| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 1042, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0009596928982725527, | |
| "grad_norm": 0.068359375, | |
| "learning_rate": 9.990403071017275e-06, | |
| "loss": 1.3835, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0019193857965451055, | |
| "grad_norm": 0.0673828125, | |
| "learning_rate": 9.98080614203455e-06, | |
| "loss": 1.3302, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0028790786948176585, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 9.971209213051824e-06, | |
| "loss": 1.3785, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.003838771593090211, | |
| "grad_norm": 0.07080078125, | |
| "learning_rate": 9.961612284069098e-06, | |
| "loss": 1.3093, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0047984644913627635, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 9.952015355086372e-06, | |
| "loss": 1.4606, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.005758157389635317, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 9.942418426103647e-06, | |
| "loss": 1.4186, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0067178502879078695, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 9.932821497120923e-06, | |
| "loss": 1.4227, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.007677543186180422, | |
| "grad_norm": 0.07763671875, | |
| "learning_rate": 9.923224568138197e-06, | |
| "loss": 1.2789, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.008637236084452975, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 9.913627639155471e-06, | |
| "loss": 1.4264, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.009596928982725527, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 9.904030710172746e-06, | |
| "loss": 1.3867, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.01055662188099808, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 9.89443378119002e-06, | |
| "loss": 1.3162, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.011516314779270634, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 9.884836852207294e-06, | |
| "loss": 1.351, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.012476007677543186, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 9.875239923224569e-06, | |
| "loss": 1.3704, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.013435700575815739, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 9.865642994241843e-06, | |
| "loss": 1.3264, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.014395393474088292, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 9.856046065259119e-06, | |
| "loss": 1.3761, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.015355086372360844, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 9.846449136276392e-06, | |
| "loss": 1.3366, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.016314779270633396, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 9.836852207293666e-06, | |
| "loss": 1.2195, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.01727447216890595, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 9.82725527831094e-06, | |
| "loss": 1.3383, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.018234165067178502, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 9.817658349328216e-06, | |
| "loss": 1.4293, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.019193857965451054, | |
| "grad_norm": 0.12158203125, | |
| "learning_rate": 9.80806142034549e-06, | |
| "loss": 1.3755, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.02015355086372361, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 9.798464491362765e-06, | |
| "loss": 1.4084, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.02111324376199616, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 9.78886756238004e-06, | |
| "loss": 1.2938, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.022072936660268713, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 9.779270633397314e-06, | |
| "loss": 1.3799, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.023032629558541268, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 9.769673704414588e-06, | |
| "loss": 1.3184, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.02399232245681382, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 9.760076775431862e-06, | |
| "loss": 1.367, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.02495201535508637, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 9.750479846449137e-06, | |
| "loss": 1.4573, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.025911708253358926, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 9.740882917466411e-06, | |
| "loss": 1.5804, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.026871401151631478, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 9.731285988483687e-06, | |
| "loss": 1.4974, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.02783109404990403, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 9.721689059500961e-06, | |
| "loss": 1.4201, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.028790786948176585, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 9.712092130518234e-06, | |
| "loss": 1.3526, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.029750479846449136, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 9.702495201535508e-06, | |
| "loss": 1.3501, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.030710172744721688, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 9.692898272552784e-06, | |
| "loss": 1.3327, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.03166986564299424, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 9.683301343570059e-06, | |
| "loss": 1.3058, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.03262955854126679, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 9.673704414587333e-06, | |
| "loss": 1.3515, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.03358925143953935, | |
| "grad_norm": 0.177734375, | |
| "learning_rate": 9.664107485604607e-06, | |
| "loss": 1.4229, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.0345489443378119, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 9.654510556621882e-06, | |
| "loss": 1.2888, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.03550863723608445, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 9.644913627639156e-06, | |
| "loss": 1.4405, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.036468330134357005, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 9.63531669865643e-06, | |
| "loss": 1.3298, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.03742802303262956, | |
| "grad_norm": 0.1806640625, | |
| "learning_rate": 9.625719769673705e-06, | |
| "loss": 1.3527, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.03838771593090211, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 9.61612284069098e-06, | |
| "loss": 1.3846, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03934740882917467, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 9.606525911708255e-06, | |
| "loss": 1.4143, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.04030710172744722, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 9.59692898272553e-06, | |
| "loss": 1.3999, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.04126679462571977, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 9.587332053742802e-06, | |
| "loss": 1.3248, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.04222648752399232, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 9.577735124760078e-06, | |
| "loss": 1.2913, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.04318618042226487, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 9.568138195777352e-06, | |
| "loss": 1.3192, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.044145873320537425, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 9.558541266794627e-06, | |
| "loss": 1.3543, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.045105566218809984, | |
| "grad_norm": 0.19140625, | |
| "learning_rate": 9.548944337811901e-06, | |
| "loss": 1.2851, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.046065259117082535, | |
| "grad_norm": 0.201171875, | |
| "learning_rate": 9.539347408829175e-06, | |
| "loss": 1.3718, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.04702495201535509, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 9.52975047984645e-06, | |
| "loss": 1.4037, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.04798464491362764, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 9.520153550863724e-06, | |
| "loss": 1.2992, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.04894433781190019, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 9.510556621880998e-06, | |
| "loss": 1.3151, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.04990403071017274, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 9.500959692898273e-06, | |
| "loss": 1.2815, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.0508637236084453, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 9.491362763915549e-06, | |
| "loss": 1.3421, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.05182341650671785, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 9.481765834932823e-06, | |
| "loss": 1.3319, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.052783109404990404, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 9.472168905950097e-06, | |
| "loss": 1.2811, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.053742802303262956, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 9.46257197696737e-06, | |
| "loss": 1.3407, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.05470249520153551, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 9.452975047984646e-06, | |
| "loss": 1.1884, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.05566218809980806, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 9.44337811900192e-06, | |
| "loss": 1.2598, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.05662188099808062, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 9.433781190019195e-06, | |
| "loss": 1.3258, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.05758157389635317, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 9.424184261036469e-06, | |
| "loss": 1.3675, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.05854126679462572, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 9.414587332053743e-06, | |
| "loss": 1.2389, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.05950095969289827, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 9.404990403071018e-06, | |
| "loss": 1.281, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.060460652591170824, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 9.395393474088292e-06, | |
| "loss": 1.3178, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.061420345489443376, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 9.385796545105566e-06, | |
| "loss": 1.215, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.06238003838771593, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 9.376199616122842e-06, | |
| "loss": 1.1806, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.06333973128598848, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 9.366602687140117e-06, | |
| "loss": 1.1896, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.06429942418426103, | |
| "grad_norm": 0.1728515625, | |
| "learning_rate": 9.357005758157391e-06, | |
| "loss": 1.1949, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.06525911708253358, | |
| "grad_norm": 0.21484375, | |
| "learning_rate": 9.347408829174665e-06, | |
| "loss": 1.3085, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.06621880998080615, | |
| "grad_norm": 0.173828125, | |
| "learning_rate": 9.33781190019194e-06, | |
| "loss": 1.1966, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.0671785028790787, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 9.328214971209214e-06, | |
| "loss": 1.2213, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.06813819577735125, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 9.318618042226488e-06, | |
| "loss": 1.2269, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.0690978886756238, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 9.309021113243763e-06, | |
| "loss": 1.2402, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.07005758157389635, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 9.299424184261039e-06, | |
| "loss": 1.2418, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.0710172744721689, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 9.289827255278311e-06, | |
| "loss": 1.1873, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.07197696737044146, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 9.280230326295585e-06, | |
| "loss": 1.2642, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07293666026871401, | |
| "grad_norm": 0.177734375, | |
| "learning_rate": 9.27063339731286e-06, | |
| "loss": 1.2479, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.07389635316698656, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 9.261036468330134e-06, | |
| "loss": 1.178, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.07485604606525911, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 9.25143953934741e-06, | |
| "loss": 1.1754, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.07581573896353166, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 9.241842610364684e-06, | |
| "loss": 1.2165, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.07677543186180422, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 9.232245681381959e-06, | |
| "loss": 1.1865, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.07773512476007678, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 9.222648752399233e-06, | |
| "loss": 1.2512, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.07869481765834933, | |
| "grad_norm": 0.240234375, | |
| "learning_rate": 9.213051823416507e-06, | |
| "loss": 1.4117, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.07965451055662189, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 9.203454894433782e-06, | |
| "loss": 1.1267, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.08061420345489444, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 9.193857965451056e-06, | |
| "loss": 1.2428, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.08157389635316699, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 9.18426103646833e-06, | |
| "loss": 1.1984, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.08253358925143954, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 9.174664107485606e-06, | |
| "loss": 1.1621, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.08349328214971209, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 9.16506717850288e-06, | |
| "loss": 1.1578, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.08445297504798464, | |
| "grad_norm": 0.146484375, | |
| "learning_rate": 9.155470249520153e-06, | |
| "loss": 1.1536, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.0854126679462572, | |
| "grad_norm": 0.173828125, | |
| "learning_rate": 9.145873320537428e-06, | |
| "loss": 1.2703, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.08637236084452975, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 9.136276391554704e-06, | |
| "loss": 1.1967, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.0873320537428023, | |
| "grad_norm": 0.23828125, | |
| "learning_rate": 9.126679462571978e-06, | |
| "loss": 1.4335, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.08829174664107485, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 9.117082533589252e-06, | |
| "loss": 1.1648, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0892514395393474, | |
| "grad_norm": 0.140625, | |
| "learning_rate": 9.107485604606527e-06, | |
| "loss": 1.1267, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.09021113243761997, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 9.097888675623801e-06, | |
| "loss": 1.1272, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.09117082533589252, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 9.088291746641075e-06, | |
| "loss": 1.158, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.09213051823416507, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 9.07869481765835e-06, | |
| "loss": 1.2036, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.09309021113243762, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 9.069097888675624e-06, | |
| "loss": 1.1793, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.09404990403071017, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 9.0595009596929e-06, | |
| "loss": 1.1281, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.09500959692898273, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 9.049904030710174e-06, | |
| "loss": 1.145, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.09596928982725528, | |
| "grad_norm": 0.12451171875, | |
| "learning_rate": 9.040307101727449e-06, | |
| "loss": 1.0348, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09692898272552783, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 9.030710172744721e-06, | |
| "loss": 1.1187, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.09788867562380038, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 9.021113243761996e-06, | |
| "loss": 1.2216, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.09884836852207293, | |
| "grad_norm": 0.2060546875, | |
| "learning_rate": 9.011516314779272e-06, | |
| "loss": 1.136, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.09980806142034548, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 9.001919385796546e-06, | |
| "loss": 1.0826, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.10076775431861804, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 8.99232245681382e-06, | |
| "loss": 1.1926, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.1017274472168906, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 8.982725527831095e-06, | |
| "loss": 1.1645, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.10268714011516315, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 8.973128598848369e-06, | |
| "loss": 1.1451, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.1036468330134357, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 8.963531669865643e-06, | |
| "loss": 1.1533, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.10460652591170826, | |
| "grad_norm": 0.11474609375, | |
| "learning_rate": 8.953934740882918e-06, | |
| "loss": 1.1095, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.10556621880998081, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 8.944337811900192e-06, | |
| "loss": 1.0983, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.10652591170825336, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 8.934740882917468e-06, | |
| "loss": 1.0713, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.10748560460652591, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 8.925143953934742e-06, | |
| "loss": 1.1187, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.10844529750479846, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 8.915547024952017e-06, | |
| "loss": 1.0156, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.10940499040307101, | |
| "grad_norm": 0.12060546875, | |
| "learning_rate": 8.905950095969291e-06, | |
| "loss": 1.1281, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.11036468330134357, | |
| "grad_norm": 0.12451171875, | |
| "learning_rate": 8.896353166986565e-06, | |
| "loss": 1.1185, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.11132437619961612, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 8.88675623800384e-06, | |
| "loss": 1.1278, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.11228406909788867, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 8.877159309021114e-06, | |
| "loss": 1.0986, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.11324376199616124, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 8.867562380038388e-06, | |
| "loss": 1.0399, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.11420345489443379, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 8.857965451055663e-06, | |
| "loss": 1.1677, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.11516314779270634, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 8.848368522072937e-06, | |
| "loss": 1.1291, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.11612284069097889, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 8.838771593090211e-06, | |
| "loss": 1.0135, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.11708253358925144, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 8.829174664107486e-06, | |
| "loss": 1.1206, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.118042226487524, | |
| "grad_norm": 0.1240234375, | |
| "learning_rate": 8.819577735124762e-06, | |
| "loss": 1.0963, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.11900191938579655, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 8.809980806142036e-06, | |
| "loss": 1.1072, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.1199616122840691, | |
| "grad_norm": 0.12060546875, | |
| "learning_rate": 8.80038387715931e-06, | |
| "loss": 1.1213, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.12092130518234165, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 8.790786948176585e-06, | |
| "loss": 1.0651, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.1218809980806142, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 8.781190019193859e-06, | |
| "loss": 1.056, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.12284069097888675, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 8.771593090211133e-06, | |
| "loss": 1.082, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.1238003838771593, | |
| "grad_norm": 0.1533203125, | |
| "learning_rate": 8.761996161228408e-06, | |
| "loss": 1.2184, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.12476007677543186, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 8.752399232245682e-06, | |
| "loss": 1.0084, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.1257197696737044, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 8.742802303262956e-06, | |
| "loss": 1.1232, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.12667946257197696, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 8.73320537428023e-06, | |
| "loss": 1.0711, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.1276391554702495, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 8.723608445297505e-06, | |
| "loss": 1.2007, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.12859884836852206, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 8.71401151631478e-06, | |
| "loss": 1.1375, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.1295585412667946, | |
| "grad_norm": 0.12109375, | |
| "learning_rate": 8.704414587332054e-06, | |
| "loss": 1.0473, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.13051823416506717, | |
| "grad_norm": 0.1240234375, | |
| "learning_rate": 8.69481765834933e-06, | |
| "loss": 1.1487, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.13147792706333974, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 8.685220729366604e-06, | |
| "loss": 1.072, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.1324376199616123, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 8.675623800383878e-06, | |
| "loss": 1.0678, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.13339731285988485, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 8.666026871401153e-06, | |
| "loss": 1.1066, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.1343570057581574, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 8.656429942418427e-06, | |
| "loss": 1.1308, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.13531669865642995, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 8.646833013435701e-06, | |
| "loss": 1.0877, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.1362763915547025, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 8.637236084452976e-06, | |
| "loss": 1.0643, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.13723608445297505, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 8.62763915547025e-06, | |
| "loss": 0.9934, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.1381957773512476, | |
| "grad_norm": 0.12109375, | |
| "learning_rate": 8.618042226487526e-06, | |
| "loss": 1.1454, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.13915547024952016, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 8.6084452975048e-06, | |
| "loss": 1.0282, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.1401151631477927, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 8.598848368522073e-06, | |
| "loss": 1.2197, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.14107485604606526, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 8.589251439539347e-06, | |
| "loss": 1.0509, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.1420345489443378, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 8.579654510556623e-06, | |
| "loss": 1.0721, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.14299424184261036, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 8.570057581573898e-06, | |
| "loss": 1.1034, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.14395393474088292, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 8.560460652591172e-06, | |
| "loss": 1.0192, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.14491362763915547, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 8.550863723608446e-06, | |
| "loss": 1.0542, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.14587332053742802, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 8.54126679462572e-06, | |
| "loss": 1.0556, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.14683301343570057, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 8.531669865642995e-06, | |
| "loss": 1.0191, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.14779270633397312, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 8.522072936660269e-06, | |
| "loss": 0.9925, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.14875239923224567, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 8.512476007677543e-06, | |
| "loss": 1.1719, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.14971209213051823, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 8.502879078694818e-06, | |
| "loss": 1.1286, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.15067178502879078, | |
| "grad_norm": 0.12255859375, | |
| "learning_rate": 8.493282149712094e-06, | |
| "loss": 1.1383, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.15163147792706333, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 8.483685220729368e-06, | |
| "loss": 1.2015, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.15259117082533588, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 8.47408829174664e-06, | |
| "loss": 1.0849, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.15355086372360843, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 8.464491362763915e-06, | |
| "loss": 0.944, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.15451055662188098, | |
| "grad_norm": 0.12353515625, | |
| "learning_rate": 8.454894433781191e-06, | |
| "loss": 1.0966, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.15547024952015356, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 8.445297504798465e-06, | |
| "loss": 0.9607, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.15642994241842612, | |
| "grad_norm": 0.11669921875, | |
| "learning_rate": 8.43570057581574e-06, | |
| "loss": 1.1095, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.15738963531669867, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 8.426103646833014e-06, | |
| "loss": 1.0191, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.15834932821497122, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 8.416506717850288e-06, | |
| "loss": 1.0218, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.15930902111324377, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 8.406909788867563e-06, | |
| "loss": 0.9931, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.16026871401151632, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 8.397312859884837e-06, | |
| "loss": 0.9998, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.16122840690978887, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 8.387715930902111e-06, | |
| "loss": 1.0155, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.16218809980806143, | |
| "grad_norm": 0.115234375, | |
| "learning_rate": 8.378119001919387e-06, | |
| "loss": 0.9811, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.16314779270633398, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 8.368522072936662e-06, | |
| "loss": 0.9923, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.16410748560460653, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 8.358925143953936e-06, | |
| "loss": 1.0559, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.16506717850287908, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 8.34932821497121e-06, | |
| "loss": 1.0016, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.16602687140115163, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 8.339731285988485e-06, | |
| "loss": 0.9755, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.16698656429942418, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 8.330134357005759e-06, | |
| "loss": 1.0032, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.16794625719769674, | |
| "grad_norm": 0.1318359375, | |
| "learning_rate": 8.320537428023033e-06, | |
| "loss": 1.1055, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.1689059500959693, | |
| "grad_norm": 0.11865234375, | |
| "learning_rate": 8.310940499040308e-06, | |
| "loss": 1.0221, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.16986564299424184, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 8.301343570057582e-06, | |
| "loss": 1.0432, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.1708253358925144, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 8.291746641074856e-06, | |
| "loss": 1.0867, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.17178502879078694, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 8.28214971209213e-06, | |
| "loss": 1.0279, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.1727447216890595, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 8.272552783109405e-06, | |
| "loss": 1.0319, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.17370441458733205, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 8.26295585412668e-06, | |
| "loss": 1.0082, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.1746641074856046, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 8.253358925143955e-06, | |
| "loss": 0.9761, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.17562380038387715, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 8.24376199616123e-06, | |
| "loss": 0.9492, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.1765834932821497, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 8.234165067178504e-06, | |
| "loss": 1.0052, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.17754318618042225, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 8.224568138195778e-06, | |
| "loss": 0.9921, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1785028790786948, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 8.214971209213053e-06, | |
| "loss": 1.0567, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.17946257197696738, | |
| "grad_norm": 0.1181640625, | |
| "learning_rate": 8.205374280230327e-06, | |
| "loss": 0.9777, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.18042226487523993, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 8.195777351247601e-06, | |
| "loss": 1.0348, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.1813819577735125, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 8.186180422264876e-06, | |
| "loss": 0.9495, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.18234165067178504, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 8.176583493282152e-06, | |
| "loss": 1.0123, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.1833013435700576, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 8.166986564299424e-06, | |
| "loss": 1.1257, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.18426103646833014, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 8.157389635316699e-06, | |
| "loss": 1.1254, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.1852207293666027, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 8.147792706333973e-06, | |
| "loss": 1.0055, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.18618042226487524, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 8.138195777351249e-06, | |
| "loss": 1.0149, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.1871401151631478, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 8.128598848368523e-06, | |
| "loss": 0.9892, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.18809980806142035, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 8.119001919385798e-06, | |
| "loss": 0.9941, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.1890595009596929, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 8.109404990403072e-06, | |
| "loss": 1.0089, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.19001919385796545, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 8.099808061420346e-06, | |
| "loss": 0.9446, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.190978886756238, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 8.09021113243762e-06, | |
| "loss": 0.9734, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.19193857965451055, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 8.080614203454895e-06, | |
| "loss": 1.0315, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.1928982725527831, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 8.07101727447217e-06, | |
| "loss": 1.0699, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.19385796545105566, | |
| "grad_norm": 0.1494140625, | |
| "learning_rate": 8.061420345489444e-06, | |
| "loss": 1.0639, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.1948176583493282, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 8.05182341650672e-06, | |
| "loss": 1.0076, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.19577735124760076, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 8.042226487523992e-06, | |
| "loss": 0.9557, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.1967370441458733, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 8.032629558541267e-06, | |
| "loss": 1.0174, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.19769673704414586, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 8.023032629558541e-06, | |
| "loss": 1.0005, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.19865642994241842, | |
| "grad_norm": 0.126953125, | |
| "learning_rate": 8.013435700575817e-06, | |
| "loss": 1.0216, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.19961612284069097, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 8.003838771593091e-06, | |
| "loss": 1.0296, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.20057581573896352, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 7.994241842610366e-06, | |
| "loss": 0.9604, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.20153550863723607, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 7.98464491362764e-06, | |
| "loss": 0.9319, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.20249520153550865, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 7.975047984644914e-06, | |
| "loss": 0.959, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.2034548944337812, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 7.965451055662189e-06, | |
| "loss": 1.0316, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.20441458733205375, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 7.955854126679463e-06, | |
| "loss": 0.9858, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.2053742802303263, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.946257197696737e-06, | |
| "loss": 0.9937, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.20633397312859886, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 7.936660268714013e-06, | |
| "loss": 1.0125, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.2072936660268714, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 7.927063339731288e-06, | |
| "loss": 1.0136, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.20825335892514396, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 7.91746641074856e-06, | |
| "loss": 0.9332, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.2092130518234165, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 7.907869481765835e-06, | |
| "loss": 0.9504, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.21017274472168906, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 7.89827255278311e-06, | |
| "loss": 1.0072, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.21113243761996162, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 7.888675623800385e-06, | |
| "loss": 0.9329, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.21209213051823417, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 7.87907869481766e-06, | |
| "loss": 0.9958, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.21305182341650672, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 7.869481765834934e-06, | |
| "loss": 1.0125, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.21401151631477927, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 7.859884836852208e-06, | |
| "loss": 0.9808, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.21497120921305182, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 7.850287907869482e-06, | |
| "loss": 0.9976, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.21593090211132437, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 7.840690978886757e-06, | |
| "loss": 1.0249, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.21689059500959693, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.83109404990403e-06, | |
| "loss": 0.9899, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.21785028790786948, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 7.821497120921305e-06, | |
| "loss": 1.1065, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.21880998080614203, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 7.811900191938581e-06, | |
| "loss": 1.0697, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.21976967370441458, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 7.802303262955856e-06, | |
| "loss": 0.9401, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.22072936660268713, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 7.79270633397313e-06, | |
| "loss": 1.052, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.22168905950095968, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 7.783109404990402e-06, | |
| "loss": 1.154, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.22264875239923224, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 7.773512476007678e-06, | |
| "loss": 1.0706, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.2236084452975048, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 7.763915547024953e-06, | |
| "loss": 0.9314, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.22456813819577734, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 7.754318618042227e-06, | |
| "loss": 0.9448, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.2255278310940499, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 7.744721689059501e-06, | |
| "loss": 1.0048, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.22648752399232247, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 7.735124760076776e-06, | |
| "loss": 1.0153, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.22744721689059502, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 7.72552783109405e-06, | |
| "loss": 0.921, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.22840690978886757, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 7.715930902111324e-06, | |
| "loss": 0.8776, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.22936660268714013, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 7.706333973128599e-06, | |
| "loss": 0.9054, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.23032629558541268, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 7.696737044145875e-06, | |
| "loss": 0.9415, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.23128598848368523, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 7.687140115163149e-06, | |
| "loss": 1.0546, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.23224568138195778, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 7.677543186180423e-06, | |
| "loss": 0.989, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.23320537428023033, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 7.667946257197698e-06, | |
| "loss": 0.9687, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.23416506717850288, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 7.658349328214972e-06, | |
| "loss": 0.9352, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.23512476007677544, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 7.648752399232246e-06, | |
| "loss": 0.9592, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.236084452975048, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.63915547024952e-06, | |
| "loss": 1.0028, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.23704414587332054, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 7.629558541266795e-06, | |
| "loss": 0.9823, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.2380038387715931, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 7.61996161228407e-06, | |
| "loss": 0.9399, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.23896353166986564, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 7.610364683301345e-06, | |
| "loss": 0.9947, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.2399232245681382, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 7.600767754318619e-06, | |
| "loss": 1.0125, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.24088291746641075, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 7.591170825335893e-06, | |
| "loss": 0.8915, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.2418426103646833, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 7.581573896353167e-06, | |
| "loss": 0.9549, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.24280230326295585, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 7.571976967370443e-06, | |
| "loss": 0.9493, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.2437619961612284, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 7.562380038387716e-06, | |
| "loss": 0.9959, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.24472168905950095, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 7.5527831094049905e-06, | |
| "loss": 0.9799, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.2456813819577735, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.543186180422265e-06, | |
| "loss": 0.9498, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.24664107485604606, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 7.53358925143954e-06, | |
| "loss": 0.8909, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.2476007677543186, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 7.523992322456814e-06, | |
| "loss": 0.9247, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.24856046065259116, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 7.514395393474089e-06, | |
| "loss": 0.9861, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.2495201535508637, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 7.504798464491363e-06, | |
| "loss": 0.949, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.2504798464491363, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.495201535508638e-06, | |
| "loss": 0.973, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.2514395393474088, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 7.4856046065259125e-06, | |
| "loss": 0.9665, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.2523992322456814, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 7.476007677543187e-06, | |
| "loss": 0.9601, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.2533589251439539, | |
| "grad_norm": 0.126953125, | |
| "learning_rate": 7.466410748560461e-06, | |
| "loss": 1.0331, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.2543186180422265, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 7.456813819577736e-06, | |
| "loss": 0.9773, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.255278310940499, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 7.447216890595011e-06, | |
| "loss": 0.9897, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.2562380038387716, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 7.437619961612285e-06, | |
| "loss": 0.8944, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.2571976967370441, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 7.4280230326295585e-06, | |
| "loss": 1.031, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.2581573896353167, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 7.4184261036468345e-06, | |
| "loss": 0.9817, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.2591170825335892, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 7.408829174664108e-06, | |
| "loss": 0.999, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2600767754318618, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 7.399232245681382e-06, | |
| "loss": 0.9768, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.26103646833013433, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 7.389635316698657e-06, | |
| "loss": 1.0208, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.2619961612284069, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 7.380038387715931e-06, | |
| "loss": 0.9654, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.2629558541266795, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.370441458733206e-06, | |
| "loss": 0.9188, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.263915547024952, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 7.3608445297504805e-06, | |
| "loss": 0.98, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.2648752399232246, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 7.351247600767755e-06, | |
| "loss": 0.9927, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.2658349328214971, | |
| "grad_norm": 0.123046875, | |
| "learning_rate": 7.341650671785029e-06, | |
| "loss": 1.0048, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.2667946257197697, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 7.332053742802304e-06, | |
| "loss": 0.9771, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.2677543186180422, | |
| "grad_norm": 0.115234375, | |
| "learning_rate": 7.322456813819579e-06, | |
| "loss": 1.0225, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.2687140115163148, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 7.312859884836853e-06, | |
| "loss": 0.9999, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2696737044145873, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 7.3032629558541264e-06, | |
| "loss": 0.9115, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.2706333973128599, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 7.2936660268714024e-06, | |
| "loss": 0.9408, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.2715930902111324, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 7.284069097888676e-06, | |
| "loss": 0.8829, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.272552783109405, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 7.27447216890595e-06, | |
| "loss": 0.9513, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.27351247600767753, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 7.264875239923225e-06, | |
| "loss": 0.8901, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.2744721689059501, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 7.2552783109405e-06, | |
| "loss": 0.9344, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.27543186180422263, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 7.245681381957774e-06, | |
| "loss": 0.9692, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.2763915547024952, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 7.236084452975048e-06, | |
| "loss": 0.954, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.27735124760076774, | |
| "grad_norm": 0.1181640625, | |
| "learning_rate": 7.226487523992323e-06, | |
| "loss": 1.0483, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.2783109404990403, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 7.216890595009598e-06, | |
| "loss": 1.0348, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.27927063339731284, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 7.207293666026872e-06, | |
| "loss": 0.9232, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2802303262955854, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 7.1976967370441466e-06, | |
| "loss": 0.9508, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.28119001919385794, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 7.188099808061421e-06, | |
| "loss": 0.9915, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.2821497120921305, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 7.178502879078696e-06, | |
| "loss": 0.8884, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.28310940499040305, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 7.16890595009597e-06, | |
| "loss": 0.9609, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.2840690978886756, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 7.159309021113245e-06, | |
| "loss": 0.9797, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.28502879078694815, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 7.149712092130518e-06, | |
| "loss": 0.9791, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.28598848368522073, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 7.1401151631477925e-06, | |
| "loss": 0.8977, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.2869481765834933, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 7.130518234165068e-06, | |
| "loss": 0.8998, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.28790786948176583, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 7.120921305182342e-06, | |
| "loss": 0.9845, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2888675623800384, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 7.111324376199616e-06, | |
| "loss": 0.967, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.28982725527831094, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 7.101727447216891e-06, | |
| "loss": 0.9695, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2907869481765835, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 7.092130518234166e-06, | |
| "loss": 0.9427, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.29174664107485604, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 7.08253358925144e-06, | |
| "loss": 0.9487, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.2927063339731286, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 7.0729366602687145e-06, | |
| "loss": 0.9567, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.29366602687140114, | |
| "grad_norm": 0.1494140625, | |
| "learning_rate": 7.063339731285989e-06, | |
| "loss": 1.039, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.2946257197696737, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 7.053742802303264e-06, | |
| "loss": 0.9293, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.29558541266794625, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 7.044145873320538e-06, | |
| "loss": 0.9293, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.2965451055662188, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 7.034548944337813e-06, | |
| "loss": 1.0326, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.29750479846449135, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 7.024952015355086e-06, | |
| "loss": 0.9228, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.29846449136276393, | |
| "grad_norm": 0.1171875, | |
| "learning_rate": 7.015355086372362e-06, | |
| "loss": 1.0134, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.29942418426103645, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 7.005758157389636e-06, | |
| "loss": 0.8992, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.30038387715930903, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 6.99616122840691e-06, | |
| "loss": 0.9474, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.30134357005758156, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 6.986564299424184e-06, | |
| "loss": 0.9854, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.30230326295585414, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 6.9769673704414595e-06, | |
| "loss": 0.9244, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.30326295585412666, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 6.967370441458734e-06, | |
| "loss": 0.9114, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.30422264875239924, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 6.957773512476008e-06, | |
| "loss": 0.9245, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.30518234165067176, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 6.9481765834932824e-06, | |
| "loss": 0.9606, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.30614203454894434, | |
| "grad_norm": 0.1318359375, | |
| "learning_rate": 6.938579654510558e-06, | |
| "loss": 1.056, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.30710172744721687, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 6.928982725527832e-06, | |
| "loss": 0.9928, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.30806142034548945, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 6.919385796545106e-06, | |
| "loss": 0.9356, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.30902111324376197, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 6.909788867562381e-06, | |
| "loss": 0.8933, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.30998080614203455, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 6.900191938579655e-06, | |
| "loss": 0.9043, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.31094049904030713, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 6.89059500959693e-06, | |
| "loss": 0.9646, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.31190019193857965, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 6.8809980806142044e-06, | |
| "loss": 0.9541, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.31285988483685223, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 6.871401151631478e-06, | |
| "loss": 0.901, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.31381957773512476, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.861804222648752e-06, | |
| "loss": 0.9409, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.31477927063339733, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 6.852207293666027e-06, | |
| "loss": 1.0008, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.31573896353166986, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 6.842610364683302e-06, | |
| "loss": 0.9406, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.31669865642994244, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 6.833013435700576e-06, | |
| "loss": 0.9532, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.31765834932821496, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 6.82341650671785e-06, | |
| "loss": 0.9948, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.31861804222648754, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 6.8138195777351256e-06, | |
| "loss": 0.9763, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.31957773512476007, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 6.8042226487524e-06, | |
| "loss": 0.8708, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.32053742802303264, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 6.794625719769674e-06, | |
| "loss": 0.922, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.32149712092130517, | |
| "grad_norm": 0.0810546875, | |
| "learning_rate": 6.7850287907869485e-06, | |
| "loss": 0.9056, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.32245681381957775, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 6.775431861804224e-06, | |
| "loss": 0.9891, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.32341650671785027, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 6.765834932821498e-06, | |
| "loss": 0.9317, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.32437619961612285, | |
| "grad_norm": 0.1123046875, | |
| "learning_rate": 6.756238003838772e-06, | |
| "loss": 1.0119, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.3253358925143954, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 6.746641074856046e-06, | |
| "loss": 0.9928, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.32629558541266795, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 6.737044145873322e-06, | |
| "loss": 1.0083, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.3272552783109405, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 6.727447216890595e-06, | |
| "loss": 0.9153, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.32821497120921306, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 6.71785028790787e-06, | |
| "loss": 0.9593, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.3291746641074856, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 6.708253358925144e-06, | |
| "loss": 0.9167, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.33013435700575816, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 6.698656429942419e-06, | |
| "loss": 0.8708, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.3310940499040307, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 6.6890595009596935e-06, | |
| "loss": 0.9276, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.33205374280230326, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 6.679462571976968e-06, | |
| "loss": 1.0077, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.3330134357005758, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 6.669865642994242e-06, | |
| "loss": 1.0066, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.33397312859884837, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 6.6602687140115165e-06, | |
| "loss": 0.8746, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.33493282149712095, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 6.650671785028792e-06, | |
| "loss": 0.9443, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.33589251439539347, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 6.641074856046066e-06, | |
| "loss": 0.9567, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.33685220729366605, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 6.63147792706334e-06, | |
| "loss": 1.1195, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.3378119001919386, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 6.621880998080615e-06, | |
| "loss": 0.9184, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.33877159309021115, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 6.61228406909789e-06, | |
| "loss": 0.9047, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.3397312859884837, | |
| "grad_norm": 0.0703125, | |
| "learning_rate": 6.602687140115164e-06, | |
| "loss": 0.8174, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.34069097888675626, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 6.593090211132438e-06, | |
| "loss": 0.894, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.3416506717850288, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 6.583493282149712e-06, | |
| "loss": 0.9048, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.34261036468330136, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 6.573896353166987e-06, | |
| "loss": 0.9497, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.3435700575815739, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.5642994241842614e-06, | |
| "loss": 0.9741, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.34452975047984646, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 6.554702495201536e-06, | |
| "loss": 0.9669, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.345489443378119, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 6.54510556621881e-06, | |
| "loss": 0.9659, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.34644913627639157, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 6.535508637236085e-06, | |
| "loss": 0.8867, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.3474088291746641, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 6.52591170825336e-06, | |
| "loss": 0.9019, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.34836852207293667, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.516314779270634e-06, | |
| "loss": 0.8726, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.3493282149712092, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 6.506717850287908e-06, | |
| "loss": 0.895, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.3502879078694818, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.497120921305183e-06, | |
| "loss": 0.8961, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.3512476007677543, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 6.487523992322458e-06, | |
| "loss": 0.889, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.3522072936660269, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 6.477927063339732e-06, | |
| "loss": 0.8605, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.3531669865642994, | |
| "grad_norm": 0.11865234375, | |
| "learning_rate": 6.4683301343570056e-06, | |
| "loss": 0.9347, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.354126679462572, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 6.4587332053742816e-06, | |
| "loss": 1.0203, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.3550863723608445, | |
| "grad_norm": 0.115234375, | |
| "learning_rate": 6.449136276391556e-06, | |
| "loss": 1.0568, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3560460652591171, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 6.439539347408829e-06, | |
| "loss": 1.0349, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.3570057581573896, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 6.429942418426104e-06, | |
| "loss": 0.8929, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.3579654510556622, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 6.420345489443378e-06, | |
| "loss": 1.0427, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.35892514395393477, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 6.410748560460653e-06, | |
| "loss": 0.8672, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.3598848368522073, | |
| "grad_norm": 0.12255859375, | |
| "learning_rate": 6.4011516314779275e-06, | |
| "loss": 0.8907, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.36084452975047987, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 6.391554702495202e-06, | |
| "loss": 0.9077, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.3618042226487524, | |
| "grad_norm": 0.0791015625, | |
| "learning_rate": 6.381957773512476e-06, | |
| "loss": 0.8758, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.362763915547025, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 6.372360844529751e-06, | |
| "loss": 0.8745, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.3637236084452975, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 6.362763915547026e-06, | |
| "loss": 0.8887, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.3646833013435701, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 6.3531669865643e-06, | |
| "loss": 0.8776, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3656429942418426, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 6.343570057581574e-06, | |
| "loss": 0.9301, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.3666026871401152, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 6.3339731285988495e-06, | |
| "loss": 0.8878, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.3675623800383877, | |
| "grad_norm": 0.1279296875, | |
| "learning_rate": 6.324376199616124e-06, | |
| "loss": 0.8949, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.3685220729366603, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 6.314779270633397e-06, | |
| "loss": 0.9057, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.3694817658349328, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 6.305182341650672e-06, | |
| "loss": 0.8729, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.3704414587332054, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 6.295585412667947e-06, | |
| "loss": 0.9009, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.3714011516314779, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 6.285988483685221e-06, | |
| "loss": 0.8487, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.3723608445297505, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 6.2763915547024955e-06, | |
| "loss": 0.862, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.373320537428023, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 6.26679462571977e-06, | |
| "loss": 0.936, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.3742802303262956, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 6.257197696737045e-06, | |
| "loss": 0.9083, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.3752399232245681, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 6.247600767754319e-06, | |
| "loss": 0.9089, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.3761996161228407, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 6.238003838771594e-06, | |
| "loss": 1.1005, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.3771593090211132, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 6.228406909788868e-06, | |
| "loss": 0.8175, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.3781190019193858, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 6.218809980806143e-06, | |
| "loss": 0.8441, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.3790786948176583, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 6.2092130518234175e-06, | |
| "loss": 0.9371, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.3800383877159309, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 6.199616122840692e-06, | |
| "loss": 1.0151, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.3809980806142035, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 6.190019193857965e-06, | |
| "loss": 0.9341, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.381957773512476, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 6.18042226487524e-06, | |
| "loss": 0.9144, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.3829174664107486, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 6.170825335892516e-06, | |
| "loss": 0.9452, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.3838771593090211, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 6.161228406909789e-06, | |
| "loss": 1.0361, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3848368522072937, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 6.151631477927063e-06, | |
| "loss": 0.9116, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.3857965451055662, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 6.142034548944338e-06, | |
| "loss": 0.8891, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.3867562380038388, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 6.132437619961613e-06, | |
| "loss": 0.8676, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.3877159309021113, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 6.122840690978887e-06, | |
| "loss": 0.9296, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.3886756238003839, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 6.1132437619961616e-06, | |
| "loss": 0.9911, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.3896353166986564, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 6.103646833013436e-06, | |
| "loss": 0.9256, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.390595009596929, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 6.094049904030711e-06, | |
| "loss": 0.8496, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.3915547024952015, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 6.084452975047985e-06, | |
| "loss": 0.8917, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.3925143953934741, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 6.07485604606526e-06, | |
| "loss": 0.9327, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.3934740882917466, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 6.065259117082534e-06, | |
| "loss": 0.8882, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.3944337811900192, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 6.055662188099809e-06, | |
| "loss": 0.9488, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.39539347408829173, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 6.0460652591170836e-06, | |
| "loss": 0.9024, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.3963531669865643, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 6.036468330134357e-06, | |
| "loss": 0.9393, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.39731285988483683, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.026871401151631e-06, | |
| "loss": 0.8741, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.3982725527831094, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.0172744721689065e-06, | |
| "loss": 0.9609, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.39923224568138194, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 6.007677543186181e-06, | |
| "loss": 0.9116, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.4001919385796545, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 5.998080614203455e-06, | |
| "loss": 0.9607, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.40115163147792704, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 5.9884836852207295e-06, | |
| "loss": 0.9153, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.4021113243761996, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 5.978886756238005e-06, | |
| "loss": 0.8719, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.40307101727447214, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 5.969289827255279e-06, | |
| "loss": 0.9437, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.4040307101727447, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 5.959692898272553e-06, | |
| "loss": 0.9401, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.4049904030710173, | |
| "grad_norm": 0.115234375, | |
| "learning_rate": 5.950095969289828e-06, | |
| "loss": 0.9458, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.4059500959692898, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 5.940499040307102e-06, | |
| "loss": 1.0006, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.4069097888675624, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 5.930902111324377e-06, | |
| "loss": 0.8507, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.40786948176583493, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 5.9213051823416515e-06, | |
| "loss": 0.9471, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.4088291746641075, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 5.911708253358925e-06, | |
| "loss": 0.9143, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.40978886756238003, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 5.902111324376199e-06, | |
| "loss": 0.9904, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.4107485604606526, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 5.892514395393475e-06, | |
| "loss": 0.9509, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.41170825335892514, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 5.882917466410749e-06, | |
| "loss": 0.9433, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.4126679462571977, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 5.873320537428023e-06, | |
| "loss": 0.9041, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.41362763915547024, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 5.8637236084452975e-06, | |
| "loss": 0.8894, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.4145873320537428, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 5.854126679462573e-06, | |
| "loss": 0.9713, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.41554702495201534, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 5.844529750479847e-06, | |
| "loss": 0.8676, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.4165067178502879, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 5.834932821497121e-06, | |
| "loss": 0.9207, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.41746641074856045, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 5.825335892514396e-06, | |
| "loss": 1.011, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.418426103646833, | |
| "grad_norm": 0.1318359375, | |
| "learning_rate": 5.815738963531671e-06, | |
| "loss": 0.9231, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.41938579654510555, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 5.806142034548945e-06, | |
| "loss": 0.941, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.42034548944337813, | |
| "grad_norm": 0.2001953125, | |
| "learning_rate": 5.7965451055662194e-06, | |
| "loss": 1.0724, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.42130518234165065, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 5.786948176583494e-06, | |
| "loss": 0.9221, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.42226487523992323, | |
| "grad_norm": 0.12060546875, | |
| "learning_rate": 5.777351247600769e-06, | |
| "loss": 1.0565, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.42322456813819576, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 5.767754318618043e-06, | |
| "loss": 0.984, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.42418426103646834, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 5.758157389635317e-06, | |
| "loss": 0.9393, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.42514395393474086, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 5.748560460652591e-06, | |
| "loss": 0.9928, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.42610364683301344, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 5.738963531669866e-06, | |
| "loss": 1.0521, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.42706333973128596, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 5.7293666026871406e-06, | |
| "loss": 0.9167, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.42802303262955854, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 5.719769673704415e-06, | |
| "loss": 0.959, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.4289827255278311, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 5.710172744721689e-06, | |
| "loss": 0.8617, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.42994241842610365, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 5.7005758157389635e-06, | |
| "loss": 0.9925, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.4309021113243762, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 5.690978886756239e-06, | |
| "loss": 1.026, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.43186180422264875, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 5.681381957773513e-06, | |
| "loss": 0.8937, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.43282149712092133, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 5.671785028790787e-06, | |
| "loss": 0.8959, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.43378119001919385, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 5.662188099808062e-06, | |
| "loss": 0.9824, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.43474088291746643, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 5.652591170825337e-06, | |
| "loss": 0.912, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.43570057581573896, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 5.642994241842611e-06, | |
| "loss": 0.8847, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.43666026871401153, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 5.6333973128598855e-06, | |
| "loss": 0.8793, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.43761996161228406, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 5.623800383877159e-06, | |
| "loss": 0.8788, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.43857965451055664, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 5.614203454894435e-06, | |
| "loss": 0.9116, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.43953934740882916, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 5.6046065259117085e-06, | |
| "loss": 0.8685, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.44049904030710174, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 5.595009596928983e-06, | |
| "loss": 0.8572, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.44145873320537427, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 5.585412667946257e-06, | |
| "loss": 0.8928, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.44241842610364684, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 5.575815738963532e-06, | |
| "loss": 0.8916, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.44337811900191937, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 5.566218809980807e-06, | |
| "loss": 0.8626, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.44433781190019195, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 5.556621880998081e-06, | |
| "loss": 0.9158, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.44529750479846447, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 5.547024952015355e-06, | |
| "loss": 0.8666, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.44625719769673705, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 5.5374280230326305e-06, | |
| "loss": 0.9601, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.4472168905950096, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 5.527831094049905e-06, | |
| "loss": 0.9191, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.44817658349328215, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 5.518234165067179e-06, | |
| "loss": 1.1148, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.4491362763915547, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 5.5086372360844535e-06, | |
| "loss": 0.8935, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.45009596928982726, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 5.499040307101729e-06, | |
| "loss": 0.9715, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.4510556621880998, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 5.489443378119003e-06, | |
| "loss": 0.9087, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.45201535508637236, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 5.4798464491362765e-06, | |
| "loss": 0.912, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.45297504798464494, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 5.470249520153551e-06, | |
| "loss": 0.9182, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.45393474088291746, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 5.460652591170825e-06, | |
| "loss": 0.8728, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.45489443378119004, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 5.4510556621881e-06, | |
| "loss": 0.9494, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.45585412667946257, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 5.441458733205375e-06, | |
| "loss": 0.8172, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.45681381957773515, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 5.431861804222649e-06, | |
| "loss": 0.9106, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.45777351247600767, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 5.422264875239923e-06, | |
| "loss": 0.958, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.45873320537428025, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 5.4126679462571984e-06, | |
| "loss": 0.8861, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.4596928982725528, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 5.403071017274473e-06, | |
| "loss": 0.8868, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.46065259117082535, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 5.393474088291747e-06, | |
| "loss": 0.954, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.4616122840690979, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 5.383877159309021e-06, | |
| "loss": 0.9156, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.46257197696737046, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 5.374280230326297e-06, | |
| "loss": 0.8546, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.463531669865643, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 5.364683301343571e-06, | |
| "loss": 0.8904, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.46449136276391556, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 5.355086372360845e-06, | |
| "loss": 0.92, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.4654510556621881, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 5.345489443378119e-06, | |
| "loss": 0.9414, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.46641074856046066, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 5.335892514395395e-06, | |
| "loss": 0.852, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.4673704414587332, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 5.326295585412668e-06, | |
| "loss": 0.8742, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.46833013435700577, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 5.3166986564299425e-06, | |
| "loss": 0.9099, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.4692898272552783, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 5.307101727447217e-06, | |
| "loss": 0.8932, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.47024952015355087, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 5.297504798464492e-06, | |
| "loss": 0.9031, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.4712092130518234, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 5.287907869481766e-06, | |
| "loss": 0.9625, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.472168905950096, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 5.278310940499041e-06, | |
| "loss": 0.8983, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.4731285988483685, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 5.268714011516315e-06, | |
| "loss": 0.8959, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.4740882917466411, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 5.25911708253359e-06, | |
| "loss": 0.8927, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.4750479846449136, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 5.2495201535508645e-06, | |
| "loss": 0.9237, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.4760076775431862, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 5.239923224568139e-06, | |
| "loss": 0.8485, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.47696737044145876, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 5.230326295585413e-06, | |
| "loss": 0.8559, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.4779270633397313, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 5.220729366602687e-06, | |
| "loss": 0.9235, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.47888675623800386, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 5.211132437619963e-06, | |
| "loss": 0.8661, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.4798464491362764, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 5.201535508637236e-06, | |
| "loss": 0.8836, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.48080614203454897, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 5.1919385796545105e-06, | |
| "loss": 0.8703, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.4817658349328215, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 5.182341650671785e-06, | |
| "loss": 0.8234, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.48272552783109407, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 5.17274472168906e-06, | |
| "loss": 0.9394, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.4836852207293666, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 5.163147792706334e-06, | |
| "loss": 0.9818, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.4846449136276392, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 5.153550863723609e-06, | |
| "loss": 0.8652, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.4856046065259117, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 5.143953934740883e-06, | |
| "loss": 0.8937, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.4865642994241843, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 5.134357005758158e-06, | |
| "loss": 0.8381, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.4875239923224568, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 5.1247600767754325e-06, | |
| "loss": 0.9158, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.4884836852207294, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 5.115163147792707e-06, | |
| "loss": 0.897, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.4894433781190019, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 5.105566218809981e-06, | |
| "loss": 0.8864, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.4904030710172745, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 5.095969289827256e-06, | |
| "loss": 0.8694, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.491362763915547, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 5.086372360844531e-06, | |
| "loss": 1.0052, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.4923224568138196, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 5.076775431861805e-06, | |
| "loss": 0.8901, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.4932821497120921, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 5.0671785028790784e-06, | |
| "loss": 0.8759, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.4942418426103647, | |
| "grad_norm": 0.11865234375, | |
| "learning_rate": 5.0575815738963544e-06, | |
| "loss": 0.9608, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.4952015355086372, | |
| "grad_norm": 0.08056640625, | |
| "learning_rate": 5.047984644913628e-06, | |
| "loss": 0.8765, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.4961612284069098, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 5.038387715930902e-06, | |
| "loss": 0.819, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.4971209213051823, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 5.028790786948177e-06, | |
| "loss": 0.9235, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.4980806142034549, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 5.019193857965452e-06, | |
| "loss": 0.898, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.4990403071017274, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 5.009596928982726e-06, | |
| "loss": 0.8986, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 5e-06, | |
| "loss": 0.888, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.5009596928982726, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 4.990403071017275e-06, | |
| "loss": 0.8961, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.5019193857965452, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 4.980806142034549e-06, | |
| "loss": 0.8657, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.5028790786948176, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 4.971209213051823e-06, | |
| "loss": 0.9533, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.5038387715930902, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 4.9616122840690986e-06, | |
| "loss": 0.8836, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.5047984644913628, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4.952015355086373e-06, | |
| "loss": 0.9135, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.5057581573896354, | |
| "grad_norm": 0.142578125, | |
| "learning_rate": 4.942418426103647e-06, | |
| "loss": 0.9559, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.5067178502879078, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 4.9328214971209215e-06, | |
| "loss": 0.8754, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.5076775431861804, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 4.923224568138196e-06, | |
| "loss": 0.8919, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.508637236084453, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 4.91362763915547e-06, | |
| "loss": 1.0779, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5095969289827256, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 4.904030710172745e-06, | |
| "loss": 0.9135, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.510556621880998, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 4.89443378119002e-06, | |
| "loss": 0.8455, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.5115163147792706, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 4.884836852207294e-06, | |
| "loss": 0.8528, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.5124760076775432, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.875239923224568e-06, | |
| "loss": 0.932, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.5134357005758158, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 4.8656429942418435e-06, | |
| "loss": 0.8596, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.5143953934740882, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 4.856046065259117e-06, | |
| "loss": 0.9044, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.5153550863723608, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 4.846449136276392e-06, | |
| "loss": 0.8278, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.5163147792706334, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 4.8368522072936665e-06, | |
| "loss": 0.8511, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.517274472168906, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 4.827255278310941e-06, | |
| "loss": 0.8579, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.5182341650671785, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 4.817658349328215e-06, | |
| "loss": 0.9334, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.519193857965451, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 4.80806142034549e-06, | |
| "loss": 0.897, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.5201535508637236, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 4.798464491362765e-06, | |
| "loss": 0.8945, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.5211132437619962, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4.788867562380039e-06, | |
| "loss": 0.938, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.5220729366602687, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 4.779270633397313e-06, | |
| "loss": 0.879, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.5230326295585412, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 4.769673704414588e-06, | |
| "loss": 0.921, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.5239923224568138, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 4.760076775431862e-06, | |
| "loss": 0.9038, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.5249520153550864, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 4.750479846449136e-06, | |
| "loss": 0.8522, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.525911708253359, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 4.7408829174664115e-06, | |
| "loss": 0.8692, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.5268714011516314, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 4.731285988483685e-06, | |
| "loss": 0.9022, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.527831094049904, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 4.72168905950096e-06, | |
| "loss": 0.9225, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5287907869481766, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 4.7120921305182344e-06, | |
| "loss": 0.9088, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.5297504798464492, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 4.702495201535509e-06, | |
| "loss": 0.8757, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.5307101727447217, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 4.692898272552783e-06, | |
| "loss": 0.9915, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.5316698656429942, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 4.683301343570058e-06, | |
| "loss": 0.9778, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.5326295585412668, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 4.673704414587333e-06, | |
| "loss": 0.9743, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.5335892514395394, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 4.664107485604607e-06, | |
| "loss": 0.9756, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.5345489443378119, | |
| "grad_norm": 0.146484375, | |
| "learning_rate": 4.654510556621881e-06, | |
| "loss": 0.9832, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.5355086372360844, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 4.644913627639156e-06, | |
| "loss": 0.9348, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.536468330134357, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 4.63531669865643e-06, | |
| "loss": 0.8675, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.5374280230326296, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 4.625719769673705e-06, | |
| "loss": 0.9037, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5383877159309021, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 4.616122840690979e-06, | |
| "loss": 0.9621, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.5393474088291746, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 4.606525911708254e-06, | |
| "loss": 0.9018, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.5403071017274472, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 4.596928982725528e-06, | |
| "loss": 0.8998, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.5412667946257198, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 4.587332053742803e-06, | |
| "loss": 0.8812, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.5422264875239923, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 4.577735124760077e-06, | |
| "loss": 0.8627, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.5431861804222649, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 4.568138195777352e-06, | |
| "loss": 0.9064, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.5441458733205374, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 4.558541266794626e-06, | |
| "loss": 0.85, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.54510556621881, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 4.5489443378119005e-06, | |
| "loss": 0.9558, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.5460652591170825, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 4.539347408829175e-06, | |
| "loss": 0.9183, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.5470249520153551, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 4.52975047984645e-06, | |
| "loss": 0.9178, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.5479846449136276, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 4.520153550863724e-06, | |
| "loss": 0.9022, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.5489443378119002, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 4.510556621880998e-06, | |
| "loss": 0.858, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.5499040307101728, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.500959692898273e-06, | |
| "loss": 0.8855, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.5508637236084453, | |
| "grad_norm": 0.1181640625, | |
| "learning_rate": 4.491362763915547e-06, | |
| "loss": 1.0215, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.5518234165067178, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 4.481765834932822e-06, | |
| "loss": 1.0886, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.5527831094049904, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 4.472168905950096e-06, | |
| "loss": 0.9371, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.553742802303263, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4.462571976967371e-06, | |
| "loss": 0.9896, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.5547024952015355, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 4.4529750479846455e-06, | |
| "loss": 0.9882, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.555662188099808, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 4.44337811900192e-06, | |
| "loss": 0.839, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.5566218809980806, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.433781190019194e-06, | |
| "loss": 0.8676, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.5575815738963532, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 4.4241842610364685e-06, | |
| "loss": 0.9514, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.5585412667946257, | |
| "grad_norm": 0.12158203125, | |
| "learning_rate": 4.414587332053743e-06, | |
| "loss": 0.9222, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.5595009596928983, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 4.404990403071018e-06, | |
| "loss": 0.8855, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.5604606525911708, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 4.395393474088292e-06, | |
| "loss": 0.8979, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.5614203454894434, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4.385796545105567e-06, | |
| "loss": 0.8857, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.5623800383877159, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 4.376199616122841e-06, | |
| "loss": 0.8976, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.5633397312859885, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 4.366602687140115e-06, | |
| "loss": 0.8973, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.564299424184261, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 4.35700575815739e-06, | |
| "loss": 0.9081, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.5652591170825336, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 4.347408829174665e-06, | |
| "loss": 0.9515, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.5662188099808061, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 4.337811900191939e-06, | |
| "loss": 0.9527, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.5671785028790787, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.3282149712092134e-06, | |
| "loss": 1.024, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.5681381957773513, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 4.318618042226488e-06, | |
| "loss": 0.9031, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.5690978886756238, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 4.309021113243763e-06, | |
| "loss": 0.9823, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.5700575815738963, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 4.299424184261036e-06, | |
| "loss": 0.8912, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.5710172744721689, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 4.289827255278312e-06, | |
| "loss": 0.8746, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.5719769673704415, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 4.280230326295586e-06, | |
| "loss": 0.8366, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.572936660268714, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 4.27063339731286e-06, | |
| "loss": 0.9169, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.5738963531669866, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 4.2610364683301346e-06, | |
| "loss": 0.8725, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.5748560460652591, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4.251439539347409e-06, | |
| "loss": 0.8822, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.5758157389635317, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 4.241842610364684e-06, | |
| "loss": 0.8658, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.5767754318618042, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 4.2322456813819576e-06, | |
| "loss": 0.9315, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.5777351247600768, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 4.222648752399233e-06, | |
| "loss": 0.915, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.5786948176583493, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 4.213051823416507e-06, | |
| "loss": 0.8608, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.5796545105566219, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 4.203454894433781e-06, | |
| "loss": 0.9974, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.5806142034548945, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 4.193857965451056e-06, | |
| "loss": 0.8526, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.581573896353167, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 4.184261036468331e-06, | |
| "loss": 0.8498, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.5825335892514395, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 4.174664107485605e-06, | |
| "loss": 1.011, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.5834932821497121, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 4.1650671785028795e-06, | |
| "loss": 0.8683, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.5844529750479847, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 4.155470249520154e-06, | |
| "loss": 0.8471, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.5854126679462572, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 4.145873320537428e-06, | |
| "loss": 0.9454, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.5863723608445297, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 4.1362763915547025e-06, | |
| "loss": 0.9189, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.5873320537428023, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.126679462571978e-06, | |
| "loss": 0.8792, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.5882917466410749, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 4.117082533589252e-06, | |
| "loss": 1.0256, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.5892514395393474, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 4.107485604606526e-06, | |
| "loss": 0.8726, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.5902111324376199, | |
| "grad_norm": 0.080078125, | |
| "learning_rate": 4.097888675623801e-06, | |
| "loss": 0.8566, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.5911708253358925, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 4.088291746641076e-06, | |
| "loss": 0.8521, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.5921305182341651, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 4.078694817658349e-06, | |
| "loss": 0.8877, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.5930902111324377, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 4.0690978886756245e-06, | |
| "loss": 0.8607, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.5940499040307101, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 4.059500959692899e-06, | |
| "loss": 1.0081, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.5950095969289827, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 4.049904030710173e-06, | |
| "loss": 0.8838, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.5959692898272553, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 4.0403071017274475e-06, | |
| "loss": 0.9923, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.5969289827255279, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 4.030710172744722e-06, | |
| "loss": 0.8822, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.5978886756238004, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 4.021113243761996e-06, | |
| "loss": 0.8951, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.5988483685220729, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 4.0115163147792705e-06, | |
| "loss": 0.8574, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.5998080614203455, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 4.001919385796546e-06, | |
| "loss": 0.9205, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.6007677543186181, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 3.99232245681382e-06, | |
| "loss": 0.8943, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.6017274472168906, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 3.982725527831094e-06, | |
| "loss": 0.8803, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.6026871401151631, | |
| "grad_norm": 0.1181640625, | |
| "learning_rate": 3.973128598848369e-06, | |
| "loss": 0.9062, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.6036468330134357, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 3.963531669865644e-06, | |
| "loss": 0.8578, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.6046065259117083, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 3.953934740882917e-06, | |
| "loss": 0.8856, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6055662188099808, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 3.9443378119001924e-06, | |
| "loss": 0.8826, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.6065259117082533, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 3.934740882917467e-06, | |
| "loss": 0.8445, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.6074856046065259, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 3.925143953934741e-06, | |
| "loss": 0.8739, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.6084452975047985, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 3.915547024952015e-06, | |
| "loss": 0.9497, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.6094049904030711, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 3.905950095969291e-06, | |
| "loss": 0.8306, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.6103646833013435, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 3.896353166986565e-06, | |
| "loss": 0.9456, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.6113243761996161, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 3.886756238003839e-06, | |
| "loss": 0.9411, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.6122840690978887, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 3.8771593090211136e-06, | |
| "loss": 0.9256, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.6132437619961613, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 3.867562380038388e-06, | |
| "loss": 0.8409, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.6142034548944337, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 3.857965451055662e-06, | |
| "loss": 0.9718, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6151631477927063, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.848368522072937e-06, | |
| "loss": 0.9014, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.6161228406909789, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 3.838771593090212e-06, | |
| "loss": 0.9303, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.6170825335892515, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 3.829174664107486e-06, | |
| "loss": 0.918, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.6180422264875239, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 3.81957773512476e-06, | |
| "loss": 0.8488, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.6190019193857965, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 3.809980806142035e-06, | |
| "loss": 0.892, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.6199616122840691, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 3.8003838771593095e-06, | |
| "loss": 0.865, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.6209213051823417, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 3.7907869481765834e-06, | |
| "loss": 0.8873, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.6218809980806143, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 3.781190019193858e-06, | |
| "loss": 0.8258, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.6228406909788867, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 3.7715930902111324e-06, | |
| "loss": 0.8526, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.6238003838771593, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 3.761996161228407e-06, | |
| "loss": 0.8948, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6247600767754319, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 3.7523992322456815e-06, | |
| "loss": 0.9332, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.6257197696737045, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 3.7428023032629563e-06, | |
| "loss": 0.8915, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.6266794625719769, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 3.7332053742802306e-06, | |
| "loss": 0.8898, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.6276391554702495, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 3.7236084452975053e-06, | |
| "loss": 0.9066, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.6285988483685221, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 3.7140115163147792e-06, | |
| "loss": 0.8427, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.6295585412667947, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 3.704414587332054e-06, | |
| "loss": 0.8633, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.6305182341650671, | |
| "grad_norm": 0.115234375, | |
| "learning_rate": 3.6948176583493283e-06, | |
| "loss": 0.9561, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.6314779270633397, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 3.685220729366603e-06, | |
| "loss": 0.8768, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.6324376199616123, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.6756238003838774e-06, | |
| "loss": 0.8322, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.6333973128598849, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 3.666026871401152e-06, | |
| "loss": 0.9367, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6343570057581573, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 3.6564299424184265e-06, | |
| "loss": 0.9112, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.6353166986564299, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 3.6468330134357012e-06, | |
| "loss": 0.9364, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.6362763915547025, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 3.637236084452975e-06, | |
| "loss": 0.8825, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.6372360844529751, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 3.62763915547025e-06, | |
| "loss": 0.927, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.6381957773512476, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 3.618042226487524e-06, | |
| "loss": 0.9676, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.6391554702495201, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 3.608445297504799e-06, | |
| "loss": 0.7565, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.6401151631477927, | |
| "grad_norm": 0.08154296875, | |
| "learning_rate": 3.5988483685220733e-06, | |
| "loss": 0.8226, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.6410748560460653, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 3.589251439539348e-06, | |
| "loss": 0.9409, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.6420345489443378, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 3.5796545105566224e-06, | |
| "loss": 0.8866, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.6429942418426103, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 3.5700575815738963e-06, | |
| "loss": 0.8802, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6439539347408829, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 3.560460652591171e-06, | |
| "loss": 0.8443, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.6449136276391555, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 3.5508637236084453e-06, | |
| "loss": 0.9877, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.6458733205374281, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 3.54126679462572e-06, | |
| "loss": 0.8619, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.6468330134357005, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 3.5316698656429944e-06, | |
| "loss": 0.9202, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.6477927063339731, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 3.522072936660269e-06, | |
| "loss": 0.8944, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.6487523992322457, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 3.512476007677543e-06, | |
| "loss": 0.8625, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.6497120921305183, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 3.502879078694818e-06, | |
| "loss": 0.8948, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.6506717850287908, | |
| "grad_norm": 0.1171875, | |
| "learning_rate": 3.493282149712092e-06, | |
| "loss": 0.9505, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.6516314779270633, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 3.483685220729367e-06, | |
| "loss": 0.8514, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.6525911708253359, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 3.4740882917466412e-06, | |
| "loss": 0.9246, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.6535508637236085, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 3.464491362763916e-06, | |
| "loss": 0.8808, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.654510556621881, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 3.4548944337811903e-06, | |
| "loss": 0.9993, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.6554702495201535, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 3.445297504798465e-06, | |
| "loss": 0.8682, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.6564299424184261, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 3.435700575815739e-06, | |
| "loss": 0.8945, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.6573896353166987, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 3.4261036468330137e-06, | |
| "loss": 0.8457, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.6583493282149712, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 3.416506717850288e-06, | |
| "loss": 0.8959, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.6593090211132437, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 3.4069097888675628e-06, | |
| "loss": 0.8913, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.6602687140115163, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 3.397312859884837e-06, | |
| "loss": 0.9114, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.6612284069097889, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.387715930902112e-06, | |
| "loss": 0.8474, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.6621880998080614, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 3.378119001919386e-06, | |
| "loss": 0.877, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.663147792706334, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 3.368522072936661e-06, | |
| "loss": 0.878, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.6641074856046065, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 3.358925143953935e-06, | |
| "loss": 1.0548, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.6650671785028791, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 3.3493282149712096e-06, | |
| "loss": 0.8348, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.6660268714011516, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 3.339731285988484e-06, | |
| "loss": 0.9428, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.6669865642994242, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 3.3301343570057582e-06, | |
| "loss": 0.8574, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.6679462571976967, | |
| "grad_norm": 0.1474609375, | |
| "learning_rate": 3.320537428023033e-06, | |
| "loss": 1.0041, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.6689059500959693, | |
| "grad_norm": 0.083984375, | |
| "learning_rate": 3.3109404990403073e-06, | |
| "loss": 0.9344, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.6698656429942419, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 3.301343570057582e-06, | |
| "loss": 0.8579, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.6708253358925144, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 3.291746641074856e-06, | |
| "loss": 0.8861, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.6717850287907869, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 3.2821497120921307e-06, | |
| "loss": 0.841, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.6727447216890595, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 3.272552783109405e-06, | |
| "loss": 0.9662, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.6737044145873321, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 3.26295585412668e-06, | |
| "loss": 0.9112, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.6746641074856046, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 3.253358925143954e-06, | |
| "loss": 1.0551, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.6756238003838771, | |
| "grad_norm": 0.1171875, | |
| "learning_rate": 3.243761996161229e-06, | |
| "loss": 0.8602, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.6765834932821497, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 3.2341650671785028e-06, | |
| "loss": 0.9373, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.6775431861804223, | |
| "grad_norm": 0.1357421875, | |
| "learning_rate": 3.224568138195778e-06, | |
| "loss": 0.9555, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.6785028790786948, | |
| "grad_norm": 0.142578125, | |
| "learning_rate": 3.214971209213052e-06, | |
| "loss": 0.97, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.6794625719769674, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 3.2053742802303266e-06, | |
| "loss": 0.833, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.6804222648752399, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 3.195777351247601e-06, | |
| "loss": 0.8036, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.6813819577735125, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 3.1861804222648757e-06, | |
| "loss": 0.9721, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.682341650671785, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.17658349328215e-06, | |
| "loss": 0.8539, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.6833013435700576, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 3.1669865642994248e-06, | |
| "loss": 0.8504, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.6842610364683301, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 3.1573896353166987e-06, | |
| "loss": 0.878, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.6852207293666027, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 3.1477927063339734e-06, | |
| "loss": 0.8945, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.6861804222648752, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 3.1381957773512477e-06, | |
| "loss": 0.9088, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.6871401151631478, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 3.1285988483685225e-06, | |
| "loss": 0.9088, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.6880998080614203, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 3.119001919385797e-06, | |
| "loss": 0.8658, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.6890595009596929, | |
| "grad_norm": 0.11865234375, | |
| "learning_rate": 3.1094049904030716e-06, | |
| "loss": 0.8723, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.6900191938579654, | |
| "grad_norm": 0.154296875, | |
| "learning_rate": 3.099808061420346e-06, | |
| "loss": 0.9998, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.690978886756238, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 3.09021113243762e-06, | |
| "loss": 0.8432, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.6919385796545106, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 3.0806142034548945e-06, | |
| "loss": 0.8783, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.6928982725527831, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 3.071017274472169e-06, | |
| "loss": 0.8529, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.6938579654510557, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 3.0614203454894436e-06, | |
| "loss": 0.8756, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.6948176583493282, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 3.051823416506718e-06, | |
| "loss": 0.9314, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.6957773512476008, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 3.0422264875239927e-06, | |
| "loss": 0.8719, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.6967370441458733, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 3.032629558541267e-06, | |
| "loss": 0.881, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.6976967370441459, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 3.0230326295585418e-06, | |
| "loss": 0.97, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.6986564299424184, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 3.0134357005758157e-06, | |
| "loss": 0.8983, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.699616122840691, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 3.0038387715930904e-06, | |
| "loss": 0.8501, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.7005758157389635, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 2.9942418426103648e-06, | |
| "loss": 0.8754, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7015355086372361, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 2.9846449136276395e-06, | |
| "loss": 0.9421, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.7024952015355086, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 2.975047984644914e-06, | |
| "loss": 0.9579, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.7034548944337812, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 2.9654510556621886e-06, | |
| "loss": 0.8518, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.7044145873320538, | |
| "grad_norm": 0.11962890625, | |
| "learning_rate": 2.9558541266794625e-06, | |
| "loss": 0.9811, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.7053742802303263, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 2.9462571976967377e-06, | |
| "loss": 0.9039, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.7063339731285988, | |
| "grad_norm": 0.11083984375, | |
| "learning_rate": 2.9366602687140116e-06, | |
| "loss": 0.914, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.7072936660268714, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 2.9270633397312863e-06, | |
| "loss": 0.8289, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.708253358925144, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 2.9174664107485606e-06, | |
| "loss": 0.8639, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.7092130518234165, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 2.9078694817658354e-06, | |
| "loss": 0.9277, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.710172744721689, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 2.8982725527831097e-06, | |
| "loss": 0.8812, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7111324376199616, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 2.8886756238003845e-06, | |
| "loss": 0.9588, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.7120921305182342, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 2.8790786948176584e-06, | |
| "loss": 0.9299, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.7130518234165067, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 2.869481765834933e-06, | |
| "loss": 0.9102, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.7140115163147792, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 2.8598848368522074e-06, | |
| "loss": 0.9518, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.7149712092130518, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 2.8502879078694818e-06, | |
| "loss": 0.8438, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.7159309021113244, | |
| "grad_norm": 0.11865234375, | |
| "learning_rate": 2.8406909788867565e-06, | |
| "loss": 0.9486, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.716890595009597, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 2.831094049904031e-06, | |
| "loss": 0.9467, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.7178502879078695, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 2.8214971209213056e-06, | |
| "loss": 0.897, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.718809980806142, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 2.8119001919385795e-06, | |
| "loss": 0.9883, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.7197696737044146, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 2.8023032629558543e-06, | |
| "loss": 0.8568, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7207293666026872, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 2.7927063339731286e-06, | |
| "loss": 0.9299, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.7216890595009597, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 2.7831094049904033e-06, | |
| "loss": 0.9514, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.7226487523992322, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 2.7735124760076777e-06, | |
| "loss": 0.8542, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.7236084452975048, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 2.7639155470249524e-06, | |
| "loss": 0.9055, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.7245681381957774, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 2.7543186180422267e-06, | |
| "loss": 0.975, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.72552783109405, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 2.7447216890595015e-06, | |
| "loss": 0.9118, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.7264875239923224, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 2.7351247600767754e-06, | |
| "loss": 0.8825, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.727447216890595, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 2.72552783109405e-06, | |
| "loss": 0.8799, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.7284069097888676, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 2.7159309021113245e-06, | |
| "loss": 0.8458, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.7293666026871402, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 2.7063339731285992e-06, | |
| "loss": 0.9211, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7303262955854126, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 2.6967370441458735e-06, | |
| "loss": 0.9039, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.7312859884836852, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 2.6871401151631483e-06, | |
| "loss": 0.8405, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.7322456813819578, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 2.6775431861804226e-06, | |
| "loss": 0.9307, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.7332053742802304, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 2.6679462571976974e-06, | |
| "loss": 0.8627, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.7341650671785028, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 2.6583493282149713e-06, | |
| "loss": 0.9403, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.7351247600767754, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 2.648752399232246e-06, | |
| "loss": 0.8884, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.736084452975048, | |
| "grad_norm": 0.203125, | |
| "learning_rate": 2.6391554702495203e-06, | |
| "loss": 1.0263, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.7370441458733206, | |
| "grad_norm": 0.169921875, | |
| "learning_rate": 2.629558541266795e-06, | |
| "loss": 0.9245, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.738003838771593, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 2.6199616122840694e-06, | |
| "loss": 0.91, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.7389635316698656, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 2.6103646833013433e-06, | |
| "loss": 0.9534, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7399232245681382, | |
| "grad_norm": 0.12353515625, | |
| "learning_rate": 2.600767754318618e-06, | |
| "loss": 0.8804, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.7408829174664108, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 2.5911708253358924e-06, | |
| "loss": 0.9345, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.7418426103646834, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 2.581573896353167e-06, | |
| "loss": 0.8712, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.7428023032629558, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 2.5719769673704415e-06, | |
| "loss": 0.9098, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.7437619961612284, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 2.5623800383877162e-06, | |
| "loss": 0.9252, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.744721689059501, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 2.5527831094049906e-06, | |
| "loss": 0.8881, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.7456813819577736, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 2.5431861804222653e-06, | |
| "loss": 0.9383, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.746641074856046, | |
| "grad_norm": 0.11474609375, | |
| "learning_rate": 2.5335892514395392e-06, | |
| "loss": 0.8981, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.7476007677543186, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 2.523992322456814e-06, | |
| "loss": 0.9791, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.7485604606525912, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 2.5143953934740883e-06, | |
| "loss": 0.8725, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.7495201535508638, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 2.504798464491363e-06, | |
| "loss": 0.9092, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.7504798464491362, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 2.4952015355086374e-06, | |
| "loss": 0.8934, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.7514395393474088, | |
| "grad_norm": 0.12158203125, | |
| "learning_rate": 2.4856046065259117e-06, | |
| "loss": 0.9178, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.7523992322456814, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 2.4760076775431864e-06, | |
| "loss": 0.8888, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.753358925143954, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 2.4664107485604608e-06, | |
| "loss": 0.9234, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.7543186180422264, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 2.456813819577735e-06, | |
| "loss": 0.8478, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.755278310940499, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 2.44721689059501e-06, | |
| "loss": 0.9564, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.7562380038387716, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 2.437619961612284e-06, | |
| "loss": 0.8473, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.7571976967370442, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 2.4280230326295585e-06, | |
| "loss": 0.9126, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.7581573896353166, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 2.4184261036468333e-06, | |
| "loss": 0.9022, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.7591170825335892, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 2.4088291746641076e-06, | |
| "loss": 0.8685, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.7600767754318618, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 2.3992322456813823e-06, | |
| "loss": 1.0382, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.7610364683301344, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 2.3896353166986567e-06, | |
| "loss": 0.888, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.761996161228407, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 2.380038387715931e-06, | |
| "loss": 0.8546, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.7629558541266794, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 2.3704414587332057e-06, | |
| "loss": 0.8766, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.763915547024952, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 2.36084452975048e-06, | |
| "loss": 0.8559, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.7648752399232246, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 2.3512476007677544e-06, | |
| "loss": 0.8919, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.7658349328214972, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 2.341650671785029e-06, | |
| "loss": 1.005, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.7667946257197696, | |
| "grad_norm": 0.1318359375, | |
| "learning_rate": 2.3320537428023035e-06, | |
| "loss": 0.8963, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.7677543186180422, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 2.322456813819578e-06, | |
| "loss": 0.9342, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.7687140115163148, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 2.3128598848368525e-06, | |
| "loss": 0.904, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.7696737044145874, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 2.303262955854127e-06, | |
| "loss": 0.8932, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.7706333973128598, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 2.2936660268714016e-06, | |
| "loss": 0.8518, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.7715930902111324, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 2.284069097888676e-06, | |
| "loss": 0.8464, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.772552783109405, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 2.2744721689059503e-06, | |
| "loss": 0.8479, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.7735124760076776, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 2.264875239923225e-06, | |
| "loss": 0.8904, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.77447216890595, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 2.255278310940499e-06, | |
| "loss": 0.8472, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.7754318618042226, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 2.2456813819577737e-06, | |
| "loss": 0.9097, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.7763915547024952, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 2.236084452975048e-06, | |
| "loss": 0.9732, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.7773512476007678, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 2.2264875239923228e-06, | |
| "loss": 0.8784, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.7783109404990403, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 2.216890595009597e-06, | |
| "loss": 0.9363, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.7792706333973128, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 2.2072936660268714e-06, | |
| "loss": 0.9103, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.7802303262955854, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 2.197696737044146e-06, | |
| "loss": 0.9041, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.781190019193858, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 2.1880998080614205e-06, | |
| "loss": 1.0047, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.7821497120921305, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 2.178502879078695e-06, | |
| "loss": 0.8674, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.783109404990403, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 2.1689059500959696e-06, | |
| "loss": 0.945, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.7840690978886756, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 2.159309021113244e-06, | |
| "loss": 0.9308, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.7850287907869482, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 2.149712092130518e-06, | |
| "loss": 0.8592, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.7859884836852208, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 2.140115163147793e-06, | |
| "loss": 0.8601, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.7869481765834933, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 2.1305182341650673e-06, | |
| "loss": 0.908, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.7879078694817658, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 2.120921305182342e-06, | |
| "loss": 0.8875, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.7888675623800384, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 2.1113243761996164e-06, | |
| "loss": 0.9093, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.789827255278311, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 2.1017274472168907e-06, | |
| "loss": 0.8605, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.7907869481765835, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 2.0921305182341654e-06, | |
| "loss": 0.839, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.791746641074856, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 2.0825335892514398e-06, | |
| "loss": 0.9268, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.7927063339731286, | |
| "grad_norm": 0.0830078125, | |
| "learning_rate": 2.072936660268714e-06, | |
| "loss": 0.8654, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.7936660268714012, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 2.063339731285989e-06, | |
| "loss": 0.8853, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.7946257197696737, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 2.053742802303263e-06, | |
| "loss": 0.9269, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.7955854126679462, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 2.044145873320538e-06, | |
| "loss": 0.8849, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.7965451055662188, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 2.0345489443378122e-06, | |
| "loss": 0.8988, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.7975047984644914, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 2.0249520153550866e-06, | |
| "loss": 0.9887, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.7984644913627639, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 2.015355086372361e-06, | |
| "loss": 0.818, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.7994241842610365, | |
| "grad_norm": 0.1259765625, | |
| "learning_rate": 2.0057581573896352e-06, | |
| "loss": 0.9224, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.800383877159309, | |
| "grad_norm": 0.1318359375, | |
| "learning_rate": 1.99616122840691e-06, | |
| "loss": 0.9942, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.8013435700575816, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 1.9865642994241843e-06, | |
| "loss": 0.8478, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.8023032629558541, | |
| "grad_norm": 0.1728515625, | |
| "learning_rate": 1.9769673704414586e-06, | |
| "loss": 0.8404, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.8032629558541267, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 1.9673704414587334e-06, | |
| "loss": 0.8551, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.8042226487523992, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 1.9577735124760077e-06, | |
| "loss": 0.9121, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.8051823416506718, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 1.9481765834932825e-06, | |
| "loss": 0.9308, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.8061420345489443, | |
| "grad_norm": 0.11865234375, | |
| "learning_rate": 1.9385796545105568e-06, | |
| "loss": 0.9405, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8071017274472169, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 1.928982725527831e-06, | |
| "loss": 0.8425, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.8080614203454894, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 1.919385796545106e-06, | |
| "loss": 0.9141, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.809021113243762, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 1.90978886756238e-06, | |
| "loss": 0.8844, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.8099808061420346, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 1.9001919385796547e-06, | |
| "loss": 0.8686, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.8109404990403071, | |
| "grad_norm": 0.11181640625, | |
| "learning_rate": 1.890595009596929e-06, | |
| "loss": 0.9215, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.8119001919385797, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 1.8809980806142036e-06, | |
| "loss": 0.873, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.8128598848368522, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 1.8714011516314781e-06, | |
| "loss": 0.8606, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.8138195777351248, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 1.8618042226487527e-06, | |
| "loss": 1.0169, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.8147792706333973, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 1.852207293666027e-06, | |
| "loss": 0.9263, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.8157389635316699, | |
| "grad_norm": 0.0947265625, | |
| "learning_rate": 1.8426103646833015e-06, | |
| "loss": 0.9282, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8166986564299424, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 1.833013435700576e-06, | |
| "loss": 0.8512, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.817658349328215, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 1.8234165067178506e-06, | |
| "loss": 0.9415, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.8186180422264875, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 1.813819577735125e-06, | |
| "loss": 0.8646, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.8195777351247601, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.8042226487523995e-06, | |
| "loss": 0.9663, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.8205374280230326, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 1.794625719769674e-06, | |
| "loss": 0.855, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.8214971209213052, | |
| "grad_norm": 0.10595703125, | |
| "learning_rate": 1.7850287907869481e-06, | |
| "loss": 0.8448, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.8224568138195777, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 1.7754318618042227e-06, | |
| "loss": 0.9227, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.8234165067178503, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 1.7658349328214972e-06, | |
| "loss": 0.8926, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.8243761996161229, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 1.7562380038387715e-06, | |
| "loss": 0.8505, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.8253358925143954, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 1.746641074856046e-06, | |
| "loss": 0.9039, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8262955854126679, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 1.7370441458733206e-06, | |
| "loss": 0.9247, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.8272552783109405, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.7274472168905951e-06, | |
| "loss": 0.8928, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.8282149712092131, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 1.7178502879078695e-06, | |
| "loss": 0.9062, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.8291746641074856, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 1.708253358925144e-06, | |
| "loss": 0.8303, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.8301343570057581, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 1.6986564299424186e-06, | |
| "loss": 0.8619, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.8310940499040307, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 1.689059500959693e-06, | |
| "loss": 0.8517, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.8320537428023033, | |
| "grad_norm": 0.1181640625, | |
| "learning_rate": 1.6794625719769674e-06, | |
| "loss": 1.0451, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.8330134357005758, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 1.669865642994242e-06, | |
| "loss": 0.8912, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.8339731285988484, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 1.6602687140115165e-06, | |
| "loss": 0.9978, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.8349328214971209, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 1.650671785028791e-06, | |
| "loss": 0.948, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.8358925143953935, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 1.6410748560460654e-06, | |
| "loss": 0.9118, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.836852207293666, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 1.63147792706334e-06, | |
| "loss": 0.9054, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.8378119001919386, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 1.6218809980806144e-06, | |
| "loss": 0.8954, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.8387715930902111, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 1.612284069097889e-06, | |
| "loss": 0.8979, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.8397312859884837, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 1.6026871401151633e-06, | |
| "loss": 0.9078, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.8406909788867563, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 1.5930902111324378e-06, | |
| "loss": 0.8511, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.8416506717850288, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 1.5834932821497124e-06, | |
| "loss": 0.8885, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.8426103646833013, | |
| "grad_norm": 0.12451171875, | |
| "learning_rate": 1.5738963531669867e-06, | |
| "loss": 0.9479, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.8435700575815739, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 1.5642994241842612e-06, | |
| "loss": 0.8318, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.8445297504798465, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 1.5547024952015358e-06, | |
| "loss": 0.9085, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.845489443378119, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 1.54510556621881e-06, | |
| "loss": 0.8787, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.8464491362763915, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 1.5355086372360844e-06, | |
| "loss": 0.875, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.8474088291746641, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 1.525911708253359e-06, | |
| "loss": 0.8904, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.8483685220729367, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 1.5163147792706335e-06, | |
| "loss": 0.9297, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.8493282149712092, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 1.5067178502879078e-06, | |
| "loss": 0.8951, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.8502879078694817, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 1.4971209213051824e-06, | |
| "loss": 0.8508, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.8512476007677543, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 1.487523992322457e-06, | |
| "loss": 0.924, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.8522072936660269, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 1.4779270633397312e-06, | |
| "loss": 0.9006, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.8531669865642995, | |
| "grad_norm": 0.08203125, | |
| "learning_rate": 1.4683301343570058e-06, | |
| "loss": 0.9105, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.8541266794625719, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 1.4587332053742803e-06, | |
| "loss": 0.9097, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.8550863723608445, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 1.4491362763915549e-06, | |
| "loss": 0.8719, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.8560460652591171, | |
| "grad_norm": 0.10888671875, | |
| "learning_rate": 1.4395393474088292e-06, | |
| "loss": 0.9129, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.8570057581573897, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 1.4299424184261037e-06, | |
| "loss": 0.818, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.8579654510556622, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 1.4203454894433783e-06, | |
| "loss": 0.8259, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.8589251439539347, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.4107485604606528e-06, | |
| "loss": 0.9523, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.8598848368522073, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.4011516314779271e-06, | |
| "loss": 0.8779, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.8608445297504799, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 1.3915547024952017e-06, | |
| "loss": 0.8471, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.8618042226487524, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 1.3819577735124762e-06, | |
| "loss": 0.8541, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.8627639155470249, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 1.3723608445297507e-06, | |
| "loss": 0.8406, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.8637236084452975, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 1.362763915547025e-06, | |
| "loss": 0.9321, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.8646833013435701, | |
| "grad_norm": 0.10009765625, | |
| "learning_rate": 1.3531669865642996e-06, | |
| "loss": 0.8372, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.8656429942418427, | |
| "grad_norm": 0.1201171875, | |
| "learning_rate": 1.3435700575815741e-06, | |
| "loss": 0.9387, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.8666026871401151, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 1.3339731285988487e-06, | |
| "loss": 0.8453, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.8675623800383877, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 1.324376199616123e-06, | |
| "loss": 0.8814, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.8685220729366603, | |
| "grad_norm": 0.142578125, | |
| "learning_rate": 1.3147792706333976e-06, | |
| "loss": 0.8979, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.8694817658349329, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 1.3051823416506717e-06, | |
| "loss": 0.9195, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.8704414587332053, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 1.2955854126679462e-06, | |
| "loss": 0.8919, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.8714011516314779, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 1.2859884836852207e-06, | |
| "loss": 0.8825, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.8723608445297505, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 1.2763915547024953e-06, | |
| "loss": 0.7967, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.8733205374280231, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 1.2667946257197696e-06, | |
| "loss": 0.8685, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.8742802303262955, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 1.2571976967370441e-06, | |
| "loss": 0.8319, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.8752399232245681, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 1.2476007677543187e-06, | |
| "loss": 1.0421, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.8761996161228407, | |
| "grad_norm": 0.125, | |
| "learning_rate": 1.2380038387715932e-06, | |
| "loss": 1.02, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.8771593090211133, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 1.2284069097888675e-06, | |
| "loss": 0.8858, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.8781190019193857, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 1.218809980806142e-06, | |
| "loss": 0.903, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.8790786948176583, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 1.2092130518234166e-06, | |
| "loss": 0.9122, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.8800383877159309, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 1.1996161228406912e-06, | |
| "loss": 0.8541, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.8809980806142035, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 1.1900191938579655e-06, | |
| "loss": 0.8816, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.8819577735124761, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 1.18042226487524e-06, | |
| "loss": 0.8834, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.8829174664107485, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 1.1708253358925146e-06, | |
| "loss": 0.9132, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.8838771593090211, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 1.161228406909789e-06, | |
| "loss": 0.9044, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.8848368522072937, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 1.1516314779270634e-06, | |
| "loss": 0.8791, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.8857965451055663, | |
| "grad_norm": 0.150390625, | |
| "learning_rate": 1.142034548944338e-06, | |
| "loss": 0.8169, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.8867562380038387, | |
| "grad_norm": 0.08935546875, | |
| "learning_rate": 1.1324376199616125e-06, | |
| "loss": 0.8696, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.8877159309021113, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 1.1228406909788868e-06, | |
| "loss": 0.908, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.8886756238003839, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 1.1132437619961614e-06, | |
| "loss": 0.8748, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.8896353166986565, | |
| "grad_norm": 0.08251953125, | |
| "learning_rate": 1.1036468330134357e-06, | |
| "loss": 0.8716, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.8905950095969289, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 1.0940499040307102e-06, | |
| "loss": 0.8613, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.8915547024952015, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 1.0844529750479848e-06, | |
| "loss": 0.8703, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.8925143953934741, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 1.074856046065259e-06, | |
| "loss": 0.8743, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.8934740882917467, | |
| "grad_norm": 0.10400390625, | |
| "learning_rate": 1.0652591170825336e-06, | |
| "loss": 0.8926, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.8944337811900192, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 1.0556621880998082e-06, | |
| "loss": 0.8598, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.8953934740882917, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 1.0460652591170827e-06, | |
| "loss": 0.9332, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.8963531669865643, | |
| "grad_norm": 0.10693359375, | |
| "learning_rate": 1.036468330134357e-06, | |
| "loss": 0.9341, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.8973128598848369, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 1.0268714011516316e-06, | |
| "loss": 0.9819, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.8982725527831094, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 1.0172744721689061e-06, | |
| "loss": 0.8789, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.8992322456813819, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 1.0076775431861805e-06, | |
| "loss": 1.0342, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.9001919385796545, | |
| "grad_norm": 0.10498046875, | |
| "learning_rate": 9.98080614203455e-07, | |
| "loss": 0.8745, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.9011516314779271, | |
| "grad_norm": 0.1123046875, | |
| "learning_rate": 9.884836852207293e-07, | |
| "loss": 0.8975, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.9021113243761996, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 9.788867562380039e-07, | |
| "loss": 0.9254, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9030710172744721, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 9.692898272552784e-07, | |
| "loss": 0.8642, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.9040307101727447, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 9.59692898272553e-07, | |
| "loss": 1.0205, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.9049904030710173, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 9.500959692898274e-07, | |
| "loss": 0.8844, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.9059500959692899, | |
| "grad_norm": 0.107421875, | |
| "learning_rate": 9.404990403071018e-07, | |
| "loss": 0.8562, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.9069097888675623, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 9.309021113243763e-07, | |
| "loss": 0.8775, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.9078694817658349, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 9.213051823416508e-07, | |
| "loss": 0.8285, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.9088291746641075, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 9.117082533589253e-07, | |
| "loss": 0.8177, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.9097888675623801, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 9.021113243761997e-07, | |
| "loss": 0.939, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.9107485604606526, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 8.925143953934741e-07, | |
| "loss": 0.8413, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.9117082533589251, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 8.829174664107486e-07, | |
| "loss": 0.9704, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9126679462571977, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 8.73320537428023e-07, | |
| "loss": 0.9006, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.9136276391554703, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 8.637236084452976e-07, | |
| "loss": 0.8903, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.9145873320537428, | |
| "grad_norm": 0.1298828125, | |
| "learning_rate": 8.54126679462572e-07, | |
| "loss": 0.9654, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.9155470249520153, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 8.445297504798465e-07, | |
| "loss": 0.9326, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.9165067178502879, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 8.34932821497121e-07, | |
| "loss": 0.9402, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.9174664107485605, | |
| "grad_norm": 0.11376953125, | |
| "learning_rate": 8.253358925143955e-07, | |
| "loss": 0.9127, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.918426103646833, | |
| "grad_norm": 0.10986328125, | |
| "learning_rate": 8.1573896353167e-07, | |
| "loss": 0.8601, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.9193857965451055, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 8.061420345489445e-07, | |
| "loss": 0.8914, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.9203454894433781, | |
| "grad_norm": 0.271484375, | |
| "learning_rate": 7.965451055662189e-07, | |
| "loss": 0.8644, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.9213051823416507, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 7.869481765834934e-07, | |
| "loss": 0.876, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9222648752399232, | |
| "grad_norm": 0.11328125, | |
| "learning_rate": 7.773512476007679e-07, | |
| "loss": 0.9974, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.9232245681381958, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 7.677543186180422e-07, | |
| "loss": 0.9056, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.9241842610364683, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 7.581573896353168e-07, | |
| "loss": 1.0843, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.9251439539347409, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 7.485604606525912e-07, | |
| "loss": 0.9126, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.9261036468330134, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 7.389635316698656e-07, | |
| "loss": 0.8867, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.927063339731286, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 7.293666026871402e-07, | |
| "loss": 0.881, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.9280230326295585, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 7.197696737044146e-07, | |
| "loss": 0.8719, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.9289827255278311, | |
| "grad_norm": 0.0927734375, | |
| "learning_rate": 7.101727447216891e-07, | |
| "loss": 0.8987, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.9299424184261037, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 7.005758157389636e-07, | |
| "loss": 0.905, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.9309021113243762, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 6.909788867562381e-07, | |
| "loss": 0.838, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.9318618042226487, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 6.813819577735125e-07, | |
| "loss": 0.939, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.9328214971209213, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 6.717850287907871e-07, | |
| "loss": 0.9673, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.9337811900191939, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 6.621880998080615e-07, | |
| "loss": 0.8974, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.9347408829174664, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 6.525911708253358e-07, | |
| "loss": 0.8756, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.935700575815739, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 6.429942418426104e-07, | |
| "loss": 0.9782, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.9366602687140115, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 6.333973128598848e-07, | |
| "loss": 0.8836, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.9376199616122841, | |
| "grad_norm": 0.1083984375, | |
| "learning_rate": 6.238003838771593e-07, | |
| "loss": 0.9716, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.9385796545105566, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 6.142034548944338e-07, | |
| "loss": 0.861, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.9395393474088292, | |
| "grad_norm": 0.12353515625, | |
| "learning_rate": 6.046065259117083e-07, | |
| "loss": 0.9611, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.9404990403071017, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 5.950095969289827e-07, | |
| "loss": 0.9402, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.9414587332053743, | |
| "grad_norm": 0.08447265625, | |
| "learning_rate": 5.854126679462573e-07, | |
| "loss": 0.8861, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.9424184261036468, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 5.758157389635317e-07, | |
| "loss": 0.9267, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.9433781190019194, | |
| "grad_norm": 0.1064453125, | |
| "learning_rate": 5.662188099808063e-07, | |
| "loss": 0.9603, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.944337811900192, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 5.566218809980807e-07, | |
| "loss": 0.8911, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.9452975047984645, | |
| "grad_norm": 0.08837890625, | |
| "learning_rate": 5.470249520153551e-07, | |
| "loss": 0.8627, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.946257197696737, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 5.374280230326296e-07, | |
| "loss": 0.9447, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.9472168905950096, | |
| "grad_norm": 0.10205078125, | |
| "learning_rate": 5.278310940499041e-07, | |
| "loss": 0.9111, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.9481765834932822, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 5.182341650671785e-07, | |
| "loss": 0.9145, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.9491362763915547, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 5.086372360844531e-07, | |
| "loss": 0.9212, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.9500959692898272, | |
| "grad_norm": 0.08349609375, | |
| "learning_rate": 4.990403071017275e-07, | |
| "loss": 0.9028, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.9510556621880998, | |
| "grad_norm": 0.09375, | |
| "learning_rate": 4.894433781190019e-07, | |
| "loss": 0.9005, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.9520153550863724, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 4.798464491362765e-07, | |
| "loss": 0.915, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.9529750479846449, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 4.702495201535509e-07, | |
| "loss": 0.8614, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.9539347408829175, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 4.606525911708254e-07, | |
| "loss": 0.8747, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.95489443378119, | |
| "grad_norm": 0.0888671875, | |
| "learning_rate": 4.5105566218809987e-07, | |
| "loss": 0.8996, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.9558541266794626, | |
| "grad_norm": 0.11572265625, | |
| "learning_rate": 4.414587332053743e-07, | |
| "loss": 0.8711, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.9568138195777351, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 4.318618042226488e-07, | |
| "loss": 0.8874, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.9577735124760077, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 4.2226487523992327e-07, | |
| "loss": 0.8792, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.9587332053742802, | |
| "grad_norm": 0.12158203125, | |
| "learning_rate": 4.1266794625719776e-07, | |
| "loss": 0.9438, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.9596928982725528, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 4.0307101727447224e-07, | |
| "loss": 0.8969, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.9606525911708254, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 3.934740882917467e-07, | |
| "loss": 0.9036, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.9616122840690979, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 3.838771593090211e-07, | |
| "loss": 0.8347, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.9625719769673704, | |
| "grad_norm": 0.11669921875, | |
| "learning_rate": 3.742802303262956e-07, | |
| "loss": 0.9452, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.963531669865643, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 3.646833013435701e-07, | |
| "loss": 0.9136, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.9644913627639156, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 3.5508637236084457e-07, | |
| "loss": 0.8792, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.9654510556621881, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 3.4548944337811905e-07, | |
| "loss": 0.916, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.9664107485604606, | |
| "grad_norm": 0.10546875, | |
| "learning_rate": 3.3589251439539354e-07, | |
| "loss": 0.8992, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.9673704414587332, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 3.262955854126679e-07, | |
| "loss": 0.9242, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.9683301343570058, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 3.166986564299424e-07, | |
| "loss": 0.91, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.9692898272552783, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 3.071017274472169e-07, | |
| "loss": 0.873, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.9702495201535508, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 2.9750479846449137e-07, | |
| "loss": 0.9169, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.9712092130518234, | |
| "grad_norm": 0.11279296875, | |
| "learning_rate": 2.8790786948176586e-07, | |
| "loss": 0.9147, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.972168905950096, | |
| "grad_norm": 0.1220703125, | |
| "learning_rate": 2.7831094049904034e-07, | |
| "loss": 0.925, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.9731285988483686, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 2.687140115163148e-07, | |
| "loss": 0.9201, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.974088291746641, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 2.5911708253358926e-07, | |
| "loss": 0.8828, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.9750479846449136, | |
| "grad_norm": 0.119140625, | |
| "learning_rate": 2.4952015355086375e-07, | |
| "loss": 0.8624, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.9760076775431862, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 2.3992322456813823e-07, | |
| "loss": 0.8561, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.9769673704414588, | |
| "grad_norm": 0.0869140625, | |
| "learning_rate": 2.303262955854127e-07, | |
| "loss": 0.8079, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.9779270633397313, | |
| "grad_norm": 0.1240234375, | |
| "learning_rate": 2.2072936660268715e-07, | |
| "loss": 0.9034, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.9788867562380038, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 2.1113243761996164e-07, | |
| "loss": 0.8095, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.9798464491362764, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 2.0153550863723612e-07, | |
| "loss": 0.9089, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.980806142034549, | |
| "grad_norm": 0.0849609375, | |
| "learning_rate": 1.9193857965451055e-07, | |
| "loss": 0.8832, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.9817658349328215, | |
| "grad_norm": 0.11962890625, | |
| "learning_rate": 1.8234165067178504e-07, | |
| "loss": 0.9369, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.982725527831094, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 1.7274472168905953e-07, | |
| "loss": 1.0612, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.9836852207293666, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 1.6314779270633396e-07, | |
| "loss": 0.9416, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.9846449136276392, | |
| "grad_norm": 0.1162109375, | |
| "learning_rate": 1.5355086372360844e-07, | |
| "loss": 0.9809, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.9856046065259118, | |
| "grad_norm": 0.10791015625, | |
| "learning_rate": 1.4395393474088293e-07, | |
| "loss": 0.842, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.9865642994241842, | |
| "grad_norm": 0.09130859375, | |
| "learning_rate": 1.343570057581574e-07, | |
| "loss": 0.9277, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.9875239923224568, | |
| "grad_norm": 0.1103515625, | |
| "learning_rate": 1.2476007677543187e-07, | |
| "loss": 0.8711, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.9884836852207294, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 1.1516314779270635e-07, | |
| "loss": 0.8977, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.989443378119002, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.0556621880998082e-07, | |
| "loss": 0.8632, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.9904030710172744, | |
| "grad_norm": 0.10107421875, | |
| "learning_rate": 9.596928982725528e-08, | |
| "loss": 0.9557, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.991362763915547, | |
| "grad_norm": 0.09765625, | |
| "learning_rate": 8.637236084452976e-08, | |
| "loss": 0.8898, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.9923224568138196, | |
| "grad_norm": 0.146484375, | |
| "learning_rate": 7.677543186180422e-08, | |
| "loss": 0.9666, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.9932821497120922, | |
| "grad_norm": 0.1015625, | |
| "learning_rate": 6.71785028790787e-08, | |
| "loss": 0.8335, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.9942418426103646, | |
| "grad_norm": 0.095703125, | |
| "learning_rate": 5.758157389635317e-08, | |
| "loss": 0.9066, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.9952015355086372, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 4.798464491362764e-08, | |
| "loss": 0.8566, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.9961612284069098, | |
| "grad_norm": 0.09033203125, | |
| "learning_rate": 3.838771593090211e-08, | |
| "loss": 0.8087, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.9971209213051824, | |
| "grad_norm": 0.111328125, | |
| "learning_rate": 2.8790786948176586e-08, | |
| "loss": 0.8638, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.9980806142034548, | |
| "grad_norm": 0.09912109375, | |
| "learning_rate": 1.9193857965451055e-08, | |
| "loss": 0.9796, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.9990403071017274, | |
| "grad_norm": 0.11767578125, | |
| "learning_rate": 9.596928982725528e-09, | |
| "loss": 0.8987, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.09228515625, | |
| "learning_rate": 0.0, | |
| "loss": 0.897, | |
| "step": 1042 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1042, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.362875479576019e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |