| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 0, | |
| "global_step": 116, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008620689655172414, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 9.913793103448277e-06, | |
| "loss": 1.8897, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.017241379310344827, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 9.827586206896553e-06, | |
| "loss": 1.7847, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.02586206896551724, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 9.741379310344829e-06, | |
| "loss": 1.8435, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.034482758620689655, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 9.655172413793105e-06, | |
| "loss": 1.8, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.04310344827586207, | |
| "grad_norm": 0.41796875, | |
| "learning_rate": 9.56896551724138e-06, | |
| "loss": 1.8393, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.05172413793103448, | |
| "grad_norm": 0.466796875, | |
| "learning_rate": 9.482758620689655e-06, | |
| "loss": 1.8275, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0603448275862069, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 9.396551724137931e-06, | |
| "loss": 1.7559, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.06896551724137931, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 9.310344827586207e-06, | |
| "loss": 1.7784, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.07758620689655173, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 9.224137931034484e-06, | |
| "loss": 1.7637, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.08620689655172414, | |
| "grad_norm": 0.28515625, | |
| "learning_rate": 9.13793103448276e-06, | |
| "loss": 1.7164, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09482758620689655, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 9.051724137931036e-06, | |
| "loss": 1.7796, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.10344827586206896, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 8.965517241379312e-06, | |
| "loss": 1.698, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.11206896551724138, | |
| "grad_norm": 0.2412109375, | |
| "learning_rate": 8.879310344827588e-06, | |
| "loss": 1.7032, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1206896551724138, | |
| "grad_norm": 0.2392578125, | |
| "learning_rate": 8.793103448275862e-06, | |
| "loss": 1.7151, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.12931034482758622, | |
| "grad_norm": 0.265625, | |
| "learning_rate": 8.706896551724138e-06, | |
| "loss": 1.6807, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13793103448275862, | |
| "grad_norm": 0.2373046875, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 1.7027, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.14655172413793102, | |
| "grad_norm": 0.244140625, | |
| "learning_rate": 8.53448275862069e-06, | |
| "loss": 1.6944, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.15517241379310345, | |
| "grad_norm": 0.2392578125, | |
| "learning_rate": 8.448275862068966e-06, | |
| "loss": 1.7346, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.16379310344827586, | |
| "grad_norm": 0.22265625, | |
| "learning_rate": 8.362068965517242e-06, | |
| "loss": 1.5981, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.1724137931034483, | |
| "grad_norm": 0.2451171875, | |
| "learning_rate": 8.275862068965518e-06, | |
| "loss": 1.6794, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.1810344827586207, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 8.189655172413794e-06, | |
| "loss": 1.6385, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.1896551724137931, | |
| "grad_norm": 0.205078125, | |
| "learning_rate": 8.103448275862069e-06, | |
| "loss": 1.6853, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.19827586206896552, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 8.017241379310345e-06, | |
| "loss": 1.6833, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.20689655172413793, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 7.93103448275862e-06, | |
| "loss": 1.5735, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.21551724137931033, | |
| "grad_norm": 0.1806640625, | |
| "learning_rate": 7.844827586206897e-06, | |
| "loss": 1.6481, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.22413793103448276, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 7.758620689655173e-06, | |
| "loss": 1.6415, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.23275862068965517, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 7.672413793103449e-06, | |
| "loss": 1.697, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.2413793103448276, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 7.586206896551724e-06, | |
| "loss": 1.6084, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 1.6449, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.25862068965517243, | |
| "grad_norm": 0.216796875, | |
| "learning_rate": 7.413793103448277e-06, | |
| "loss": 1.6571, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.2672413793103448, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 7.327586206896552e-06, | |
| "loss": 1.5427, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.27586206896551724, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 7.241379310344828e-06, | |
| "loss": 1.498, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.28448275862068967, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 7.155172413793104e-06, | |
| "loss": 1.6642, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.29310344827586204, | |
| "grad_norm": 0.212890625, | |
| "learning_rate": 7.0689655172413796e-06, | |
| "loss": 1.6553, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.3017241379310345, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 6.982758620689656e-06, | |
| "loss": 1.5789, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.3103448275862069, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 6.896551724137932e-06, | |
| "loss": 1.568, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.31896551724137934, | |
| "grad_norm": 0.189453125, | |
| "learning_rate": 6.810344827586207e-06, | |
| "loss": 1.5269, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.3275862068965517, | |
| "grad_norm": 0.2001953125, | |
| "learning_rate": 6.724137931034484e-06, | |
| "loss": 1.6063, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.33620689655172414, | |
| "grad_norm": 0.181640625, | |
| "learning_rate": 6.63793103448276e-06, | |
| "loss": 1.5043, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.3448275862068966, | |
| "grad_norm": 0.1904296875, | |
| "learning_rate": 6.551724137931035e-06, | |
| "loss": 1.6623, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.35344827586206895, | |
| "grad_norm": 0.1875, | |
| "learning_rate": 6.465517241379311e-06, | |
| "loss": 1.5848, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.3620689655172414, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 6.379310344827587e-06, | |
| "loss": 1.5198, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.3706896551724138, | |
| "grad_norm": 0.1982421875, | |
| "learning_rate": 6.293103448275862e-06, | |
| "loss": 1.5934, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.3793103448275862, | |
| "grad_norm": 0.23828125, | |
| "learning_rate": 6.206896551724138e-06, | |
| "loss": 1.5823, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.3879310344827586, | |
| "grad_norm": 0.1533203125, | |
| "learning_rate": 6.1206896551724135e-06, | |
| "loss": 1.5819, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.39655172413793105, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 6.03448275862069e-06, | |
| "loss": 1.5034, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.4051724137931034, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 5.9482758620689665e-06, | |
| "loss": 1.5815, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.41379310344827586, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 5.862068965517242e-06, | |
| "loss": 1.5191, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.4224137931034483, | |
| "grad_norm": 0.1708984375, | |
| "learning_rate": 5.775862068965518e-06, | |
| "loss": 1.5469, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.43103448275862066, | |
| "grad_norm": 0.158203125, | |
| "learning_rate": 5.689655172413794e-06, | |
| "loss": 1.5112, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4396551724137931, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 5.603448275862069e-06, | |
| "loss": 1.5272, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.4482758620689655, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 5.517241379310345e-06, | |
| "loss": 1.4435, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.45689655172413796, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 5.431034482758621e-06, | |
| "loss": 1.5291, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.46551724137931033, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 5.344827586206896e-06, | |
| "loss": 1.4499, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.47413793103448276, | |
| "grad_norm": 0.2294921875, | |
| "learning_rate": 5.258620689655173e-06, | |
| "loss": 1.5481, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.4827586206896552, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 5.172413793103449e-06, | |
| "loss": 1.4589, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.49137931034482757, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 5.086206896551724e-06, | |
| "loss": 1.5175, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 5e-06, | |
| "loss": 1.5369, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.5086206896551724, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 4.9137931034482765e-06, | |
| "loss": 1.5117, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.5172413793103449, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 4.8275862068965525e-06, | |
| "loss": 1.5455, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.5258620689655172, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.741379310344828e-06, | |
| "loss": 1.5353, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.5344827586206896, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 4.655172413793104e-06, | |
| "loss": 1.4975, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.5431034482758621, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 4.56896551724138e-06, | |
| "loss": 1.5309, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.5517241379310345, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 4.482758620689656e-06, | |
| "loss": 1.5189, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.5603448275862069, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 4.396551724137931e-06, | |
| "loss": 1.4481, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.5689655172413793, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 4.310344827586207e-06, | |
| "loss": 1.5394, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.5775862068965517, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 4.224137931034483e-06, | |
| "loss": 1.5147, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.5862068965517241, | |
| "grad_norm": 0.14453125, | |
| "learning_rate": 4.137931034482759e-06, | |
| "loss": 1.5097, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.5948275862068966, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 4.051724137931034e-06, | |
| "loss": 1.5006, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.603448275862069, | |
| "grad_norm": 0.166015625, | |
| "learning_rate": 3.96551724137931e-06, | |
| "loss": 1.5042, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.6120689655172413, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 3.8793103448275865e-06, | |
| "loss": 1.5101, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.6206896551724138, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 3.793103448275862e-06, | |
| "loss": 1.4706, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.6293103448275862, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 3.7068965517241385e-06, | |
| "loss": 1.513, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.6379310344827587, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 3.620689655172414e-06, | |
| "loss": 1.4574, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.646551724137931, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 3.5344827586206898e-06, | |
| "loss": 1.4928, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.6551724137931034, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 3.448275862068966e-06, | |
| "loss": 1.4709, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.6637931034482759, | |
| "grad_norm": 0.12890625, | |
| "learning_rate": 3.362068965517242e-06, | |
| "loss": 1.428, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.6724137931034483, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 3.2758620689655175e-06, | |
| "loss": 1.5212, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.6810344827586207, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 3.1896551724137935e-06, | |
| "loss": 1.492, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.6896551724137931, | |
| "grad_norm": 0.1572265625, | |
| "learning_rate": 3.103448275862069e-06, | |
| "loss": 1.3807, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.6982758620689655, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 3.017241379310345e-06, | |
| "loss": 1.4559, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.7068965517241379, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 2.931034482758621e-06, | |
| "loss": 1.5057, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.7155172413793104, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 2.844827586206897e-06, | |
| "loss": 1.4579, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.7241379310344828, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 2.7586206896551725e-06, | |
| "loss": 1.4931, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.7327586206896551, | |
| "grad_norm": 0.1552734375, | |
| "learning_rate": 2.672413793103448e-06, | |
| "loss": 1.4055, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.7413793103448276, | |
| "grad_norm": 0.1484375, | |
| "learning_rate": 2.5862068965517246e-06, | |
| "loss": 1.4233, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.2041015625, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.4571, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.7586206896551724, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 2.4137931034482762e-06, | |
| "loss": 1.503, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.7672413793103449, | |
| "grad_norm": 0.18359375, | |
| "learning_rate": 2.327586206896552e-06, | |
| "loss": 1.4796, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.7758620689655172, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 2.241379310344828e-06, | |
| "loss": 1.4198, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.7844827586206896, | |
| "grad_norm": 0.1845703125, | |
| "learning_rate": 2.1551724137931035e-06, | |
| "loss": 1.4005, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.7931034482758621, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 2.0689655172413796e-06, | |
| "loss": 1.4767, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.8017241379310345, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 1.982758620689655e-06, | |
| "loss": 1.5157, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.8103448275862069, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 1.896551724137931e-06, | |
| "loss": 1.4623, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.8189655172413793, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 1.810344827586207e-06, | |
| "loss": 1.4235, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.8275862068965517, | |
| "grad_norm": 0.173828125, | |
| "learning_rate": 1.724137931034483e-06, | |
| "loss": 1.5049, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.8362068965517241, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 1.6379310344827587e-06, | |
| "loss": 1.4492, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.8448275862068966, | |
| "grad_norm": 0.1943359375, | |
| "learning_rate": 1.5517241379310346e-06, | |
| "loss": 1.4447, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.853448275862069, | |
| "grad_norm": 0.1455078125, | |
| "learning_rate": 1.4655172413793104e-06, | |
| "loss": 1.4843, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.8620689655172413, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 1.3793103448275862e-06, | |
| "loss": 1.3772, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.8706896551724138, | |
| "grad_norm": 0.2109375, | |
| "learning_rate": 1.2931034482758623e-06, | |
| "loss": 1.5097, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.8793103448275862, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 1.2068965517241381e-06, | |
| "loss": 1.4115, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.8879310344827587, | |
| "grad_norm": 0.13671875, | |
| "learning_rate": 1.120689655172414e-06, | |
| "loss": 1.4794, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.896551724137931, | |
| "grad_norm": 0.17578125, | |
| "learning_rate": 1.0344827586206898e-06, | |
| "loss": 1.4197, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.9051724137931034, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 9.482758620689655e-07, | |
| "loss": 1.4999, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.9137931034482759, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 8.620689655172415e-07, | |
| "loss": 1.4207, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.9224137931034483, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 7.758620689655173e-07, | |
| "loss": 1.396, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.9310344827586207, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 6.896551724137931e-07, | |
| "loss": 1.4244, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.9396551724137931, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 6.034482758620691e-07, | |
| "loss": 1.4731, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.9482758620689655, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 5.172413793103449e-07, | |
| "loss": 1.5113, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.9568965517241379, | |
| "grad_norm": 0.1630859375, | |
| "learning_rate": 4.3103448275862073e-07, | |
| "loss": 1.468, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.9655172413793104, | |
| "grad_norm": 0.162109375, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "loss": 1.4651, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.9741379310344828, | |
| "grad_norm": 0.2001953125, | |
| "learning_rate": 2.5862068965517245e-07, | |
| "loss": 1.4632, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.9827586206896551, | |
| "grad_norm": 0.1337890625, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "loss": 1.4311, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.9913793103448276, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 8.620689655172414e-08, | |
| "loss": 1.423, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 0.1494140625, | |
| "learning_rate": 0.0, | |
| "loss": 1.4911, | |
| "step": 116 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 116, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 3.1862449256372634e+17, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |