| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.994773519163763, | |
| "eval_steps": 500, | |
| "global_step": 573, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005226480836236934, | |
| "grad_norm": 6.817761533477436, | |
| "learning_rate": 1.7241379310344828e-07, | |
| "loss": 1.0919, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010452961672473868, | |
| "grad_norm": 7.115516920325354, | |
| "learning_rate": 3.4482758620689656e-07, | |
| "loss": 1.1216, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0156794425087108, | |
| "grad_norm": 6.6059432881920035, | |
| "learning_rate": 5.172413793103449e-07, | |
| "loss": 1.0823, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.020905923344947737, | |
| "grad_norm": 7.040897553026814, | |
| "learning_rate": 6.896551724137931e-07, | |
| "loss": 1.1163, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.02613240418118467, | |
| "grad_norm": 6.283884644524741, | |
| "learning_rate": 8.620689655172415e-07, | |
| "loss": 1.0475, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0313588850174216, | |
| "grad_norm": 6.519407744623921, | |
| "learning_rate": 1.0344827586206898e-06, | |
| "loss": 1.0632, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.036585365853658534, | |
| "grad_norm": 6.169268594264987, | |
| "learning_rate": 1.2068965517241381e-06, | |
| "loss": 1.0867, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.041811846689895474, | |
| "grad_norm": 6.0230600012209905, | |
| "learning_rate": 1.3793103448275862e-06, | |
| "loss": 1.0679, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.047038327526132406, | |
| "grad_norm": 4.974697996657908, | |
| "learning_rate": 1.5517241379310346e-06, | |
| "loss": 1.0599, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.05226480836236934, | |
| "grad_norm": 4.610162697639871, | |
| "learning_rate": 1.724137931034483e-06, | |
| "loss": 1.0377, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05749128919860627, | |
| "grad_norm": 3.231983464184593, | |
| "learning_rate": 1.896551724137931e-06, | |
| "loss": 0.9836, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.0627177700348432, | |
| "grad_norm": 2.8094756280140682, | |
| "learning_rate": 2.0689655172413796e-06, | |
| "loss": 1.0003, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.06794425087108014, | |
| "grad_norm": 2.5493675085491594, | |
| "learning_rate": 2.241379310344828e-06, | |
| "loss": 0.9805, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.07317073170731707, | |
| "grad_norm": 2.5421388777679423, | |
| "learning_rate": 2.4137931034482762e-06, | |
| "loss": 0.9785, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.078397212543554, | |
| "grad_norm": 2.4038506323025564, | |
| "learning_rate": 2.5862068965517246e-06, | |
| "loss": 0.9579, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.08362369337979095, | |
| "grad_norm": 3.118376309649565, | |
| "learning_rate": 2.7586206896551725e-06, | |
| "loss": 0.9266, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.08885017421602788, | |
| "grad_norm": 3.256072028222514, | |
| "learning_rate": 2.931034482758621e-06, | |
| "loss": 0.9496, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.09407665505226481, | |
| "grad_norm": 3.0097390475425785, | |
| "learning_rate": 3.103448275862069e-06, | |
| "loss": 0.9417, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.09930313588850175, | |
| "grad_norm": 2.5804981571566685, | |
| "learning_rate": 3.2758620689655175e-06, | |
| "loss": 0.9133, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.10452961672473868, | |
| "grad_norm": 2.1253546888275614, | |
| "learning_rate": 3.448275862068966e-06, | |
| "loss": 0.9181, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10975609756097561, | |
| "grad_norm": 1.4630576625319751, | |
| "learning_rate": 3.620689655172414e-06, | |
| "loss": 0.8708, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.11498257839721254, | |
| "grad_norm": 1.8224890188617031, | |
| "learning_rate": 3.793103448275862e-06, | |
| "loss": 0.8532, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.12020905923344948, | |
| "grad_norm": 1.947817512922658, | |
| "learning_rate": 3.96551724137931e-06, | |
| "loss": 0.867, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.1254355400696864, | |
| "grad_norm": 1.6741792992922881, | |
| "learning_rate": 4.137931034482759e-06, | |
| "loss": 0.8596, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.13066202090592335, | |
| "grad_norm": 1.1380185267106577, | |
| "learning_rate": 4.310344827586207e-06, | |
| "loss": 0.8119, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.13588850174216027, | |
| "grad_norm": 1.2049716071539038, | |
| "learning_rate": 4.482758620689656e-06, | |
| "loss": 0.8374, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.14111498257839722, | |
| "grad_norm": 1.1289607052380697, | |
| "learning_rate": 4.655172413793104e-06, | |
| "loss": 0.7917, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.14634146341463414, | |
| "grad_norm": 1.2662899307258315, | |
| "learning_rate": 4.8275862068965525e-06, | |
| "loss": 0.8187, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.15156794425087108, | |
| "grad_norm": 1.3017674903802239, | |
| "learning_rate": 5e-06, | |
| "loss": 0.8106, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.156794425087108, | |
| "grad_norm": 0.9437556124885579, | |
| "learning_rate": 5.172413793103449e-06, | |
| "loss": 0.7807, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.16202090592334495, | |
| "grad_norm": 0.9854643253704647, | |
| "learning_rate": 5.344827586206896e-06, | |
| "loss": 0.7987, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.1672473867595819, | |
| "grad_norm": 1.0216232146030035, | |
| "learning_rate": 5.517241379310345e-06, | |
| "loss": 0.7978, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.17247386759581881, | |
| "grad_norm": 0.8155263699639064, | |
| "learning_rate": 5.689655172413794e-06, | |
| "loss": 0.7872, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.17770034843205576, | |
| "grad_norm": 0.8283995090284338, | |
| "learning_rate": 5.862068965517242e-06, | |
| "loss": 0.7704, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.18292682926829268, | |
| "grad_norm": 0.7185821947364432, | |
| "learning_rate": 6.03448275862069e-06, | |
| "loss": 0.7581, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.18815331010452963, | |
| "grad_norm": 0.770352836030317, | |
| "learning_rate": 6.206896551724138e-06, | |
| "loss": 0.7787, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.19337979094076654, | |
| "grad_norm": 0.7388881984398195, | |
| "learning_rate": 6.379310344827587e-06, | |
| "loss": 0.745, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.1986062717770035, | |
| "grad_norm": 0.8358555735535642, | |
| "learning_rate": 6.551724137931035e-06, | |
| "loss": 0.7604, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.2038327526132404, | |
| "grad_norm": 0.793851198978272, | |
| "learning_rate": 6.724137931034484e-06, | |
| "loss": 0.7544, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.20905923344947736, | |
| "grad_norm": 0.7772514888903087, | |
| "learning_rate": 6.896551724137932e-06, | |
| "loss": 0.732, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.21428571428571427, | |
| "grad_norm": 0.8148652288090457, | |
| "learning_rate": 7.0689655172413796e-06, | |
| "loss": 0.7297, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.21951219512195122, | |
| "grad_norm": 0.8289759706310655, | |
| "learning_rate": 7.241379310344828e-06, | |
| "loss": 0.7383, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.22473867595818817, | |
| "grad_norm": 0.7140088762027519, | |
| "learning_rate": 7.413793103448277e-06, | |
| "loss": 0.7461, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.22996515679442509, | |
| "grad_norm": 0.6669906555709666, | |
| "learning_rate": 7.586206896551724e-06, | |
| "loss": 0.7343, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.23519163763066203, | |
| "grad_norm": 0.7425162700101843, | |
| "learning_rate": 7.758620689655173e-06, | |
| "loss": 0.7211, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.24041811846689895, | |
| "grad_norm": 0.7667317947005017, | |
| "learning_rate": 7.93103448275862e-06, | |
| "loss": 0.7307, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.2456445993031359, | |
| "grad_norm": 0.6701254662379068, | |
| "learning_rate": 8.103448275862069e-06, | |
| "loss": 0.7244, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.2508710801393728, | |
| "grad_norm": 0.6974840533976149, | |
| "learning_rate": 8.275862068965518e-06, | |
| "loss": 0.7222, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.25609756097560976, | |
| "grad_norm": 0.7601606441382056, | |
| "learning_rate": 8.448275862068966e-06, | |
| "loss": 0.7325, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.2613240418118467, | |
| "grad_norm": 0.6932741056532161, | |
| "learning_rate": 8.620689655172414e-06, | |
| "loss": 0.7166, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.2665505226480836, | |
| "grad_norm": 0.7185062343660426, | |
| "learning_rate": 8.793103448275862e-06, | |
| "loss": 0.7367, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.27177700348432055, | |
| "grad_norm": 0.6728422963531511, | |
| "learning_rate": 8.965517241379312e-06, | |
| "loss": 0.7104, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.2770034843205575, | |
| "grad_norm": 0.661725549134852, | |
| "learning_rate": 9.13793103448276e-06, | |
| "loss": 0.7183, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.28222996515679444, | |
| "grad_norm": 0.6362203257614645, | |
| "learning_rate": 9.310344827586207e-06, | |
| "loss": 0.7153, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.2874564459930314, | |
| "grad_norm": 0.5890508129443038, | |
| "learning_rate": 9.482758620689655e-06, | |
| "loss": 0.7093, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.2926829268292683, | |
| "grad_norm": 0.6116919862768194, | |
| "learning_rate": 9.655172413793105e-06, | |
| "loss": 0.7176, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.2979094076655052, | |
| "grad_norm": 0.7050152514142426, | |
| "learning_rate": 9.827586206896553e-06, | |
| "loss": 0.7211, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.30313588850174217, | |
| "grad_norm": 0.6296132679526024, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6993, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.3083623693379791, | |
| "grad_norm": 0.6453523097142025, | |
| "learning_rate": 9.999906969801156e-06, | |
| "loss": 0.6963, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.313588850174216, | |
| "grad_norm": 0.6955516834757508, | |
| "learning_rate": 9.999627882666474e-06, | |
| "loss": 0.7151, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.31881533101045295, | |
| "grad_norm": 0.5580219589427374, | |
| "learning_rate": 9.999162748981362e-06, | |
| "loss": 0.7049, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.3240418118466899, | |
| "grad_norm": 0.5910429225441691, | |
| "learning_rate": 9.998511586054415e-06, | |
| "loss": 0.7013, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.32926829268292684, | |
| "grad_norm": 0.8120038822728922, | |
| "learning_rate": 9.997674418116759e-06, | |
| "loss": 0.717, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.3344947735191638, | |
| "grad_norm": 0.7517280345835676, | |
| "learning_rate": 9.996651276321153e-06, | |
| "loss": 0.6952, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.3397212543554007, | |
| "grad_norm": 0.6469274840068534, | |
| "learning_rate": 9.995442198740832e-06, | |
| "loss": 0.701, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.34494773519163763, | |
| "grad_norm": 0.9251247112850067, | |
| "learning_rate": 9.994047230368087e-06, | |
| "loss": 0.7124, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.3501742160278746, | |
| "grad_norm": 0.662124581217118, | |
| "learning_rate": 9.99246642311259e-06, | |
| "loss": 0.7041, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.3554006968641115, | |
| "grad_norm": 0.6490157119284249, | |
| "learning_rate": 9.99069983579947e-06, | |
| "loss": 0.6885, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.3606271777003484, | |
| "grad_norm": 0.8298745583053626, | |
| "learning_rate": 9.988747534167112e-06, | |
| "loss": 0.7179, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.36585365853658536, | |
| "grad_norm": 0.561322661520838, | |
| "learning_rate": 9.98660959086472e-06, | |
| "loss": 0.6955, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.3710801393728223, | |
| "grad_norm": 0.7057604500980034, | |
| "learning_rate": 9.98428608544961e-06, | |
| "loss": 0.6903, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.37630662020905925, | |
| "grad_norm": 0.6658460439147413, | |
| "learning_rate": 9.981777104384251e-06, | |
| "loss": 0.7082, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.38153310104529614, | |
| "grad_norm": 0.5943504077793226, | |
| "learning_rate": 9.979082741033047e-06, | |
| "loss": 0.6768, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.3867595818815331, | |
| "grad_norm": 0.6864915702650408, | |
| "learning_rate": 9.976203095658859e-06, | |
| "loss": 0.6761, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.39198606271777003, | |
| "grad_norm": 0.6390276002240977, | |
| "learning_rate": 9.97313827541928e-06, | |
| "loss": 0.6934, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.397212543554007, | |
| "grad_norm": 0.6038585831748531, | |
| "learning_rate": 9.969888394362648e-06, | |
| "loss": 0.7041, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.4024390243902439, | |
| "grad_norm": 0.6334231152676686, | |
| "learning_rate": 9.96645357342379e-06, | |
| "loss": 0.6945, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.4076655052264808, | |
| "grad_norm": 0.5462598060569209, | |
| "learning_rate": 9.96283394041954e-06, | |
| "loss": 0.696, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.41289198606271776, | |
| "grad_norm": 0.6338524439755173, | |
| "learning_rate": 9.959029630043969e-06, | |
| "loss": 0.691, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.4181184668989547, | |
| "grad_norm": 0.6411928123304964, | |
| "learning_rate": 9.955040783863373e-06, | |
| "loss": 0.692, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.42334494773519166, | |
| "grad_norm": 0.8030581042650948, | |
| "learning_rate": 9.950867550311019e-06, | |
| "loss": 0.7196, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.42857142857142855, | |
| "grad_norm": 0.6024953623866571, | |
| "learning_rate": 9.946510084681602e-06, | |
| "loss": 0.6726, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.4337979094076655, | |
| "grad_norm": 0.6074570740511613, | |
| "learning_rate": 9.941968549125481e-06, | |
| "loss": 0.6897, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.43902439024390244, | |
| "grad_norm": 0.7048846718482608, | |
| "learning_rate": 9.937243112642639e-06, | |
| "loss": 0.6918, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.4442508710801394, | |
| "grad_norm": 0.5918640923441442, | |
| "learning_rate": 9.932333951076395e-06, | |
| "loss": 0.6766, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.44947735191637633, | |
| "grad_norm": 0.5926297060359819, | |
| "learning_rate": 9.927241247106856e-06, | |
| "loss": 0.695, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.4547038327526132, | |
| "grad_norm": 0.553208274597187, | |
| "learning_rate": 9.921965190244129e-06, | |
| "loss": 0.6534, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.45993031358885017, | |
| "grad_norm": 0.7161403077582934, | |
| "learning_rate": 9.916505976821262e-06, | |
| "loss": 0.6964, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.4651567944250871, | |
| "grad_norm": 0.6949109145902641, | |
| "learning_rate": 9.910863809986942e-06, | |
| "loss": 0.6948, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.47038327526132406, | |
| "grad_norm": 0.669998554662786, | |
| "learning_rate": 9.905038899697924e-06, | |
| "loss": 0.6748, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.47560975609756095, | |
| "grad_norm": 0.812799215586504, | |
| "learning_rate": 9.899031462711237e-06, | |
| "loss": 0.698, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.4808362369337979, | |
| "grad_norm": 0.7765125728252844, | |
| "learning_rate": 9.892841722576103e-06, | |
| "loss": 0.6839, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.48606271777003485, | |
| "grad_norm": 0.7279630881961654, | |
| "learning_rate": 9.886469909625624e-06, | |
| "loss": 0.6865, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.4912891986062718, | |
| "grad_norm": 0.8884193887396585, | |
| "learning_rate": 9.879916260968212e-06, | |
| "loss": 0.6693, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.4965156794425087, | |
| "grad_norm": 0.607572845135145, | |
| "learning_rate": 9.87318102047876e-06, | |
| "loss": 0.6812, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.5017421602787456, | |
| "grad_norm": 0.7203962118826617, | |
| "learning_rate": 9.866264438789573e-06, | |
| "loss": 0.686, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.5069686411149826, | |
| "grad_norm": 0.6785442509634554, | |
| "learning_rate": 9.85916677328104e-06, | |
| "loss": 0.7037, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.5121951219512195, | |
| "grad_norm": 0.5917891054628419, | |
| "learning_rate": 9.851888288072053e-06, | |
| "loss": 0.7037, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.5174216027874564, | |
| "grad_norm": 0.7601539373800206, | |
| "learning_rate": 9.844429254010184e-06, | |
| "loss": 0.6969, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.5226480836236934, | |
| "grad_norm": 0.6020652320578571, | |
| "learning_rate": 9.836789948661602e-06, | |
| "loss": 0.6879, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.5278745644599303, | |
| "grad_norm": 0.6764576509816881, | |
| "learning_rate": 9.828970656300743e-06, | |
| "loss": 0.6898, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.5331010452961672, | |
| "grad_norm": 0.6666874805819255, | |
| "learning_rate": 9.82097166789974e-06, | |
| "loss": 0.6778, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.5383275261324042, | |
| "grad_norm": 0.7360464789092225, | |
| "learning_rate": 9.81279328111758e-06, | |
| "loss": 0.6882, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.5435540069686411, | |
| "grad_norm": 0.701626438763124, | |
| "learning_rate": 9.804435800289047e-06, | |
| "loss": 0.6893, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.5487804878048781, | |
| "grad_norm": 0.7646586589346112, | |
| "learning_rate": 9.795899536413383e-06, | |
| "loss": 0.663, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.554006968641115, | |
| "grad_norm": 0.7827135637222683, | |
| "learning_rate": 9.787184807142713e-06, | |
| "loss": 0.6782, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.5592334494773519, | |
| "grad_norm": 0.7963355955603488, | |
| "learning_rate": 9.778291936770241e-06, | |
| "loss": 0.6948, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.5644599303135889, | |
| "grad_norm": 0.9185384976633354, | |
| "learning_rate": 9.769221256218165e-06, | |
| "loss": 0.6855, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.5696864111498258, | |
| "grad_norm": 0.6715876776695767, | |
| "learning_rate": 9.759973103025369e-06, | |
| "loss": 0.6692, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.5749128919860628, | |
| "grad_norm": 1.2389220927650733, | |
| "learning_rate": 9.750547821334868e-06, | |
| "loss": 0.6881, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.5801393728222997, | |
| "grad_norm": 0.7818470562308032, | |
| "learning_rate": 9.740945761880993e-06, | |
| "loss": 0.6483, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.5853658536585366, | |
| "grad_norm": 0.8194633115862917, | |
| "learning_rate": 9.731167281976346e-06, | |
| "loss": 0.6573, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.5905923344947736, | |
| "grad_norm": 0.8554081467672775, | |
| "learning_rate": 9.721212745498493e-06, | |
| "loss": 0.6831, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.5958188153310104, | |
| "grad_norm": 0.6301357489652583, | |
| "learning_rate": 9.711082522876445e-06, | |
| "loss": 0.6883, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.6010452961672473, | |
| "grad_norm": 0.7320145359964577, | |
| "learning_rate": 9.700776991076846e-06, | |
| "loss": 0.6826, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.6062717770034843, | |
| "grad_norm": 0.7164618151118382, | |
| "learning_rate": 9.690296533589967e-06, | |
| "loss": 0.6669, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6114982578397212, | |
| "grad_norm": 0.6788237892173536, | |
| "learning_rate": 9.679641540415428e-06, | |
| "loss": 0.6659, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.6167247386759582, | |
| "grad_norm": 0.6158473714288532, | |
| "learning_rate": 9.66881240804768e-06, | |
| "loss": 0.6597, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.6219512195121951, | |
| "grad_norm": 0.6555526405519565, | |
| "learning_rate": 9.657809539461256e-06, | |
| "loss": 0.6831, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.627177700348432, | |
| "grad_norm": 0.6887980438356484, | |
| "learning_rate": 9.64663334409578e-06, | |
| "loss": 0.666, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.632404181184669, | |
| "grad_norm": 0.5979509900819844, | |
| "learning_rate": 9.635284237840721e-06, | |
| "loss": 0.6553, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.6376306620209059, | |
| "grad_norm": 0.7294362846964269, | |
| "learning_rate": 9.623762643019927e-06, | |
| "loss": 0.6771, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.6428571428571429, | |
| "grad_norm": 0.7545085300162823, | |
| "learning_rate": 9.612068988375898e-06, | |
| "loss": 0.6715, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.6480836236933798, | |
| "grad_norm": 0.6749790083440244, | |
| "learning_rate": 9.60020370905384e-06, | |
| "loss": 0.6514, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.6533101045296167, | |
| "grad_norm": 0.804530126401735, | |
| "learning_rate": 9.588167246585474e-06, | |
| "loss": 0.6459, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.6585365853658537, | |
| "grad_norm": 0.6831220443498838, | |
| "learning_rate": 9.575960048872595e-06, | |
| "loss": 0.6896, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.6637630662020906, | |
| "grad_norm": 0.5810171537948514, | |
| "learning_rate": 9.563582570170418e-06, | |
| "loss": 0.6613, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.6689895470383276, | |
| "grad_norm": 0.7304494902535995, | |
| "learning_rate": 9.551035271070665e-06, | |
| "loss": 0.6516, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.6742160278745645, | |
| "grad_norm": 0.6119862390653155, | |
| "learning_rate": 9.538318618484426e-06, | |
| "loss": 0.6618, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.6794425087108014, | |
| "grad_norm": 0.6430165539813878, | |
| "learning_rate": 9.52543308562479e-06, | |
| "loss": 0.6864, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.6846689895470384, | |
| "grad_norm": 0.6194884070250605, | |
| "learning_rate": 9.512379151989229e-06, | |
| "loss": 0.6784, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.6898954703832753, | |
| "grad_norm": 0.5516355539801995, | |
| "learning_rate": 9.499157303341761e-06, | |
| "loss": 0.6653, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.6951219512195121, | |
| "grad_norm": 0.622640994125776, | |
| "learning_rate": 9.485768031694872e-06, | |
| "loss": 0.6731, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.7003484320557491, | |
| "grad_norm": 0.547286279646913, | |
| "learning_rate": 9.4722118352912e-06, | |
| "loss": 0.6663, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.705574912891986, | |
| "grad_norm": 0.6363064057591127, | |
| "learning_rate": 9.458489218585003e-06, | |
| "loss": 0.655, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.710801393728223, | |
| "grad_norm": 0.6134952050030124, | |
| "learning_rate": 9.44460069222339e-06, | |
| "loss": 0.6749, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.7160278745644599, | |
| "grad_norm": 0.5937836538107625, | |
| "learning_rate": 9.430546773027302e-06, | |
| "loss": 0.6686, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.7212543554006968, | |
| "grad_norm": 0.6644117521702412, | |
| "learning_rate": 9.416327983972304e-06, | |
| "loss": 0.6594, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.7264808362369338, | |
| "grad_norm": 0.6220644455779103, | |
| "learning_rate": 9.401944854169103e-06, | |
| "loss": 0.6749, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.7317073170731707, | |
| "grad_norm": 0.6547771063435059, | |
| "learning_rate": 9.38739791884387e-06, | |
| "loss": 0.6579, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.7369337979094077, | |
| "grad_norm": 0.561284464609663, | |
| "learning_rate": 9.372687719318316e-06, | |
| "loss": 0.681, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.7421602787456446, | |
| "grad_norm": 0.6038846576238117, | |
| "learning_rate": 9.35781480298956e-06, | |
| "loss": 0.6666, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.7473867595818815, | |
| "grad_norm": 0.572240770754845, | |
| "learning_rate": 9.342779723309746e-06, | |
| "loss": 0.6238, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.7526132404181185, | |
| "grad_norm": 0.5890147611510671, | |
| "learning_rate": 9.327583039765453e-06, | |
| "loss": 0.6637, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.7578397212543554, | |
| "grad_norm": 0.6636633864116531, | |
| "learning_rate": 9.31222531785688e-06, | |
| "loss": 0.6523, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.7630662020905923, | |
| "grad_norm": 0.6188433137259545, | |
| "learning_rate": 9.296707129076794e-06, | |
| "loss": 0.6626, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.7682926829268293, | |
| "grad_norm": 0.6163886375278169, | |
| "learning_rate": 9.281029050889274e-06, | |
| "loss": 0.6786, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.7735191637630662, | |
| "grad_norm": 0.7346550992330867, | |
| "learning_rate": 9.26519166670821e-06, | |
| "loss": 0.6861, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.7787456445993032, | |
| "grad_norm": 0.5852860888717738, | |
| "learning_rate": 9.2491955658756e-06, | |
| "loss": 0.6591, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.7839721254355401, | |
| "grad_norm": 0.7028963677577197, | |
| "learning_rate": 9.233041343639623e-06, | |
| "loss": 0.6881, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.789198606271777, | |
| "grad_norm": 0.8189320800786853, | |
| "learning_rate": 9.216729601132481e-06, | |
| "loss": 0.6702, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.794425087108014, | |
| "grad_norm": 0.5963261479645, | |
| "learning_rate": 9.200260945348034e-06, | |
| "loss": 0.6652, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.7996515679442509, | |
| "grad_norm": 0.6745799083758508, | |
| "learning_rate": 9.183635989119211e-06, | |
| "loss": 0.6569, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.8048780487804879, | |
| "grad_norm": 0.7674431433443214, | |
| "learning_rate": 9.166855351095205e-06, | |
| "loss": 0.6629, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.8101045296167247, | |
| "grad_norm": 0.5864935078587289, | |
| "learning_rate": 9.149919655718453e-06, | |
| "loss": 0.6615, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.8153310104529616, | |
| "grad_norm": 0.6569771197412858, | |
| "learning_rate": 9.132829533201397e-06, | |
| "loss": 0.6321, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.8205574912891986, | |
| "grad_norm": 0.6855458963431387, | |
| "learning_rate": 9.115585619503039e-06, | |
| "loss": 0.6471, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.8257839721254355, | |
| "grad_norm": 0.5765451182091219, | |
| "learning_rate": 9.098188556305262e-06, | |
| "loss": 0.6748, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.8310104529616724, | |
| "grad_norm": 0.6567732753183342, | |
| "learning_rate": 9.080638990988971e-06, | |
| "loss": 0.6564, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.8362369337979094, | |
| "grad_norm": 0.636552981471604, | |
| "learning_rate": 9.062937576609983e-06, | |
| "loss": 0.654, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.8414634146341463, | |
| "grad_norm": 0.5529034964515249, | |
| "learning_rate": 9.045084971874738e-06, | |
| "loss": 0.6558, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.8466898954703833, | |
| "grad_norm": 0.5893696671833625, | |
| "learning_rate": 9.027081841115784e-06, | |
| "loss": 0.6663, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.8519163763066202, | |
| "grad_norm": 0.5667170304864082, | |
| "learning_rate": 9.008928854267054e-06, | |
| "loss": 0.6443, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.8571428571428571, | |
| "grad_norm": 0.5725785658325226, | |
| "learning_rate": 8.99062668683894e-06, | |
| "loss": 0.6536, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.8623693379790941, | |
| "grad_norm": 0.6060232996236128, | |
| "learning_rate": 8.97217601989315e-06, | |
| "loss": 0.6579, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.867595818815331, | |
| "grad_norm": 0.5509869618020639, | |
| "learning_rate": 8.95357754001737e-06, | |
| "loss": 0.6713, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.872822299651568, | |
| "grad_norm": 0.6324223806192756, | |
| "learning_rate": 8.934831939299715e-06, | |
| "loss": 0.6696, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.8780487804878049, | |
| "grad_norm": 0.6405745054270299, | |
| "learning_rate": 8.91593991530297e-06, | |
| "loss": 0.6584, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.8832752613240418, | |
| "grad_norm": 0.6307439054892403, | |
| "learning_rate": 8.896902171038629e-06, | |
| "loss": 0.6529, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.8885017421602788, | |
| "grad_norm": 0.6611063399387049, | |
| "learning_rate": 8.877719414940751e-06, | |
| "loss": 0.6816, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.8937282229965157, | |
| "grad_norm": 0.5327557983597507, | |
| "learning_rate": 8.85839236083958e-06, | |
| "loss": 0.6591, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.8989547038327527, | |
| "grad_norm": 0.6306768954606544, | |
| "learning_rate": 8.838921727934992e-06, | |
| "loss": 0.6725, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.9041811846689896, | |
| "grad_norm": 0.5906681591023982, | |
| "learning_rate": 8.819308240769726e-06, | |
| "loss": 0.663, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.9094076655052264, | |
| "grad_norm": 0.6242722153083872, | |
| "learning_rate": 8.799552629202424e-06, | |
| "loss": 0.6593, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.9146341463414634, | |
| "grad_norm": 0.6206172349107745, | |
| "learning_rate": 8.779655628380479e-06, | |
| "loss": 0.6366, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.9198606271777003, | |
| "grad_norm": 0.6446754833762357, | |
| "learning_rate": 8.759617978712667e-06, | |
| "loss": 0.6588, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.9250871080139372, | |
| "grad_norm": 0.6524808843585924, | |
| "learning_rate": 8.7394404258416e-06, | |
| "loss": 0.6708, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.9303135888501742, | |
| "grad_norm": 0.5695275564016344, | |
| "learning_rate": 8.71912372061598e-06, | |
| "loss": 0.6581, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.9355400696864111, | |
| "grad_norm": 0.5524125516832253, | |
| "learning_rate": 8.69866861906266e-06, | |
| "loss": 0.675, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.9407665505226481, | |
| "grad_norm": 0.6301898047784469, | |
| "learning_rate": 8.678075882358506e-06, | |
| "loss": 0.6315, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.945993031358885, | |
| "grad_norm": 0.5726932230656162, | |
| "learning_rate": 8.657346276802071e-06, | |
| "loss": 0.6407, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.9512195121951219, | |
| "grad_norm": 0.6267676856772942, | |
| "learning_rate": 8.636480573785089e-06, | |
| "loss": 0.6665, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.9564459930313589, | |
| "grad_norm": 0.6244142776173546, | |
| "learning_rate": 8.615479549763756e-06, | |
| "loss": 0.6618, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.9616724738675958, | |
| "grad_norm": 0.6177405231356075, | |
| "learning_rate": 8.594343986229854e-06, | |
| "loss": 0.6773, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.9668989547038328, | |
| "grad_norm": 0.5988900835082392, | |
| "learning_rate": 8.57307466968165e-06, | |
| "loss": 0.6429, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.9721254355400697, | |
| "grad_norm": 0.6447854583228403, | |
| "learning_rate": 8.551672391594646e-06, | |
| "loss": 0.6735, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.9773519163763066, | |
| "grad_norm": 0.5761133455065623, | |
| "learning_rate": 8.530137948392113e-06, | |
| "loss": 0.6614, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.9825783972125436, | |
| "grad_norm": 0.5943974371608253, | |
| "learning_rate": 8.508472141415468e-06, | |
| "loss": 0.6539, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.9878048780487805, | |
| "grad_norm": 0.6718313091561632, | |
| "learning_rate": 8.48667577689444e-06, | |
| "loss": 0.6346, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.9930313588850174, | |
| "grad_norm": 0.5856014973725608, | |
| "learning_rate": 8.46474966591708e-06, | |
| "loss": 0.6626, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.9982578397212544, | |
| "grad_norm": 0.6113432602736558, | |
| "learning_rate": 8.442694624399576e-06, | |
| "loss": 0.6604, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 1.0034843205574913, | |
| "grad_norm": 1.1692152399954596, | |
| "learning_rate": 8.420511473055887e-06, | |
| "loss": 1.0489, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 1.0087108013937283, | |
| "grad_norm": 0.6145614135481637, | |
| "learning_rate": 8.398201037367202e-06, | |
| "loss": 0.6419, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 1.0139372822299653, | |
| "grad_norm": 0.6268987577515908, | |
| "learning_rate": 8.37576414755123e-06, | |
| "loss": 0.6137, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 1.019163763066202, | |
| "grad_norm": 0.6416953015407134, | |
| "learning_rate": 8.3532016385313e-06, | |
| "loss": 0.6243, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.024390243902439, | |
| "grad_norm": 0.5704666555598586, | |
| "learning_rate": 8.330514349905295e-06, | |
| "loss": 0.6368, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 1.029616724738676, | |
| "grad_norm": 0.503382880864932, | |
| "learning_rate": 8.307703125914397e-06, | |
| "loss": 0.5435, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 1.0348432055749128, | |
| "grad_norm": 0.5773976279224134, | |
| "learning_rate": 8.284768815411693e-06, | |
| "loss": 0.6031, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 1.0400696864111498, | |
| "grad_norm": 0.6463491533346394, | |
| "learning_rate": 8.261712271830564e-06, | |
| "loss": 0.5943, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 1.0452961672473868, | |
| "grad_norm": 0.6023840946052278, | |
| "learning_rate": 8.23853435315295e-06, | |
| "loss": 0.5491, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.0505226480836236, | |
| "grad_norm": 0.5738792581365361, | |
| "learning_rate": 8.215235921877403e-06, | |
| "loss": 0.5604, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 1.0557491289198606, | |
| "grad_norm": 0.6156147642415088, | |
| "learning_rate": 8.191817844986998e-06, | |
| "loss": 0.6434, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 1.0609756097560976, | |
| "grad_norm": 0.5569699792461741, | |
| "learning_rate": 8.168280993917078e-06, | |
| "loss": 0.5594, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 1.0662020905923344, | |
| "grad_norm": 0.5657310358177506, | |
| "learning_rate": 8.144626244522812e-06, | |
| "loss": 0.5686, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 1.0714285714285714, | |
| "grad_norm": 0.6672649446625757, | |
| "learning_rate": 8.120854477046621e-06, | |
| "loss": 0.6803, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.0766550522648084, | |
| "grad_norm": 0.5612168930959153, | |
| "learning_rate": 8.096966576085406e-06, | |
| "loss": 0.5824, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 1.0818815331010452, | |
| "grad_norm": 0.6579205469061564, | |
| "learning_rate": 8.072963430557636e-06, | |
| "loss": 0.6028, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 1.0871080139372822, | |
| "grad_norm": 0.5598751666256221, | |
| "learning_rate": 8.048845933670274e-06, | |
| "loss": 0.5777, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 1.0923344947735192, | |
| "grad_norm": 0.5843330235385993, | |
| "learning_rate": 8.024614982885527e-06, | |
| "loss": 0.5577, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 1.0975609756097562, | |
| "grad_norm": 0.601858208353747, | |
| "learning_rate": 8.00027147988747e-06, | |
| "loss": 0.6143, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.102787456445993, | |
| "grad_norm": 0.627628607597956, | |
| "learning_rate": 7.975816330548466e-06, | |
| "loss": 0.596, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 1.10801393728223, | |
| "grad_norm": 0.6241333668783621, | |
| "learning_rate": 7.951250444895485e-06, | |
| "loss": 0.6494, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 1.113240418118467, | |
| "grad_norm": 0.5382934099983673, | |
| "learning_rate": 7.92657473707621e-06, | |
| "loss": 0.5217, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 1.1184668989547037, | |
| "grad_norm": 0.6953324804344473, | |
| "learning_rate": 7.901790125325049e-06, | |
| "loss": 0.6941, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 1.1236933797909407, | |
| "grad_norm": 0.5453365037414081, | |
| "learning_rate": 7.876897531928943e-06, | |
| "loss": 0.6076, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.1289198606271778, | |
| "grad_norm": 0.5736100460893016, | |
| "learning_rate": 7.851897883193057e-06, | |
| "loss": 0.5852, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 1.1341463414634148, | |
| "grad_norm": 0.5648285470025594, | |
| "learning_rate": 7.82679210940631e-06, | |
| "loss": 0.6033, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 1.1393728222996515, | |
| "grad_norm": 0.5867845846947239, | |
| "learning_rate": 7.801581144806752e-06, | |
| "loss": 0.5587, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 1.1445993031358885, | |
| "grad_norm": 0.5513023482330085, | |
| "learning_rate": 7.7762659275468e-06, | |
| "loss": 0.618, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 1.1498257839721253, | |
| "grad_norm": 0.6153474010859067, | |
| "learning_rate": 7.750847399658336e-06, | |
| "loss": 0.6282, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.1550522648083623, | |
| "grad_norm": 0.4877966585426567, | |
| "learning_rate": 7.725326507017644e-06, | |
| "loss": 0.5451, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 1.1602787456445993, | |
| "grad_norm": 0.5463512600607151, | |
| "learning_rate": 7.699704199310204e-06, | |
| "loss": 0.5972, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 1.1655052264808363, | |
| "grad_norm": 0.6352304968816307, | |
| "learning_rate": 7.673981429995372e-06, | |
| "loss": 0.6114, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 1.170731707317073, | |
| "grad_norm": 0.510832677269982, | |
| "learning_rate": 7.648159156270884e-06, | |
| "loss": 0.6268, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 1.17595818815331, | |
| "grad_norm": 0.5814622275101878, | |
| "learning_rate": 7.622238339037248e-06, | |
| "loss": 0.6141, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.181184668989547, | |
| "grad_norm": 0.5818228914475463, | |
| "learning_rate": 7.596219942861971e-06, | |
| "loss": 0.5655, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 1.1864111498257839, | |
| "grad_norm": 0.5842183364885295, | |
| "learning_rate": 7.570104935943685e-06, | |
| "loss": 0.5983, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 1.1916376306620209, | |
| "grad_norm": 0.5752307733655048, | |
| "learning_rate": 7.5438942900761035e-06, | |
| "loss": 0.6072, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 1.1968641114982579, | |
| "grad_norm": 0.585743260967966, | |
| "learning_rate": 7.517588980611864e-06, | |
| "loss": 0.6306, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 1.202090592334495, | |
| "grad_norm": 0.48400699770777905, | |
| "learning_rate": 7.491189986426236e-06, | |
| "loss": 0.523, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.2073170731707317, | |
| "grad_norm": 0.6000420590770226, | |
| "learning_rate": 7.464698289880689e-06, | |
| "loss": 0.6248, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 1.2125435540069687, | |
| "grad_norm": 0.566825613388456, | |
| "learning_rate": 7.438114876786344e-06, | |
| "loss": 0.5892, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 1.2177700348432055, | |
| "grad_norm": 0.5942135634459118, | |
| "learning_rate": 7.411440736367281e-06, | |
| "loss": 0.604, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 1.2229965156794425, | |
| "grad_norm": 0.5308639194617243, | |
| "learning_rate": 7.384676861223738e-06, | |
| "loss": 0.5938, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 1.2282229965156795, | |
| "grad_norm": 0.5233297325602637, | |
| "learning_rate": 7.3578242472951635e-06, | |
| "loss": 0.5606, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.2334494773519165, | |
| "grad_norm": 0.5506498491313724, | |
| "learning_rate": 7.330883893823164e-06, | |
| "loss": 0.6321, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 1.2386759581881532, | |
| "grad_norm": 0.5032393653634389, | |
| "learning_rate": 7.303856803314313e-06, | |
| "loss": 0.5772, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 1.2439024390243902, | |
| "grad_norm": 0.5516215897044785, | |
| "learning_rate": 7.276743981502856e-06, | |
| "loss": 0.6249, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 1.2491289198606272, | |
| "grad_norm": 0.5922013934962067, | |
| "learning_rate": 7.249546437313273e-06, | |
| "loss": 0.5742, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 1.254355400696864, | |
| "grad_norm": 0.563594800466747, | |
| "learning_rate": 7.22226518282274e-06, | |
| "loss": 0.5931, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.259581881533101, | |
| "grad_norm": 0.5865049138488002, | |
| "learning_rate": 7.194901233223471e-06, | |
| "loss": 0.6026, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 1.264808362369338, | |
| "grad_norm": 0.6049741584214127, | |
| "learning_rate": 7.167455606784936e-06, | |
| "loss": 0.5973, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 1.270034843205575, | |
| "grad_norm": 0.600516789714151, | |
| "learning_rate": 7.139929324815965e-06, | |
| "loss": 0.639, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 1.2752613240418118, | |
| "grad_norm": 0.5799862799629144, | |
| "learning_rate": 7.112323411626756e-06, | |
| "loss": 0.5975, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 1.2804878048780488, | |
| "grad_norm": 0.608405822867294, | |
| "learning_rate": 7.084638894490744e-06, | |
| "loss": 0.5932, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.2857142857142856, | |
| "grad_norm": 0.6086512791994181, | |
| "learning_rate": 7.056876803606384e-06, | |
| "loss": 0.6211, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 1.2909407665505226, | |
| "grad_norm": 0.6114934942027832, | |
| "learning_rate": 7.029038172058809e-06, | |
| "loss": 0.5816, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 1.2961672473867596, | |
| "grad_norm": 0.5531245874896416, | |
| "learning_rate": 7.00112403578139e-06, | |
| "loss": 0.5793, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 1.3013937282229966, | |
| "grad_norm": 0.590859726592878, | |
| "learning_rate": 6.9731354335171885e-06, | |
| "loss": 0.5969, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 1.3066202090592334, | |
| "grad_norm": 0.6199140175680848, | |
| "learning_rate": 6.945073406780296e-06, | |
| "loss": 0.6263, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.3118466898954704, | |
| "grad_norm": 0.49378151455087976, | |
| "learning_rate": 6.916938999817085e-06, | |
| "loss": 0.6258, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 1.3170731707317074, | |
| "grad_norm": 0.5481220680272908, | |
| "learning_rate": 6.888733259567343e-06, | |
| "loss": 0.5697, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 1.3222996515679442, | |
| "grad_norm": 0.5601237104950061, | |
| "learning_rate": 6.860457235625322e-06, | |
| "loss": 0.6192, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 1.3275261324041812, | |
| "grad_norm": 0.5562695792344781, | |
| "learning_rate": 6.832111980200672e-06, | |
| "loss": 0.5595, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 1.3327526132404182, | |
| "grad_norm": 0.6375068604321416, | |
| "learning_rate": 6.803698548079294e-06, | |
| "loss": 0.6001, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.3379790940766552, | |
| "grad_norm": 0.5443785529911263, | |
| "learning_rate": 6.775217996584082e-06, | |
| "loss": 0.5915, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 1.343205574912892, | |
| "grad_norm": 0.5408122608427677, | |
| "learning_rate": 6.746671385535586e-06, | |
| "loss": 0.5953, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 1.348432055749129, | |
| "grad_norm": 0.5539163863396951, | |
| "learning_rate": 6.7180597772125665e-06, | |
| "loss": 0.6388, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 1.3536585365853657, | |
| "grad_norm": 0.5362462583433603, | |
| "learning_rate": 6.689384236312465e-06, | |
| "loss": 0.5838, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 1.3588850174216027, | |
| "grad_norm": 0.541689780691623, | |
| "learning_rate": 6.660645829911794e-06, | |
| "loss": 0.6318, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.3641114982578397, | |
| "grad_norm": 0.5106593953968394, | |
| "learning_rate": 6.631845627426418e-06, | |
| "loss": 0.5425, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 1.3693379790940767, | |
| "grad_norm": 0.49403851854670217, | |
| "learning_rate": 6.602984700571758e-06, | |
| "loss": 0.5305, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 1.3745644599303135, | |
| "grad_norm": 0.5967203227127262, | |
| "learning_rate": 6.574064123322925e-06, | |
| "loss": 0.6405, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 1.3797909407665505, | |
| "grad_norm": 0.5601246715123179, | |
| "learning_rate": 6.545084971874738e-06, | |
| "loss": 0.5802, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 1.3850174216027875, | |
| "grad_norm": 0.5342203081001657, | |
| "learning_rate": 6.516048324601685e-06, | |
| "loss": 0.6166, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.3902439024390243, | |
| "grad_norm": 0.5175415102386944, | |
| "learning_rate": 6.486955262017794e-06, | |
| "loss": 0.6049, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 1.3954703832752613, | |
| "grad_norm": 0.603808078724887, | |
| "learning_rate": 6.457806866736424e-06, | |
| "loss": 0.615, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 1.4006968641114983, | |
| "grad_norm": 0.5271092125681837, | |
| "learning_rate": 6.42860422342998e-06, | |
| "loss": 0.522, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 1.4059233449477353, | |
| "grad_norm": 0.5871002546738227, | |
| "learning_rate": 6.399348418789545e-06, | |
| "loss": 0.6314, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 1.411149825783972, | |
| "grad_norm": 0.5188920938550411, | |
| "learning_rate": 6.37004054148445e-06, | |
| "loss": 0.5822, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.416376306620209, | |
| "grad_norm": 0.5152138837098115, | |
| "learning_rate": 6.3406816821217554e-06, | |
| "loss": 0.6093, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 1.4216027874564459, | |
| "grad_norm": 0.513754045434498, | |
| "learning_rate": 6.311272933205672e-06, | |
| "loss": 0.5832, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 1.4268292682926829, | |
| "grad_norm": 0.4912944860658243, | |
| "learning_rate": 6.281815389096903e-06, | |
| "loss": 0.529, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 1.4320557491289199, | |
| "grad_norm": 0.5427353767757749, | |
| "learning_rate": 6.2523101459719204e-06, | |
| "loss": 0.6526, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 1.4372822299651569, | |
| "grad_norm": 0.5535697011306765, | |
| "learning_rate": 6.222758301782183e-06, | |
| "loss": 0.6282, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.4425087108013936, | |
| "grad_norm": 0.5162479310938117, | |
| "learning_rate": 6.193160956213262e-06, | |
| "loss": 0.5903, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 1.4477351916376306, | |
| "grad_norm": 0.5604814223022786, | |
| "learning_rate": 6.163519210643939e-06, | |
| "loss": 0.6083, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 1.4529616724738676, | |
| "grad_norm": 0.5139952218594641, | |
| "learning_rate": 6.133834168105206e-06, | |
| "loss": 0.5271, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 1.4581881533101044, | |
| "grad_norm": 0.6229450021349342, | |
| "learning_rate": 6.104106933239227e-06, | |
| "loss": 0.5636, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 1.4634146341463414, | |
| "grad_norm": 0.5408488715069601, | |
| "learning_rate": 6.07433861225823e-06, | |
| "loss": 0.6205, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.4686411149825784, | |
| "grad_norm": 0.49674309270840106, | |
| "learning_rate": 6.044530312903343e-06, | |
| "loss": 0.5794, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 1.4738675958188154, | |
| "grad_norm": 0.582431202829567, | |
| "learning_rate": 6.014683144403375e-06, | |
| "loss": 0.6167, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 1.4790940766550522, | |
| "grad_norm": 0.573524016791654, | |
| "learning_rate": 5.9847982174335314e-06, | |
| "loss": 0.6008, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 1.4843205574912892, | |
| "grad_norm": 0.5130183259716176, | |
| "learning_rate": 5.954876644074092e-06, | |
| "loss": 0.6057, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 1.489547038327526, | |
| "grad_norm": 0.547570250155865, | |
| "learning_rate": 5.924919537769025e-06, | |
| "loss": 0.5872, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.494773519163763, | |
| "grad_norm": 0.5786698298674792, | |
| "learning_rate": 5.894928013284551e-06, | |
| "loss": 0.549, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 0.5765527388977582, | |
| "learning_rate": 5.8649031866676685e-06, | |
| "loss": 0.6325, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 1.505226480836237, | |
| "grad_norm": 0.4800435309675829, | |
| "learning_rate": 5.834846175204612e-06, | |
| "loss": 0.5278, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 1.510452961672474, | |
| "grad_norm": 0.6218236849239587, | |
| "learning_rate": 5.804758097379281e-06, | |
| "loss": 0.6634, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 1.5156794425087108, | |
| "grad_norm": 0.5073235422648865, | |
| "learning_rate": 5.774640072831622e-06, | |
| "loss": 0.5773, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.5209059233449478, | |
| "grad_norm": 0.5687256206289278, | |
| "learning_rate": 5.74449322231596e-06, | |
| "loss": 0.6216, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 1.5261324041811846, | |
| "grad_norm": 0.495289281164082, | |
| "learning_rate": 5.714318667659295e-06, | |
| "loss": 0.5697, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 1.5313588850174216, | |
| "grad_norm": 0.5071599473157437, | |
| "learning_rate": 5.684117531719552e-06, | |
| "loss": 0.5902, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 1.5365853658536586, | |
| "grad_norm": 0.5465492422582399, | |
| "learning_rate": 5.653890938343806e-06, | |
| "loss": 0.6084, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 1.5418118466898956, | |
| "grad_norm": 0.5794183886322944, | |
| "learning_rate": 5.623640012326455e-06, | |
| "loss": 0.6074, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.5470383275261324, | |
| "grad_norm": 0.5077288815232027, | |
| "learning_rate": 5.593365879367361e-06, | |
| "loss": 0.559, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 1.5522648083623694, | |
| "grad_norm": 0.535785650486145, | |
| "learning_rate": 5.56306966602997e-06, | |
| "loss": 0.6259, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 1.5574912891986061, | |
| "grad_norm": 0.5574339114144872, | |
| "learning_rate": 5.532752499699381e-06, | |
| "loss": 0.6274, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 1.5627177700348431, | |
| "grad_norm": 0.4864171169075875, | |
| "learning_rate": 5.502415508540401e-06, | |
| "loss": 0.5689, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 1.5679442508710801, | |
| "grad_norm": 0.5395355580960868, | |
| "learning_rate": 5.472059821455554e-06, | |
| "loss": 0.615, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.5731707317073171, | |
| "grad_norm": 0.5367333644494985, | |
| "learning_rate": 5.441686568043086e-06, | |
| "loss": 0.6209, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 1.5783972125435541, | |
| "grad_norm": 0.5629949580706008, | |
| "learning_rate": 5.411296878554918e-06, | |
| "loss": 0.5778, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 1.583623693379791, | |
| "grad_norm": 0.5279025042856722, | |
| "learning_rate": 5.380891883854591e-06, | |
| "loss": 0.5891, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 1.588850174216028, | |
| "grad_norm": 0.529056626502548, | |
| "learning_rate": 5.3504727153751865e-06, | |
| "loss": 0.5781, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 1.5940766550522647, | |
| "grad_norm": 0.5143855575664987, | |
| "learning_rate": 5.320040505077222e-06, | |
| "loss": 0.6108, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.5993031358885017, | |
| "grad_norm": 0.5496043322415672, | |
| "learning_rate": 5.289596385406527e-06, | |
| "loss": 0.5925, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 1.6045296167247387, | |
| "grad_norm": 0.45788921944124966, | |
| "learning_rate": 5.259141489252104e-06, | |
| "loss": 0.5211, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 1.6097560975609757, | |
| "grad_norm": 0.5610785336406, | |
| "learning_rate": 5.228676949903974e-06, | |
| "loss": 0.6835, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 1.6149825783972127, | |
| "grad_norm": 0.5041137193985311, | |
| "learning_rate": 5.198203901010993e-06, | |
| "loss": 0.5614, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 1.6202090592334495, | |
| "grad_norm": 0.6140523018892134, | |
| "learning_rate": 5.167723476538683e-06, | |
| "loss": 0.6063, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.6254355400696863, | |
| "grad_norm": 0.47853290908855395, | |
| "learning_rate": 5.137236810727025e-06, | |
| "loss": 0.5358, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 1.6306620209059233, | |
| "grad_norm": 0.5819289571367962, | |
| "learning_rate": 5.106745038048251e-06, | |
| "loss": 0.6031, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 1.6358885017421603, | |
| "grad_norm": 0.5268059390545785, | |
| "learning_rate": 5.07624929316463e-06, | |
| "loss": 0.5926, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 1.6411149825783973, | |
| "grad_norm": 0.5158701213233892, | |
| "learning_rate": 5.045750710886248e-06, | |
| "loss": 0.5909, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 1.6463414634146343, | |
| "grad_norm": 0.5151166538253583, | |
| "learning_rate": 5.015250426128772e-06, | |
| "loss": 0.5394, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 1.651567944250871, | |
| "grad_norm": 0.5538710708429964, | |
| "learning_rate": 4.984749573871228e-06, | |
| "loss": 0.6349, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 1.656794425087108, | |
| "grad_norm": 0.5631924824167399, | |
| "learning_rate": 4.954249289113753e-06, | |
| "loss": 0.6271, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 1.6620209059233448, | |
| "grad_norm": 0.5557831986406585, | |
| "learning_rate": 4.923750706835371e-06, | |
| "loss": 0.6067, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 1.6672473867595818, | |
| "grad_norm": 0.5129556814943586, | |
| "learning_rate": 4.8932549619517514e-06, | |
| "loss": 0.5688, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 1.6724738675958188, | |
| "grad_norm": 0.510154514617586, | |
| "learning_rate": 4.862763189272976e-06, | |
| "loss": 0.5993, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.6777003484320558, | |
| "grad_norm": 0.5144230379042817, | |
| "learning_rate": 4.832276523461317e-06, | |
| "loss": 0.5251, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 1.6829268292682928, | |
| "grad_norm": 0.6787456643186844, | |
| "learning_rate": 4.801796098989009e-06, | |
| "loss": 0.6171, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.6881533101045296, | |
| "grad_norm": 0.5288392604393932, | |
| "learning_rate": 4.771323050096028e-06, | |
| "loss": 0.5868, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 1.6933797909407664, | |
| "grad_norm": 0.5337562030536104, | |
| "learning_rate": 4.740858510747897e-06, | |
| "loss": 0.6094, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 1.6986062717770034, | |
| "grad_norm": 0.5526357082902859, | |
| "learning_rate": 4.710403614593475e-06, | |
| "loss": 0.551, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 1.7038327526132404, | |
| "grad_norm": 0.5667609879832234, | |
| "learning_rate": 4.679959494922779e-06, | |
| "loss": 0.6207, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 1.7090592334494774, | |
| "grad_norm": 0.5052465715351889, | |
| "learning_rate": 4.649527284624814e-06, | |
| "loss": 0.5813, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 1.7142857142857144, | |
| "grad_norm": 0.6012385103563326, | |
| "learning_rate": 4.619108116145411e-06, | |
| "loss": 0.5997, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 1.7195121951219512, | |
| "grad_norm": 0.49875874383318625, | |
| "learning_rate": 4.588703121445084e-06, | |
| "loss": 0.6162, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 1.7247386759581882, | |
| "grad_norm": 0.5203036540095274, | |
| "learning_rate": 4.558313431956914e-06, | |
| "loss": 0.5616, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.729965156794425, | |
| "grad_norm": 0.6015714400165652, | |
| "learning_rate": 4.527940178544446e-06, | |
| "loss": 0.6088, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 1.735191637630662, | |
| "grad_norm": 0.4998018729629211, | |
| "learning_rate": 4.4975844914596015e-06, | |
| "loss": 0.5808, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 1.740418118466899, | |
| "grad_norm": 0.5755597332027752, | |
| "learning_rate": 4.467247500300621e-06, | |
| "loss": 0.6187, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 1.745644599303136, | |
| "grad_norm": 0.4930485861382567, | |
| "learning_rate": 4.436930333970033e-06, | |
| "loss": 0.5532, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 1.750871080139373, | |
| "grad_norm": 0.5496423220513306, | |
| "learning_rate": 4.40663412063264e-06, | |
| "loss": 0.6331, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 1.7560975609756098, | |
| "grad_norm": 0.5442491839702545, | |
| "learning_rate": 4.376359987673547e-06, | |
| "loss": 0.5759, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 1.7613240418118465, | |
| "grad_norm": 0.5248586938914367, | |
| "learning_rate": 4.346109061656196e-06, | |
| "loss": 0.5859, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 1.7665505226480835, | |
| "grad_norm": 0.5323054479309529, | |
| "learning_rate": 4.31588246828045e-06, | |
| "loss": 0.6084, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 1.7717770034843205, | |
| "grad_norm": 0.47627566350608475, | |
| "learning_rate": 4.285681332340708e-06, | |
| "loss": 0.5706, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 1.7770034843205575, | |
| "grad_norm": 0.5358815728520783, | |
| "learning_rate": 4.255506777684041e-06, | |
| "loss": 0.5904, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.7822299651567945, | |
| "grad_norm": 0.48425524407967563, | |
| "learning_rate": 4.225359927168379e-06, | |
| "loss": 0.5667, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 1.7874564459930313, | |
| "grad_norm": 0.5596432588171234, | |
| "learning_rate": 4.195241902620721e-06, | |
| "loss": 0.6049, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 1.7926829268292683, | |
| "grad_norm": 0.5440361119871323, | |
| "learning_rate": 4.165153824795391e-06, | |
| "loss": 0.6078, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 1.797909407665505, | |
| "grad_norm": 0.4826979180979176, | |
| "learning_rate": 4.135096813332333e-06, | |
| "loss": 0.588, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 1.8031358885017421, | |
| "grad_norm": 0.5397256499018573, | |
| "learning_rate": 4.105071986715449e-06, | |
| "loss": 0.6015, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 1.8083623693379791, | |
| "grad_norm": 0.4510163396115861, | |
| "learning_rate": 4.075080462230977e-06, | |
| "loss": 0.5697, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 1.8135888501742161, | |
| "grad_norm": 0.5218309859466103, | |
| "learning_rate": 4.04512335592591e-06, | |
| "loss": 0.5698, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 1.8188153310104531, | |
| "grad_norm": 0.5100313135625644, | |
| "learning_rate": 4.015201782566471e-06, | |
| "loss": 0.5787, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.82404181184669, | |
| "grad_norm": 0.498790862195705, | |
| "learning_rate": 3.985316855596627e-06, | |
| "loss": 0.6, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 1.8292682926829267, | |
| "grad_norm": 0.4754481818294577, | |
| "learning_rate": 3.955469687096657e-06, | |
| "loss": 0.5759, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.8344947735191637, | |
| "grad_norm": 0.5366460340127677, | |
| "learning_rate": 3.9256613877417715e-06, | |
| "loss": 0.6505, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 1.8397212543554007, | |
| "grad_norm": 0.48550128940070797, | |
| "learning_rate": 3.895893066760774e-06, | |
| "loss": 0.5581, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 1.8449477351916377, | |
| "grad_norm": 0.4645102288396099, | |
| "learning_rate": 3.866165831894796e-06, | |
| "loss": 0.6126, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 1.8501742160278747, | |
| "grad_norm": 0.5483359900678575, | |
| "learning_rate": 3.836480789356063e-06, | |
| "loss": 0.6353, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 1.8554006968641115, | |
| "grad_norm": 0.4726139182789402, | |
| "learning_rate": 3.806839043786738e-06, | |
| "loss": 0.5625, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 1.8606271777003485, | |
| "grad_norm": 0.46997478052113384, | |
| "learning_rate": 3.777241698217819e-06, | |
| "loss": 0.5839, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 1.8658536585365852, | |
| "grad_norm": 0.487480595451344, | |
| "learning_rate": 3.747689854028081e-06, | |
| "loss": 0.596, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 1.8710801393728222, | |
| "grad_norm": 0.4691428215776054, | |
| "learning_rate": 3.7181846109031007e-06, | |
| "loss": 0.5786, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 1.8763066202090593, | |
| "grad_norm": 0.49771800498221574, | |
| "learning_rate": 3.6887270667943285e-06, | |
| "loss": 0.5763, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 1.8815331010452963, | |
| "grad_norm": 0.48008730260324617, | |
| "learning_rate": 3.6593183178782454e-06, | |
| "loss": 0.6022, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.8867595818815333, | |
| "grad_norm": 0.49688177136494294, | |
| "learning_rate": 3.6299594585155513e-06, | |
| "loss": 0.5949, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 1.89198606271777, | |
| "grad_norm": 0.48272168297238965, | |
| "learning_rate": 3.600651581210457e-06, | |
| "loss": 0.5868, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 1.8972125435540068, | |
| "grad_norm": 0.4282471284231044, | |
| "learning_rate": 3.5713957765700224e-06, | |
| "loss": 0.5522, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 1.9024390243902438, | |
| "grad_norm": 0.49857423331197087, | |
| "learning_rate": 3.542193133263576e-06, | |
| "loss": 0.6013, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 1.9076655052264808, | |
| "grad_norm": 0.5051671065894443, | |
| "learning_rate": 3.5130447379822076e-06, | |
| "loss": 0.5448, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 1.9128919860627178, | |
| "grad_norm": 0.49126598359477613, | |
| "learning_rate": 3.483951675398316e-06, | |
| "loss": 0.6007, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 1.9181184668989548, | |
| "grad_norm": 0.5076571957591817, | |
| "learning_rate": 3.4549150281252635e-06, | |
| "loss": 0.5685, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 1.9233449477351916, | |
| "grad_norm": 0.512416353580941, | |
| "learning_rate": 3.425935876677077e-06, | |
| "loss": 0.5857, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.9285714285714286, | |
| "grad_norm": 0.5368818226681046, | |
| "learning_rate": 3.397015299428242e-06, | |
| "loss": 0.5809, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 1.9337979094076654, | |
| "grad_norm": 0.5120830184486495, | |
| "learning_rate": 3.3681543725735843e-06, | |
| "loss": 0.6067, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.9390243902439024, | |
| "grad_norm": 0.4785710907485341, | |
| "learning_rate": 3.339354170088207e-06, | |
| "loss": 0.5398, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 1.9442508710801394, | |
| "grad_norm": 0.45732087968552365, | |
| "learning_rate": 3.3106157636875356e-06, | |
| "loss": 0.5702, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 1.9494773519163764, | |
| "grad_norm": 0.49177652612113, | |
| "learning_rate": 3.2819402227874364e-06, | |
| "loss": 0.5925, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 1.9547038327526134, | |
| "grad_norm": 0.522898733840705, | |
| "learning_rate": 3.253328614464414e-06, | |
| "loss": 0.6521, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 1.9599303135888502, | |
| "grad_norm": 0.4962984913773303, | |
| "learning_rate": 3.2247820034159182e-06, | |
| "loss": 0.5637, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 1.965156794425087, | |
| "grad_norm": 0.48961258731018903, | |
| "learning_rate": 3.196301451920708e-06, | |
| "loss": 0.6078, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 1.970383275261324, | |
| "grad_norm": 0.516946862548738, | |
| "learning_rate": 3.16788801979933e-06, | |
| "loss": 0.614, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 1.975609756097561, | |
| "grad_norm": 0.49422567410473595, | |
| "learning_rate": 3.1395427643746802e-06, | |
| "loss": 0.5407, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 1.980836236933798, | |
| "grad_norm": 0.5096634642555786, | |
| "learning_rate": 3.111266740432658e-06, | |
| "loss": 0.5792, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 1.986062717770035, | |
| "grad_norm": 0.5002493838245908, | |
| "learning_rate": 3.0830610001829173e-06, | |
| "loss": 0.5552, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.9912891986062717, | |
| "grad_norm": 0.4634141103131062, | |
| "learning_rate": 3.0549265932197055e-06, | |
| "loss": 0.5474, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 1.9965156794425087, | |
| "grad_norm": 0.49968954452588915, | |
| "learning_rate": 3.026864566482813e-06, | |
| "loss": 0.5926, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 2.0017421602787455, | |
| "grad_norm": 0.9566301326202498, | |
| "learning_rate": 2.99887596421861e-06, | |
| "loss": 0.9583, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 2.0069686411149825, | |
| "grad_norm": 0.4819191529898956, | |
| "learning_rate": 2.9709618279411922e-06, | |
| "loss": 0.4765, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 2.0121951219512195, | |
| "grad_norm": 0.5177350268902499, | |
| "learning_rate": 2.9431231963936176e-06, | |
| "loss": 0.5513, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.0174216027874565, | |
| "grad_norm": 0.49571084166916257, | |
| "learning_rate": 2.9153611055092586e-06, | |
| "loss": 0.5568, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 2.0226480836236935, | |
| "grad_norm": 0.46758191028533336, | |
| "learning_rate": 2.8876765883732447e-06, | |
| "loss": 0.5378, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 2.0278745644599305, | |
| "grad_norm": 0.5062906288655116, | |
| "learning_rate": 2.860070675184036e-06, | |
| "loss": 0.5742, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 2.033101045296167, | |
| "grad_norm": 0.48782738802655556, | |
| "learning_rate": 2.8325443932150646e-06, | |
| "loss": 0.5343, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 2.038327526132404, | |
| "grad_norm": 0.5414387332438313, | |
| "learning_rate": 2.805098766776529e-06, | |
| "loss": 0.5849, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.043554006968641, | |
| "grad_norm": 0.4642400730103952, | |
| "learning_rate": 2.7777348171772604e-06, | |
| "loss": 0.5105, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 2.048780487804878, | |
| "grad_norm": 0.5304251807046266, | |
| "learning_rate": 2.750453562686729e-06, | |
| "loss": 0.5143, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 2.054006968641115, | |
| "grad_norm": 0.5576744024363436, | |
| "learning_rate": 2.7232560184971437e-06, | |
| "loss": 0.5839, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 2.059233449477352, | |
| "grad_norm": 0.45314772448223345, | |
| "learning_rate": 2.6961431966856866e-06, | |
| "loss": 0.5233, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 2.0644599303135887, | |
| "grad_norm": 0.4577070909297848, | |
| "learning_rate": 2.669116106176838e-06, | |
| "loss": 0.5494, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.0696864111498257, | |
| "grad_norm": 0.4395553331677893, | |
| "learning_rate": 2.6421757527048373e-06, | |
| "loss": 0.5097, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 2.0749128919860627, | |
| "grad_norm": 0.5287787777736306, | |
| "learning_rate": 2.615323138776264e-06, | |
| "loss": 0.5719, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 2.0801393728222997, | |
| "grad_norm": 0.5093454684767568, | |
| "learning_rate": 2.588559263632719e-06, | |
| "loss": 0.5265, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 2.0853658536585367, | |
| "grad_norm": 0.5080566318736497, | |
| "learning_rate": 2.5618851232136576e-06, | |
| "loss": 0.5772, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 2.0905923344947737, | |
| "grad_norm": 0.4499734335969841, | |
| "learning_rate": 2.5353017101193124e-06, | |
| "loss": 0.4555, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0958188153310107, | |
| "grad_norm": 0.5291566057629548, | |
| "learning_rate": 2.508810013573767e-06, | |
| "loss": 0.5643, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 2.1010452961672472, | |
| "grad_norm": 0.46824151574429873, | |
| "learning_rate": 2.4824110193881384e-06, | |
| "loss": 0.5065, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 2.1062717770034842, | |
| "grad_norm": 0.4617359511491741, | |
| "learning_rate": 2.4561057099238973e-06, | |
| "loss": 0.563, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 2.1114982578397212, | |
| "grad_norm": 0.4697636992836946, | |
| "learning_rate": 2.4298950640563155e-06, | |
| "loss": 0.5056, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 2.1167247386759582, | |
| "grad_norm": 0.48460609918415953, | |
| "learning_rate": 2.4037800571380297e-06, | |
| "loss": 0.5517, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.1219512195121952, | |
| "grad_norm": 0.465322807158819, | |
| "learning_rate": 2.377761660962754e-06, | |
| "loss": 0.5764, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 2.1271777003484322, | |
| "grad_norm": 0.49593267777356703, | |
| "learning_rate": 2.3518408437291155e-06, | |
| "loss": 0.5357, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 2.132404181184669, | |
| "grad_norm": 0.48746378777099386, | |
| "learning_rate": 2.3260185700046295e-06, | |
| "loss": 0.5321, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 2.137630662020906, | |
| "grad_norm": 0.46071947103740635, | |
| "learning_rate": 2.3002958006897985e-06, | |
| "loss": 0.5468, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 2.142857142857143, | |
| "grad_norm": 0.440111968230906, | |
| "learning_rate": 2.2746734929823596e-06, | |
| "loss": 0.531, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.14808362369338, | |
| "grad_norm": 0.4575672027883791, | |
| "learning_rate": 2.249152600341665e-06, | |
| "loss": 0.52, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 2.153310104529617, | |
| "grad_norm": 0.5283338120770418, | |
| "learning_rate": 2.2237340724532007e-06, | |
| "loss": 0.6015, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 2.158536585365854, | |
| "grad_norm": 0.4532501199512347, | |
| "learning_rate": 2.1984188551932513e-06, | |
| "loss": 0.4911, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 2.1637630662020904, | |
| "grad_norm": 0.5151201704194432, | |
| "learning_rate": 2.173207890593693e-06, | |
| "loss": 0.5519, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 2.1689895470383274, | |
| "grad_norm": 0.4813092159395286, | |
| "learning_rate": 2.148102116806944e-06, | |
| "loss": 0.531, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.1742160278745644, | |
| "grad_norm": 0.47141365400566, | |
| "learning_rate": 2.123102468071058e-06, | |
| "loss": 0.5249, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 2.1794425087108014, | |
| "grad_norm": 0.4935502426085609, | |
| "learning_rate": 2.0982098746749524e-06, | |
| "loss": 0.5261, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 2.1846689895470384, | |
| "grad_norm": 0.5305754761741215, | |
| "learning_rate": 2.0734252629237892e-06, | |
| "loss": 0.609, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 2.1898954703832754, | |
| "grad_norm": 0.482899473961335, | |
| "learning_rate": 2.048749555104516e-06, | |
| "loss": 0.5314, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 2.1951219512195124, | |
| "grad_norm": 0.44109993576183376, | |
| "learning_rate": 2.0241836694515338e-06, | |
| "loss": 0.54, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.200348432055749, | |
| "grad_norm": 0.4541137377252633, | |
| "learning_rate": 1.9997285201125328e-06, | |
| "loss": 0.5182, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 2.205574912891986, | |
| "grad_norm": 0.47179876294255096, | |
| "learning_rate": 1.975385017114473e-06, | |
| "loss": 0.5786, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 2.210801393728223, | |
| "grad_norm": 0.49973970600486517, | |
| "learning_rate": 1.9511540663297284e-06, | |
| "loss": 0.5183, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 2.21602787456446, | |
| "grad_norm": 0.46356554425956825, | |
| "learning_rate": 1.9270365694423654e-06, | |
| "loss": 0.5159, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 2.221254355400697, | |
| "grad_norm": 0.5241753937590147, | |
| "learning_rate": 1.903033423914596e-06, | |
| "loss": 0.5957, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.226480836236934, | |
| "grad_norm": 0.44148518090830996, | |
| "learning_rate": 1.8791455229533806e-06, | |
| "loss": 0.4488, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 2.231707317073171, | |
| "grad_norm": 0.5038639568195054, | |
| "learning_rate": 1.8553737554771883e-06, | |
| "loss": 0.6349, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 2.2369337979094075, | |
| "grad_norm": 0.49606752296755446, | |
| "learning_rate": 1.8317190060829242e-06, | |
| "loss": 0.5089, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 2.2421602787456445, | |
| "grad_norm": 0.45579655008006387, | |
| "learning_rate": 1.808182155013003e-06, | |
| "loss": 0.4899, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 2.2473867595818815, | |
| "grad_norm": 0.464962788957996, | |
| "learning_rate": 1.7847640781225982e-06, | |
| "loss": 0.5486, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.2526132404181185, | |
| "grad_norm": 0.4984600881906674, | |
| "learning_rate": 1.7614656468470508e-06, | |
| "loss": 0.6217, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 2.2578397212543555, | |
| "grad_norm": 0.4386510268154617, | |
| "learning_rate": 1.7382877281694355e-06, | |
| "loss": 0.5065, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 2.2630662020905925, | |
| "grad_norm": 0.4628957678602141, | |
| "learning_rate": 1.7152311845883096e-06, | |
| "loss": 0.5339, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 2.2682926829268295, | |
| "grad_norm": 0.4511882273309374, | |
| "learning_rate": 1.692296874085605e-06, | |
| "loss": 0.5833, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 2.273519163763066, | |
| "grad_norm": 0.4423489129104317, | |
| "learning_rate": 1.6694856500947081e-06, | |
| "loss": 0.5414, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.278745644599303, | |
| "grad_norm": 0.4278102250494057, | |
| "learning_rate": 1.6467983614686995e-06, | |
| "loss": 0.5291, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 2.28397212543554, | |
| "grad_norm": 0.4899503913317031, | |
| "learning_rate": 1.62423585244877e-06, | |
| "loss": 0.5207, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 2.289198606271777, | |
| "grad_norm": 0.45314192403359066, | |
| "learning_rate": 1.601798962632799e-06, | |
| "loss": 0.5233, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 2.294425087108014, | |
| "grad_norm": 0.4675516971595869, | |
| "learning_rate": 1.5794885269441152e-06, | |
| "loss": 0.5789, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 2.2996515679442506, | |
| "grad_norm": 0.4684838825648316, | |
| "learning_rate": 1.5573053756004253e-06, | |
| "loss": 0.5447, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.3048780487804876, | |
| "grad_norm": 0.4742515090348089, | |
| "learning_rate": 1.53525033408292e-06, | |
| "loss": 0.5522, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 2.3101045296167246, | |
| "grad_norm": 0.4255877001891176, | |
| "learning_rate": 1.5133242231055622e-06, | |
| "loss": 0.52, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 2.3153310104529616, | |
| "grad_norm": 0.4703877181502192, | |
| "learning_rate": 1.491527858584535e-06, | |
| "loss": 0.5629, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 2.3205574912891986, | |
| "grad_norm": 0.4564707024075428, | |
| "learning_rate": 1.4698620516078882e-06, | |
| "loss": 0.5899, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 2.3257839721254356, | |
| "grad_norm": 0.4393071035962851, | |
| "learning_rate": 1.4483276084053567e-06, | |
| "loss": 0.514, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.3310104529616726, | |
| "grad_norm": 0.4382243891353936, | |
| "learning_rate": 1.4269253303183516e-06, | |
| "loss": 0.5766, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 2.3362369337979096, | |
| "grad_norm": 0.46207515860487863, | |
| "learning_rate": 1.4056560137701469e-06, | |
| "loss": 0.492, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 2.341463414634146, | |
| "grad_norm": 0.4779708014243619, | |
| "learning_rate": 1.3845204502362442e-06, | |
| "loss": 0.5503, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 2.346689895470383, | |
| "grad_norm": 0.4446567240946519, | |
| "learning_rate": 1.3635194262149131e-06, | |
| "loss": 0.5173, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 2.35191637630662, | |
| "grad_norm": 0.5194569385386936, | |
| "learning_rate": 1.3426537231979309e-06, | |
| "loss": 0.5464, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.357142857142857, | |
| "grad_norm": 0.47302377318250843, | |
| "learning_rate": 1.3219241176414948e-06, | |
| "loss": 0.5341, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 2.362369337979094, | |
| "grad_norm": 0.4316213228653094, | |
| "learning_rate": 1.3013313809373396e-06, | |
| "loss": 0.5283, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 2.3675958188153308, | |
| "grad_norm": 0.4347857005792825, | |
| "learning_rate": 1.28087627938402e-06, | |
| "loss": 0.5103, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 2.3728222996515678, | |
| "grad_norm": 0.4599959414145429, | |
| "learning_rate": 1.2605595741584015e-06, | |
| "loss": 0.5989, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 2.3780487804878048, | |
| "grad_norm": 0.4477421618895101, | |
| "learning_rate": 1.2403820212873347e-06, | |
| "loss": 0.5247, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.3832752613240418, | |
| "grad_norm": 0.46194912531798293, | |
| "learning_rate": 1.2203443716195213e-06, | |
| "loss": 0.547, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 2.3885017421602788, | |
| "grad_norm": 0.48246695872510925, | |
| "learning_rate": 1.200447370797576e-06, | |
| "loss": 0.5319, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 2.3937282229965158, | |
| "grad_norm": 0.44364446337995106, | |
| "learning_rate": 1.1806917592302763e-06, | |
| "loss": 0.5079, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 2.3989547038327528, | |
| "grad_norm": 0.45366308339292766, | |
| "learning_rate": 1.16107827206501e-06, | |
| "loss": 0.528, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 2.40418118466899, | |
| "grad_norm": 0.44739930129966016, | |
| "learning_rate": 1.1416076391604197e-06, | |
| "loss": 0.5351, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.4094076655052263, | |
| "grad_norm": 0.4842777825049656, | |
| "learning_rate": 1.12228058505925e-06, | |
| "loss": 0.5459, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 2.4146341463414633, | |
| "grad_norm": 0.46656539985547907, | |
| "learning_rate": 1.1030978289613725e-06, | |
| "loss": 0.5377, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 2.4198606271777003, | |
| "grad_norm": 0.4434636263867975, | |
| "learning_rate": 1.0840600846970333e-06, | |
| "loss": 0.5494, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 2.4250871080139373, | |
| "grad_norm": 0.42991245492599656, | |
| "learning_rate": 1.0651680607002861e-06, | |
| "loss": 0.5372, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 2.4303135888501743, | |
| "grad_norm": 0.47167305733035525, | |
| "learning_rate": 1.0464224599826301e-06, | |
| "loss": 0.5127, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.435540069686411, | |
| "grad_norm": 0.460998662886707, | |
| "learning_rate": 1.0278239801068518e-06, | |
| "loss": 0.5137, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 2.440766550522648, | |
| "grad_norm": 0.5048182621907691, | |
| "learning_rate": 1.0093733131610621e-06, | |
| "loss": 0.6034, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 2.445993031358885, | |
| "grad_norm": 0.46718193125046437, | |
| "learning_rate": 9.91071145732948e-07, | |
| "loss": 0.5201, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 2.451219512195122, | |
| "grad_norm": 0.40697553295642236, | |
| "learning_rate": 9.729181588842184e-07, | |
| "loss": 0.4763, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 2.456445993031359, | |
| "grad_norm": 0.4282850894981929, | |
| "learning_rate": 9.549150281252633e-07, | |
| "loss": 0.5551, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.461672473867596, | |
| "grad_norm": 0.4425949078660112, | |
| "learning_rate": 9.370624233900183e-07, | |
| "loss": 0.509, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 2.466898954703833, | |
| "grad_norm": 0.43823474167181586, | |
| "learning_rate": 9.193610090110305e-07, | |
| "loss": 0.5605, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 2.47212543554007, | |
| "grad_norm": 0.45300520608343586, | |
| "learning_rate": 9.018114436947373e-07, | |
| "loss": 0.5394, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 2.4773519163763065, | |
| "grad_norm": 0.4830008293506701, | |
| "learning_rate": 8.844143804969624e-07, | |
| "loss": 0.5614, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 2.4825783972125435, | |
| "grad_norm": 0.4313889868995408, | |
| "learning_rate": 8.671704667986036e-07, | |
| "loss": 0.495, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 2.4878048780487805, | |
| "grad_norm": 0.4652585907754659, | |
| "learning_rate": 8.500803442815475e-07, | |
| "loss": 0.5337, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 2.4930313588850175, | |
| "grad_norm": 0.4434579349259814, | |
| "learning_rate": 8.331446489047956e-07, | |
| "loss": 0.5432, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 2.4982578397212545, | |
| "grad_norm": 0.4260838162732741, | |
| "learning_rate": 8.163640108807897e-07, | |
| "loss": 0.5526, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 2.503484320557491, | |
| "grad_norm": 0.41851893716167266, | |
| "learning_rate": 7.997390546519668e-07, | |
| "loss": 0.5274, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 2.508710801393728, | |
| "grad_norm": 0.43352778515121865, | |
| "learning_rate": 7.832703988675195e-07, | |
| "loss": 0.5498, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.513937282229965, | |
| "grad_norm": 0.45074130711852517, | |
| "learning_rate": 7.669586563603782e-07, | |
| "loss": 0.5531, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 2.519163763066202, | |
| "grad_norm": 0.45864866148701783, | |
| "learning_rate": 7.508044341244014e-07, | |
| "loss": 0.5914, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 2.524390243902439, | |
| "grad_norm": 0.43380289787943704, | |
| "learning_rate": 7.348083332917927e-07, | |
| "loss": 0.5302, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 2.529616724738676, | |
| "grad_norm": 0.44541179316397, | |
| "learning_rate": 7.189709491107272e-07, | |
| "loss": 0.5103, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 2.534843205574913, | |
| "grad_norm": 0.42812608402735486, | |
| "learning_rate": 7.032928709232062e-07, | |
| "loss": 0.5131, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 2.54006968641115, | |
| "grad_norm": 0.4641631244885751, | |
| "learning_rate": 6.87774682143122e-07, | |
| "loss": 0.5406, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 2.5452961672473866, | |
| "grad_norm": 0.46466095362861476, | |
| "learning_rate": 6.724169602345487e-07, | |
| "loss": 0.5524, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 2.5505226480836236, | |
| "grad_norm": 0.44159109445526523, | |
| "learning_rate": 6.572202766902569e-07, | |
| "loss": 0.5107, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 2.5557491289198606, | |
| "grad_norm": 0.43095228829120374, | |
| "learning_rate": 6.421851970104409e-07, | |
| "loss": 0.5465, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 2.5609756097560976, | |
| "grad_norm": 0.4398781503440029, | |
| "learning_rate": 6.273122806816845e-07, | |
| "loss": 0.5306, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.5662020905923346, | |
| "grad_norm": 0.455532997496077, | |
| "learning_rate": 6.126020811561323e-07, | |
| "loss": 0.5881, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 2.571428571428571, | |
| "grad_norm": 0.3948516059085693, | |
| "learning_rate": 5.980551458308981e-07, | |
| "loss": 0.4535, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 2.576655052264808, | |
| "grad_norm": 0.4009473999258072, | |
| "learning_rate": 5.836720160276971e-07, | |
| "loss": 0.5696, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 2.581881533101045, | |
| "grad_norm": 0.4065638987549273, | |
| "learning_rate": 5.694532269726977e-07, | |
| "loss": 0.5412, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 2.587108013937282, | |
| "grad_norm": 0.4539932581334291, | |
| "learning_rate": 5.553993077766124e-07, | |
| "loss": 0.5886, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 2.592334494773519, | |
| "grad_norm": 0.42936026661045573, | |
| "learning_rate": 5.415107814149978e-07, | |
| "loss": 0.519, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 2.597560975609756, | |
| "grad_norm": 0.4335795734646765, | |
| "learning_rate": 5.277881647088024e-07, | |
| "loss": 0.5866, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 2.602787456445993, | |
| "grad_norm": 0.44871188719474847, | |
| "learning_rate": 5.1423196830513e-07, | |
| "loss": 0.5824, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 2.60801393728223, | |
| "grad_norm": 0.4649294419801843, | |
| "learning_rate": 5.008426966582386e-07, | |
| "loss": 0.5086, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 2.6132404181184667, | |
| "grad_norm": 0.47884736570244324, | |
| "learning_rate": 4.87620848010772e-07, | |
| "loss": 0.5853, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.6184668989547037, | |
| "grad_norm": 0.4070247906117974, | |
| "learning_rate": 4.7456691437521243e-07, | |
| "loss": 0.4705, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 2.6236933797909407, | |
| "grad_norm": 0.44228619733522523, | |
| "learning_rate": 4.6168138151557516e-07, | |
| "loss": 0.6052, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 2.6289198606271778, | |
| "grad_norm": 0.4186718748315985, | |
| "learning_rate": 4.4896472892933693e-07, | |
| "loss": 0.5156, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 2.6341463414634148, | |
| "grad_norm": 0.41927468655653893, | |
| "learning_rate": 4.3641742982958203e-07, | |
| "loss": 0.5552, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 2.6393728222996513, | |
| "grad_norm": 0.3807275422714154, | |
| "learning_rate": 4.240399511274057e-07, | |
| "loss": 0.4874, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 2.6445993031358883, | |
| "grad_norm": 0.43043158611193255, | |
| "learning_rate": 4.118327534145278e-07, | |
| "loss": 0.5473, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 2.6498257839721253, | |
| "grad_norm": 0.4427511902610817, | |
| "learning_rate": 3.997962909461611e-07, | |
| "loss": 0.5476, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 2.6550522648083623, | |
| "grad_norm": 0.4287387407398484, | |
| "learning_rate": 3.8793101162410417e-07, | |
| "loss": 0.515, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 2.6602787456445993, | |
| "grad_norm": 0.4581450049667044, | |
| "learning_rate": 3.762373569800737e-07, | |
| "loss": 0.5736, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 2.6655052264808363, | |
| "grad_norm": 0.45437023578463565, | |
| "learning_rate": 3.6471576215927897e-07, | |
| "loss": 0.5301, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.6707317073170733, | |
| "grad_norm": 0.4205454157334176, | |
| "learning_rate": 3.5336665590422147e-07, | |
| "loss": 0.5274, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 2.6759581881533103, | |
| "grad_norm": 0.42921462391644427, | |
| "learning_rate": 3.4219046053874604e-07, | |
| "loss": 0.5453, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 2.681184668989547, | |
| "grad_norm": 0.4233533929984576, | |
| "learning_rate": 3.3118759195232273e-07, | |
| "loss": 0.5573, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 2.686411149825784, | |
| "grad_norm": 0.3919672353563574, | |
| "learning_rate": 3.2035845958457324e-07, | |
| "loss": 0.5181, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 2.691637630662021, | |
| "grad_norm": 0.47372027451981813, | |
| "learning_rate": 3.0970346641003346e-07, | |
| "loss": 0.5436, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 2.696864111498258, | |
| "grad_norm": 0.4396594930081907, | |
| "learning_rate": 2.9922300892315517e-07, | |
| "loss": 0.5475, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 2.702090592334495, | |
| "grad_norm": 0.4473565138284573, | |
| "learning_rate": 2.88917477123557e-07, | |
| "loss": 0.5559, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 2.7073170731707314, | |
| "grad_norm": 0.4024822510249766, | |
| "learning_rate": 2.787872545015069e-07, | |
| "loss": 0.5059, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 2.7125435540069684, | |
| "grad_norm": 0.4665623873342051, | |
| "learning_rate": 2.6883271802365606e-07, | |
| "loss": 0.5585, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 2.7177700348432055, | |
| "grad_norm": 0.42854221934756087, | |
| "learning_rate": 2.5905423811900755e-07, | |
| "loss": 0.5104, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.7229965156794425, | |
| "grad_norm": 0.4286908922250654, | |
| "learning_rate": 2.4945217866513274e-07, | |
| "loss": 0.5518, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 2.7282229965156795, | |
| "grad_norm": 0.44386460142096057, | |
| "learning_rate": 2.4002689697463224e-07, | |
| "loss": 0.5305, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 2.7334494773519165, | |
| "grad_norm": 0.43745356189843465, | |
| "learning_rate": 2.307787437818365e-07, | |
| "loss": 0.5579, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 2.7386759581881535, | |
| "grad_norm": 0.4192061612455892, | |
| "learning_rate": 2.2170806322976023e-07, | |
| "loss": 0.5133, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 2.7439024390243905, | |
| "grad_norm": 0.44288601777891073, | |
| "learning_rate": 2.1281519285728803e-07, | |
| "loss": 0.5837, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 2.749128919860627, | |
| "grad_norm": 0.4161039648633803, | |
| "learning_rate": 2.041004635866195e-07, | |
| "loss": 0.5105, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 2.754355400696864, | |
| "grad_norm": 0.4294055964727348, | |
| "learning_rate": 1.9556419971095363e-07, | |
| "loss": 0.5319, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 2.759581881533101, | |
| "grad_norm": 0.4358089354550434, | |
| "learning_rate": 1.8720671888242058e-07, | |
| "loss": 0.4988, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 2.764808362369338, | |
| "grad_norm": 0.43788412237975727, | |
| "learning_rate": 1.7902833210026228e-07, | |
| "loss": 0.546, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 2.770034843205575, | |
| "grad_norm": 0.45097033105165757, | |
| "learning_rate": 1.710293436992566e-07, | |
| "loss": 0.5696, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.7752613240418116, | |
| "grad_norm": 0.4138845338967673, | |
| "learning_rate": 1.6321005133839884e-07, | |
| "loss": 0.5316, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 2.7804878048780486, | |
| "grad_norm": 0.4202899606511126, | |
| "learning_rate": 1.5557074598981593e-07, | |
| "loss": 0.5301, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 2.7857142857142856, | |
| "grad_norm": 0.42666653234169716, | |
| "learning_rate": 1.4811171192794628e-07, | |
| "loss": 0.5697, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 2.7909407665505226, | |
| "grad_norm": 0.37877132072819447, | |
| "learning_rate": 1.4083322671896048e-07, | |
| "loss": 0.4895, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 2.7961672473867596, | |
| "grad_norm": 0.4422175332427632, | |
| "learning_rate": 1.337355612104274e-07, | |
| "loss": 0.5793, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 2.8013937282229966, | |
| "grad_norm": 0.4141363458191714, | |
| "learning_rate": 1.2681897952124046e-07, | |
| "loss": 0.5057, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 2.8066202090592336, | |
| "grad_norm": 0.4469656727146587, | |
| "learning_rate": 1.2008373903178828e-07, | |
| "loss": 0.5524, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 2.8118466898954706, | |
| "grad_norm": 0.42707047626049666, | |
| "learning_rate": 1.1353009037437523e-07, | |
| "loss": 0.4922, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 2.817073170731707, | |
| "grad_norm": 0.45141362991328465, | |
| "learning_rate": 1.0715827742389717e-07, | |
| "loss": 0.5724, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 2.822299651567944, | |
| "grad_norm": 0.45003668834699156, | |
| "learning_rate": 1.0096853728876365e-07, | |
| "loss": 0.4959, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.827526132404181, | |
| "grad_norm": 0.40703793301322555, | |
| "learning_rate": 9.496110030207673e-08, | |
| "loss": 0.5187, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 2.832752613240418, | |
| "grad_norm": 0.4349893646514051, | |
| "learning_rate": 8.913619001305995e-08, | |
| "loss": 0.5413, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 2.837979094076655, | |
| "grad_norm": 0.4413261937466953, | |
| "learning_rate": 8.34940231787379e-08, | |
| "loss": 0.5597, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 2.8432055749128917, | |
| "grad_norm": 0.4219769036035547, | |
| "learning_rate": 7.803480975587197e-08, | |
| "loss": 0.542, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 2.8484320557491287, | |
| "grad_norm": 0.4673046785235365, | |
| "learning_rate": 7.275875289314616e-08, | |
| "loss": 0.5495, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 2.8536585365853657, | |
| "grad_norm": 0.45105426206164123, | |
| "learning_rate": 6.766604892360751e-08, | |
| "loss": 0.5724, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 2.8588850174216027, | |
| "grad_norm": 0.43511636060314596, | |
| "learning_rate": 6.275688735736141e-08, | |
| "loss": 0.5257, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 2.8641114982578397, | |
| "grad_norm": 0.43349086279126203, | |
| "learning_rate": 5.803145087451945e-08, | |
| "loss": 0.5131, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 2.8693379790940767, | |
| "grad_norm": 0.4537611808145007, | |
| "learning_rate": 5.348991531839875e-08, | |
| "loss": 0.5607, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 2.8745644599303137, | |
| "grad_norm": 0.4439279551647372, | |
| "learning_rate": 4.913244968898279e-08, | |
| "loss": 0.5056, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.8797909407665507, | |
| "grad_norm": 0.4411930978992959, | |
| "learning_rate": 4.495921613662746e-08, | |
| "loss": 0.5415, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 2.8850174216027873, | |
| "grad_norm": 0.4436199780022762, | |
| "learning_rate": 4.097036995603321e-08, | |
| "loss": 0.5205, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 2.8902439024390243, | |
| "grad_norm": 0.42509245773578536, | |
| "learning_rate": 3.716605958046071e-08, | |
| "loss": 0.5259, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 2.8954703832752613, | |
| "grad_norm": 0.43131172360952724, | |
| "learning_rate": 3.354642657621032e-08, | |
| "loss": 0.604, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 2.9006968641114983, | |
| "grad_norm": 0.4133105200801449, | |
| "learning_rate": 3.011160563735349e-08, | |
| "loss": 0.4883, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 2.9059233449477353, | |
| "grad_norm": 0.4391744276562499, | |
| "learning_rate": 2.6861724580719562e-08, | |
| "loss": 0.5621, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 2.911149825783972, | |
| "grad_norm": 0.4391177361243526, | |
| "learning_rate": 2.3796904341141236e-08, | |
| "loss": 0.5334, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 2.916376306620209, | |
| "grad_norm": 0.4214891944016137, | |
| "learning_rate": 2.0917258966953735e-08, | |
| "loss": 0.553, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.921602787456446, | |
| "grad_norm": 0.4014499901153732, | |
| "learning_rate": 1.8222895615748748e-08, | |
| "loss": 0.5358, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 2.926829268292683, | |
| "grad_norm": 0.4454567928346748, | |
| "learning_rate": 1.5713914550389843e-08, | |
| "loss": 0.5405, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.93205574912892, | |
| "grad_norm": 0.4178833466790727, | |
| "learning_rate": 1.3390409135281001e-08, | |
| "loss": 0.5513, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 2.937282229965157, | |
| "grad_norm": 0.4457705249971793, | |
| "learning_rate": 1.125246583288886e-08, | |
| "loss": 0.5245, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 2.942508710801394, | |
| "grad_norm": 0.4645928048601542, | |
| "learning_rate": 9.300164200530815e-09, | |
| "loss": 0.5701, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 2.947735191637631, | |
| "grad_norm": 0.4563271003769357, | |
| "learning_rate": 7.533576887410188e-09, | |
| "loss": 0.4999, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 2.952961672473868, | |
| "grad_norm": 0.39820608811758146, | |
| "learning_rate": 5.95276963191449e-09, | |
| "loss": 0.5704, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 2.9581881533101044, | |
| "grad_norm": 0.4590404415892991, | |
| "learning_rate": 4.557801259169048e-09, | |
| "loss": 0.4995, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 2.9634146341463414, | |
| "grad_norm": 0.455741641646435, | |
| "learning_rate": 3.348723678847643e-09, | |
| "loss": 0.5569, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 2.9686411149825784, | |
| "grad_norm": 0.4914791262393272, | |
| "learning_rate": 2.3255818832423894e-09, | |
| "loss": 0.5232, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 2.9738675958188154, | |
| "grad_norm": 0.4310628707346215, | |
| "learning_rate": 1.4884139455861868e-09, | |
| "loss": 0.5572, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 2.979094076655052, | |
| "grad_norm": 0.43105949566502877, | |
| "learning_rate": 8.372510186388516e-10, | |
| "loss": 0.5603, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.984320557491289, | |
| "grad_norm": 0.3988129414746118, | |
| "learning_rate": 3.7211733352748856e-10, | |
| "loss": 0.4893, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 2.989547038327526, | |
| "grad_norm": 0.4326905501521538, | |
| "learning_rate": 9.303019884387976e-11, | |
| "loss": 0.5622, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 2.994773519163763, | |
| "grad_norm": 0.4266940308017901, | |
| "learning_rate": 0.0, | |
| "loss": 0.4927, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 2.994773519163763, | |
| "step": 573, | |
| "total_flos": 520358168002560.0, | |
| "train_loss": 0.6214590458553707, | |
| "train_runtime": 8980.1589, | |
| "train_samples_per_second": 6.133, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 573, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 520358168002560.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |