| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9942363112391931, |
| "eval_steps": 500, |
| "global_step": 115, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008645533141210375, |
| "grad_norm": 34.72848892211914, |
| "learning_rate": 5.0000000000000004e-08, |
| "loss": 2.477, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01729106628242075, |
| "grad_norm": 33.733909606933594, |
| "learning_rate": 1.0000000000000001e-07, |
| "loss": 2.4134, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.025936599423631124, |
| "grad_norm": 34.543819427490234, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 2.4467, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0345821325648415, |
| "grad_norm": 35.37831115722656, |
| "learning_rate": 2.0000000000000002e-07, |
| "loss": 2.4801, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.043227665706051875, |
| "grad_norm": 33.97856140136719, |
| "learning_rate": 2.5000000000000004e-07, |
| "loss": 2.4422, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.05187319884726225, |
| "grad_norm": 34.11160659790039, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 2.4003, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.06051873198847262, |
| "grad_norm": 34.086463928222656, |
| "learning_rate": 3.5000000000000004e-07, |
| "loss": 2.4211, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.069164265129683, |
| "grad_norm": 33.96665573120117, |
| "learning_rate": 4.0000000000000003e-07, |
| "loss": 2.4054, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.07780979827089338, |
| "grad_norm": 34.871307373046875, |
| "learning_rate": 4.5000000000000003e-07, |
| "loss": 2.4077, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.08645533141210375, |
| "grad_norm": 33.91160583496094, |
| "learning_rate": 5.000000000000001e-07, |
| "loss": 2.3598, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.09510086455331412, |
| "grad_norm": 33.40217971801758, |
| "learning_rate": 5.5e-07, |
| "loss": 2.3209, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.1037463976945245, |
| "grad_norm": 33.771121978759766, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 2.3178, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.11239193083573487, |
| "grad_norm": 33.577476501464844, |
| "learning_rate": 6.5e-07, |
| "loss": 2.3107, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.12103746397694524, |
| "grad_norm": 31.330514907836914, |
| "learning_rate": 7.000000000000001e-07, |
| "loss": 2.095, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.12968299711815562, |
| "grad_norm": 33.60646057128906, |
| "learning_rate": 7.5e-07, |
| "loss": 2.1663, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.138328530259366, |
| "grad_norm": 31.96607208251953, |
| "learning_rate": 8.000000000000001e-07, |
| "loss": 1.9986, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.14697406340057637, |
| "grad_norm": 32.33183288574219, |
| "learning_rate": 8.500000000000001e-07, |
| "loss": 1.9401, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.15561959654178675, |
| "grad_norm": 33.50197219848633, |
| "learning_rate": 9.000000000000001e-07, |
| "loss": 1.8529, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.1642651296829971, |
| "grad_norm": 33.55098342895508, |
| "learning_rate": 9.500000000000001e-07, |
| "loss": 1.7087, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.1729106628242075, |
| "grad_norm": 33.90129852294922, |
| "learning_rate": 1.0000000000000002e-06, |
| "loss": 1.6324, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.18155619596541786, |
| "grad_norm": 33.738037109375, |
| "learning_rate": 1.0500000000000001e-06, |
| "loss": 1.4896, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.19020172910662825, |
| "grad_norm": 33.6258659362793, |
| "learning_rate": 1.1e-06, |
| "loss": 1.3664, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.1988472622478386, |
| "grad_norm": 30.822349548339844, |
| "learning_rate": 1.1500000000000002e-06, |
| "loss": 1.2193, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.207492795389049, |
| "grad_norm": 29.608501434326172, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": 1.074, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.21613832853025935, |
| "grad_norm": 27.651105880737305, |
| "learning_rate": 1.25e-06, |
| "loss": 0.938, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.22478386167146974, |
| "grad_norm": 29.479637145996094, |
| "learning_rate": 1.3e-06, |
| "loss": 0.7728, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.2334293948126801, |
| "grad_norm": 29.068634033203125, |
| "learning_rate": 1.3500000000000002e-06, |
| "loss": 0.6051, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.2420749279538905, |
| "grad_norm": 24.850099563598633, |
| "learning_rate": 1.4000000000000001e-06, |
| "loss": 0.4463, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.2507204610951009, |
| "grad_norm": 22.095216751098633, |
| "learning_rate": 1.45e-06, |
| "loss": 0.3489, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.25936599423631124, |
| "grad_norm": 19.491201400756836, |
| "learning_rate": 1.5e-06, |
| "loss": 0.261, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2680115273775216, |
| "grad_norm": 14.492341041564941, |
| "learning_rate": 1.5500000000000002e-06, |
| "loss": 0.1707, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.276657060518732, |
| "grad_norm": 6.020577907562256, |
| "learning_rate": 1.6000000000000001e-06, |
| "loss": 0.1151, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.28530259365994237, |
| "grad_norm": 2.90791916847229, |
| "learning_rate": 1.6500000000000003e-06, |
| "loss": 0.0976, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.29394812680115273, |
| "grad_norm": 2.637803554534912, |
| "learning_rate": 1.7000000000000002e-06, |
| "loss": 0.0911, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.3025936599423631, |
| "grad_norm": 1.804861068725586, |
| "learning_rate": 1.75e-06, |
| "loss": 0.0834, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.3112391930835735, |
| "grad_norm": 2.049024820327759, |
| "learning_rate": 1.8000000000000001e-06, |
| "loss": 0.0842, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.31988472622478387, |
| "grad_norm": 1.3263498544692993, |
| "learning_rate": 1.85e-06, |
| "loss": 0.0744, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.3285302593659942, |
| "grad_norm": 1.7187089920043945, |
| "learning_rate": 1.9000000000000002e-06, |
| "loss": 0.0783, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.3371757925072046, |
| "grad_norm": 1.3925131559371948, |
| "learning_rate": 1.9500000000000004e-06, |
| "loss": 0.073, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.345821325648415, |
| "grad_norm": 1.2181739807128906, |
| "learning_rate": 2.0000000000000003e-06, |
| "loss": 0.0749, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.35446685878962536, |
| "grad_norm": 1.0519245862960815, |
| "learning_rate": 2.05e-06, |
| "loss": 0.0692, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.3631123919308357, |
| "grad_norm": 0.9188923835754395, |
| "learning_rate": 2.1000000000000002e-06, |
| "loss": 0.0726, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.37175792507204614, |
| "grad_norm": 0.8273228406906128, |
| "learning_rate": 2.15e-06, |
| "loss": 0.0651, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.3804034582132565, |
| "grad_norm": 0.9098994135856628, |
| "learning_rate": 2.2e-06, |
| "loss": 0.066, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.38904899135446686, |
| "grad_norm": 0.8456838726997375, |
| "learning_rate": 2.25e-06, |
| "loss": 0.0646, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.3976945244956772, |
| "grad_norm": 0.8240940570831299, |
| "learning_rate": 2.3000000000000004e-06, |
| "loss": 0.0604, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.40634005763688763, |
| "grad_norm": 1.111759901046753, |
| "learning_rate": 2.35e-06, |
| "loss": 0.0589, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.414985590778098, |
| "grad_norm": 0.9933035373687744, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": 0.0613, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.42363112391930835, |
| "grad_norm": 0.7491716742515564, |
| "learning_rate": 2.4500000000000003e-06, |
| "loss": 0.0583, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.4322766570605187, |
| "grad_norm": 0.9089523553848267, |
| "learning_rate": 2.5e-06, |
| "loss": 0.0558, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4409221902017291, |
| "grad_norm": 0.7088611721992493, |
| "learning_rate": 2.55e-06, |
| "loss": 0.0553, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.4495677233429395, |
| "grad_norm": 0.7892571091651917, |
| "learning_rate": 2.6e-06, |
| "loss": 0.064, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.45821325648414984, |
| "grad_norm": 0.9248467087745667, |
| "learning_rate": 2.6500000000000005e-06, |
| "loss": 0.0653, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.4668587896253602, |
| "grad_norm": 0.7224969863891602, |
| "learning_rate": 2.7000000000000004e-06, |
| "loss": 0.0611, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.4755043227665706, |
| "grad_norm": 0.8231533765792847, |
| "learning_rate": 2.7500000000000004e-06, |
| "loss": 0.0617, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.484149855907781, |
| "grad_norm": 0.7306967973709106, |
| "learning_rate": 2.8000000000000003e-06, |
| "loss": 0.0571, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.49279538904899134, |
| "grad_norm": 0.8415323495864868, |
| "learning_rate": 2.85e-06, |
| "loss": 0.0548, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.5014409221902018, |
| "grad_norm": 1.5560295581817627, |
| "learning_rate": 2.9e-06, |
| "loss": 0.0672, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.5100864553314121, |
| "grad_norm": 0.9170955419540405, |
| "learning_rate": 2.95e-06, |
| "loss": 0.068, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.5187319884726225, |
| "grad_norm": 0.6508005857467651, |
| "learning_rate": 3e-06, |
| "loss": 0.057, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.5273775216138329, |
| "grad_norm": 0.8307355642318726, |
| "learning_rate": 3.05e-06, |
| "loss": 0.0598, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.5360230547550432, |
| "grad_norm": 1.11078679561615, |
| "learning_rate": 3.1000000000000004e-06, |
| "loss": 0.0575, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.5446685878962536, |
| "grad_norm": 1.0765758752822876, |
| "learning_rate": 3.1500000000000003e-06, |
| "loss": 0.0615, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.553314121037464, |
| "grad_norm": 0.8381508588790894, |
| "learning_rate": 3.2000000000000003e-06, |
| "loss": 0.0606, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.5619596541786743, |
| "grad_norm": 1.007628321647644, |
| "learning_rate": 3.2500000000000002e-06, |
| "loss": 0.0572, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.5706051873198847, |
| "grad_norm": 0.7254197597503662, |
| "learning_rate": 3.3000000000000006e-06, |
| "loss": 0.0543, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.579250720461095, |
| "grad_norm": 0.5906903147697449, |
| "learning_rate": 3.3500000000000005e-06, |
| "loss": 0.0557, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.5878962536023055, |
| "grad_norm": 0.6791537404060364, |
| "learning_rate": 3.4000000000000005e-06, |
| "loss": 0.0516, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.5965417867435159, |
| "grad_norm": 0.6390945315361023, |
| "learning_rate": 3.45e-06, |
| "loss": 0.0529, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.6051873198847262, |
| "grad_norm": 0.61552494764328, |
| "learning_rate": 3.5e-06, |
| "loss": 0.0554, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.6138328530259366, |
| "grad_norm": 0.6545206308364868, |
| "learning_rate": 3.5500000000000003e-06, |
| "loss": 0.0545, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.622478386167147, |
| "grad_norm": 0.9062793254852295, |
| "learning_rate": 3.6000000000000003e-06, |
| "loss": 0.0559, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.6311239193083573, |
| "grad_norm": 0.9642562866210938, |
| "learning_rate": 3.65e-06, |
| "loss": 0.0485, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.6397694524495677, |
| "grad_norm": 0.7286660075187683, |
| "learning_rate": 3.7e-06, |
| "loss": 0.0564, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.6484149855907781, |
| "grad_norm": 0.6356053948402405, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.053, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.6570605187319885, |
| "grad_norm": 0.706794261932373, |
| "learning_rate": 3.8000000000000005e-06, |
| "loss": 0.0531, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.6657060518731989, |
| "grad_norm": 0.6616448163986206, |
| "learning_rate": 3.85e-06, |
| "loss": 0.0504, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.6743515850144092, |
| "grad_norm": 0.7465748190879822, |
| "learning_rate": 3.900000000000001e-06, |
| "loss": 0.0495, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.6829971181556196, |
| "grad_norm": 0.8153467774391174, |
| "learning_rate": 3.95e-06, |
| "loss": 0.0495, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.69164265129683, |
| "grad_norm": 0.7728897333145142, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.0434, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.7002881844380403, |
| "grad_norm": 0.6173391938209534, |
| "learning_rate": 4.05e-06, |
| "loss": 0.0432, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.7089337175792507, |
| "grad_norm": 0.7128047943115234, |
| "learning_rate": 4.1e-06, |
| "loss": 0.0512, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.7175792507204611, |
| "grad_norm": 0.6098653674125671, |
| "learning_rate": 4.15e-06, |
| "loss": 0.0415, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.7262247838616714, |
| "grad_norm": 0.7464293241500854, |
| "learning_rate": 4.2000000000000004e-06, |
| "loss": 0.0453, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.7348703170028819, |
| "grad_norm": 0.8350300788879395, |
| "learning_rate": 4.25e-06, |
| "loss": 0.0463, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.7435158501440923, |
| "grad_norm": 0.7880110740661621, |
| "learning_rate": 4.3e-06, |
| "loss": 0.0426, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.7521613832853026, |
| "grad_norm": 0.77886962890625, |
| "learning_rate": 4.350000000000001e-06, |
| "loss": 0.0476, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.760806916426513, |
| "grad_norm": 1.0404386520385742, |
| "learning_rate": 4.4e-06, |
| "loss": 0.0501, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.7694524495677233, |
| "grad_norm": 0.5827208757400513, |
| "learning_rate": 4.450000000000001e-06, |
| "loss": 0.0396, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.7780979827089337, |
| "grad_norm": 0.5928618907928467, |
| "learning_rate": 4.5e-06, |
| "loss": 0.0438, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.7867435158501441, |
| "grad_norm": 0.5311946272850037, |
| "learning_rate": 4.5500000000000005e-06, |
| "loss": 0.0388, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.7953890489913544, |
| "grad_norm": 0.7609073519706726, |
| "learning_rate": 4.600000000000001e-06, |
| "loss": 0.0403, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.8040345821325648, |
| "grad_norm": 0.6055853962898254, |
| "learning_rate": 4.65e-06, |
| "loss": 0.0402, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.8126801152737753, |
| "grad_norm": 0.8020023703575134, |
| "learning_rate": 4.7e-06, |
| "loss": 0.0415, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.8213256484149856, |
| "grad_norm": 0.9083772301673889, |
| "learning_rate": 4.75e-06, |
| "loss": 0.0428, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.829971181556196, |
| "grad_norm": 0.6658433079719543, |
| "learning_rate": 4.800000000000001e-06, |
| "loss": 0.0381, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.8386167146974063, |
| "grad_norm": 0.925826907157898, |
| "learning_rate": 4.85e-06, |
| "loss": 0.0465, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.8472622478386167, |
| "grad_norm": 0.5956787467002869, |
| "learning_rate": 4.9000000000000005e-06, |
| "loss": 0.0429, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.8559077809798271, |
| "grad_norm": 0.8485273718833923, |
| "learning_rate": 4.95e-06, |
| "loss": 0.0465, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.8645533141210374, |
| "grad_norm": 0.7477124333381653, |
| "learning_rate": 5e-06, |
| "loss": 0.0407, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8731988472622478, |
| "grad_norm": 0.624204695224762, |
| "learning_rate": 4.999964559102694e-06, |
| "loss": 0.0433, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.8818443804034583, |
| "grad_norm": 0.6230912804603577, |
| "learning_rate": 4.999858237415621e-06, |
| "loss": 0.0364, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.8904899135446686, |
| "grad_norm": 0.9286472201347351, |
| "learning_rate": 4.999681037953289e-06, |
| "loss": 0.0385, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.899135446685879, |
| "grad_norm": 1.0528490543365479, |
| "learning_rate": 4.999432965739786e-06, |
| "loss": 0.0406, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.9077809798270894, |
| "grad_norm": 1.1465263366699219, |
| "learning_rate": 4.999114027808632e-06, |
| "loss": 0.0526, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.9164265129682997, |
| "grad_norm": 0.7173194885253906, |
| "learning_rate": 4.998724233202585e-06, |
| "loss": 0.05, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.9250720461095101, |
| "grad_norm": 0.7150623798370361, |
| "learning_rate": 4.998263592973382e-06, |
| "loss": 0.0392, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.9337175792507204, |
| "grad_norm": 0.6749414801597595, |
| "learning_rate": 4.9977321201814235e-06, |
| "loss": 0.0381, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.9423631123919308, |
| "grad_norm": 0.7057478427886963, |
| "learning_rate": 4.997129829895409e-06, |
| "loss": 0.0427, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.9510086455331412, |
| "grad_norm": 0.8370860815048218, |
| "learning_rate": 4.996456739191905e-06, |
| "loss": 0.0353, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.9596541786743515, |
| "grad_norm": 0.8583172559738159, |
| "learning_rate": 4.995712867154863e-06, |
| "loss": 0.0366, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.968299711815562, |
| "grad_norm": 0.9564568400382996, |
| "learning_rate": 4.994898234875075e-06, |
| "loss": 0.0463, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.9769452449567724, |
| "grad_norm": 0.7943828105926514, |
| "learning_rate": 4.9940128654495826e-06, |
| "loss": 0.0366, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.9855907780979827, |
| "grad_norm": 0.9387117624282837, |
| "learning_rate": 4.9930567839810125e-06, |
| "loss": 0.0433, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.9942363112391931, |
| "grad_norm": 0.5982036590576172, |
| "learning_rate": 4.992030017576876e-06, |
| "loss": 0.0311, |
| "step": 115 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 690, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 6, |
| "save_steps": 115, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.904942201274368e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|