| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 29.948867786705623, | |
| "global_step": 164000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 5e-05, | |
| "loss": 7.0207, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 4.96937775600196e-05, | |
| "loss": 5.1703, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 4.93875551200392e-05, | |
| "loss": 4.7048, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.9081332680058794e-05, | |
| "loss": 4.3894, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 4.87751102400784e-05, | |
| "loss": 4.1348, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 4.8468887800097995e-05, | |
| "loss": 3.9447, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 4.816266536011759e-05, | |
| "loss": 3.8286, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 4.785644292013719e-05, | |
| "loss": 3.7462, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "learning_rate": 4.7550220480156786e-05, | |
| "loss": 3.693, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 4.7243998040176384e-05, | |
| "loss": 3.6311, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.693777560019598e-05, | |
| "loss": 3.5843, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 4.6631553160215585e-05, | |
| "loss": 3.4624, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.37, | |
| "learning_rate": 4.632533072023518e-05, | |
| "loss": 3.4432, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 4.601910828025478e-05, | |
| "loss": 3.423, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 4.5712885840274376e-05, | |
| "loss": 3.4042, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 4.540666340029397e-05, | |
| "loss": 3.3902, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 4.510044096031357e-05, | |
| "loss": 3.2973, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 4.479421852033317e-05, | |
| "loss": 3.2482, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 4.448799608035277e-05, | |
| "loss": 3.2427, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.65, | |
| "learning_rate": 4.418177364037237e-05, | |
| "loss": 3.2309, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.3875551200391966e-05, | |
| "loss": 3.2193, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 4.356932876041156e-05, | |
| "loss": 3.2065, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.2, | |
| "learning_rate": 4.326310632043116e-05, | |
| "loss": 3.0733, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 4.295688388045076e-05, | |
| "loss": 3.0868, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 4.2650661440470355e-05, | |
| "loss": 3.0884, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 4.234443900048996e-05, | |
| "loss": 3.0843, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 4.93, | |
| "learning_rate": 4.2038216560509556e-05, | |
| "loss": 3.0768, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 4.173199412052915e-05, | |
| "loss": 2.9834, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 4.142577168054875e-05, | |
| "loss": 2.9267, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 4.111954924056835e-05, | |
| "loss": 2.9463, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 4.081332680058795e-05, | |
| "loss": 2.9523, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 4.050710436060755e-05, | |
| "loss": 2.9561, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 4.0200881920627145e-05, | |
| "loss": 2.9237, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 3.989465948064674e-05, | |
| "loss": 2.7803, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.39, | |
| "learning_rate": 3.958843704066634e-05, | |
| "loss": 2.8068, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 3.9282214600685944e-05, | |
| "loss": 2.8223, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 3.897599216070554e-05, | |
| "loss": 2.8238, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 3.866976972072514e-05, | |
| "loss": 2.8341, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 3.8363547280744735e-05, | |
| "loss": 2.6976, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 7.3, | |
| "learning_rate": 3.805732484076434e-05, | |
| "loss": 2.6679, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.49, | |
| "learning_rate": 3.7751102400783936e-05, | |
| "loss": 2.691, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.67, | |
| "learning_rate": 3.744487996080353e-05, | |
| "loss": 2.7001, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 3.713865752082313e-05, | |
| "loss": 2.7189, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 3.683243508084273e-05, | |
| "loss": 2.6829, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 3.6526212640862325e-05, | |
| "loss": 2.5262, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.4, | |
| "learning_rate": 3.621999020088192e-05, | |
| "loss": 2.5547, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.58, | |
| "learning_rate": 3.5913767760901526e-05, | |
| "loss": 2.5926, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 8.77, | |
| "learning_rate": 3.560754532092112e-05, | |
| "loss": 2.5971, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 3.530132288094072e-05, | |
| "loss": 2.6053, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 3.499510044096032e-05, | |
| "loss": 2.4615, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 9.31, | |
| "learning_rate": 3.4688878000979914e-05, | |
| "loss": 2.4412, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 3.438265556099951e-05, | |
| "loss": 2.4633, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 9.68, | |
| "learning_rate": 3.407643312101911e-05, | |
| "loss": 2.469, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 3.377021068103871e-05, | |
| "loss": 2.5006, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 10.04, | |
| "learning_rate": 3.346398824105831e-05, | |
| "loss": 2.4556, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 3.315776580107791e-05, | |
| "loss": 2.3134, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 10.41, | |
| "learning_rate": 3.2851543361097504e-05, | |
| "loss": 2.3445, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 10.59, | |
| "learning_rate": 3.25453209211171e-05, | |
| "loss": 2.3709, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "learning_rate": 3.22390984811367e-05, | |
| "loss": 2.3831, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "learning_rate": 3.1932876041156296e-05, | |
| "loss": 2.4089, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 11.14, | |
| "learning_rate": 3.162665360117589e-05, | |
| "loss": 2.248, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 11.32, | |
| "learning_rate": 3.13204311611955e-05, | |
| "loss": 2.2344, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 3.1014208721215094e-05, | |
| "loss": 2.2553, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "learning_rate": 3.070798628123469e-05, | |
| "loss": 2.2816, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 3.0401763841254288e-05, | |
| "loss": 2.3067, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 12.05, | |
| "learning_rate": 3.0095541401273885e-05, | |
| "loss": 2.248, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 2.9789318961293483e-05, | |
| "loss": 2.1257, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "learning_rate": 2.948309652131308e-05, | |
| "loss": 2.1559, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 12.6, | |
| "learning_rate": 2.9176874081332684e-05, | |
| "loss": 2.1743, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 12.78, | |
| "learning_rate": 2.887065164135228e-05, | |
| "loss": 2.1997, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "learning_rate": 2.8564429201371878e-05, | |
| "loss": 2.2321, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "learning_rate": 2.8258206761391475e-05, | |
| "loss": 2.0603, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 2.7951984321411072e-05, | |
| "loss": 2.0491, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 13.51, | |
| "learning_rate": 2.764576188143067e-05, | |
| "loss": 2.079, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "learning_rate": 2.7339539441450267e-05, | |
| "loss": 2.1119, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 2.703331700146987e-05, | |
| "loss": 2.1355, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 14.06, | |
| "learning_rate": 2.6727094561489468e-05, | |
| "loss": 2.068, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "learning_rate": 2.6420872121509065e-05, | |
| "loss": 1.96, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 14.43, | |
| "learning_rate": 2.6114649681528662e-05, | |
| "loss": 1.9827, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 14.61, | |
| "learning_rate": 2.5808427241548263e-05, | |
| "loss": 2.0163, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 14.79, | |
| "learning_rate": 2.550220480156786e-05, | |
| "loss": 2.0496, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 14.97, | |
| "learning_rate": 2.5195982361587457e-05, | |
| "loss": 2.0579, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 15.16, | |
| "learning_rate": 2.4889759921607057e-05, | |
| "loss": 1.8884, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 15.34, | |
| "learning_rate": 2.4583537481626655e-05, | |
| "loss": 1.8992, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "learning_rate": 2.4277315041646255e-05, | |
| "loss": 1.9383, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 2.3971092601665852e-05, | |
| "loss": 1.9512, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 2.366487016168545e-05, | |
| "loss": 1.9814, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "learning_rate": 2.335864772170505e-05, | |
| "loss": 1.9075, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 16.25, | |
| "learning_rate": 2.3052425281724647e-05, | |
| "loss": 1.8141, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 16.44, | |
| "learning_rate": 2.2746202841744244e-05, | |
| "loss": 1.8431, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 16.62, | |
| "learning_rate": 2.243998040176384e-05, | |
| "loss": 1.8723, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 16.8, | |
| "learning_rate": 2.2133757961783442e-05, | |
| "loss": 1.9034, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "learning_rate": 2.182753552180304e-05, | |
| "loss": 1.9112, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "learning_rate": 2.1521313081822636e-05, | |
| "loss": 1.7565, | |
| "step": 94000 | |
| }, | |
| { | |
| "epoch": 17.35, | |
| "learning_rate": 2.1215090641842237e-05, | |
| "loss": 1.7656, | |
| "step": 95000 | |
| }, | |
| { | |
| "epoch": 17.53, | |
| "learning_rate": 2.0908868201861834e-05, | |
| "loss": 1.7856, | |
| "step": 96000 | |
| }, | |
| { | |
| "epoch": 17.71, | |
| "learning_rate": 2.060264576188143e-05, | |
| "loss": 1.8185, | |
| "step": 97000 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 2.029642332190103e-05, | |
| "loss": 1.8397, | |
| "step": 98000 | |
| }, | |
| { | |
| "epoch": 18.08, | |
| "learning_rate": 1.999020088192063e-05, | |
| "loss": 1.7726, | |
| "step": 99000 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "learning_rate": 1.9683978441940226e-05, | |
| "loss": 1.6883, | |
| "step": 100000 | |
| }, | |
| { | |
| "epoch": 18.44, | |
| "learning_rate": 1.9377756001959823e-05, | |
| "loss": 1.724, | |
| "step": 101000 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "learning_rate": 1.9071533561979424e-05, | |
| "loss": 1.751, | |
| "step": 102000 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "learning_rate": 1.876531112199902e-05, | |
| "loss": 1.7654, | |
| "step": 103000 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 1.8459088682018618e-05, | |
| "loss": 1.7807, | |
| "step": 104000 | |
| }, | |
| { | |
| "epoch": 19.17, | |
| "learning_rate": 1.8152866242038215e-05, | |
| "loss": 1.6287, | |
| "step": 105000 | |
| }, | |
| { | |
| "epoch": 19.36, | |
| "learning_rate": 1.7846643802057816e-05, | |
| "loss": 1.6529, | |
| "step": 106000 | |
| }, | |
| { | |
| "epoch": 19.54, | |
| "learning_rate": 1.7540421362077413e-05, | |
| "loss": 1.6782, | |
| "step": 107000 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 1.723419892209701e-05, | |
| "loss": 1.6989, | |
| "step": 108000 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 1.692797648211661e-05, | |
| "loss": 1.7147, | |
| "step": 109000 | |
| }, | |
| { | |
| "epoch": 20.09, | |
| "learning_rate": 1.6621754042136208e-05, | |
| "loss": 1.6426, | |
| "step": 110000 | |
| }, | |
| { | |
| "epoch": 20.27, | |
| "learning_rate": 1.6315531602155805e-05, | |
| "loss": 1.5852, | |
| "step": 111000 | |
| }, | |
| { | |
| "epoch": 20.45, | |
| "learning_rate": 1.6009309162175405e-05, | |
| "loss": 1.6156, | |
| "step": 112000 | |
| }, | |
| { | |
| "epoch": 20.64, | |
| "learning_rate": 1.5703086722195003e-05, | |
| "loss": 1.6307, | |
| "step": 113000 | |
| }, | |
| { | |
| "epoch": 20.82, | |
| "learning_rate": 1.53968642822146e-05, | |
| "loss": 1.6493, | |
| "step": 114000 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 1.5090641842234199e-05, | |
| "loss": 1.6623, | |
| "step": 115000 | |
| }, | |
| { | |
| "epoch": 21.18, | |
| "learning_rate": 1.47844194022538e-05, | |
| "loss": 1.5248, | |
| "step": 116000 | |
| }, | |
| { | |
| "epoch": 21.37, | |
| "learning_rate": 1.4478196962273396e-05, | |
| "loss": 1.551, | |
| "step": 117000 | |
| }, | |
| { | |
| "epoch": 21.55, | |
| "learning_rate": 1.4171974522292993e-05, | |
| "loss": 1.5738, | |
| "step": 118000 | |
| }, | |
| { | |
| "epoch": 21.73, | |
| "learning_rate": 1.386575208231259e-05, | |
| "loss": 1.5854, | |
| "step": 119000 | |
| }, | |
| { | |
| "epoch": 21.91, | |
| "learning_rate": 1.3559529642332191e-05, | |
| "loss": 1.599, | |
| "step": 120000 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "learning_rate": 1.3253307202351788e-05, | |
| "loss": 1.5402, | |
| "step": 121000 | |
| }, | |
| { | |
| "epoch": 22.28, | |
| "learning_rate": 1.2947084762371387e-05, | |
| "loss": 1.486, | |
| "step": 122000 | |
| }, | |
| { | |
| "epoch": 22.46, | |
| "learning_rate": 1.2640862322390986e-05, | |
| "loss": 1.5188, | |
| "step": 123000 | |
| }, | |
| { | |
| "epoch": 22.64, | |
| "learning_rate": 1.2334639882410585e-05, | |
| "loss": 1.5442, | |
| "step": 124000 | |
| }, | |
| { | |
| "epoch": 22.83, | |
| "learning_rate": 1.2028417442430182e-05, | |
| "loss": 1.5483, | |
| "step": 125000 | |
| }, | |
| { | |
| "epoch": 23.01, | |
| "learning_rate": 1.1722195002449781e-05, | |
| "loss": 1.5501, | |
| "step": 126000 | |
| }, | |
| { | |
| "epoch": 23.19, | |
| "learning_rate": 1.1415972562469378e-05, | |
| "loss": 1.4471, | |
| "step": 127000 | |
| }, | |
| { | |
| "epoch": 23.37, | |
| "learning_rate": 1.1109750122488977e-05, | |
| "loss": 1.4578, | |
| "step": 128000 | |
| }, | |
| { | |
| "epoch": 23.56, | |
| "learning_rate": 1.0803527682508574e-05, | |
| "loss": 1.4712, | |
| "step": 129000 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "learning_rate": 1.0497305242528173e-05, | |
| "loss": 1.4918, | |
| "step": 130000 | |
| }, | |
| { | |
| "epoch": 23.92, | |
| "learning_rate": 1.0191082802547772e-05, | |
| "loss": 1.5129, | |
| "step": 131000 | |
| }, | |
| { | |
| "epoch": 24.11, | |
| "learning_rate": 9.884860362567369e-06, | |
| "loss": 1.4455, | |
| "step": 132000 | |
| }, | |
| { | |
| "epoch": 24.29, | |
| "learning_rate": 9.578637922586968e-06, | |
| "loss": 1.4141, | |
| "step": 133000 | |
| }, | |
| { | |
| "epoch": 24.47, | |
| "learning_rate": 9.272415482606565e-06, | |
| "loss": 1.4301, | |
| "step": 134000 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "learning_rate": 8.966193042626164e-06, | |
| "loss": 1.4454, | |
| "step": 135000 | |
| }, | |
| { | |
| "epoch": 24.84, | |
| "learning_rate": 8.659970602645761e-06, | |
| "loss": 1.4599, | |
| "step": 136000 | |
| }, | |
| { | |
| "epoch": 25.02, | |
| "learning_rate": 8.35374816266536e-06, | |
| "loss": 1.4573, | |
| "step": 137000 | |
| }, | |
| { | |
| "epoch": 25.2, | |
| "learning_rate": 8.047525722684959e-06, | |
| "loss": 1.3704, | |
| "step": 138000 | |
| }, | |
| { | |
| "epoch": 25.38, | |
| "learning_rate": 7.741303282704558e-06, | |
| "loss": 1.3845, | |
| "step": 139000 | |
| }, | |
| { | |
| "epoch": 25.57, | |
| "learning_rate": 7.4350808427241555e-06, | |
| "loss": 1.4078, | |
| "step": 140000 | |
| }, | |
| { | |
| "epoch": 25.75, | |
| "learning_rate": 7.128858402743753e-06, | |
| "loss": 1.4147, | |
| "step": 141000 | |
| }, | |
| { | |
| "epoch": 25.93, | |
| "learning_rate": 6.8226359627633515e-06, | |
| "loss": 1.4223, | |
| "step": 142000 | |
| }, | |
| { | |
| "epoch": 26.11, | |
| "learning_rate": 6.5164135227829495e-06, | |
| "loss": 1.3655, | |
| "step": 143000 | |
| }, | |
| { | |
| "epoch": 26.3, | |
| "learning_rate": 6.210191082802548e-06, | |
| "loss": 1.3528, | |
| "step": 144000 | |
| }, | |
| { | |
| "epoch": 26.48, | |
| "learning_rate": 5.903968642822146e-06, | |
| "loss": 1.3626, | |
| "step": 145000 | |
| }, | |
| { | |
| "epoch": 26.66, | |
| "learning_rate": 5.597746202841744e-06, | |
| "loss": 1.3719, | |
| "step": 146000 | |
| }, | |
| { | |
| "epoch": 26.84, | |
| "learning_rate": 5.291523762861342e-06, | |
| "loss": 1.3808, | |
| "step": 147000 | |
| }, | |
| { | |
| "epoch": 27.03, | |
| "learning_rate": 4.985301322880941e-06, | |
| "loss": 1.3752, | |
| "step": 148000 | |
| }, | |
| { | |
| "epoch": 27.21, | |
| "learning_rate": 4.679078882900539e-06, | |
| "loss": 1.3171, | |
| "step": 149000 | |
| }, | |
| { | |
| "epoch": 27.39, | |
| "learning_rate": 4.372856442920137e-06, | |
| "loss": 1.3321, | |
| "step": 150000 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "learning_rate": 4.066634002939736e-06, | |
| "loss": 1.3365, | |
| "step": 151000 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "learning_rate": 3.7604115629593337e-06, | |
| "loss": 1.3424, | |
| "step": 152000 | |
| }, | |
| { | |
| "epoch": 27.94, | |
| "learning_rate": 3.454189122978932e-06, | |
| "loss": 1.3471, | |
| "step": 153000 | |
| }, | |
| { | |
| "epoch": 28.12, | |
| "learning_rate": 3.14796668299853e-06, | |
| "loss": 1.3095, | |
| "step": 154000 | |
| }, | |
| { | |
| "epoch": 28.31, | |
| "learning_rate": 2.8417442430181285e-06, | |
| "loss": 1.3113, | |
| "step": 155000 | |
| }, | |
| { | |
| "epoch": 28.49, | |
| "learning_rate": 2.5355218030377265e-06, | |
| "loss": 1.3017, | |
| "step": 156000 | |
| }, | |
| { | |
| "epoch": 28.67, | |
| "learning_rate": 2.229299363057325e-06, | |
| "loss": 1.3147, | |
| "step": 157000 | |
| }, | |
| { | |
| "epoch": 28.85, | |
| "learning_rate": 1.9230769230769234e-06, | |
| "loss": 1.3135, | |
| "step": 158000 | |
| }, | |
| { | |
| "epoch": 29.04, | |
| "learning_rate": 1.6168544830965214e-06, | |
| "loss": 1.309, | |
| "step": 159000 | |
| }, | |
| { | |
| "epoch": 29.22, | |
| "learning_rate": 1.3106320431161196e-06, | |
| "loss": 1.2784, | |
| "step": 160000 | |
| }, | |
| { | |
| "epoch": 29.4, | |
| "learning_rate": 1.0044096031357178e-06, | |
| "loss": 1.2903, | |
| "step": 161000 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "learning_rate": 6.98187163155316e-07, | |
| "loss": 1.2919, | |
| "step": 162000 | |
| }, | |
| { | |
| "epoch": 29.77, | |
| "learning_rate": 3.9196472317491427e-07, | |
| "loss": 1.2802, | |
| "step": 163000 | |
| }, | |
| { | |
| "epoch": 29.95, | |
| "learning_rate": 8.57422831945125e-08, | |
| "loss": 1.2904, | |
| "step": 164000 | |
| } | |
| ], | |
| "max_steps": 164280, | |
| "num_train_epochs": 30, | |
| "total_flos": 3.45466660974336e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |