| { |
| "best_metric": 4.7635955810546875, |
| "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/passive/lstm/4/checkpoints/checkpoint-76320", |
| "epoch": 0.025000606015738065, |
| "eval_steps": 10, |
| "global_step": 76320, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999998362119627e-05, |
| "loss": 10.8196, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999161405248948e-05, |
| "loss": 7.5533, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.998322810497896e-05, |
| "loss": 7.0635, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.997484215746844e-05, |
| "loss": 7.0042, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.996645620995792e-05, |
| "loss": 6.9533, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.99580702624474e-05, |
| "loss": 6.9322, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.994968431493688e-05, |
| "loss": 6.7756, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.994129836742636e-05, |
| "loss": 6.6613, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.993291241991584e-05, |
| "loss": 6.5605, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.992452647240532e-05, |
| "loss": 6.4804, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.99161405248948e-05, |
| "loss": 6.4189, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.990775457738428e-05, |
| "loss": 6.3513, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.989936862987376e-05, |
| "loss": 6.2866, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.989098268236324e-05, |
| "loss": 6.2208, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.988259673485272e-05, |
| "loss": 6.1649, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.98742107873422e-05, |
| "loss": 6.0949, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.986582483983168e-05, |
| "loss": 6.0579, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.985743889232116e-05, |
| "loss": 6.0075, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.984905294481064e-05, |
| "loss": 5.9647, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.984068337610385e-05, |
| "loss": 5.9239, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.983229742859333e-05, |
| "loss": 5.8918, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.982391148108281e-05, |
| "loss": 5.8513, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.981552553357229e-05, |
| "loss": 5.8219, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.980713958606178e-05, |
| "loss": 5.7784, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.979875363855125e-05, |
| "loss": 5.7661, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.979036769104073e-05, |
| "loss": 5.7274, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.978198174353021e-05, |
| "loss": 5.7087, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9773612174823426e-05, |
| "loss": 5.6689, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.97652262273129e-05, |
| "loss": 5.6523, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.975684027980238e-05, |
| "loss": 5.6324, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.974845433229186e-05, |
| "loss": 5.6059, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.974008476358507e-05, |
| "loss": 5.5968, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.973169881607455e-05, |
| "loss": 5.5723, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.972331286856403e-05, |
| "loss": 5.5537, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9714926921053515e-05, |
| "loss": 5.5465, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9706557352346724e-05, |
| "loss": 5.5243, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9698171404836204e-05, |
| "loss": 5.5144, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9689785457325684e-05, |
| "loss": 5.4689, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9681399509815164e-05, |
| "loss": 5.4752, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9673013562304644e-05, |
| "loss": 5.4362, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9664627614794124e-05, |
| "loss": 5.4425, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9656241667283604e-05, |
| "loss": 5.4201, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9647855719773084e-05, |
| "loss": 5.4185, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9639469772262564e-05, |
| "loss": 5.389, |
| "step": 22016 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9631083824752044e-05, |
| "loss": 5.3955, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.962271425604525e-05, |
| "loss": 5.3851, |
| "step": 23040 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.961432830853473e-05, |
| "loss": 5.3771, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.960594236102421e-05, |
| "loss": 5.3689, |
| "step": 24064 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.959755641351369e-05, |
| "loss": 5.3395, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.958917046600318e-05, |
| "loss": 5.3322, |
| "step": 25088 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.958080089729639e-05, |
| "loss": 5.3311, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.957241494978587e-05, |
| "loss": 5.3187, |
| "step": 26112 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.956402900227535e-05, |
| "loss": 5.3127, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.955564305476483e-05, |
| "loss": 5.29, |
| "step": 27136 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.954728986486177e-05, |
| "loss": 5.2844, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.953890391735125e-05, |
| "loss": 5.267, |
| "step": 28160 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9530517969840727e-05, |
| "loss": 5.2882, |
| "step": 28672 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9522132022330207e-05, |
| "loss": 5.2425, |
| "step": 29184 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9513746074819686e-05, |
| "loss": 5.2583, |
| "step": 29696 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9505360127309166e-05, |
| "loss": 5.2469, |
| "step": 30208 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9496974179798646e-05, |
| "loss": 5.2273, |
| "step": 30720 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.948858823228813e-05, |
| "loss": 5.2259, |
| "step": 31232 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.948020228477761e-05, |
| "loss": 5.2129, |
| "step": 31744 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9471816337267086e-05, |
| "loss": 5.2021, |
| "step": 32256 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9463430389756566e-05, |
| "loss": 5.1949, |
| "step": 32768 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9455044442246046e-05, |
| "loss": 5.2091, |
| "step": 33280 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9446658494735526e-05, |
| "loss": 5.1846, |
| "step": 33792 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9438272547225006e-05, |
| "loss": 5.1752, |
| "step": 34304 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9429886599714486e-05, |
| "loss": 5.1647, |
| "step": 34816 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9421500652203966e-05, |
| "loss": 5.1633, |
| "step": 35328 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9413131083497175e-05, |
| "loss": 5.1684, |
| "step": 35840 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9404745135986655e-05, |
| "loss": 5.1636, |
| "step": 36352 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9396359188476135e-05, |
| "loss": 5.1401, |
| "step": 36864 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.938798961976935e-05, |
| "loss": 5.1545, |
| "step": 37376 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.937960367225883e-05, |
| "loss": 5.1482, |
| "step": 37888 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.937121772474831e-05, |
| "loss": 5.1205, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.936283177723779e-05, |
| "loss": 5.1193, |
| "step": 38912 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.935444582972727e-05, |
| "loss": 5.1089, |
| "step": 39424 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.934607626102048e-05, |
| "loss": 5.1017, |
| "step": 39936 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.933769031350996e-05, |
| "loss": 5.0958, |
| "step": 40448 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.932930436599944e-05, |
| "loss": 5.0998, |
| "step": 40960 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.932091841848892e-05, |
| "loss": 5.097, |
| "step": 41472 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.931254884978213e-05, |
| "loss": 5.098, |
| "step": 41984 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.930416290227161e-05, |
| "loss": 5.0739, |
| "step": 42496 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9295793333564825e-05, |
| "loss": 5.0562, |
| "step": 43008 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9287407386054305e-05, |
| "loss": 5.0612, |
| "step": 43520 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9279021438543785e-05, |
| "loss": 5.0632, |
| "step": 44032 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9270635491033264e-05, |
| "loss": 5.0611, |
| "step": 44544 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9262249543522744e-05, |
| "loss": 5.0452, |
| "step": 45056 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9253863596012224e-05, |
| "loss": 5.0429, |
| "step": 45568 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9245477648501704e-05, |
| "loss": 5.0427, |
| "step": 46080 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9237091700991184e-05, |
| "loss": 5.0344, |
| "step": 46592 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.922872213228439e-05, |
| "loss": 5.0245, |
| "step": 47104 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.922033618477387e-05, |
| "loss": 5.0217, |
| "step": 47616 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.921195023726335e-05, |
| "loss": 5.0082, |
| "step": 48128 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.920356428975283e-05, |
| "loss": 5.0206, |
| "step": 48640 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.919517834224231e-05, |
| "loss": 5.0085, |
| "step": 49152 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.918680877353552e-05, |
| "loss": 4.9863, |
| "step": 49664 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.917842282602501e-05, |
| "loss": 4.9927, |
| "step": 50176 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.917003687851449e-05, |
| "loss": 4.9899, |
| "step": 50688 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.916165093100397e-05, |
| "loss": 4.9978, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.915326498349345e-05, |
| "loss": 4.9792, |
| "step": 51712 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.914487903598292e-05, |
| "loss": 4.9753, |
| "step": 52224 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.91364930884724e-05, |
| "loss": 4.9611, |
| "step": 52736 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.912810714096188e-05, |
| "loss": 4.9671, |
| "step": 53248 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.911973757225509e-05, |
| "loss": 4.9459, |
| "step": 53760 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.911136800354831e-05, |
| "loss": 4.9572, |
| "step": 54272 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.910298205603779e-05, |
| "loss": 4.9442, |
| "step": 54784 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9094612487330996e-05, |
| "loss": 4.9469, |
| "step": 55296 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9086226539820476e-05, |
| "loss": 4.9352, |
| "step": 55808 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.907784059230996e-05, |
| "loss": 4.9385, |
| "step": 56320 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.906945464479944e-05, |
| "loss": 4.9251, |
| "step": 56832 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.906106869728892e-05, |
| "loss": 4.93, |
| "step": 57344 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9052682749778396e-05, |
| "loss": 4.9215, |
| "step": 57856 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9044296802267876e-05, |
| "loss": 4.9146, |
| "step": 58368 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9035910854757356e-05, |
| "loss": 4.9269, |
| "step": 58880 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9027541286050565e-05, |
| "loss": 4.9104, |
| "step": 59392 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9019155338540045e-05, |
| "loss": 4.9092, |
| "step": 59904 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9010769391029525e-05, |
| "loss": 4.898, |
| "step": 60416 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9002383443519005e-05, |
| "loss": 4.9027, |
| "step": 60928 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8993997496008485e-05, |
| "loss": 4.8985, |
| "step": 61440 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.89856279273017e-05, |
| "loss": 4.8913, |
| "step": 61952 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.897724197979118e-05, |
| "loss": 4.8959, |
| "step": 62464 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.896885603228066e-05, |
| "loss": 4.8833, |
| "step": 62976 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.896048646357387e-05, |
| "loss": 4.8825, |
| "step": 63488 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.895210051606335e-05, |
| "loss": 4.863, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.894371456855283e-05, |
| "loss": 4.8677, |
| "step": 64512 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.893532862104231e-05, |
| "loss": 4.8685, |
| "step": 65024 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.892694267353179e-05, |
| "loss": 4.8599, |
| "step": 65536 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.891855672602127e-05, |
| "loss": 4.8503, |
| "step": 66048 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.891017077851075e-05, |
| "loss": 4.8745, |
| "step": 66560 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.890178483100023e-05, |
| "loss": 4.8557, |
| "step": 67072 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.889339888348971e-05, |
| "loss": 4.8489, |
| "step": 67584 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.888502931478292e-05, |
| "loss": 4.8487, |
| "step": 68096 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.88766433672724e-05, |
| "loss": 4.8422, |
| "step": 68608 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.886825741976188e-05, |
| "loss": 4.8447, |
| "step": 69120 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8859871472251365e-05, |
| "loss": 4.8411, |
| "step": 69632 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8851485524740845e-05, |
| "loss": 4.8362, |
| "step": 70144 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8843099577230325e-05, |
| "loss": 4.8387, |
| "step": 70656 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8834713629719805e-05, |
| "loss": 4.8376, |
| "step": 71168 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.882632768220928e-05, |
| "loss": 4.8255, |
| "step": 71680 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8817958113502494e-05, |
| "loss": 4.8153, |
| "step": 72192 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8809572165991974e-05, |
| "loss": 4.8277, |
| "step": 72704 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8801186218481454e-05, |
| "loss": 4.8137, |
| "step": 73216 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.879280027097093e-05, |
| "loss": 4.8173, |
| "step": 73728 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.878443070226414e-05, |
| "loss": 4.8072, |
| "step": 74240 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.877604475475362e-05, |
| "loss": 4.8057, |
| "step": 74752 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.876767518604683e-05, |
| "loss": 4.8111, |
| "step": 75264 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.875928923853632e-05, |
| "loss": 4.7981, |
| "step": 75776 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.87509032910258e-05, |
| "loss": 4.7943, |
| "step": 76288 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 4.7635955810546875, |
| "eval_runtime": 297.4563, |
| "eval_samples_per_second": 1282.847, |
| "eval_steps_per_second": 40.09, |
| "step": 76320 |
| } |
| ], |
| "logging_steps": 512, |
| "max_steps": 3052726, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 10, |
| "total_flos": 3.153557465701272e+16, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|