| { | |
| "best_metric": 4.762204647064209, | |
| "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/binding-reconstruction/lstm/2/checkpoints/checkpoint-76320", | |
| "epoch": 0.025000606015738065, | |
| "eval_steps": 10, | |
| "global_step": 76320, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999998362119627e-05, | |
| "loss": 10.8201, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.999161405248948e-05, | |
| "loss": 7.5498, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.998322810497896e-05, | |
| "loss": 7.0599, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.997484215746844e-05, | |
| "loss": 6.9942, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.996645620995792e-05, | |
| "loss": 6.9508, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.99580702624474e-05, | |
| "loss": 6.9339, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.994968431493688e-05, | |
| "loss": 6.7931, | |
| "step": 3072 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.994129836742636e-05, | |
| "loss": 6.6876, | |
| "step": 3584 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.993291241991584e-05, | |
| "loss": 6.5759, | |
| "step": 4096 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.992452647240532e-05, | |
| "loss": 6.4872, | |
| "step": 4608 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.99161405248948e-05, | |
| "loss": 6.4115, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.990775457738428e-05, | |
| "loss": 6.3432, | |
| "step": 5632 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.989936862987376e-05, | |
| "loss": 6.2727, | |
| "step": 6144 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.989098268236324e-05, | |
| "loss": 6.2001, | |
| "step": 6656 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.988259673485272e-05, | |
| "loss": 6.1494, | |
| "step": 7168 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.98742107873422e-05, | |
| "loss": 6.088, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.986582483983168e-05, | |
| "loss": 6.0543, | |
| "step": 8192 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.985743889232116e-05, | |
| "loss": 6.0135, | |
| "step": 8704 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.984905294481064e-05, | |
| "loss": 5.964, | |
| "step": 9216 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.984066699730012e-05, | |
| "loss": 5.9333, | |
| "step": 9728 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9832281049789595e-05, | |
| "loss": 5.8943, | |
| "step": 10240 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.982391148108281e-05, | |
| "loss": 5.8629, | |
| "step": 10752 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.981552553357229e-05, | |
| "loss": 5.8341, | |
| "step": 11264 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.980713958606178e-05, | |
| "loss": 5.799, | |
| "step": 11776 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.979875363855125e-05, | |
| "loss": 5.7818, | |
| "step": 12288 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9790384069844466e-05, | |
| "loss": 5.7427, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9781998122333946e-05, | |
| "loss": 5.724, | |
| "step": 13312 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9773628553627155e-05, | |
| "loss": 5.6992, | |
| "step": 13824 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9765242606116635e-05, | |
| "loss": 5.6708, | |
| "step": 14336 | |
| }, | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 4.9756856658606115e-05, | |
| "loss": 5.6427, | |
| "step": 14848 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9748470711095595e-05, | |
| "loss": 5.628, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.974008476358507e-05, | |
| "loss": 5.6165, | |
| "step": 15872 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.973169881607455e-05, | |
| "loss": 5.5902, | |
| "step": 16384 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.972331286856403e-05, | |
| "loss": 5.5703, | |
| "step": 16896 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9714943299857244e-05, | |
| "loss": 5.5623, | |
| "step": 17408 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9706557352346724e-05, | |
| "loss": 5.5499, | |
| "step": 17920 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9698171404836204e-05, | |
| "loss": 5.5244, | |
| "step": 18432 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9689785457325684e-05, | |
| "loss": 5.4912, | |
| "step": 18944 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.96814158886189e-05, | |
| "loss": 5.4869, | |
| "step": 19456 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.967302994110837e-05, | |
| "loss": 5.4558, | |
| "step": 19968 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.966464399359785e-05, | |
| "loss": 5.4597, | |
| "step": 20480 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.965625804608733e-05, | |
| "loss": 5.4433, | |
| "step": 20992 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.964787209857681e-05, | |
| "loss": 5.4402, | |
| "step": 21504 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.963950252987002e-05, | |
| "loss": 5.4115, | |
| "step": 22016 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.96311165823595e-05, | |
| "loss": 5.4087, | |
| "step": 22528 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.962273063484898e-05, | |
| "loss": 5.3948, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.961434468733847e-05, | |
| "loss": 5.3876, | |
| "step": 23552 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.960595873982795e-05, | |
| "loss": 5.3831, | |
| "step": 24064 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.959757279231743e-05, | |
| "loss": 5.3484, | |
| "step": 24576 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.958920322361064e-05, | |
| "loss": 5.3462, | |
| "step": 25088 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.958081727610012e-05, | |
| "loss": 5.3518, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.95724313285896e-05, | |
| "loss": 5.342, | |
| "step": 26112 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.956404538107908e-05, | |
| "loss": 5.316, | |
| "step": 26624 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.955565943356856e-05, | |
| "loss": 5.302, | |
| "step": 27136 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.954727348605804e-05, | |
| "loss": 5.295, | |
| "step": 27648 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.953888753854752e-05, | |
| "loss": 5.2869, | |
| "step": 28160 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9530501591037e-05, | |
| "loss": 5.2978, | |
| "step": 28672 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9522132022330207e-05, | |
| "loss": 5.2579, | |
| "step": 29184 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9513746074819686e-05, | |
| "loss": 5.2679, | |
| "step": 29696 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9505360127309166e-05, | |
| "loss": 5.2568, | |
| "step": 30208 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9496974179798646e-05, | |
| "loss": 5.2416, | |
| "step": 30720 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.948860461109186e-05, | |
| "loss": 5.2421, | |
| "step": 31232 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.948021866358134e-05, | |
| "loss": 5.2162, | |
| "step": 31744 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.947183271607082e-05, | |
| "loss": 5.2111, | |
| "step": 32256 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.94634467685603e-05, | |
| "loss": 5.2091, | |
| "step": 32768 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.945506082104978e-05, | |
| "loss": 5.2213, | |
| "step": 33280 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9446674873539255e-05, | |
| "loss": 5.1899, | |
| "step": 33792 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.943830530483247e-05, | |
| "loss": 5.1855, | |
| "step": 34304 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.942991935732195e-05, | |
| "loss": 5.1659, | |
| "step": 34816 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.942154978861516e-05, | |
| "loss": 5.1728, | |
| "step": 35328 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.941316384110464e-05, | |
| "loss": 5.1737, | |
| "step": 35840 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.940477789359412e-05, | |
| "loss": 5.173, | |
| "step": 36352 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.93963919460836e-05, | |
| "loss": 5.1481, | |
| "step": 36864 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.938800599857309e-05, | |
| "loss": 5.1614, | |
| "step": 37376 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.937962005106256e-05, | |
| "loss": 5.1616, | |
| "step": 37888 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.937123410355204e-05, | |
| "loss": 5.137, | |
| "step": 38400 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.936284815604152e-05, | |
| "loss": 5.1237, | |
| "step": 38912 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9354462208531e-05, | |
| "loss": 5.1148, | |
| "step": 39424 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.934607626102048e-05, | |
| "loss": 5.1051, | |
| "step": 39936 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.933769031350996e-05, | |
| "loss": 5.115, | |
| "step": 40448 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.932932074480317e-05, | |
| "loss": 5.094, | |
| "step": 40960 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.932093479729265e-05, | |
| "loss": 5.1093, | |
| "step": 41472 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.931254884978213e-05, | |
| "loss": 5.1004, | |
| "step": 41984 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.930416290227161e-05, | |
| "loss": 5.0834, | |
| "step": 42496 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9295793333564825e-05, | |
| "loss": 5.0687, | |
| "step": 43008 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9287407386054305e-05, | |
| "loss": 5.0701, | |
| "step": 43520 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9279021438543785e-05, | |
| "loss": 5.0704, | |
| "step": 44032 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9270635491033264e-05, | |
| "loss": 5.0734, | |
| "step": 44544 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9262265922326474e-05, | |
| "loss": 5.0538, | |
| "step": 45056 | |
| }, | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 4.9253879974815954e-05, | |
| "loss": 5.0525, | |
| "step": 45568 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9245494027305433e-05, | |
| "loss": 5.0475, | |
| "step": 46080 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9237108079794913e-05, | |
| "loss": 5.0327, | |
| "step": 46592 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.922872213228439e-05, | |
| "loss": 5.0302, | |
| "step": 47104 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.922033618477387e-05, | |
| "loss": 5.0289, | |
| "step": 47616 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.921195023726335e-05, | |
| "loss": 5.0215, | |
| "step": 48128 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.920358066855656e-05, | |
| "loss": 5.0198, | |
| "step": 48640 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.919519472104604e-05, | |
| "loss": 5.0081, | |
| "step": 49152 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.918680877353552e-05, | |
| "loss": 5.0, | |
| "step": 49664 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.917842282602501e-05, | |
| "loss": 5.0063, | |
| "step": 50176 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.917005325731822e-05, | |
| "loss": 5.0027, | |
| "step": 50688 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.91616673098077e-05, | |
| "loss": 4.9949, | |
| "step": 51200 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.915328136229718e-05, | |
| "loss": 4.9887, | |
| "step": 51712 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.914489541478666e-05, | |
| "loss": 4.9799, | |
| "step": 52224 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.913650946727614e-05, | |
| "loss": 4.9733, | |
| "step": 52736 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.912812351976562e-05, | |
| "loss": 4.9706, | |
| "step": 53248 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.911973757225509e-05, | |
| "loss": 4.9495, | |
| "step": 53760 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.911136800354831e-05, | |
| "loss": 4.956, | |
| "step": 54272 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.910298205603779e-05, | |
| "loss": 4.9538, | |
| "step": 54784 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.909459610852727e-05, | |
| "loss": 4.9565, | |
| "step": 55296 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.908621016101675e-05, | |
| "loss": 4.9432, | |
| "step": 55808 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.907784059230996e-05, | |
| "loss": 4.9485, | |
| "step": 56320 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.906945464479944e-05, | |
| "loss": 4.9257, | |
| "step": 56832 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.906106869728892e-05, | |
| "loss": 4.9447, | |
| "step": 57344 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9052682749778396e-05, | |
| "loss": 4.9158, | |
| "step": 57856 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9044296802267876e-05, | |
| "loss": 4.9247, | |
| "step": 58368 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9035910854757356e-05, | |
| "loss": 4.9254, | |
| "step": 58880 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9027524907246836e-05, | |
| "loss": 4.9212, | |
| "step": 59392 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9019155338540045e-05, | |
| "loss": 4.9159, | |
| "step": 59904 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9010769391029525e-05, | |
| "loss": 4.905, | |
| "step": 60416 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.9002383443519005e-05, | |
| "loss": 4.9099, | |
| "step": 60928 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8993997496008485e-05, | |
| "loss": 4.9127, | |
| "step": 61440 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8985611548497965e-05, | |
| "loss": 4.8943, | |
| "step": 61952 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8977225600987445e-05, | |
| "loss": 4.8905, | |
| "step": 62464 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8968839653476924e-05, | |
| "loss": 4.8884, | |
| "step": 62976 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.896045370596641e-05, | |
| "loss": 4.8997, | |
| "step": 63488 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.895206775845589e-05, | |
| "loss": 4.8673, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.894368181094537e-05, | |
| "loss": 4.8726, | |
| "step": 64512 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.893529586343485e-05, | |
| "loss": 4.8656, | |
| "step": 65024 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.892690991592433e-05, | |
| "loss": 4.8726, | |
| "step": 65536 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.891854034721754e-05, | |
| "loss": 4.8593, | |
| "step": 66048 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.891015439970702e-05, | |
| "loss": 4.8761, | |
| "step": 66560 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.890178483100023e-05, | |
| "loss": 4.8647, | |
| "step": 67072 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.889339888348971e-05, | |
| "loss": 4.8569, | |
| "step": 67584 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.888501293597919e-05, | |
| "loss": 4.8479, | |
| "step": 68096 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.887662698846867e-05, | |
| "loss": 4.8545, | |
| "step": 68608 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.886824104095815e-05, | |
| "loss": 4.8516, | |
| "step": 69120 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.885985509344763e-05, | |
| "loss": 4.8519, | |
| "step": 69632 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8851485524740845e-05, | |
| "loss": 4.8393, | |
| "step": 70144 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8843099577230325e-05, | |
| "loss": 4.8479, | |
| "step": 70656 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.8834713629719805e-05, | |
| "loss": 4.8377, | |
| "step": 71168 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.882632768220928e-05, | |
| "loss": 4.8364, | |
| "step": 71680 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.881794173469876e-05, | |
| "loss": 4.8176, | |
| "step": 72192 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.880955578718824e-05, | |
| "loss": 4.8354, | |
| "step": 72704 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.880116983967772e-05, | |
| "loss": 4.8176, | |
| "step": 73216 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.879280027097093e-05, | |
| "loss": 4.801, | |
| "step": 73728 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.878441432346041e-05, | |
| "loss": 4.825, | |
| "step": 74240 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.877602837594989e-05, | |
| "loss": 4.813, | |
| "step": 74752 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.87676588072431e-05, | |
| "loss": 4.8115, | |
| "step": 75264 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.875927285973258e-05, | |
| "loss": 4.8095, | |
| "step": 75776 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "learning_rate": 4.875088691222206e-05, | |
| "loss": 4.7955, | |
| "step": 76288 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_loss": 4.762204647064209, | |
| "eval_runtime": 290.7203, | |
| "eval_samples_per_second": 1312.571, | |
| "eval_steps_per_second": 41.019, | |
| "step": 76320 | |
| } | |
| ], | |
| "logging_steps": 512, | |
| "max_steps": 3052726, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 10, | |
| "total_flos": 3.145048926888413e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |