| { |
| "best_metric": 3.869717836380005, |
| "best_model_checkpoint": "/mmfs1/gscratch/stf/abhinavp/corpus-filtering/outputs/binding-domain/transformer/0/checkpoints/checkpoint-457920", |
| "epoch": 1.0250006060157382, |
| "eval_steps": 10, |
| "global_step": 457920, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999998362119627e-05, |
| "loss": 10.9402, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.999161405248948e-05, |
| "loss": 6.8272, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.998322810497896e-05, |
| "loss": 6.1901, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.997484215746844e-05, |
| "loss": 5.9741, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.996645620995792e-05, |
| "loss": 5.8089, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.99580702624474e-05, |
| "loss": 5.7087, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.994968431493688e-05, |
| "loss": 5.5936, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.994129836742636e-05, |
| "loss": 5.5251, |
| "step": 3584 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.993291241991584e-05, |
| "loss": 5.4409, |
| "step": 4096 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.992452647240532e-05, |
| "loss": 5.3928, |
| "step": 4608 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.99161405248948e-05, |
| "loss": 5.3412, |
| "step": 5120 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.990775457738428e-05, |
| "loss": 5.3021, |
| "step": 5632 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.989936862987376e-05, |
| "loss": 5.2523, |
| "step": 6144 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.989099906116697e-05, |
| "loss": 5.2013, |
| "step": 6656 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.988261311365645e-05, |
| "loss": 5.1676, |
| "step": 7168 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.987422716614593e-05, |
| "loss": 5.1251, |
| "step": 7680 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.986584121863541e-05, |
| "loss": 5.097, |
| "step": 8192 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.985745527112489e-05, |
| "loss": 5.075, |
| "step": 8704 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.984906932361437e-05, |
| "loss": 5.0453, |
| "step": 9216 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.984068337610385e-05, |
| "loss": 5.0127, |
| "step": 9728 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.983229742859333e-05, |
| "loss": 5.0017, |
| "step": 10240 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9823927859886547e-05, |
| "loss": 4.9631, |
| "step": 10752 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9815541912376026e-05, |
| "loss": 4.9463, |
| "step": 11264 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9807155964865506e-05, |
| "loss": 4.9236, |
| "step": 11776 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9798770017354986e-05, |
| "loss": 4.9129, |
| "step": 12288 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9790400448648195e-05, |
| "loss": 4.8836, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9782014501137675e-05, |
| "loss": 4.8671, |
| "step": 13312 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9773628553627155e-05, |
| "loss": 4.8459, |
| "step": 13824 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9765242606116635e-05, |
| "loss": 4.8271, |
| "step": 14336 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.9756873037409844e-05, |
| "loss": 4.8133, |
| "step": 14848 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9748487089899324e-05, |
| "loss": 4.7946, |
| "step": 15360 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9740101142388804e-05, |
| "loss": 4.7908, |
| "step": 15872 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9731715194878284e-05, |
| "loss": 4.7707, |
| "step": 16384 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9723329247367764e-05, |
| "loss": 4.7631, |
| "step": 16896 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.971495967866098e-05, |
| "loss": 4.7571, |
| "step": 17408 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.970657373115046e-05, |
| "loss": 4.7389, |
| "step": 17920 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.969820416244367e-05, |
| "loss": 4.7307, |
| "step": 18432 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.968981821493315e-05, |
| "loss": 4.7035, |
| "step": 18944 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.968143226742263e-05, |
| "loss": 4.6986, |
| "step": 19456 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.967304631991211e-05, |
| "loss": 4.6811, |
| "step": 19968 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.966466037240159e-05, |
| "loss": 4.6804, |
| "step": 20480 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.965627442489107e-05, |
| "loss": 4.6683, |
| "step": 20992 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.964788847738054e-05, |
| "loss": 4.6592, |
| "step": 21504 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.963950252987002e-05, |
| "loss": 4.6375, |
| "step": 22016 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.96311165823595e-05, |
| "loss": 4.6483, |
| "step": 22528 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.962274701365272e-05, |
| "loss": 4.6339, |
| "step": 23040 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.96143610661422e-05, |
| "loss": 4.6276, |
| "step": 23552 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.960597511863168e-05, |
| "loss": 4.621, |
| "step": 24064 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.959758917112116e-05, |
| "loss": 4.5898, |
| "step": 24576 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.95892359812181e-05, |
| "loss": 4.5885, |
| "step": 25088 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.958085003370758e-05, |
| "loss": 4.5932, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.957246408619706e-05, |
| "loss": 4.5853, |
| "step": 26112 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.956407813868654e-05, |
| "loss": 4.5673, |
| "step": 26624 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9555692191176016e-05, |
| "loss": 4.5487, |
| "step": 27136 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.954732262246923e-05, |
| "loss": 4.5516, |
| "step": 27648 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.953893667495871e-05, |
| "loss": 4.5366, |
| "step": 28160 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.953055072744819e-05, |
| "loss": 4.564, |
| "step": 28672 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.952216477993767e-05, |
| "loss": 4.5197, |
| "step": 29184 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.951377883242715e-05, |
| "loss": 4.5349, |
| "step": 29696 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.950539288491663e-05, |
| "loss": 4.5282, |
| "step": 30208 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.949700693740611e-05, |
| "loss": 4.5056, |
| "step": 30720 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.948862098989559e-05, |
| "loss": 4.5105, |
| "step": 31232 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.94802514211888e-05, |
| "loss": 4.4995, |
| "step": 31744 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.947186547367828e-05, |
| "loss": 4.483, |
| "step": 32256 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.946347952616776e-05, |
| "loss": 4.4835, |
| "step": 32768 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.945509357865724e-05, |
| "loss": 4.502, |
| "step": 33280 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.944672400995045e-05, |
| "loss": 4.4789, |
| "step": 33792 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.943833806243993e-05, |
| "loss": 4.4688, |
| "step": 34304 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.942995211492941e-05, |
| "loss": 4.4589, |
| "step": 34816 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9421582546222625e-05, |
| "loss": 4.4607, |
| "step": 35328 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9413196598712105e-05, |
| "loss": 4.4728, |
| "step": 35840 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9404810651201585e-05, |
| "loss": 4.4682, |
| "step": 36352 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9396424703691065e-05, |
| "loss": 4.4569, |
| "step": 36864 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9388038756180545e-05, |
| "loss": 4.4626, |
| "step": 37376 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9379652808670025e-05, |
| "loss": 4.4649, |
| "step": 37888 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9371266861159505e-05, |
| "loss": 4.4491, |
| "step": 38400 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9362897292452714e-05, |
| "loss": 4.4373, |
| "step": 38912 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9354511344942194e-05, |
| "loss": 4.4282, |
| "step": 39424 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9346125397431674e-05, |
| "loss": 4.4274, |
| "step": 39936 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9337739449921154e-05, |
| "loss": 4.4201, |
| "step": 40448 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9329353502410634e-05, |
| "loss": 4.4157, |
| "step": 40960 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9320967554900114e-05, |
| "loss": 4.418, |
| "step": 41472 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9312581607389594e-05, |
| "loss": 4.4189, |
| "step": 41984 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.9304195659879074e-05, |
| "loss": 4.4145, |
| "step": 42496 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.929582609117229e-05, |
| "loss": 4.3895, |
| "step": 43008 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.928744014366177e-05, |
| "loss": 4.3879, |
| "step": 43520 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.927905419615125e-05, |
| "loss": 4.3972, |
| "step": 44032 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.927066824864073e-05, |
| "loss": 4.4005, |
| "step": 44544 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.926229867993394e-05, |
| "loss": 4.3816, |
| "step": 45056 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.925392911122715e-05, |
| "loss": 4.3833, |
| "step": 45568 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.924554316371663e-05, |
| "loss": 4.388, |
| "step": 46080 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.923715721620611e-05, |
| "loss": 4.3728, |
| "step": 46592 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.922878764749932e-05, |
| "loss": 4.369, |
| "step": 47104 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.92204016999888e-05, |
| "loss": 4.3723, |
| "step": 47616 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.921201575247828e-05, |
| "loss": 4.3629, |
| "step": 48128 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.920362980496776e-05, |
| "loss": 4.3675, |
| "step": 48640 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.919524385745724e-05, |
| "loss": 4.3589, |
| "step": 49152 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.918685790994672e-05, |
| "loss": 4.3442, |
| "step": 49664 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.91784719624362e-05, |
| "loss": 4.3522, |
| "step": 50176 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9170086014925676e-05, |
| "loss": 4.353, |
| "step": 50688 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9161700067415156e-05, |
| "loss": 4.3479, |
| "step": 51200 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9153314119904636e-05, |
| "loss": 4.3472, |
| "step": 51712 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9144928172394116e-05, |
| "loss": 4.3415, |
| "step": 52224 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9136542224883596e-05, |
| "loss": 4.3384, |
| "step": 52736 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9128172656176805e-05, |
| "loss": 4.3383, |
| "step": 53248 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.9119786708666285e-05, |
| "loss": 4.3137, |
| "step": 53760 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.91114171399595e-05, |
| "loss": 4.3176, |
| "step": 54272 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.910303119244898e-05, |
| "loss": 4.3223, |
| "step": 54784 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.909464524493846e-05, |
| "loss": 4.3199, |
| "step": 55296 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.908625929742794e-05, |
| "loss": 4.3105, |
| "step": 55808 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.907787334991742e-05, |
| "loss": 4.3065, |
| "step": 56320 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.90694874024069e-05, |
| "loss": 4.3039, |
| "step": 56832 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.906110145489638e-05, |
| "loss": 4.3084, |
| "step": 57344 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.905273188618959e-05, |
| "loss": 4.303, |
| "step": 57856 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.904434593867907e-05, |
| "loss": 4.3037, |
| "step": 58368 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.903595999116855e-05, |
| "loss": 4.2976, |
| "step": 58880 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.902757404365803e-05, |
| "loss": 4.3028, |
| "step": 59392 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.901918809614751e-05, |
| "loss": 4.2989, |
| "step": 59904 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.901080214863699e-05, |
| "loss": 4.2949, |
| "step": 60416 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.900241620112647e-05, |
| "loss": 4.2981, |
| "step": 60928 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8994046632419686e-05, |
| "loss": 4.2965, |
| "step": 61440 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8985660684909166e-05, |
| "loss": 4.285, |
| "step": 61952 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8977274737398646e-05, |
| "loss": 4.2916, |
| "step": 62464 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8968888789888125e-05, |
| "loss": 4.2792, |
| "step": 62976 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8960519221181335e-05, |
| "loss": 4.2861, |
| "step": 63488 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8952133273670815e-05, |
| "loss": 4.264, |
| "step": 64000 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8943747326160294e-05, |
| "loss": 4.2766, |
| "step": 64512 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8935361378649774e-05, |
| "loss": 4.2625, |
| "step": 65024 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8926991809942984e-05, |
| "loss": 4.2671, |
| "step": 65536 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8918605862432463e-05, |
| "loss": 4.2521, |
| "step": 66048 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8910219914921943e-05, |
| "loss": 4.2818, |
| "step": 66560 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8901833967411423e-05, |
| "loss": 4.2687, |
| "step": 67072 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.88934480199009e-05, |
| "loss": 4.2551, |
| "step": 67584 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.888506207239038e-05, |
| "loss": 4.2565, |
| "step": 68096 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.887667612487986e-05, |
| "loss": 4.2617, |
| "step": 68608 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.886829017736934e-05, |
| "loss": 4.2646, |
| "step": 69120 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.885992060866256e-05, |
| "loss": 4.2605, |
| "step": 69632 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.885155103995577e-05, |
| "loss": 4.2494, |
| "step": 70144 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.884316509244525e-05, |
| "loss": 4.2561, |
| "step": 70656 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.883477914493473e-05, |
| "loss": 4.2434, |
| "step": 71168 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.882639319742421e-05, |
| "loss": 4.2566, |
| "step": 71680 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.881800724991369e-05, |
| "loss": 4.2391, |
| "step": 72192 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.880962130240316e-05, |
| "loss": 4.2418, |
| "step": 72704 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.880123535489264e-05, |
| "loss": 4.2418, |
| "step": 73216 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.879286578618586e-05, |
| "loss": 4.2315, |
| "step": 73728 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.878447983867534e-05, |
| "loss": 4.2366, |
| "step": 74240 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.877609389116482e-05, |
| "loss": 4.2352, |
| "step": 74752 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.87677079436543e-05, |
| "loss": 4.2335, |
| "step": 75264 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.875932199614378e-05, |
| "loss": 4.2347, |
| "step": 75776 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.8750952427436986e-05, |
| "loss": 4.2251, |
| "step": 76288 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 4.1916117668151855, |
| "eval_runtime": 306.2873, |
| "eval_samples_per_second": 1245.86, |
| "eval_steps_per_second": 38.934, |
| "step": 76320 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8742566479926466e-05, |
| "loss": 4.21, |
| "step": 76800 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8734180532415946e-05, |
| "loss": 4.2084, |
| "step": 77312 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8725794584905426e-05, |
| "loss": 4.2256, |
| "step": 77824 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8717408637394906e-05, |
| "loss": 4.2102, |
| "step": 78336 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8709039068688115e-05, |
| "loss": 4.2279, |
| "step": 78848 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8700653121177595e-05, |
| "loss": 4.2004, |
| "step": 79360 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8692267173667075e-05, |
| "loss": 4.2057, |
| "step": 79872 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8683881226156555e-05, |
| "loss": 4.1917, |
| "step": 80384 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.867551165744977e-05, |
| "loss": 4.2061, |
| "step": 80896 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.866712570993925e-05, |
| "loss": 4.1993, |
| "step": 81408 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.865873976242873e-05, |
| "loss": 4.2053, |
| "step": 81920 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.865035381491821e-05, |
| "loss": 4.201, |
| "step": 82432 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.864196786740769e-05, |
| "loss": 4.187, |
| "step": 82944 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.863358191989717e-05, |
| "loss": 4.1871, |
| "step": 83456 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.862519597238665e-05, |
| "loss": 4.1836, |
| "step": 83968 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.861681002487613e-05, |
| "loss": 4.178, |
| "step": 84480 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.860844045616934e-05, |
| "loss": 4.1853, |
| "step": 84992 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.860005450865882e-05, |
| "loss": 4.1823, |
| "step": 85504 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.85916685611483e-05, |
| "loss": 4.1787, |
| "step": 86016 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.858328261363778e-05, |
| "loss": 4.1977, |
| "step": 86528 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8574913044930995e-05, |
| "loss": 4.1695, |
| "step": 87040 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8566527097420475e-05, |
| "loss": 4.1811, |
| "step": 87552 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8558141149909955e-05, |
| "loss": 4.177, |
| "step": 88064 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8549755202399435e-05, |
| "loss": 4.1852, |
| "step": 88576 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8541385633692644e-05, |
| "loss": 4.1614, |
| "step": 89088 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8532999686182124e-05, |
| "loss": 4.1699, |
| "step": 89600 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8524613738671604e-05, |
| "loss": 4.1634, |
| "step": 90112 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.8516227791161084e-05, |
| "loss": 4.1678, |
| "step": 90624 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.850785822245429e-05, |
| "loss": 4.152, |
| "step": 91136 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.849947227494377e-05, |
| "loss": 4.1549, |
| "step": 91648 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.849108632743325e-05, |
| "loss": 4.1647, |
| "step": 92160 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.848270037992273e-05, |
| "loss": 4.1587, |
| "step": 92672 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.847431443241221e-05, |
| "loss": 4.1582, |
| "step": 93184 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.846592848490169e-05, |
| "loss": 4.1631, |
| "step": 93696 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.845755891619491e-05, |
| "loss": 4.1567, |
| "step": 94208 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.844917296868439e-05, |
| "loss": 4.156, |
| "step": 94720 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.844078702117387e-05, |
| "loss": 4.1441, |
| "step": 95232 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.843240107366335e-05, |
| "loss": 4.1506, |
| "step": 95744 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.842403150495656e-05, |
| "loss": 4.1361, |
| "step": 96256 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.841564555744604e-05, |
| "loss": 4.1446, |
| "step": 96768 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.840725960993552e-05, |
| "loss": 4.1445, |
| "step": 97280 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8398873662425e-05, |
| "loss": 4.1443, |
| "step": 97792 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.839050409371821e-05, |
| "loss": 4.131, |
| "step": 98304 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8382118146207687e-05, |
| "loss": 4.1418, |
| "step": 98816 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8373732198697167e-05, |
| "loss": 4.1444, |
| "step": 99328 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8365346251186646e-05, |
| "loss": 4.1378, |
| "step": 99840 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.835697668247986e-05, |
| "loss": 4.1415, |
| "step": 100352 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.834859073496934e-05, |
| "loss": 4.1123, |
| "step": 100864 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.834020478745882e-05, |
| "loss": 4.1183, |
| "step": 101376 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8331818839948295e-05, |
| "loss": 4.1369, |
| "step": 101888 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.832344927124151e-05, |
| "loss": 4.1288, |
| "step": 102400 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.831506332373099e-05, |
| "loss": 4.1241, |
| "step": 102912 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.830667737622047e-05, |
| "loss": 4.1072, |
| "step": 103424 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8298291428709944e-05, |
| "loss": 4.1122, |
| "step": 103936 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.828992186000316e-05, |
| "loss": 4.1054, |
| "step": 104448 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.828153591249264e-05, |
| "loss": 4.1295, |
| "step": 104960 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.827314996498212e-05, |
| "loss": 4.1043, |
| "step": 105472 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.82647640174716e-05, |
| "loss": 4.1174, |
| "step": 105984 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8256394448764816e-05, |
| "loss": 4.1219, |
| "step": 106496 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8248008501254296e-05, |
| "loss": 4.1011, |
| "step": 107008 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.823962255374377e-05, |
| "loss": 4.1084, |
| "step": 107520 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.823123660623325e-05, |
| "loss": 4.0997, |
| "step": 108032 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8222867037526465e-05, |
| "loss": 4.0961, |
| "step": 108544 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8214481090015945e-05, |
| "loss": 4.0947, |
| "step": 109056 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.820609514250542e-05, |
| "loss": 4.1132, |
| "step": 109568 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.81977091949949e-05, |
| "loss": 4.1023, |
| "step": 110080 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.818932324748438e-05, |
| "loss": 4.0926, |
| "step": 110592 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8180953678777594e-05, |
| "loss": 4.0899, |
| "step": 111104 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8172567731267074e-05, |
| "loss": 4.0915, |
| "step": 111616 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8164181783756554e-05, |
| "loss": 4.1056, |
| "step": 112128 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8155795836246034e-05, |
| "loss": 4.1072, |
| "step": 112640 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.814742626753924e-05, |
| "loss": 4.1014, |
| "step": 113152 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.813904032002872e-05, |
| "loss": 4.11, |
| "step": 113664 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.81306543725182e-05, |
| "loss": 4.1085, |
| "step": 114176 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.812226842500768e-05, |
| "loss": 4.104, |
| "step": 114688 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.811389885630089e-05, |
| "loss": 4.0933, |
| "step": 115200 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.810551290879037e-05, |
| "loss": 4.0905, |
| "step": 115712 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.809712696127985e-05, |
| "loss": 4.0904, |
| "step": 116224 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.808874101376933e-05, |
| "loss": 4.0884, |
| "step": 116736 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.808037144506255e-05, |
| "loss": 4.0851, |
| "step": 117248 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.807198549755203e-05, |
| "loss": 4.0917, |
| "step": 117760 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.806359955004151e-05, |
| "loss": 4.0961, |
| "step": 118272 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.805521360253099e-05, |
| "loss": 4.0878, |
| "step": 118784 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8046844033824197e-05, |
| "loss": 4.074, |
| "step": 119296 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8038458086313677e-05, |
| "loss": 4.0735, |
| "step": 119808 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8030072138803156e-05, |
| "loss": 4.0808, |
| "step": 120320 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8021702570096366e-05, |
| "loss": 4.0884, |
| "step": 120832 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8013316622585846e-05, |
| "loss": 4.0734, |
| "step": 121344 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.8004930675075325e-05, |
| "loss": 4.0766, |
| "step": 121856 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7996544727564805e-05, |
| "loss": 4.0848, |
| "step": 122368 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7988175158858015e-05, |
| "loss": 4.0733, |
| "step": 122880 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7979789211347494e-05, |
| "loss": 4.072, |
| "step": 123392 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.797140326383698e-05, |
| "loss": 4.0731, |
| "step": 123904 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.796301731632646e-05, |
| "loss": 4.0731, |
| "step": 124416 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.795464774761967e-05, |
| "loss": 4.0757, |
| "step": 124928 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.794626180010915e-05, |
| "loss": 4.0743, |
| "step": 125440 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.793787585259863e-05, |
| "loss": 4.0586, |
| "step": 125952 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.792948990508811e-05, |
| "loss": 4.0699, |
| "step": 126464 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.792110395757759e-05, |
| "loss": 4.0665, |
| "step": 126976 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.791271801006707e-05, |
| "loss": 4.075, |
| "step": 127488 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.790433206255655e-05, |
| "loss": 4.0663, |
| "step": 128000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.789594611504603e-05, |
| "loss": 4.0667, |
| "step": 128512 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.788757654633924e-05, |
| "loss": 4.0638, |
| "step": 129024 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.787919059882872e-05, |
| "loss": 4.0699, |
| "step": 129536 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7870821030121935e-05, |
| "loss": 4.0456, |
| "step": 130048 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7862435082611415e-05, |
| "loss": 4.0516, |
| "step": 130560 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7854049135100895e-05, |
| "loss": 4.0574, |
| "step": 131072 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7845663187590375e-05, |
| "loss": 4.0536, |
| "step": 131584 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7837277240079855e-05, |
| "loss": 4.0487, |
| "step": 132096 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7828891292569335e-05, |
| "loss": 4.0478, |
| "step": 132608 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7820505345058815e-05, |
| "loss": 4.0469, |
| "step": 133120 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7812119397548295e-05, |
| "loss": 4.0532, |
| "step": 133632 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7803749828841504e-05, |
| "loss": 4.0483, |
| "step": 134144 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7795363881330984e-05, |
| "loss": 4.0492, |
| "step": 134656 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7786977933820464e-05, |
| "loss": 4.0518, |
| "step": 135168 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7778591986309944e-05, |
| "loss": 4.0556, |
| "step": 135680 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7770206038799423e-05, |
| "loss": 4.0519, |
| "step": 136192 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.776183647009263e-05, |
| "loss": 4.0495, |
| "step": 136704 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.775345052258212e-05, |
| "loss": 4.0555, |
| "step": 137216 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.77450645750716e-05, |
| "loss": 4.0551, |
| "step": 137728 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.773667862756108e-05, |
| "loss": 4.0423, |
| "step": 138240 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.772829268005055e-05, |
| "loss": 4.0532, |
| "step": 138752 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.771990673254003e-05, |
| "loss": 4.0402, |
| "step": 139264 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.771152078502951e-05, |
| "loss": 4.0528, |
| "step": 139776 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.770313483751899e-05, |
| "loss": 4.0277, |
| "step": 140288 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.76947652688122e-05, |
| "loss": 4.045, |
| "step": 140800 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.768639570010542e-05, |
| "loss": 4.0278, |
| "step": 141312 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.76780097525949e-05, |
| "loss": 4.0333, |
| "step": 141824 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.766962380508437e-05, |
| "loss": 4.0237, |
| "step": 142336 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.766123785757386e-05, |
| "loss": 4.0533, |
| "step": 142848 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.765285191006334e-05, |
| "loss": 4.0424, |
| "step": 143360 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.764446596255282e-05, |
| "loss": 4.0345, |
| "step": 143872 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.76360800150423e-05, |
| "loss": 4.0288, |
| "step": 144384 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.762769406753178e-05, |
| "loss": 4.0392, |
| "step": 144896 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7619324498824986e-05, |
| "loss": 4.0425, |
| "step": 145408 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7610938551314466e-05, |
| "loss": 4.0401, |
| "step": 145920 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7602552603803946e-05, |
| "loss": 4.0335, |
| "step": 146432 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7594166656293426e-05, |
| "loss": 4.0358, |
| "step": 146944 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7585797087586635e-05, |
| "loss": 4.0281, |
| "step": 147456 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7577411140076115e-05, |
| "loss": 4.0437, |
| "step": 147968 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7569025192565595e-05, |
| "loss": 4.0239, |
| "step": 148480 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.7560639245055075e-05, |
| "loss": 4.0291, |
| "step": 148992 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.755226967634829e-05, |
| "loss": 4.0274, |
| "step": 149504 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.754388372883777e-05, |
| "loss": 4.0224, |
| "step": 150016 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.753551416013098e-05, |
| "loss": 4.0317, |
| "step": 150528 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.752712821262046e-05, |
| "loss": 4.0243, |
| "step": 151040 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.751874226510994e-05, |
| "loss": 4.029, |
| "step": 151552 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.751035631759942e-05, |
| "loss": 4.025, |
| "step": 152064 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.75019703700889e-05, |
| "loss": 4.0212, |
| "step": 152576 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 4.0242791175842285, |
| "eval_runtime": 309.0459, |
| "eval_samples_per_second": 1234.739, |
| "eval_steps_per_second": 38.587, |
| "step": 152640 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.749358442257838e-05, |
| "loss": 4.0084, |
| "step": 153088 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.748519847506786e-05, |
| "loss": 4.0066, |
| "step": 153600 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.747681252755734e-05, |
| "loss": 4.0256, |
| "step": 154112 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.746842658004682e-05, |
| "loss": 4.0107, |
| "step": 154624 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.74600406325363e-05, |
| "loss": 4.0352, |
| "step": 155136 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.745165468502578e-05, |
| "loss": 4.0031, |
| "step": 155648 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.744326873751526e-05, |
| "loss": 4.0124, |
| "step": 156160 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.743488279000474e-05, |
| "loss": 3.9956, |
| "step": 156672 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.7426513221297955e-05, |
| "loss": 4.0151, |
| "step": 157184 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.7418127273787435e-05, |
| "loss": 4.0088, |
| "step": 157696 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.740974132627691e-05, |
| "loss": 4.0119, |
| "step": 158208 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.740135537876639e-05, |
| "loss": 4.0097, |
| "step": 158720 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.7392985810059604e-05, |
| "loss": 3.9977, |
| "step": 159232 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.7384599862549084e-05, |
| "loss": 4.0036, |
| "step": 159744 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.737621391503856e-05, |
| "loss": 3.9992, |
| "step": 160256 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.736782796752804e-05, |
| "loss": 3.9955, |
| "step": 160768 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.735944202001752e-05, |
| "loss": 3.9959, |
| "step": 161280 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.7351056072507e-05, |
| "loss": 4.0005, |
| "step": 161792 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.734267012499648e-05, |
| "loss": 3.9953, |
| "step": 162304 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.733428417748596e-05, |
| "loss": 4.0182, |
| "step": 162816 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.732591460877917e-05, |
| "loss": 3.9932, |
| "step": 163328 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.731752866126865e-05, |
| "loss": 4.0009, |
| "step": 163840 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.730914271375813e-05, |
| "loss": 3.9991, |
| "step": 164352 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.730075676624761e-05, |
| "loss": 4.0074, |
| "step": 164864 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.729238719754082e-05, |
| "loss": 3.987, |
| "step": 165376 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.72840012500303e-05, |
| "loss": 3.9994, |
| "step": 165888 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.727561530251978e-05, |
| "loss": 3.9897, |
| "step": 166400 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.726722935500926e-05, |
| "loss": 3.9941, |
| "step": 166912 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.725884340749874e-05, |
| "loss": 3.9794, |
| "step": 167424 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.725045745998822e-05, |
| "loss": 3.9842, |
| "step": 167936 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.72420715124777e-05, |
| "loss": 3.9931, |
| "step": 168448 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.723370194377091e-05, |
| "loss": 3.9923, |
| "step": 168960 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.72253159962604e-05, |
| "loss": 3.9931, |
| "step": 169472 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.721693004874988e-05, |
| "loss": 3.9956, |
| "step": 169984 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.720854410123936e-05, |
| "loss": 3.9879, |
| "step": 170496 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.720015815372884e-05, |
| "loss": 3.9903, |
| "step": 171008 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7191788585022046e-05, |
| "loss": 3.9772, |
| "step": 171520 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7183402637511526e-05, |
| "loss": 3.9911, |
| "step": 172032 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7175016690001006e-05, |
| "loss": 3.9725, |
| "step": 172544 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7166630742490486e-05, |
| "loss": 3.982, |
| "step": 173056 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7158244794979966e-05, |
| "loss": 3.9796, |
| "step": 173568 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7149858847469446e-05, |
| "loss": 3.9881, |
| "step": 174080 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.714147289995892e-05, |
| "loss": 3.9789, |
| "step": 174592 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7133103331252135e-05, |
| "loss": 3.9773, |
| "step": 175104 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7124717383741615e-05, |
| "loss": 3.9873, |
| "step": 175616 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7116331436231095e-05, |
| "loss": 3.9818, |
| "step": 176128 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7107945488720575e-05, |
| "loss": 3.9814, |
| "step": 176640 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.709957592001379e-05, |
| "loss": 3.9655, |
| "step": 177152 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.709118997250327e-05, |
| "loss": 3.9608, |
| "step": 177664 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7082804024992744e-05, |
| "loss": 3.9779, |
| "step": 178176 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.7074418077482224e-05, |
| "loss": 3.9725, |
| "step": 178688 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.706604850877544e-05, |
| "loss": 3.9722, |
| "step": 179200 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.705766256126492e-05, |
| "loss": 3.9544, |
| "step": 179712 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.704927661375439e-05, |
| "loss": 3.9598, |
| "step": 180224 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.704089066624387e-05, |
| "loss": 3.9588, |
| "step": 180736 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.703252109753709e-05, |
| "loss": 3.9715, |
| "step": 181248 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.702413515002657e-05, |
| "loss": 3.9574, |
| "step": 181760 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.701574920251605e-05, |
| "loss": 3.9701, |
| "step": 182272 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.700736325500553e-05, |
| "loss": 3.969, |
| "step": 182784 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6998993686298745e-05, |
| "loss": 3.9565, |
| "step": 183296 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.699060773878822e-05, |
| "loss": 3.9598, |
| "step": 183808 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.69822217912777e-05, |
| "loss": 3.9561, |
| "step": 184320 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.697383584376718e-05, |
| "loss": 3.9469, |
| "step": 184832 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.696544989625666e-05, |
| "loss": 3.9573, |
| "step": 185344 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.695708032754987e-05, |
| "loss": 3.9623, |
| "step": 185856 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.694869438003935e-05, |
| "loss": 3.9673, |
| "step": 186368 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.694030843252883e-05, |
| "loss": 3.9444, |
| "step": 186880 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6931922485018307e-05, |
| "loss": 3.9496, |
| "step": 187392 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6923536537507787e-05, |
| "loss": 3.9479, |
| "step": 187904 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6915166968801e-05, |
| "loss": 3.9707, |
| "step": 188416 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.690678102129048e-05, |
| "loss": 3.9582, |
| "step": 188928 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.689839507377996e-05, |
| "loss": 3.9606, |
| "step": 189440 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.689000912626944e-05, |
| "loss": 3.9725, |
| "step": 189952 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.688162317875892e-05, |
| "loss": 3.9699, |
| "step": 190464 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.68732372312484e-05, |
| "loss": 3.9659, |
| "step": 190976 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.686485128373788e-05, |
| "loss": 3.9543, |
| "step": 191488 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.685646533622736e-05, |
| "loss": 3.9579, |
| "step": 192000 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.68481121463243e-05, |
| "loss": 3.9537, |
| "step": 192512 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.683972619881378e-05, |
| "loss": 3.9516, |
| "step": 193024 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.683134025130326e-05, |
| "loss": 3.9535, |
| "step": 193536 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.682295430379274e-05, |
| "loss": 3.9539, |
| "step": 194048 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.681456835628222e-05, |
| "loss": 3.9615, |
| "step": 194560 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.680618240877171e-05, |
| "loss": 3.9522, |
| "step": 195072 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6797812840064916e-05, |
| "loss": 3.9454, |
| "step": 195584 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6789426892554396e-05, |
| "loss": 3.9385, |
| "step": 196096 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6781040945043876e-05, |
| "loss": 3.9513, |
| "step": 196608 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6772654997533356e-05, |
| "loss": 3.9561, |
| "step": 197120 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6764285428826565e-05, |
| "loss": 3.9441, |
| "step": 197632 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.6755899481316045e-05, |
| "loss": 3.9433, |
| "step": 198144 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6747513533805525e-05, |
| "loss": 3.9555, |
| "step": 198656 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6739127586295005e-05, |
| "loss": 3.9365, |
| "step": 199168 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6730758017588214e-05, |
| "loss": 3.945, |
| "step": 199680 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6722372070077694e-05, |
| "loss": 3.9438, |
| "step": 200192 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6713986122567174e-05, |
| "loss": 3.9486, |
| "step": 200704 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.670561655386039e-05, |
| "loss": 3.9493, |
| "step": 201216 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.669723060634987e-05, |
| "loss": 3.9419, |
| "step": 201728 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.668884465883935e-05, |
| "loss": 3.9331, |
| "step": 202240 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.668045871132883e-05, |
| "loss": 3.9458, |
| "step": 202752 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.667207276381831e-05, |
| "loss": 3.9374, |
| "step": 203264 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.666368681630779e-05, |
| "loss": 3.9522, |
| "step": 203776 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.665530086879727e-05, |
| "loss": 3.9428, |
| "step": 204288 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.664691492128675e-05, |
| "loss": 3.9445, |
| "step": 204800 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.663854535257996e-05, |
| "loss": 3.936, |
| "step": 205312 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.663015940506944e-05, |
| "loss": 3.9487, |
| "step": 205824 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.662177345755892e-05, |
| "loss": 3.9215, |
| "step": 206336 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.66133875100484e-05, |
| "loss": 3.9291, |
| "step": 206848 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6605017941341614e-05, |
| "loss": 3.9343, |
| "step": 207360 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6596631993831094e-05, |
| "loss": 3.9331, |
| "step": 207872 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6588246046320574e-05, |
| "loss": 3.9246, |
| "step": 208384 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6579860098810054e-05, |
| "loss": 3.9283, |
| "step": 208896 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.657149053010326e-05, |
| "loss": 3.9258, |
| "step": 209408 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.656310458259274e-05, |
| "loss": 3.935, |
| "step": 209920 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.655471863508222e-05, |
| "loss": 3.9295, |
| "step": 210432 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.65463326875717e-05, |
| "loss": 3.926, |
| "step": 210944 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.653796311886491e-05, |
| "loss": 3.937, |
| "step": 211456 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.652957717135439e-05, |
| "loss": 3.939, |
| "step": 211968 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.652119122384387e-05, |
| "loss": 3.9377, |
| "step": 212480 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.651280527633335e-05, |
| "loss": 3.9274, |
| "step": 212992 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.650443570762657e-05, |
| "loss": 3.9392, |
| "step": 213504 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.649604976011605e-05, |
| "loss": 3.9351, |
| "step": 214016 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.648766381260553e-05, |
| "loss": 3.9269, |
| "step": 214528 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6479277865095e-05, |
| "loss": 3.9398, |
| "step": 215040 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.647090829638822e-05, |
| "loss": 3.9207, |
| "step": 215552 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.64625223488777e-05, |
| "loss": 3.9363, |
| "step": 216064 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.645413640136718e-05, |
| "loss": 3.9146, |
| "step": 216576 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.644575045385665e-05, |
| "loss": 3.9296, |
| "step": 217088 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6437380885149866e-05, |
| "loss": 3.9133, |
| "step": 217600 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6428994937639346e-05, |
| "loss": 3.9187, |
| "step": 218112 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.642060899012882e-05, |
| "loss": 3.9129, |
| "step": 218624 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6412223042618306e-05, |
| "loss": 3.9371, |
| "step": 219136 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.640385347391152e-05, |
| "loss": 3.9272, |
| "step": 219648 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6395467526401e-05, |
| "loss": 3.9257, |
| "step": 220160 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6387081578890475e-05, |
| "loss": 3.917, |
| "step": 220672 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6378695631379955e-05, |
| "loss": 3.9251, |
| "step": 221184 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.637032606267317e-05, |
| "loss": 3.9353, |
| "step": 221696 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.636194011516265e-05, |
| "loss": 3.926, |
| "step": 222208 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6353554167652124e-05, |
| "loss": 3.9213, |
| "step": 222720 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6345168220141604e-05, |
| "loss": 3.9314, |
| "step": 223232 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.633679865143482e-05, |
| "loss": 3.9211, |
| "step": 223744 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.632841270392429e-05, |
| "loss": 3.9263, |
| "step": 224256 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.632002675641377e-05, |
| "loss": 3.916, |
| "step": 224768 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.631164080890326e-05, |
| "loss": 3.9222, |
| "step": 225280 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.6303271240196475e-05, |
| "loss": 3.9173, |
| "step": 225792 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.629488529268595e-05, |
| "loss": 3.9163, |
| "step": 226304 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.628649934517543e-05, |
| "loss": 3.9179, |
| "step": 226816 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.627811339766491e-05, |
| "loss": 3.9233, |
| "step": 227328 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.626972745015439e-05, |
| "loss": 3.9161, |
| "step": 227840 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.62613578814476e-05, |
| "loss": 3.9174, |
| "step": 228352 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.625297193393708e-05, |
| "loss": 3.9155, |
| "step": 228864 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 3.9504966735839844, |
| "eval_runtime": 309.784, |
| "eval_samples_per_second": 1231.797, |
| "eval_steps_per_second": 38.495, |
| "step": 228960 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.624458598642656e-05, |
| "loss": 3.9086, |
| "step": 229376 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.623620003891604e-05, |
| "loss": 3.9037, |
| "step": 229888 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6227830470209246e-05, |
| "loss": 3.919, |
| "step": 230400 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6219444522698726e-05, |
| "loss": 3.9051, |
| "step": 230912 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.621105857518821e-05, |
| "loss": 3.9327, |
| "step": 231424 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.620267262767769e-05, |
| "loss": 3.8971, |
| "step": 231936 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.61943030589709e-05, |
| "loss": 3.9087, |
| "step": 232448 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.618591711146038e-05, |
| "loss": 3.8918, |
| "step": 232960 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.617753116394986e-05, |
| "loss": 3.9105, |
| "step": 233472 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.616914521643934e-05, |
| "loss": 3.9094, |
| "step": 233984 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.616077564773255e-05, |
| "loss": 3.9049, |
| "step": 234496 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.615238970022203e-05, |
| "loss": 3.9078, |
| "step": 235008 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.614402013151524e-05, |
| "loss": 3.8962, |
| "step": 235520 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.613563418400472e-05, |
| "loss": 3.9046, |
| "step": 236032 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.61272482364942e-05, |
| "loss": 3.8991, |
| "step": 236544 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.611886228898368e-05, |
| "loss": 3.8985, |
| "step": 237056 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.611047634147316e-05, |
| "loss": 3.8961, |
| "step": 237568 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.610209039396265e-05, |
| "loss": 3.8987, |
| "step": 238080 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.609370444645213e-05, |
| "loss": 3.8987, |
| "step": 238592 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6085318498941607e-05, |
| "loss": 3.9156, |
| "step": 239104 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6076948930234816e-05, |
| "loss": 3.8944, |
| "step": 239616 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6068562982724296e-05, |
| "loss": 3.9007, |
| "step": 240128 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6060177035213776e-05, |
| "loss": 3.8994, |
| "step": 240640 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6051791087703256e-05, |
| "loss": 3.9095, |
| "step": 241152 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6043421518996465e-05, |
| "loss": 3.8879, |
| "step": 241664 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6035035571485945e-05, |
| "loss": 3.9029, |
| "step": 242176 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6026649623975425e-05, |
| "loss": 3.8922, |
| "step": 242688 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6018263676464904e-05, |
| "loss": 3.8932, |
| "step": 243200 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.6009894107758114e-05, |
| "loss": 3.8818, |
| "step": 243712 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.60015081602476e-05, |
| "loss": 3.8937, |
| "step": 244224 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.599312221273708e-05, |
| "loss": 3.8941, |
| "step": 244736 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.598473626522656e-05, |
| "loss": 3.898, |
| "step": 245248 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.597636669651977e-05, |
| "loss": 3.896, |
| "step": 245760 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.596798074900925e-05, |
| "loss": 3.9022, |
| "step": 246272 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.595959480149873e-05, |
| "loss": 3.8902, |
| "step": 246784 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.595120885398821e-05, |
| "loss": 3.8973, |
| "step": 247296 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.594283928528142e-05, |
| "loss": 3.8847, |
| "step": 247808 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.59344533377709e-05, |
| "loss": 3.8939, |
| "step": 248320 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.592606739026038e-05, |
| "loss": 3.8779, |
| "step": 248832 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.591768144274986e-05, |
| "loss": 3.8889, |
| "step": 249344 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.590931187404307e-05, |
| "loss": 3.8875, |
| "step": 249856 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5900925926532554e-05, |
| "loss": 3.8966, |
| "step": 250368 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5892539979022034e-05, |
| "loss": 3.8866, |
| "step": 250880 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5884154031511514e-05, |
| "loss": 3.8867, |
| "step": 251392 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.587578446280472e-05, |
| "loss": 3.8917, |
| "step": 251904 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.58673985152942e-05, |
| "loss": 3.8901, |
| "step": 252416 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.585901256778368e-05, |
| "loss": 3.8905, |
| "step": 252928 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.585062662027316e-05, |
| "loss": 3.8777, |
| "step": 253440 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.584225705156637e-05, |
| "loss": 3.8638, |
| "step": 253952 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.583387110405585e-05, |
| "loss": 3.8885, |
| "step": 254464 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.582548515654533e-05, |
| "loss": 3.8809, |
| "step": 254976 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.581709920903481e-05, |
| "loss": 3.8788, |
| "step": 255488 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.580872964032802e-05, |
| "loss": 3.8702, |
| "step": 256000 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.580034369281751e-05, |
| "loss": 3.8684, |
| "step": 256512 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.579195774530699e-05, |
| "loss": 3.8649, |
| "step": 257024 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.57835881766002e-05, |
| "loss": 3.8829, |
| "step": 257536 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.577520222908968e-05, |
| "loss": 3.8694, |
| "step": 258048 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.576681628157916e-05, |
| "loss": 3.8769, |
| "step": 258560 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5758430334068637e-05, |
| "loss": 3.8845, |
| "step": 259072 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5750044386558117e-05, |
| "loss": 3.8687, |
| "step": 259584 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5741658439047596e-05, |
| "loss": 3.8712, |
| "step": 260096 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5733272491537076e-05, |
| "loss": 3.8741, |
| "step": 260608 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.572488654402655e-05, |
| "loss": 3.855, |
| "step": 261120 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5716516975319765e-05, |
| "loss": 3.8699, |
| "step": 261632 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5708131027809245e-05, |
| "loss": 3.8787, |
| "step": 262144 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5699745080298725e-05, |
| "loss": 3.8767, |
| "step": 262656 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5691359132788205e-05, |
| "loss": 3.8608, |
| "step": 263168 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.568298956408142e-05, |
| "loss": 3.8645, |
| "step": 263680 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.56746036165709e-05, |
| "loss": 3.8614, |
| "step": 264192 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5666217669060374e-05, |
| "loss": 3.8786, |
| "step": 264704 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5657831721549854e-05, |
| "loss": 3.8745, |
| "step": 265216 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.564946215284307e-05, |
| "loss": 3.8772, |
| "step": 265728 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.564107620533255e-05, |
| "loss": 3.8854, |
| "step": 266240 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.563269025782202e-05, |
| "loss": 3.8852, |
| "step": 266752 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.56243043103115e-05, |
| "loss": 3.8852, |
| "step": 267264 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.561593474160472e-05, |
| "loss": 3.8654, |
| "step": 267776 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.56075487940942e-05, |
| "loss": 3.8749, |
| "step": 268288 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.559916284658368e-05, |
| "loss": 3.8737, |
| "step": 268800 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.559077689907316e-05, |
| "loss": 3.8668, |
| "step": 269312 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5582407330366375e-05, |
| "loss": 3.8742, |
| "step": 269824 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.557402138285585e-05, |
| "loss": 3.869, |
| "step": 270336 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.556563543534533e-05, |
| "loss": 3.8763, |
| "step": 270848 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.555724948783481e-05, |
| "loss": 3.8705, |
| "step": 271360 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5548879919128024e-05, |
| "loss": 3.8544, |
| "step": 271872 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.55404939716175e-05, |
| "loss": 3.8583, |
| "step": 272384 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.553210802410698e-05, |
| "loss": 3.8683, |
| "step": 272896 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.552372207659646e-05, |
| "loss": 3.8729, |
| "step": 273408 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.5515352507889666e-05, |
| "loss": 3.8637, |
| "step": 273920 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.550696656037915e-05, |
| "loss": 3.8598, |
| "step": 274432 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.549858061286863e-05, |
| "loss": 3.8716, |
| "step": 274944 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.549019466535811e-05, |
| "loss": 3.8572, |
| "step": 275456 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.548182509665132e-05, |
| "loss": 3.8592, |
| "step": 275968 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.54734391491408e-05, |
| "loss": 3.8647, |
| "step": 276480 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.546505320163028e-05, |
| "loss": 3.8701, |
| "step": 276992 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.54566836329235e-05, |
| "loss": 3.8711, |
| "step": 277504 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.544829768541297e-05, |
| "loss": 3.8615, |
| "step": 278016 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.543991173790245e-05, |
| "loss": 3.8492, |
| "step": 278528 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.543152579039193e-05, |
| "loss": 3.8687, |
| "step": 279040 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.542313984288141e-05, |
| "loss": 3.855, |
| "step": 279552 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.541475389537089e-05, |
| "loss": 3.8746, |
| "step": 280064 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.540636794786037e-05, |
| "loss": 3.8662, |
| "step": 280576 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.539798200034985e-05, |
| "loss": 3.8622, |
| "step": 281088 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5389612431643066e-05, |
| "loss": 3.8578, |
| "step": 281600 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5381226484132546e-05, |
| "loss": 3.8698, |
| "step": 282112 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5372856915425755e-05, |
| "loss": 3.8451, |
| "step": 282624 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5364470967915235e-05, |
| "loss": 3.8527, |
| "step": 283136 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5356085020404715e-05, |
| "loss": 3.8511, |
| "step": 283648 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5347699072894195e-05, |
| "loss": 3.8566, |
| "step": 284160 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5339313125383675e-05, |
| "loss": 3.8466, |
| "step": 284672 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5330927177873155e-05, |
| "loss": 3.8527, |
| "step": 285184 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5322541230362635e-05, |
| "loss": 3.8519, |
| "step": 285696 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5314155282852115e-05, |
| "loss": 3.8502, |
| "step": 286208 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5305785714145324e-05, |
| "loss": 3.8561, |
| "step": 286720 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5297399766634804e-05, |
| "loss": 3.8462, |
| "step": 287232 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5289013819124284e-05, |
| "loss": 3.8638, |
| "step": 287744 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.528062787161377e-05, |
| "loss": 3.8607, |
| "step": 288256 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.527225830290698e-05, |
| "loss": 3.8612, |
| "step": 288768 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.526387235539646e-05, |
| "loss": 3.8518, |
| "step": 289280 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.525548640788594e-05, |
| "loss": 3.8616, |
| "step": 289792 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.524710046037542e-05, |
| "loss": 3.8612, |
| "step": 290304 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.523873089166863e-05, |
| "loss": 3.8489, |
| "step": 290816 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.523034494415811e-05, |
| "loss": 3.8627, |
| "step": 291328 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.522195899664759e-05, |
| "loss": 3.8463, |
| "step": 291840 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.521357304913707e-05, |
| "loss": 3.8591, |
| "step": 292352 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.520520348043028e-05, |
| "loss": 3.8422, |
| "step": 292864 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.519681753291976e-05, |
| "loss": 3.8539, |
| "step": 293376 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.518843158540924e-05, |
| "loss": 3.8374, |
| "step": 293888 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5180045637898725e-05, |
| "loss": 3.8452, |
| "step": 294400 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5171676069191934e-05, |
| "loss": 3.8414, |
| "step": 294912 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5163290121681414e-05, |
| "loss": 3.8577, |
| "step": 295424 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5154904174170894e-05, |
| "loss": 3.849, |
| "step": 295936 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5146518226660373e-05, |
| "loss": 3.8557, |
| "step": 296448 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.513814865795358e-05, |
| "loss": 3.8431, |
| "step": 296960 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.512976271044306e-05, |
| "loss": 3.8517, |
| "step": 297472 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.512137676293254e-05, |
| "loss": 3.8594, |
| "step": 297984 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.511299081542202e-05, |
| "loss": 3.8531, |
| "step": 298496 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.510462124671523e-05, |
| "loss": 3.8491, |
| "step": 299008 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.509623529920471e-05, |
| "loss": 3.8558, |
| "step": 299520 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.508784935169419e-05, |
| "loss": 3.8485, |
| "step": 300032 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.507946340418368e-05, |
| "loss": 3.8589, |
| "step": 300544 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.507109383547689e-05, |
| "loss": 3.839, |
| "step": 301056 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.506270788796637e-05, |
| "loss": 3.8517, |
| "step": 301568 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.505432194045585e-05, |
| "loss": 3.8434, |
| "step": 302080 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.504593599294533e-05, |
| "loss": 3.8433, |
| "step": 302592 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5037566424238536e-05, |
| "loss": 3.85, |
| "step": 303104 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5029180476728016e-05, |
| "loss": 3.8475, |
| "step": 303616 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5020794529217496e-05, |
| "loss": 3.846, |
| "step": 304128 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5012408581706976e-05, |
| "loss": 3.8419, |
| "step": 304640 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.5004039013000185e-05, |
| "loss": 3.8486, |
| "step": 305152 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 3.909996747970581, |
| "eval_runtime": 309.6063, |
| "eval_samples_per_second": 1232.504, |
| "eval_steps_per_second": 38.517, |
| "step": 305280 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4995653065489665e-05, |
| "loss": 3.8344, |
| "step": 305664 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4987267117979145e-05, |
| "loss": 3.8315, |
| "step": 306176 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.497888117046863e-05, |
| "loss": 3.8476, |
| "step": 306688 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4970495222958105e-05, |
| "loss": 3.8364, |
| "step": 307200 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4962109275447585e-05, |
| "loss": 3.8607, |
| "step": 307712 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4953723327937065e-05, |
| "loss": 3.834, |
| "step": 308224 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4945337380426545e-05, |
| "loss": 3.834, |
| "step": 308736 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4936951432916025e-05, |
| "loss": 3.8216, |
| "step": 309248 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4928581864209234e-05, |
| "loss": 3.8393, |
| "step": 309760 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4920195916698714e-05, |
| "loss": 3.8424, |
| "step": 310272 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4911809969188194e-05, |
| "loss": 3.8336, |
| "step": 310784 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4903424021677674e-05, |
| "loss": 3.8359, |
| "step": 311296 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.489507083177462e-05, |
| "loss": 3.8275, |
| "step": 311808 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.48866848842641e-05, |
| "loss": 3.8348, |
| "step": 312320 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.487829893675358e-05, |
| "loss": 3.8314, |
| "step": 312832 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.486991298924306e-05, |
| "loss": 3.8272, |
| "step": 313344 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.486152704173254e-05, |
| "loss": 3.8262, |
| "step": 313856 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.485314109422202e-05, |
| "loss": 3.8337, |
| "step": 314368 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.48447551467115e-05, |
| "loss": 3.8284, |
| "step": 314880 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.483636919920098e-05, |
| "loss": 3.8432, |
| "step": 315392 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4828016009297924e-05, |
| "loss": 3.831, |
| "step": 315904 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.48196300617874e-05, |
| "loss": 3.835, |
| "step": 316416 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.481124411427688e-05, |
| "loss": 3.8323, |
| "step": 316928 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.480285816676636e-05, |
| "loss": 3.8401, |
| "step": 317440 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.4794472219255837e-05, |
| "loss": 3.8206, |
| "step": 317952 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.478608627174532e-05, |
| "loss": 3.8341, |
| "step": 318464 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.47777003242348e-05, |
| "loss": 3.8274, |
| "step": 318976 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.476933075552801e-05, |
| "loss": 3.8242, |
| "step": 319488 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.476094480801749e-05, |
| "loss": 3.816, |
| "step": 320000 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.475255886050697e-05, |
| "loss": 3.8235, |
| "step": 320512 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.474417291299645e-05, |
| "loss": 3.8315, |
| "step": 321024 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.473580334428966e-05, |
| "loss": 3.8311, |
| "step": 321536 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.472741739677914e-05, |
| "loss": 3.8321, |
| "step": 322048 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.471903144926862e-05, |
| "loss": 3.8332, |
| "step": 322560 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.47106455017581e-05, |
| "loss": 3.8249, |
| "step": 323072 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.470227593305131e-05, |
| "loss": 3.8305, |
| "step": 323584 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.469388998554079e-05, |
| "loss": 3.8234, |
| "step": 324096 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.468550403803028e-05, |
| "loss": 3.8262, |
| "step": 324608 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.467711809051976e-05, |
| "loss": 3.8154, |
| "step": 325120 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4668748521812966e-05, |
| "loss": 3.8246, |
| "step": 325632 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4660362574302446e-05, |
| "loss": 3.8213, |
| "step": 326144 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4651976626791926e-05, |
| "loss": 3.8294, |
| "step": 326656 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4643590679281406e-05, |
| "loss": 3.8224, |
| "step": 327168 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4635221110574615e-05, |
| "loss": 3.8217, |
| "step": 327680 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4626835163064095e-05, |
| "loss": 3.8323, |
| "step": 328192 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4618449215553575e-05, |
| "loss": 3.8233, |
| "step": 328704 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4610063268043055e-05, |
| "loss": 3.8232, |
| "step": 329216 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4601693699336264e-05, |
| "loss": 3.8145, |
| "step": 329728 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4593307751825744e-05, |
| "loss": 3.7998, |
| "step": 330240 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4584921804315224e-05, |
| "loss": 3.8252, |
| "step": 330752 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.457653585680471e-05, |
| "loss": 3.8183, |
| "step": 331264 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.456816628809792e-05, |
| "loss": 3.8173, |
| "step": 331776 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.455979671939113e-05, |
| "loss": 3.8017, |
| "step": 332288 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.455141077188061e-05, |
| "loss": 3.8081, |
| "step": 332800 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.454302482437009e-05, |
| "loss": 3.7999, |
| "step": 333312 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.453463887685957e-05, |
| "loss": 3.8214, |
| "step": 333824 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.452625292934905e-05, |
| "loss": 3.8077, |
| "step": 334336 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.451786698183853e-05, |
| "loss": 3.8109, |
| "step": 334848 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.450948103432801e-05, |
| "loss": 3.8225, |
| "step": 335360 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.450111146562122e-05, |
| "loss": 3.806, |
| "step": 335872 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4492741896914433e-05, |
| "loss": 3.8098, |
| "step": 336384 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4484355949403913e-05, |
| "loss": 3.8149, |
| "step": 336896 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4475970001893393e-05, |
| "loss": 3.7934, |
| "step": 337408 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.446758405438287e-05, |
| "loss": 3.81, |
| "step": 337920 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.445919810687235e-05, |
| "loss": 3.8118, |
| "step": 338432 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.445081215936183e-05, |
| "loss": 3.8177, |
| "step": 338944 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.444242621185131e-05, |
| "loss": 3.7981, |
| "step": 339456 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.443404026434079e-05, |
| "loss": 3.8046, |
| "step": 339968 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4425670695634e-05, |
| "loss": 3.7966, |
| "step": 340480 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.441728474812348e-05, |
| "loss": 3.8208, |
| "step": 340992 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.440889880061296e-05, |
| "loss": 3.8129, |
| "step": 341504 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.440051285310244e-05, |
| "loss": 3.811, |
| "step": 342016 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.439214328439565e-05, |
| "loss": 3.8261, |
| "step": 342528 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.438375733688513e-05, |
| "loss": 3.8186, |
| "step": 343040 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.437537138937462e-05, |
| "loss": 3.8328, |
| "step": 343552 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.43669854418641e-05, |
| "loss": 3.8067, |
| "step": 344064 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.435861587315731e-05, |
| "loss": 3.8113, |
| "step": 344576 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.435022992564679e-05, |
| "loss": 3.8124, |
| "step": 345088 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.434184397813627e-05, |
| "loss": 3.8034, |
| "step": 345600 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4333474409429476e-05, |
| "loss": 3.8161, |
| "step": 346112 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4325088461918956e-05, |
| "loss": 3.8086, |
| "step": 346624 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4316702514408436e-05, |
| "loss": 3.8165, |
| "step": 347136 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4308316566897916e-05, |
| "loss": 3.8082, |
| "step": 347648 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4299930619387396e-05, |
| "loss": 3.7966, |
| "step": 348160 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4291544671876876e-05, |
| "loss": 3.8091, |
| "step": 348672 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.4283158724366356e-05, |
| "loss": 3.7997, |
| "step": 349184 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.427478915565957e-05, |
| "loss": 3.815, |
| "step": 349696 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.426640320814905e-05, |
| "loss": 3.806, |
| "step": 350208 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.425801726063853e-05, |
| "loss": 3.7992, |
| "step": 350720 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4249631313128005e-05, |
| "loss": 3.8116, |
| "step": 351232 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4241245365617485e-05, |
| "loss": 3.8046, |
| "step": 351744 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4232859418106965e-05, |
| "loss": 3.7964, |
| "step": 352256 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4224473470596445e-05, |
| "loss": 3.8041, |
| "step": 352768 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4216087523085925e-05, |
| "loss": 3.8144, |
| "step": 353280 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4207717954379134e-05, |
| "loss": 3.8102, |
| "step": 353792 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4199332006868614e-05, |
| "loss": 3.8082, |
| "step": 354304 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4190946059358094e-05, |
| "loss": 3.7894, |
| "step": 354816 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4182560111847573e-05, |
| "loss": 3.8077, |
| "step": 355328 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4174174164337053e-05, |
| "loss": 3.7985, |
| "step": 355840 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.416578821682654e-05, |
| "loss": 3.8131, |
| "step": 356352 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.415740226931602e-05, |
| "loss": 3.8111, |
| "step": 356864 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.41490163218055e-05, |
| "loss": 3.8047, |
| "step": 357376 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.414064675309871e-05, |
| "loss": 3.7996, |
| "step": 357888 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.413227718439192e-05, |
| "loss": 3.8143, |
| "step": 358400 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.41238912368814e-05, |
| "loss": 3.7832, |
| "step": 358912 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.411550528937088e-05, |
| "loss": 3.7967, |
| "step": 359424 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.410711934186036e-05, |
| "loss": 3.791, |
| "step": 359936 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.409873339434984e-05, |
| "loss": 3.8063, |
| "step": 360448 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.409034744683932e-05, |
| "loss": 3.7847, |
| "step": 360960 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.40819614993288e-05, |
| "loss": 3.7934, |
| "step": 361472 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.407357555181828e-05, |
| "loss": 3.7969, |
| "step": 361984 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4065205983111494e-05, |
| "loss": 3.7948, |
| "step": 362496 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4056820035600974e-05, |
| "loss": 3.7973, |
| "step": 363008 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4048434088090454e-05, |
| "loss": 3.794, |
| "step": 363520 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.4040048140579934e-05, |
| "loss": 3.8062, |
| "step": 364032 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.403167857187314e-05, |
| "loss": 3.8045, |
| "step": 364544 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.402329262436262e-05, |
| "loss": 3.8028, |
| "step": 365056 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.401492305565583e-05, |
| "loss": 3.7931, |
| "step": 365568 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.400653710814531e-05, |
| "loss": 3.8062, |
| "step": 366080 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.399815116063479e-05, |
| "loss": 3.8005, |
| "step": 366592 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.398976521312427e-05, |
| "loss": 3.7912, |
| "step": 367104 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.398137926561375e-05, |
| "loss": 3.8083, |
| "step": 367616 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.397299331810323e-05, |
| "loss": 3.7895, |
| "step": 368128 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.396462374939645e-05, |
| "loss": 3.8009, |
| "step": 368640 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.395623780188593e-05, |
| "loss": 3.7878, |
| "step": 369152 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.394785185437541e-05, |
| "loss": 3.7979, |
| "step": 369664 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.393946590686489e-05, |
| "loss": 3.785, |
| "step": 370176 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.393107995935437e-05, |
| "loss": 3.7868, |
| "step": 370688 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.392269401184384e-05, |
| "loss": 3.7902, |
| "step": 371200 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3914324443137056e-05, |
| "loss": 3.8016, |
| "step": 371712 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3905938495626536e-05, |
| "loss": 3.7955, |
| "step": 372224 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3897552548116016e-05, |
| "loss": 3.7984, |
| "step": 372736 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.388916660060549e-05, |
| "loss": 3.7854, |
| "step": 373248 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.388078065309497e-05, |
| "loss": 3.7989, |
| "step": 373760 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.387239470558445e-05, |
| "loss": 3.8038, |
| "step": 374272 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.386400875807393e-05, |
| "loss": 3.7987, |
| "step": 374784 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3855639189367145e-05, |
| "loss": 3.7943, |
| "step": 375296 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3847253241856625e-05, |
| "loss": 3.8005, |
| "step": 375808 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3838867294346105e-05, |
| "loss": 3.7931, |
| "step": 376320 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3830481346835585e-05, |
| "loss": 3.802, |
| "step": 376832 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3822111778128794e-05, |
| "loss": 3.7871, |
| "step": 377344 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3813725830618274e-05, |
| "loss": 3.796, |
| "step": 377856 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3805339883107754e-05, |
| "loss": 3.7909, |
| "step": 378368 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.3796953935597234e-05, |
| "loss": 3.7906, |
| "step": 378880 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.378858436689044e-05, |
| "loss": 3.7946, |
| "step": 379392 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.378019841937992e-05, |
| "loss": 3.7928, |
| "step": 379904 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.377182885067313e-05, |
| "loss": 3.7946, |
| "step": 380416 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.376344290316262e-05, |
| "loss": 3.7831, |
| "step": 380928 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.37550569556521e-05, |
| "loss": 3.798, |
| "step": 381440 |
| }, |
| { |
| "epoch": 0.03, |
| "eval_loss": 3.885251045227051, |
| "eval_runtime": 305.6198, |
| "eval_samples_per_second": 1248.581, |
| "eval_steps_per_second": 39.019, |
| "step": 381600 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.374667100814158e-05, |
| "loss": 3.7864, |
| "step": 381952 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.373828506063106e-05, |
| "loss": 3.7763, |
| "step": 382464 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.372989911312054e-05, |
| "loss": 3.7951, |
| "step": 382976 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.372151316561002e-05, |
| "loss": 3.7843, |
| "step": 383488 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.371314359690323e-05, |
| "loss": 3.8045, |
| "step": 384000 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.370475764939271e-05, |
| "loss": 3.7843, |
| "step": 384512 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.369638808068592e-05, |
| "loss": 3.7743, |
| "step": 385024 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.36880021331754e-05, |
| "loss": 3.7755, |
| "step": 385536 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.367961618566488e-05, |
| "loss": 3.7812, |
| "step": 386048 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.367123023815436e-05, |
| "loss": 3.7906, |
| "step": 386560 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.366284429064384e-05, |
| "loss": 3.781, |
| "step": 387072 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.365445834313332e-05, |
| "loss": 3.7858, |
| "step": 387584 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.364608877442653e-05, |
| "loss": 3.7768, |
| "step": 388096 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.363770282691601e-05, |
| "loss": 3.7785, |
| "step": 388608 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.362931687940549e-05, |
| "loss": 3.7832, |
| "step": 389120 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.362093093189497e-05, |
| "loss": 3.774, |
| "step": 389632 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.361254498438445e-05, |
| "loss": 3.7733, |
| "step": 390144 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.360415903687393e-05, |
| "loss": 3.7815, |
| "step": 390656 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.359577308936341e-05, |
| "loss": 3.7798, |
| "step": 391168 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.358738714185289e-05, |
| "loss": 3.791, |
| "step": 391680 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.357903395194983e-05, |
| "loss": 3.7823, |
| "step": 392192 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.357064800443931e-05, |
| "loss": 3.7838, |
| "step": 392704 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.356226205692879e-05, |
| "loss": 3.7806, |
| "step": 393216 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.355387610941827e-05, |
| "loss": 3.7854, |
| "step": 393728 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.354549016190776e-05, |
| "loss": 3.7689, |
| "step": 394240 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.353710421439724e-05, |
| "loss": 3.7875, |
| "step": 394752 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.352871826688672e-05, |
| "loss": 3.7719, |
| "step": 395264 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.35203323193762e-05, |
| "loss": 3.7735, |
| "step": 395776 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.3511962750669406e-05, |
| "loss": 3.7684, |
| "step": 396288 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 4.3503576803158886e-05, |
| "loss": 3.7716, |
| "step": 396800 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3495190855648366e-05, |
| "loss": 3.7814, |
| "step": 397312 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3486821286941575e-05, |
| "loss": 3.7766, |
| "step": 397824 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3478435339431055e-05, |
| "loss": 3.7855, |
| "step": 398336 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3470049391920535e-05, |
| "loss": 3.7858, |
| "step": 398848 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3461663444410015e-05, |
| "loss": 3.7734, |
| "step": 399360 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3453277496899495e-05, |
| "loss": 3.7783, |
| "step": 399872 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.344490792819271e-05, |
| "loss": 3.7748, |
| "step": 400384 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.343652198068219e-05, |
| "loss": 3.7733, |
| "step": 400896 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.342813603317167e-05, |
| "loss": 3.7689, |
| "step": 401408 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.341975008566115e-05, |
| "loss": 3.7722, |
| "step": 401920 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3411364138150624e-05, |
| "loss": 3.7757, |
| "step": 402432 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3402978190640104e-05, |
| "loss": 3.7818, |
| "step": 402944 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.339460862193332e-05, |
| "loss": 3.7692, |
| "step": 403456 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.338622267442279e-05, |
| "loss": 3.773, |
| "step": 403968 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.337783672691227e-05, |
| "loss": 3.7781, |
| "step": 404480 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.336945077940175e-05, |
| "loss": 3.7756, |
| "step": 404992 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.336108121069497e-05, |
| "loss": 3.7741, |
| "step": 405504 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.335269526318445e-05, |
| "loss": 3.7674, |
| "step": 406016 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.334430931567393e-05, |
| "loss": 3.7456, |
| "step": 406528 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.333592336816341e-05, |
| "loss": 3.7805, |
| "step": 407040 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.332753742065289e-05, |
| "loss": 3.7693, |
| "step": 407552 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.331915147314237e-05, |
| "loss": 3.7691, |
| "step": 408064 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.331078190443558e-05, |
| "loss": 3.7593, |
| "step": 408576 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.330239595692506e-05, |
| "loss": 3.7538, |
| "step": 409088 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.329401000941454e-05, |
| "loss": 3.7527, |
| "step": 409600 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.328562406190402e-05, |
| "loss": 3.7685, |
| "step": 410112 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.32772381143935e-05, |
| "loss": 3.7606, |
| "step": 410624 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.326885216688298e-05, |
| "loss": 3.7652, |
| "step": 411136 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3260482598176186e-05, |
| "loss": 3.7703, |
| "step": 411648 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3252096650665666e-05, |
| "loss": 3.7593, |
| "step": 412160 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3243710703155146e-05, |
| "loss": 3.7619, |
| "step": 412672 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3235324755644626e-05, |
| "loss": 3.7681, |
| "step": 413184 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.322693880813411e-05, |
| "loss": 3.7481, |
| "step": 413696 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.321855286062359e-05, |
| "loss": 3.7591, |
| "step": 414208 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.321016691311307e-05, |
| "loss": 3.7631, |
| "step": 414720 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.320179734440628e-05, |
| "loss": 3.77, |
| "step": 415232 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.319341139689576e-05, |
| "loss": 3.7478, |
| "step": 415744 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.318502544938524e-05, |
| "loss": 3.7583, |
| "step": 416256 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.317663950187472e-05, |
| "loss": 3.7494, |
| "step": 416768 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.31682535543642e-05, |
| "loss": 3.7698, |
| "step": 417280 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.315986760685368e-05, |
| "loss": 3.766, |
| "step": 417792 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3151481659343155e-05, |
| "loss": 3.7631, |
| "step": 418304 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3143095711832635e-05, |
| "loss": 3.7767, |
| "step": 418816 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.313472614312585e-05, |
| "loss": 3.7714, |
| "step": 419328 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.312634019561533e-05, |
| "loss": 3.7852, |
| "step": 419840 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.311795424810481e-05, |
| "loss": 3.7621, |
| "step": 420352 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.310956830059429e-05, |
| "loss": 3.7646, |
| "step": 420864 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3101198731887506e-05, |
| "loss": 3.7617, |
| "step": 421376 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3092829163180715e-05, |
| "loss": 3.7538, |
| "step": 421888 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3084443215670195e-05, |
| "loss": 3.7721, |
| "step": 422400 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3076057268159675e-05, |
| "loss": 3.7609, |
| "step": 422912 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3067671320649155e-05, |
| "loss": 3.7692, |
| "step": 423424 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.305928537313863e-05, |
| "loss": 3.7633, |
| "step": 423936 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.305089942562811e-05, |
| "loss": 3.7503, |
| "step": 424448 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3042529856921324e-05, |
| "loss": 3.7599, |
| "step": 424960 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3034143909410804e-05, |
| "loss": 3.7529, |
| "step": 425472 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3025757961900284e-05, |
| "loss": 3.7699, |
| "step": 425984 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3017372014389764e-05, |
| "loss": 3.763, |
| "step": 426496 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 4.3008986066879244e-05, |
| "loss": 3.7472, |
| "step": 427008 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.3000600119368724e-05, |
| "loss": 3.77, |
| "step": 427520 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2992214171858204e-05, |
| "loss": 3.7573, |
| "step": 428032 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2983828224347684e-05, |
| "loss": 3.7498, |
| "step": 428544 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.297545865564089e-05, |
| "loss": 3.7583, |
| "step": 429056 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.296707270813037e-05, |
| "loss": 3.7697, |
| "step": 429568 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.295870313942358e-05, |
| "loss": 3.7625, |
| "step": 430080 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.295031719191306e-05, |
| "loss": 3.766, |
| "step": 430592 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.294193124440254e-05, |
| "loss": 3.7467, |
| "step": 431104 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.293354529689202e-05, |
| "loss": 3.76, |
| "step": 431616 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.29251593493815e-05, |
| "loss": 3.7517, |
| "step": 432128 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.291677340187099e-05, |
| "loss": 3.7648, |
| "step": 432640 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.290838745436047e-05, |
| "loss": 3.7667, |
| "step": 433152 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.290000150684995e-05, |
| "loss": 3.755, |
| "step": 433664 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.289163193814316e-05, |
| "loss": 3.7594, |
| "step": 434176 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.288324599063264e-05, |
| "loss": 3.7659, |
| "step": 434688 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.287487642192585e-05, |
| "loss": 3.74, |
| "step": 435200 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.286649047441533e-05, |
| "loss": 3.7512, |
| "step": 435712 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.285810452690481e-05, |
| "loss": 3.7442, |
| "step": 436224 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.284971857939429e-05, |
| "loss": 3.7582, |
| "step": 436736 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.284133263188377e-05, |
| "loss": 3.7436, |
| "step": 437248 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2832963063176976e-05, |
| "loss": 3.7524, |
| "step": 437760 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2824577115666456e-05, |
| "loss": 3.7499, |
| "step": 438272 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.281619116815594e-05, |
| "loss": 3.7513, |
| "step": 438784 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.280780522064542e-05, |
| "loss": 3.7547, |
| "step": 439296 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.27994192731349e-05, |
| "loss": 3.7476, |
| "step": 439808 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.279103332562438e-05, |
| "loss": 3.7601, |
| "step": 440320 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.278266375691759e-05, |
| "loss": 3.7587, |
| "step": 440832 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.277427780940707e-05, |
| "loss": 3.7586, |
| "step": 441344 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.276589186189655e-05, |
| "loss": 3.7484, |
| "step": 441856 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.275750591438603e-05, |
| "loss": 3.7595, |
| "step": 442368 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.274913634567924e-05, |
| "loss": 3.7602, |
| "step": 442880 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.274075039816872e-05, |
| "loss": 3.746, |
| "step": 443392 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.27323644506582e-05, |
| "loss": 3.7673, |
| "step": 443904 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.272397850314768e-05, |
| "loss": 3.7419, |
| "step": 444416 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.271559255563716e-05, |
| "loss": 3.7593, |
| "step": 444928 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.270720660812664e-05, |
| "loss": 3.7412, |
| "step": 445440 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2698837039419856e-05, |
| "loss": 3.7564, |
| "step": 445952 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2690451091909336e-05, |
| "loss": 3.741, |
| "step": 446464 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2682065144398816e-05, |
| "loss": 3.7447, |
| "step": 446976 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.267367919688829e-05, |
| "loss": 3.7411, |
| "step": 447488 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2665309628181505e-05, |
| "loss": 3.7613, |
| "step": 448000 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2656923680670985e-05, |
| "loss": 3.7481, |
| "step": 448512 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2648537733160465e-05, |
| "loss": 3.7573, |
| "step": 449024 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.264015178564994e-05, |
| "loss": 3.743, |
| "step": 449536 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2631782216943154e-05, |
| "loss": 3.757, |
| "step": 450048 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2623396269432634e-05, |
| "loss": 3.7568, |
| "step": 450560 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2615010321922114e-05, |
| "loss": 3.755, |
| "step": 451072 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2606624374411594e-05, |
| "loss": 3.7541, |
| "step": 451584 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.2598238426901074e-05, |
| "loss": 3.7502, |
| "step": 452096 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.258986885819429e-05, |
| "loss": 3.7502, |
| "step": 452608 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.258148291068376e-05, |
| "loss": 3.761, |
| "step": 453120 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.257309696317324e-05, |
| "loss": 3.7457, |
| "step": 453632 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.256471101566272e-05, |
| "loss": 3.7535, |
| "step": 454144 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.25563250681522e-05, |
| "loss": 3.748, |
| "step": 454656 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.254793912064168e-05, |
| "loss": 3.7492, |
| "step": 455168 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.253955317313116e-05, |
| "loss": 3.7529, |
| "step": 455680 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.253116722562064e-05, |
| "loss": 3.7452, |
| "step": 456192 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.252279765691385e-05, |
| "loss": 3.7604, |
| "step": 456704 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.251441170940333e-05, |
| "loss": 3.7342, |
| "step": 457216 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 4.250602576189281e-05, |
| "loss": 3.7567, |
| "step": 457728 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_loss": 3.869717836380005, |
| "eval_runtime": 305.4028, |
| "eval_samples_per_second": 1249.468, |
| "eval_steps_per_second": 39.047, |
| "step": 457920 |
| } |
| ], |
| "logging_steps": 512, |
| "max_steps": 3052726, |
| "num_train_epochs": 9223372036854775807, |
| "save_steps": 10, |
| "total_flos": 3.1846510665575424e+17, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|