| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.014395220786698816, |
| "eval_steps": 1000, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.4395220786698816e-05, |
| "grad_norm": 0.18359375, |
| "learning_rate": 0.0001, |
| "loss": 0.2384, |
| "loss/crossentropy": 2.463143229484558, |
| "loss/fcd": 0.4892578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23836339265108109, |
| "step": 1 |
| }, |
| { |
| "epoch": 2.8790441573397632e-05, |
| "grad_norm": 0.1328125, |
| "learning_rate": 0.0001, |
| "loss": 0.2453, |
| "loss/crossentropy": 2.74690580368042, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2453368902206421, |
| "step": 2 |
| }, |
| { |
| "epoch": 4.3185662360096445e-05, |
| "grad_norm": 0.15625, |
| "learning_rate": 0.0001, |
| "loss": 0.2292, |
| "loss/crossentropy": 2.3877265453338623, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22919423878192902, |
| "step": 3 |
| }, |
| { |
| "epoch": 5.7580883146795265e-05, |
| "grad_norm": 0.130859375, |
| "learning_rate": 0.0001, |
| "loss": 0.2284, |
| "loss/crossentropy": 2.392206907272339, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22838981449604034, |
| "step": 4 |
| }, |
| { |
| "epoch": 7.197610393349408e-05, |
| "grad_norm": 0.138671875, |
| "learning_rate": 0.0001, |
| "loss": 0.2237, |
| "loss/crossentropy": 2.1798477172851562, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22366443276405334, |
| "step": 5 |
| }, |
| { |
| "epoch": 8.637132472019289e-05, |
| "grad_norm": 0.1357421875, |
| "learning_rate": 0.0001, |
| "loss": 0.2644, |
| "loss/crossentropy": 2.492342710494995, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2643834352493286, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.00010076654550689171, |
| "grad_norm": 0.150390625, |
| "learning_rate": 0.0001, |
| "loss": 0.211, |
| "loss/crossentropy": 2.035392999649048, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21097075939178467, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.00011516176629359053, |
| "grad_norm": 0.13671875, |
| "learning_rate": 0.0001, |
| "loss": 0.2388, |
| "loss/crossentropy": 2.3071805238723755, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23878887295722961, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.00012955698708028935, |
| "grad_norm": 0.126953125, |
| "learning_rate": 0.0001, |
| "loss": 0.2061, |
| "loss/crossentropy": 2.1987677812576294, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20613879710435867, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.00014395220786698817, |
| "grad_norm": 0.1240234375, |
| "learning_rate": 0.0001, |
| "loss": 0.2075, |
| "loss/crossentropy": 1.9901325702667236, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20753345638513565, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.000158347428653687, |
| "grad_norm": 0.1298828125, |
| "learning_rate": 0.0001, |
| "loss": 0.2213, |
| "loss/crossentropy": 2.3090018033981323, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2212577611207962, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.00017274264944038578, |
| "grad_norm": 0.1513671875, |
| "learning_rate": 0.0001, |
| "loss": 0.2689, |
| "loss/crossentropy": 2.2487552165985107, |
| "loss/fcd": 0.5, |
| "loss/idx": 18.0, |
| "loss/logits": 0.26888714730739594, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.0001871378702270846, |
| "grad_norm": 0.12109375, |
| "learning_rate": 0.0001, |
| "loss": 0.2335, |
| "loss/crossentropy": 2.3826037645339966, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23347856849431992, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.00020153309101378342, |
| "grad_norm": 0.115234375, |
| "learning_rate": 0.0001, |
| "loss": 0.2299, |
| "loss/crossentropy": 2.524248242378235, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22988282144069672, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00021592831180048224, |
| "grad_norm": 0.1279296875, |
| "learning_rate": 0.0001, |
| "loss": 0.2354, |
| "loss/crossentropy": 2.33734929561615, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23541489243507385, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.00023032353258718106, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 0.0002919107675552368, |
| "learning_rate": 0.0001, |
| "loss": 0.2428, |
| "loss/crossentropy": 2.3426687717437744, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24281759560108185, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.0002447187533738799, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 0.00014951129754384358, |
| "learning_rate": 0.0001, |
| "loss": 0.2399, |
| "loss/crossentropy": 2.634019374847412, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23987850546836853, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0002591139741605787, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 0.0001506239175796509, |
| "learning_rate": 0.0001, |
| "loss": 0.2267, |
| "loss/crossentropy": 2.2048473358154297, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2267211154103279, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0002735091949472775, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.0001150439182917277, |
| "learning_rate": 0.0001, |
| "loss": 0.2111, |
| "loss/crossentropy": 2.421955704689026, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21106208860874176, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.00028790441573397634, |
| "grad_norm": 0.125, |
| "grad_norm_var": 0.00011625985304514567, |
| "learning_rate": 0.0001, |
| "loss": 0.2474, |
| "loss/crossentropy": 2.4863855838775635, |
| "loss/fcd": 0.4990234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24741190671920776, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.00030229963652067516, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 0.00011513630549112956, |
| "learning_rate": 0.0001, |
| "loss": 0.2185, |
| "loss/crossentropy": 2.2641090154647827, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21849986910820007, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.000316694857307374, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.0001184294621149699, |
| "learning_rate": 0.0001, |
| "loss": 0.2309, |
| "loss/crossentropy": 2.614189624786377, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23093532770872116, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.00033109007809407274, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 8.991460005442301e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2174, |
| "loss/crossentropy": 2.258315682411194, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21735627949237823, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.00034548529888077156, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 8.047322432200113e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2413, |
| "loss/crossentropy": 2.355400562286377, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2412610948085785, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.0003598805196674704, |
| "grad_norm": 0.130859375, |
| "grad_norm_var": 8.29686721165975e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2334, |
| "loss/crossentropy": 2.4980456829071045, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23341741412878036, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.0003742757404541692, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 9.11712646484375e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2017, |
| "loss/crossentropy": 2.1927164793014526, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20174731314182281, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.000388670961240868, |
| "grad_norm": 0.173828125, |
| "grad_norm_var": 0.0002490639686584473, |
| "learning_rate": 0.0001, |
| "loss": 0.253, |
| "loss/crossentropy": 2.5806944370269775, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2529568448662758, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.00040306618202756684, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.0002092510461807251, |
| "learning_rate": 0.0001, |
| "loss": 0.2384, |
| "loss/crossentropy": 2.292937397956848, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2384110689163208, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.00041746140281426566, |
| "grad_norm": 0.12451171875, |
| "grad_norm_var": 0.0002086321512858073, |
| "learning_rate": 0.0001, |
| "loss": 0.257, |
| "loss/crossentropy": 2.4048542976379395, |
| "loss/fcd": 0.482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.25698406249284744, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.0004318566236009645, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.00022185643513997395, |
| "learning_rate": 0.0001, |
| "loss": 0.2111, |
| "loss/crossentropy": 2.4948848485946655, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21111004799604416, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.0004462518443876633, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.00022597312927246093, |
| "learning_rate": 0.0001, |
| "loss": 0.2299, |
| "loss/crossentropy": 2.233025908470154, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22986605763435364, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.0004606470651743621, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.0002281347910563151, |
| "learning_rate": 0.0001, |
| "loss": 0.2272, |
| "loss/crossentropy": 2.448768973350525, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22723641991615295, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.00047504228596106094, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0002345720926920573, |
| "learning_rate": 0.0001, |
| "loss": 0.1984, |
| "loss/crossentropy": 2.116120755672455, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19836096465587616, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.0004894375067477598, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 0.00023334821065266927, |
| "learning_rate": 0.0001, |
| "loss": 0.2416, |
| "loss/crossentropy": 2.3083192110061646, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2415921539068222, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.0005038327275344585, |
| "grad_norm": 0.1337890625, |
| "grad_norm_var": 0.00024124781290690104, |
| "learning_rate": 0.0001, |
| "loss": 0.2936, |
| "loss/crossentropy": 2.6550590991973877, |
| "loss/fcd": 0.529296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.29357363283634186, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.0005182279483211574, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00026692946751912433, |
| "learning_rate": 0.0001, |
| "loss": 0.2041, |
| "loss/crossentropy": 2.341429352760315, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20406678318977356, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.0005326231691078562, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 0.00027064879735310874, |
| "learning_rate": 0.0001, |
| "loss": 0.225, |
| "loss/crossentropy": 2.350203037261963, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22499807178974152, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.000547018389894555, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00027637084325154624, |
| "learning_rate": 0.0001, |
| "loss": 0.222, |
| "loss/crossentropy": 2.209356427192688, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22195565700531006, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.0005614136106812538, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 0.00027482410271962486, |
| "learning_rate": 0.0001, |
| "loss": 0.2432, |
| "loss/crossentropy": 2.6039966344833374, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24324779212474823, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.0005758088314679527, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 0.00027163426081339517, |
| "learning_rate": 0.0001, |
| "loss": 0.2133, |
| "loss/crossentropy": 2.3391385078430176, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21334318816661835, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.0005902040522546514, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.00028450886408487954, |
| "learning_rate": 0.0001, |
| "loss": 0.2074, |
| "loss/crossentropy": 2.5192357301712036, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2073945701122284, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0006045992730413503, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.0002897739410400391, |
| "learning_rate": 0.0001, |
| "loss": 0.1894, |
| "loss/crossentropy": 2.35784912109375, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18937092274427414, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.0006189944938280491, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 7.068216800689697e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1925, |
| "loss/crossentropy": 2.0304250717163086, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19247674196958542, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.000633389714614748, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 7.129907608032227e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2238, |
| "loss/crossentropy": 2.257385492324829, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22384560853242874, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.0006477849354014467, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 6.504058837890625e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2174, |
| "loss/crossentropy": 2.47000515460968, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2174428552389145, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.0006621801561881455, |
| "grad_norm": 0.1318359375, |
| "grad_norm_var": 8.242527643839518e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2356, |
| "loss/crossentropy": 2.77071475982666, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23558437824249268, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.0006765753769748444, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 8.253951867421468e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2314, |
| "loss/crossentropy": 2.3579763174057007, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23137739300727844, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.0006909705977615431, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 8.366008599599202e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2147, |
| "loss/crossentropy": 2.4674328565597534, |
| "loss/fcd": 0.48046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2146531641483307, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.000705365818548242, |
| "grad_norm": 0.13671875, |
| "grad_norm_var": 0.00011021196842193603, |
| "learning_rate": 0.0001, |
| "loss": 0.2773, |
| "loss/crossentropy": 2.5875381231307983, |
| "loss/fcd": 0.501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.27730000019073486, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.0007197610393349408, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.00011401176452636718, |
| "learning_rate": 0.0001, |
| "loss": 0.1992, |
| "loss/crossentropy": 2.3770352602005005, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1992211416363716, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.0007341562601216396, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 9.05315081278483e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2208, |
| "loss/crossentropy": 2.503299593925476, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22077593207359314, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.0007485514809083384, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 8.09947649637858e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2448, |
| "loss/crossentropy": 2.5992391109466553, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2447950839996338, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.0007629467016950373, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 8.217493693033855e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2302, |
| "loss/crossentropy": 2.5341001749038696, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23020881414413452, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.000777341922481736, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 8.111695448557536e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.242, |
| "loss/crossentropy": 2.594543933868408, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24200908839702606, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.0007917371432684349, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 8.102655410766602e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2157, |
| "loss/crossentropy": 2.35564386844635, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21571539342403412, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.0008061323640551337, |
| "grad_norm": 0.140625, |
| "grad_norm_var": 0.00012067854404449463, |
| "learning_rate": 0.0001, |
| "loss": 0.2262, |
| "loss/crossentropy": 2.5845850706100464, |
| "loss/fcd": 0.505859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22615493834018707, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.0008205275848418326, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00011030832926432292, |
| "learning_rate": 0.0001, |
| "loss": 0.2264, |
| "loss/crossentropy": 2.6225829124450684, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22639667242765427, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0008349228056285313, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 0.00010507901509602865, |
| "learning_rate": 0.0001, |
| "loss": 0.2332, |
| "loss/crossentropy": 2.49368155002594, |
| "loss/fcd": 0.4404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23318731039762497, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.0008493180264152302, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00010922352472941081, |
| "learning_rate": 0.0001, |
| "loss": 0.2161, |
| "loss/crossentropy": 2.365482449531555, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21612977981567383, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.000863713247201929, |
| "grad_norm": 0.12451171875, |
| "grad_norm_var": 0.00010903577009836832, |
| "learning_rate": 0.0001, |
| "loss": 0.2292, |
| "loss/crossentropy": 2.450873017311096, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22924820333719254, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0008781084679886277, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.0001178810993830363, |
| "learning_rate": 0.0001, |
| "loss": 0.2071, |
| "loss/crossentropy": 2.364640951156616, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20711997151374817, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.0008925036887753266, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 0.00012259483337402345, |
| "learning_rate": 0.0001, |
| "loss": 0.1989, |
| "loss/crossentropy": 2.430219888687134, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19892004877328873, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.0009068989095620254, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.00012622574965159098, |
| "learning_rate": 0.0001, |
| "loss": 0.2166, |
| "loss/crossentropy": 2.412087559700012, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21661554276943207, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.0009212941303487242, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00012544691562652588, |
| "learning_rate": 0.0001, |
| "loss": 0.2152, |
| "loss/crossentropy": 2.369842290878296, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21520362049341202, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.000935689351135423, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 0.00010499060153961182, |
| "learning_rate": 0.0001, |
| "loss": 0.2505, |
| "loss/crossentropy": 2.5731316804885864, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2505309656262398, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.0009500845719221219, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 9.702742099761962e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.4219590425491333, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2161625698208809, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.0009644797927088206, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 9.924471378326416e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1955, |
| "loss/crossentropy": 2.072207987308502, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19551369547843933, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.0009788750134955195, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00010709762573242187, |
| "learning_rate": 0.0001, |
| "loss": 0.2152, |
| "loss/crossentropy": 2.4199079275131226, |
| "loss/fcd": 0.404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2152082547545433, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.0009932702342822183, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.00010735094547271728, |
| "learning_rate": 0.0001, |
| "loss": 0.1939, |
| "loss/crossentropy": 2.073515832424164, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19392766803503036, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.001007665455068917, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 0.00010992586612701416, |
| "learning_rate": 0.0001, |
| "loss": 0.244, |
| "loss/crossentropy": 2.376970887184143, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2439984604716301, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.001022060675855616, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.00011152327060699463, |
| "learning_rate": 0.0001, |
| "loss": 0.2341, |
| "loss/crossentropy": 2.329576015472412, |
| "loss/fcd": 0.4716796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23405101150274277, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.0010364558966423148, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 6.649891535441081e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2061, |
| "loss/crossentropy": 2.240494966506958, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20607301592826843, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.0010508511174290136, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 6.442765394846599e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2261, |
| "loss/crossentropy": 2.58719003200531, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22607439756393433, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.0010652463382157123, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 6.287793318430583e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2209, |
| "loss/crossentropy": 2.458608031272888, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22093002498149872, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.001079641559002411, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 7.06632932027181e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.189, |
| "loss/crossentropy": 2.4321776628494263, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18898583948612213, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.00109403677978911, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 6.0458978017171226e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.241, |
| "loss/crossentropy": 2.5812788009643555, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24104547500610352, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.0011084320005758088, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 5.65489133199056e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.203, |
| "loss/crossentropy": 2.408494293689728, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20304062217473984, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.0011228272213625076, |
| "grad_norm": 0.1337890625, |
| "grad_norm_var": 7.34796126683553e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2287, |
| "loss/crossentropy": 2.239099144935608, |
| "loss/fcd": 0.46484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2286640703678131, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.0011372224421492064, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 7.603565851847331e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2298, |
| "loss/crossentropy": 2.357472777366638, |
| "loss/fcd": 0.48046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22975638508796692, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.0011516176629359054, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 7.807413736979166e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2185, |
| "loss/crossentropy": 2.4130557775497437, |
| "loss/fcd": 0.4765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21845312416553497, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.0011660128837226041, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 6.88701868057251e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2304, |
| "loss/crossentropy": 2.293164014816284, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2303522452712059, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.0011804081045093029, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 8.126795291900635e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1964, |
| "loss/crossentropy": 2.2688822746276855, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19636806100606918, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.0011948033252960016, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 7.959604263305665e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2397, |
| "loss/crossentropy": 2.398077368736267, |
| "loss/fcd": 0.4404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23967822641134262, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.0012091985460827006, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 8.558332920074462e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2079, |
| "loss/crossentropy": 2.524065375328064, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20788107812404633, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.0012235937668693994, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 8.542438348134359e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2091, |
| "loss/crossentropy": 2.398527979850769, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20912020653486252, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.0012379889876560982, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 8.542438348134359e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2435, |
| "loss/crossentropy": 2.4105933904647827, |
| "loss/fcd": 0.48828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24351391196250916, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.001252384208442797, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 8.678038914998373e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2189, |
| "loss/crossentropy": 2.26534903049469, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21887247264385223, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.001266779429229496, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 8.635421593983968e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1992, |
| "loss/crossentropy": 2.1426846981048584, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1991657018661499, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.0012811746500161947, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 8.641878763834635e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2104, |
| "loss/crossentropy": 2.2193171977996826, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21043668687343597, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.0012955698708028934, |
| "grad_norm": 0.14453125, |
| "grad_norm_var": 0.00013866325219472249, |
| "learning_rate": 0.0001, |
| "loss": 0.2259, |
| "loss/crossentropy": 2.4619998931884766, |
| "loss/fcd": 0.4755859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22587314993143082, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.0013099650915895922, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.00012292762597401936, |
| "learning_rate": 0.0001, |
| "loss": 0.2502, |
| "loss/crossentropy": 2.5881928205490112, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2501572445034981, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.001324360312376291, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 0.0001231988271077474, |
| "learning_rate": 0.0001, |
| "loss": 0.2096, |
| "loss/crossentropy": 2.28423535823822, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20956922322511673, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.00133875553316299, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 0.0001184612512588501, |
| "learning_rate": 0.0001, |
| "loss": 0.2081, |
| "loss/crossentropy": 2.0630246996879578, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.208104208111763, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.0013531507539496887, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.00010280509789784749, |
| "learning_rate": 0.0001, |
| "loss": 0.2285, |
| "loss/crossentropy": 2.4672012329101562, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22853697836399078, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.0013675459747363875, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00010375579198201497, |
| "learning_rate": 0.0001, |
| "loss": 0.2217, |
| "loss/crossentropy": 2.432914614677429, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22167058289051056, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.0013819411955230862, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 0.00011195242404937744, |
| "learning_rate": 0.0001, |
| "loss": 0.198, |
| "loss/crossentropy": 2.522903800010681, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19795683026313782, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.0013963364163097852, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00011272430419921875, |
| "learning_rate": 0.0001, |
| "loss": 0.2049, |
| "loss/crossentropy": 2.149677038192749, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20487764477729797, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.001410731637096484, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 0.00010592043399810791, |
| "learning_rate": 0.0001, |
| "loss": 0.2057, |
| "loss/crossentropy": 2.467462182044983, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20570345222949982, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.0014251268578831828, |
| "grad_norm": 0.12890625, |
| "grad_norm_var": 0.00011771519978841146, |
| "learning_rate": 0.0001, |
| "loss": 0.2105, |
| "loss/crossentropy": 2.353211760520935, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21045749634504318, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.0014395220786698815, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 0.00010607639948527019, |
| "learning_rate": 0.0001, |
| "loss": 0.2903, |
| "loss/crossentropy": 2.590612769126892, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.290309339761734, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.0014539172994565805, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 0.00010594924290974935, |
| "learning_rate": 0.0001, |
| "loss": 0.2467, |
| "loss/crossentropy": 2.3608927726745605, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24666306376457214, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.0014683125202432793, |
| "grad_norm": 0.1298828125, |
| "grad_norm_var": 0.00011356671651204427, |
| "learning_rate": 0.0001, |
| "loss": 0.2413, |
| "loss/crossentropy": 2.1008136868476868, |
| "loss/fcd": 0.4521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24129530787467957, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.001482707741029978, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.0001156767209370931, |
| "learning_rate": 0.0001, |
| "loss": 0.2219, |
| "loss/crossentropy": 2.36824232339859, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2218664586544037, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0014971029618166768, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.00011526346206665039, |
| "learning_rate": 0.0001, |
| "loss": 0.2459, |
| "loss/crossentropy": 2.3991124629974365, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24585890769958496, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.0015114981826033756, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.00013441145420074464, |
| "learning_rate": 0.0001, |
| "loss": 0.2028, |
| "loss/crossentropy": 2.5206661224365234, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20282022655010223, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.0015258934033900746, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 8.746683597564698e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2339, |
| "loss/crossentropy": 2.294739842414856, |
| "loss/fcd": 0.4619140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2339302897453308, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.0015402886241767733, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 8.897781372070312e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2028, |
| "loss/crossentropy": 2.430526852607727, |
| "loss/fcd": 0.404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20277925580739975, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.001554683844963472, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 9.490549564361572e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1685, |
| "loss/crossentropy": 1.9886462688446045, |
| "loss/fcd": 0.4814453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.16851283982396126, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.0015690790657501708, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 9.39329465230306e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2232, |
| "loss/crossentropy": 2.3031085729599, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2231953889131546, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.0015834742865368698, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 9.844700495402019e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2145, |
| "loss/crossentropy": 2.4420076608657837, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2144630402326584, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.0015978695073235686, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.00010046859582265218, |
| "learning_rate": 0.0001, |
| "loss": 0.1999, |
| "loss/crossentropy": 2.265585422515869, |
| "loss/fcd": 0.3876953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19986777007579803, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.0016122647281102674, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 9.224812189737956e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2391, |
| "loss/crossentropy": 2.5880415439605713, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23910276591777802, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.0016266599488969661, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 9.04242197672526e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2466, |
| "loss/crossentropy": 2.6048234701156616, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24656572192907333, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.0016410551696836651, |
| "grad_norm": 0.134765625, |
| "grad_norm_var": 0.00010449091593424479, |
| "learning_rate": 0.0001, |
| "loss": 0.2386, |
| "loss/crossentropy": 2.1900378465652466, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23863784968852997, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0016554503904703639, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 9.802083174387614e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2123, |
| "loss/crossentropy": 2.547809600830078, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21230217069387436, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0016698456112570626, |
| "grad_norm": 0.255859375, |
| "grad_norm_var": 0.0013202657302220663, |
| "learning_rate": 0.0001, |
| "loss": 0.2161, |
| "loss/crossentropy": 2.586913585662842, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2160758599638939, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.0016842408320437614, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 0.0013230552275975546, |
| "learning_rate": 0.0001, |
| "loss": 0.2282, |
| "loss/crossentropy": 2.3776031732559204, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2281685397028923, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.0016986360528304604, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.0013238906860351563, |
| "learning_rate": 0.0001, |
| "loss": 0.2269, |
| "loss/crossentropy": 2.5417513847351074, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.226931631565094, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.0017130312736171592, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.0013291825850804647, |
| "learning_rate": 0.0001, |
| "loss": 0.2172, |
| "loss/crossentropy": 2.416541814804077, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2172057330608368, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.001727426494403858, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.0013376067082087198, |
| "learning_rate": 0.0001, |
| "loss": 0.2088, |
| "loss/crossentropy": 2.3803776502609253, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20880089700222015, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.0017418217151905567, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 0.0013056437174479166, |
| "learning_rate": 0.0001, |
| "loss": 0.2246, |
| "loss/crossentropy": 2.4869107007980347, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22459682077169418, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.0017562169359772554, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.0013244539499282838, |
| "learning_rate": 0.0001, |
| "loss": 0.2141, |
| "loss/crossentropy": 2.2889301776885986, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21405386179685593, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0017706121567639544, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.0013290554285049438, |
| "learning_rate": 0.0001, |
| "loss": 0.183, |
| "loss/crossentropy": 2.2636550664901733, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1829545795917511, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.0017850073775506532, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 0.0013059258460998535, |
| "learning_rate": 0.0001, |
| "loss": 0.2259, |
| "loss/crossentropy": 2.3760812282562256, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2258809506893158, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.001799402598337352, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.001315462589263916, |
| "learning_rate": 0.0001, |
| "loss": 0.2041, |
| "loss/crossentropy": 2.595892906188965, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20406261831521988, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.0018137978191240507, |
| "grad_norm": 0.126953125, |
| "grad_norm_var": 0.001296854019165039, |
| "learning_rate": 0.0001, |
| "loss": 0.2131, |
| "loss/crossentropy": 2.3521647453308105, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21306610107421875, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0018281930399107497, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.0013095498085021972, |
| "learning_rate": 0.0001, |
| "loss": 0.2135, |
| "loss/crossentropy": 2.5395818948745728, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21354226768016815, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.0018425882606974485, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.0013218204180399577, |
| "learning_rate": 0.0001, |
| "loss": 0.1906, |
| "loss/crossentropy": 2.154847741127014, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19056915491819382, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.0018569834814841472, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 0.0013209412495295207, |
| "learning_rate": 0.0001, |
| "loss": 0.2376, |
| "loss/crossentropy": 2.3668060302734375, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23755235970020294, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.001871378702270846, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0013325204451878866, |
| "learning_rate": 0.0001, |
| "loss": 0.2072, |
| "loss/crossentropy": 2.4412275552749634, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20715615153312683, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.001885773923057545, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 0.0013236512740453085, |
| "learning_rate": 0.0001, |
| "loss": 0.2363, |
| "loss/crossentropy": 2.589287519454956, |
| "loss/fcd": 0.4853515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2362738400697708, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.0019001691438442438, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 5.292793114980062e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1757, |
| "loss/crossentropy": 2.1394956707954407, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.17568951100111008, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.0019145643646309425, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 5.675057570139567e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2141, |
| "loss/crossentropy": 2.5705530643463135, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21412815153598785, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.0019289595854176413, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 5.7474772135416666e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2091, |
| "loss/crossentropy": 2.2588201761245728, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20908734947443008, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.00194335480620434, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 5.976259708404541e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2167, |
| "loss/crossentropy": 2.432557463645935, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2166854664683342, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.001957750026991039, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 5.942881107330322e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2177, |
| "loss/crossentropy": 2.4058191776275635, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21774785220623016, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.001972145247777738, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 5.98907470703125e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1946, |
| "loss/crossentropy": 2.441463589668274, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19459661096334457, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.0019865404685644366, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 6.546974182128907e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2507, |
| "loss/crossentropy": 2.5539783239364624, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.25072282552719116, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0020009356893511353, |
| "grad_norm": 0.0986328125, |
| "grad_norm_var": 7.754862308502197e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2023, |
| "loss/crossentropy": 2.5158984661102295, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2023158147931099, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.002015330910137834, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 6.959338982899983e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1937, |
| "loss/crossentropy": 2.2275065183639526, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19366320967674255, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.002029726130924533, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 7.063150405883789e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1863, |
| "loss/crossentropy": 2.422375202178955, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18625369668006897, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.002044121351711232, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 5.560616652170817e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.207, |
| "loss/crossentropy": 2.209444999694824, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2070077657699585, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.002058516572497931, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 4.966954390207927e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2254, |
| "loss/crossentropy": 2.641687750816345, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2254098877310753, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.0020729117932846296, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 4.943211873372396e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2174, |
| "loss/crossentropy": 2.4751927852630615, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2174309641122818, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0020873070140713284, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 4.522005716959635e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2059, |
| "loss/crossentropy": 2.703999638557434, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20589765906333923, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.002101702234858027, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 4.261235396067301e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2243, |
| "loss/crossentropy": 2.3885515928268433, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22432449460029602, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.002116097455644726, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 3.983179728190104e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2524, |
| "loss/crossentropy": 2.471445918083191, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.25243769586086273, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.0021304926764314247, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 3.784398237864176e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2059, |
| "loss/crossentropy": 2.4856609106063843, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20586465299129486, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.0021448878972181234, |
| "grad_norm": 0.1240234375, |
| "grad_norm_var": 4.507601261138916e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.238, |
| "loss/crossentropy": 2.4825209379196167, |
| "loss/fcd": 0.4912109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23801030218601227, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.002159283118004822, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 4.329681396484375e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.218, |
| "loss/crossentropy": 2.373395562171936, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21804769337177277, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0021736783387915214, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 4.1857361793518066e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2016, |
| "loss/crossentropy": 2.242987275123596, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2015869840979576, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.00218807355957822, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 4.2071938514709474e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2289, |
| "loss/crossentropy": 2.6060279607772827, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2288510948419571, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.002202468780364919, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 4.068613052368164e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2122, |
| "loss/crossentropy": 2.4911882877349854, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2122008204460144, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.0022168640011516177, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 3.1900405883789065e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1964, |
| "loss/crossentropy": 2.2283207178115845, |
| "loss/fcd": 0.388671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19640249013900757, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0022312592219383164, |
| "grad_norm": 0.1240234375, |
| "grad_norm_var": 2.7974446614583332e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2518, |
| "loss/crossentropy": 2.6885886192321777, |
| "loss/fcd": 0.498046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.251840204000473, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.002245654442725015, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 2.698500951131185e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2217, |
| "loss/crossentropy": 2.3278268575668335, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22167562693357468, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.002260049663511714, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 2.698500951131185e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2134, |
| "loss/crossentropy": 2.2359228134155273, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21338575333356857, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.0022744448842984127, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 3.1589468320210776e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1983, |
| "loss/crossentropy": 2.1452057361602783, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19825652241706848, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.002288840105085112, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 3.095865249633789e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2048, |
| "loss/crossentropy": 2.075889527797699, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20479386299848557, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.0023032353258718107, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 3.3283233642578124e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.206, |
| "loss/crossentropy": 2.2650269269943237, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20604287087917328, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0023176305466585095, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 3.167688846588135e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2225, |
| "loss/crossentropy": 2.385145902633667, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22251462936401367, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.0023320257674452082, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 3.179609775543213e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.3363062143325806, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21623297035694122, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.002346420988231907, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 3.199477990468343e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2196, |
| "loss/crossentropy": 2.258102059364319, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.219633050262928, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.0023608162090186058, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 2.7461846669514975e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2229, |
| "loss/crossentropy": 2.477385640144348, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22293243557214737, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.0023752114298053045, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 2.260108788808187e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.206, |
| "loss/crossentropy": 2.5965325832366943, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20604980736970901, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.0023896066505920033, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 2.48183806737264e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2456, |
| "loss/crossentropy": 2.391031265258789, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24561651051044464, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.002404001871378702, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 2.616246541341146e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2163, |
| "loss/crossentropy": 2.534990668296814, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21634604781866074, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.0024183970921654013, |
| "grad_norm": 0.130859375, |
| "grad_norm_var": 3.909667332967122e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2477, |
| "loss/crossentropy": 2.354380965232849, |
| "loss/fcd": 0.482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24768973886966705, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.0024327923129521, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 3.171662489573161e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2411, |
| "loss/crossentropy": 2.430347204208374, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2411317229270935, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.002447187533738799, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 3.145535786946615e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2188, |
| "loss/crossentropy": 2.312503755092621, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21878328174352646, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0024615827545254976, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 3.1276543935139975e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2309, |
| "loss/crossentropy": 2.5175788402557373, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23088021576404572, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.0024759779753121963, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 3.1276543935139975e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2165, |
| "loss/crossentropy": 2.484018087387085, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2165074348449707, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.002490373196098895, |
| "grad_norm": 0.1318359375, |
| "grad_norm_var": 4.671414693196615e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2481, |
| "loss/crossentropy": 2.2699760794639587, |
| "loss/fcd": 0.486328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24809680879116058, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.002504768416885594, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 5.0933162371317545e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1955, |
| "loss/crossentropy": 2.2288765907287598, |
| "loss/fcd": 0.3857421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19553960859775543, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.0025191636376722926, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 5.243519941965739e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.213, |
| "loss/crossentropy": 2.4654963612556458, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2130081057548523, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.002533558858458992, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 4.954238732655843e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.21, |
| "loss/crossentropy": 2.2151373624801636, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20995519310235977, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.0025479540792456906, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 4.892349243164062e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2092, |
| "loss/crossentropy": 2.4239630699157715, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20919281244277954, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.0025623493000323893, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 4.8951307932535806e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2638, |
| "loss/crossentropy": 2.718831419944763, |
| "loss/fcd": 0.4755859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2638430893421173, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.002576744520819088, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 5.977849165598551e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1942, |
| "loss/crossentropy": 2.4341124296188354, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1942092925310135, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.002591139741605787, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 5.9516231218973795e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.223, |
| "loss/crossentropy": 2.3784589767456055, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2230425328016281, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.0026055349623924856, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 6.085137526194254e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1995, |
| "loss/crossentropy": 2.1103312969207764, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1994745284318924, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.0026199301831791844, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 5.8869520823160805e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2228, |
| "loss/crossentropy": 2.173603892326355, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22283250093460083, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.002634325403965883, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 6.18139902750651e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2399, |
| "loss/crossentropy": 2.3933345079421997, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23994869738817215, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.002648720624752582, |
| "grad_norm": 0.12890625, |
| "grad_norm_var": 5.8142344156901043e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2447, |
| "loss/crossentropy": 2.5679067373275757, |
| "loss/fcd": 0.474609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24468251317739487, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.002663115845539281, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 5.8562556902567545e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1861, |
| "loss/crossentropy": 1.966173768043518, |
| "loss/fcd": 0.3759765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18611325323581696, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.00267751106632598, |
| "grad_norm": 0.1865234375, |
| "grad_norm_var": 0.0003708908955256144, |
| "learning_rate": 0.0001, |
| "loss": 0.3475, |
| "loss/crossentropy": 2.386851668357849, |
| "loss/fcd": 0.560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.3474508970975876, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0026919062871126787, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.0003666838010152181, |
| "learning_rate": 0.0001, |
| "loss": 0.2127, |
| "loss/crossentropy": 2.4003021717071533, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21274058520793915, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.0027063015078993774, |
| "grad_norm": 0.15234375, |
| "grad_norm_var": 0.00042354265848795574, |
| "learning_rate": 0.0001, |
| "loss": 0.2758, |
| "loss/crossentropy": 2.218628406524658, |
| "loss/fcd": 0.4873046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2757628411054611, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.002720696728686076, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00042761067549387615, |
| "learning_rate": 0.0001, |
| "loss": 0.1907, |
| "loss/crossentropy": 2.1557281017303467, |
| "loss/fcd": 0.388671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1906721442937851, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.002735091949472775, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.000429573655128479, |
| "learning_rate": 0.0001, |
| "loss": 0.1882, |
| "loss/crossentropy": 2.047899842262268, |
| "loss/fcd": 0.3857421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1881674826145172, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.0027494871702594737, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00045076608657836916, |
| "learning_rate": 0.0001, |
| "loss": 0.1987, |
| "loss/crossentropy": 2.2902016639709473, |
| "loss/fcd": 0.3974609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1987495943903923, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.0027638823910461725, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.0004603862762451172, |
| "learning_rate": 0.0001, |
| "loss": 0.1967, |
| "loss/crossentropy": 2.296987771987915, |
| "loss/fcd": 0.3837890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1967175006866455, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.0027782776118328717, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 0.00045976539452870685, |
| "learning_rate": 0.0001, |
| "loss": 0.2354, |
| "loss/crossentropy": 2.2293859124183655, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23544982075691223, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.0027926728326195705, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0004739085833231608, |
| "learning_rate": 0.0001, |
| "loss": 0.2093, |
| "loss/crossentropy": 2.3077027797698975, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20933127403259277, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.0028070680534062692, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 0.0004557291666666667, |
| "learning_rate": 0.0001, |
| "loss": 0.2349, |
| "loss/crossentropy": 2.5241353511810303, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23492421209812164, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.002821463274192968, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.0004617283741633097, |
| "learning_rate": 0.0001, |
| "loss": 0.2089, |
| "loss/crossentropy": 2.2112027406692505, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20893365144729614, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.0028358584949796668, |
| "grad_norm": 0.095703125, |
| "grad_norm_var": 0.0004940946896870931, |
| "learning_rate": 0.0001, |
| "loss": 0.1738, |
| "loss/crossentropy": 2.3283063173294067, |
| "loss/fcd": 0.392578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1738404482603073, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.0028502537157663655, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.0004936844110488891, |
| "learning_rate": 0.0001, |
| "loss": 0.2259, |
| "loss/crossentropy": 2.4649304151535034, |
| "loss/fcd": 0.4697265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22589464485645294, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.0028646489365530643, |
| "grad_norm": 0.142578125, |
| "grad_norm_var": 0.0005282044410705566, |
| "learning_rate": 0.0001, |
| "loss": 0.2334, |
| "loss/crossentropy": 2.4893065690994263, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23335154354572296, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.002879044157339763, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.000536648432413737, |
| "learning_rate": 0.0001, |
| "loss": 0.2107, |
| "loss/crossentropy": 2.5291190147399902, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2106790393590927, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.002893439378126462, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.000536648432413737, |
| "learning_rate": 0.0001, |
| "loss": 0.2286, |
| "loss/crossentropy": 2.5203051567077637, |
| "loss/fcd": 0.4521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2286214381456375, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.002907834598913161, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 0.00020819405714670816, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.1828808784484863, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21623709797859192, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.00292222981969986, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.0002094109853108724, |
| "learning_rate": 0.0001, |
| "loss": 0.1716, |
| "loss/crossentropy": 1.858969271183014, |
| "loss/fcd": 0.5029296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.17157060280442238, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.0029366250404865585, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 0.0001033852497736613, |
| "learning_rate": 0.0001, |
| "loss": 0.2325, |
| "loss/crossentropy": 2.4954288005828857, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23250436782836914, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.0029510202612732573, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00010505417982737223, |
| "learning_rate": 0.0001, |
| "loss": 0.2277, |
| "loss/crossentropy": 2.389811635017395, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22774703800678253, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.002965415482059956, |
| "grad_norm": 0.1328125, |
| "grad_norm_var": 0.00012969573338826498, |
| "learning_rate": 0.0001, |
| "loss": 0.1985, |
| "loss/crossentropy": 2.144119679927826, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19851599633693695, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.002979810702846655, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 0.00012617011864980062, |
| "learning_rate": 0.0001, |
| "loss": 0.2454, |
| "loss/crossentropy": 2.509921073913574, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24539965391159058, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0029942059236333536, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.0001226097345352173, |
| "learning_rate": 0.0001, |
| "loss": 0.238, |
| "loss/crossentropy": 2.336063265800476, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.237995944917202, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.0030086011444200524, |
| "grad_norm": 0.1435546875, |
| "grad_norm_var": 0.0001734723647435506, |
| "learning_rate": 0.0001, |
| "loss": 0.2493, |
| "loss/crossentropy": 2.3922590017318726, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24932140111923218, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.003022996365206751, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.0001635064681371053, |
| "learning_rate": 0.0001, |
| "loss": 0.2434, |
| "loss/crossentropy": 2.597308397293091, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24338021874427795, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0030373915859934503, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00016493797302246093, |
| "learning_rate": 0.0001, |
| "loss": 0.2082, |
| "loss/crossentropy": 2.3584909439086914, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20816650241613388, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.003051786806780149, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.0001660307248433431, |
| "learning_rate": 0.0001, |
| "loss": 0.2122, |
| "loss/crossentropy": 2.3587781190872192, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21221671998500824, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.003066182027566848, |
| "grad_norm": 0.1298828125, |
| "grad_norm_var": 0.0001430829366048177, |
| "learning_rate": 0.0001, |
| "loss": 0.2491, |
| "loss/crossentropy": 2.4296464920043945, |
| "loss/fcd": 0.46875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24906984716653824, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.0030805772483535466, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 0.00014130969842274984, |
| "learning_rate": 0.0001, |
| "loss": 0.2168, |
| "loss/crossentropy": 2.1808066368103027, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21684125810861588, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.0030949724691402454, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00011850098768870035, |
| "learning_rate": 0.0001, |
| "loss": 0.2052, |
| "loss/crossentropy": 2.3064663410186768, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20520812273025513, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.003109367689926944, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.00010748604933420817, |
| "learning_rate": 0.0001, |
| "loss": 0.2414, |
| "loss/crossentropy": 2.4660093784332275, |
| "loss/fcd": 0.4609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.241433747112751, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.003123762910713643, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00010584890842437744, |
| "learning_rate": 0.0001, |
| "loss": 0.2161, |
| "loss/crossentropy": 2.2378053665161133, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21608934551477432, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.0031381581315003417, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00010330577691396078, |
| "learning_rate": 0.0001, |
| "loss": 0.2132, |
| "loss/crossentropy": 2.312962532043457, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21319198608398438, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.003152553352287041, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 9.870529174804688e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2193, |
| "loss/crossentropy": 2.3573015928268433, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21930547058582306, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.0031669485730737397, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 9.701152642567952e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2496, |
| "loss/crossentropy": 2.6434515714645386, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24958771467208862, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0031813437938604384, |
| "grad_norm": 0.1396484375, |
| "grad_norm_var": 0.0001229246457417806, |
| "learning_rate": 0.0001, |
| "loss": 0.2262, |
| "loss/crossentropy": 2.2807798981666565, |
| "loss/fcd": 0.50390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22618486732244492, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.003195739014647137, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 0.00011207163333892822, |
| "learning_rate": 0.0001, |
| "loss": 0.2143, |
| "loss/crossentropy": 2.350602626800537, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2142939791083336, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.003210134235433836, |
| "grad_norm": 0.193359375, |
| "grad_norm_var": 0.00045262078444163, |
| "learning_rate": 0.0001, |
| "loss": 0.2248, |
| "loss/crossentropy": 2.7532432079315186, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22482239454984665, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.0032245294562205347, |
| "grad_norm": 0.1240234375, |
| "grad_norm_var": 0.000445746382077535, |
| "learning_rate": 0.0001, |
| "loss": 0.2287, |
| "loss/crossentropy": 2.397303342819214, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22872482240200043, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.0032389246770072335, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 0.00042170584201812745, |
| "learning_rate": 0.0001, |
| "loss": 0.1917, |
| "loss/crossentropy": 2.161116361618042, |
| "loss/fcd": 0.390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1917443946003914, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.0032533198977939323, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0004292130470275879, |
| "learning_rate": 0.0001, |
| "loss": 0.2043, |
| "loss/crossentropy": 2.131627917289734, |
| "loss/fcd": 0.392578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20434105396270752, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.003267715118580631, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 0.0004218329985936483, |
| "learning_rate": 0.0001, |
| "loss": 0.218, |
| "loss/crossentropy": 2.5683807134628296, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21802888065576553, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.0032821103393673302, |
| "grad_norm": 0.134765625, |
| "grad_norm_var": 0.00041150649388631185, |
| "learning_rate": 0.0001, |
| "loss": 0.2244, |
| "loss/crossentropy": 2.4449127912521362, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2243650108575821, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.003296505560154029, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 0.00041737457116444905, |
| "learning_rate": 0.0001, |
| "loss": 0.1937, |
| "loss/crossentropy": 2.1692421436309814, |
| "loss/fcd": 0.3828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19374938309192657, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0033109007809407278, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0004300077756245931, |
| "learning_rate": 0.0001, |
| "loss": 0.2099, |
| "loss/crossentropy": 2.1626864671707153, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20994187891483307, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.0033252960017274265, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.0004090269406636556, |
| "learning_rate": 0.0001, |
| "loss": 0.2224, |
| "loss/crossentropy": 2.4669238328933716, |
| "loss/fcd": 0.4404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2224324494600296, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0033396912225141253, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.00041254361470540363, |
| "learning_rate": 0.0001, |
| "loss": 0.241, |
| "loss/crossentropy": 2.534782886505127, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24097825586795807, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.003354086443300824, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.000413509209950765, |
| "learning_rate": 0.0001, |
| "loss": 0.229, |
| "loss/crossentropy": 2.4167356491088867, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22896190732717514, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.003368481664087523, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 0.0004103730122248332, |
| "learning_rate": 0.0001, |
| "loss": 0.2494, |
| "loss/crossentropy": 2.544241964817047, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2493698000907898, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.0033828768848742216, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 0.0004053423802057902, |
| "learning_rate": 0.0001, |
| "loss": 0.2229, |
| "loss/crossentropy": 2.656595230102539, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22288895398378372, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.0033972721056609208, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.0004180183013280233, |
| "learning_rate": 0.0001, |
| "loss": 0.2, |
| "loss/crossentropy": 2.153246819972992, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19998866319656372, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.0034116673264476195, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.0004011462132136027, |
| "learning_rate": 0.0001, |
| "loss": 0.2159, |
| "loss/crossentropy": 2.3706564903259277, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21588444709777832, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.0034260625472343183, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0004108498493830363, |
| "learning_rate": 0.0001, |
| "loss": 0.2155, |
| "loss/crossentropy": 2.377021312713623, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21553778648376465, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.003440457768021017, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 4.942814509073893e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1947, |
| "loss/crossentropy": 2.1807267665863037, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19467756152153015, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.003454852988807716, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 4.976590474446614e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2297, |
| "loss/crossentropy": 2.5010019540786743, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22967635095119476, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.0034692482095944146, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 5.098978678385417e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2257, |
| "loss/crossentropy": 2.1949596405029297, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2256992757320404, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0034836434303811134, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 6.52382771174113e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1786, |
| "loss/crossentropy": 2.3066688776016235, |
| "loss/fcd": 0.404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.17856091260910034, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.003498038651167812, |
| "grad_norm": 0.1357421875, |
| "grad_norm_var": 9.119908014933268e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2979, |
| "loss/crossentropy": 2.833424210548401, |
| "loss/fcd": 0.53125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.29794102907180786, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.003512433871954511, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 6.642242272694906e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2084, |
| "loss/crossentropy": 2.4168113470077515, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2084333300590515, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.00352682909274121, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 7.130304972330729e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2081, |
| "loss/crossentropy": 2.4122915267944336, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20814163982868195, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.003541224313527909, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 7.022221883138021e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2177, |
| "loss/crossentropy": 2.357482075691223, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2177310660481453, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0035556195343146076, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 7.370313008626302e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.225, |
| "loss/crossentropy": 2.329651951789856, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22500982880592346, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0035700147551013064, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 7.381041844685872e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.177, |
| "loss/crossentropy": 2.0500356554985046, |
| "loss/fcd": 0.380859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1770332083106041, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.003584409975888005, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 7.356703281402588e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1987, |
| "loss/crossentropy": 2.2625420093536377, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19871972501277924, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.003598805196674704, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 6.967782974243164e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1945, |
| "loss/crossentropy": 2.5878301858901978, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19449464231729507, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0036132004174614027, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 6.86804453531901e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.218, |
| "loss/crossentropy": 2.4477245807647705, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21796388924121857, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.0036275956382481015, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 7.302661736806234e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2601, |
| "loss/crossentropy": 2.5919313430786133, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2600754201412201, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0036419908590348007, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 7.251004378000895e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1969, |
| "loss/crossentropy": 2.3274489641189575, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1969192698597908, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0036563860798214994, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 7.236798604329428e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2279, |
| "loss/crossentropy": 2.4737610816955566, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.227908656001091, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.003670781300608198, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 7.198651631673177e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2286, |
| "loss/crossentropy": 2.442078709602356, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2285866141319275, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.003685176521394897, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 7.04119602839152e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2116, |
| "loss/crossentropy": 2.2948302030563354, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21162152290344238, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.0036995717421815957, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 7.044474283854166e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2117, |
| "loss/crossentropy": 2.3752611875534058, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21169160306453705, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.0037139669629682945, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 6.351073582967122e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1965, |
| "loss/crossentropy": 2.3940770626068115, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19652695208787918, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.0037283621837549932, |
| "grad_norm": 0.12451171875, |
| "grad_norm_var": 3.712077935536702e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2326, |
| "loss/crossentropy": 2.329423666000366, |
| "loss/fcd": 0.482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23256323486566544, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.003742757404541692, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 3.7511189778645836e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2077, |
| "loss/crossentropy": 2.2093913555145264, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20770975947380066, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0037571526253283908, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 3.47365935643514e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2247, |
| "loss/crossentropy": 2.3547682762145996, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2247237116098404, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.00377154784611509, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 3.922681013743083e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.188, |
| "loss/crossentropy": 2.2215335369110107, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18795417994260788, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.0037859430669017887, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 3.945032755533854e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2405, |
| "loss/crossentropy": 2.5075334310531616, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24054966121912003, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0038003382876884875, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 4.806419213612874e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1904, |
| "loss/crossentropy": 2.4045649766921997, |
| "loss/fcd": 0.380859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1903528869152069, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.0038147335084751863, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 4.969338575998942e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2226, |
| "loss/crossentropy": 2.2266829013824463, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2226012423634529, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.003829128729261885, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 4.833439985911051e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2003, |
| "loss/crossentropy": 2.052451729774475, |
| "loss/fcd": 0.4033203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20034398138523102, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.003843523950048584, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 4.928807417551676e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2289, |
| "loss/crossentropy": 2.7160192728042603, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22888437658548355, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.0038579191708352826, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 4.750887552897136e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2423, |
| "loss/crossentropy": 2.2038062810897827, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24225647747516632, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.0038723143916219813, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 4.7237674395243326e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2423, |
| "loss/crossentropy": 2.5651720762252808, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24227841198444366, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.00388670961240868, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 4.798571268717448e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2367, |
| "loss/crossentropy": 2.645506978034973, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23671862483024597, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.0039011048331953793, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 4.752079645792643e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2177, |
| "loss/crossentropy": 2.5453277826309204, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21773213893175125, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.003915500053982078, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 4.729827245076497e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1938, |
| "loss/crossentropy": 2.4203790426254272, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19378525018692017, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.003929895274768776, |
| "grad_norm": 0.12255859375, |
| "grad_norm_var": 5.0731499989827474e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2212, |
| "loss/crossentropy": 2.1389888525009155, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22124628722667694, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.003944290495555476, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 4.654626051584879e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2305, |
| "loss/crossentropy": 2.364627480506897, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2305009961128235, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.003958685716342175, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 3.90013058980306e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2175, |
| "loss/crossentropy": 2.290530562400818, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21754977107048035, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.003973080937128873, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 3.883739312489828e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.223, |
| "loss/crossentropy": 2.2974144220352173, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22295525670051575, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.003987476157915572, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 4.0625532468159996e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.5710668563842773, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21617399901151657, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.004001871378702271, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 3.652175267537435e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2197, |
| "loss/crossentropy": 2.4304351806640625, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21974685788154602, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.00401626659948897, |
| "grad_norm": 0.126953125, |
| "grad_norm_var": 4.805624485015869e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2358, |
| "loss/crossentropy": 2.475973963737488, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2358318790793419, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.004030661820275668, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 3.956158955891927e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2156, |
| "loss/crossentropy": 2.5783761739730835, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21563701331615448, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.004045057041062367, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 3.956158955891927e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2139, |
| "loss/crossentropy": 2.36005961894989, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21386945247650146, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.004059452261849066, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 3.961622714996338e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2326, |
| "loss/crossentropy": 2.589225172996521, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23257827758789062, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.004073847482635765, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 3.656446933746338e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2159, |
| "loss/crossentropy": 2.340222954750061, |
| "loss/fcd": 0.46484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21591536700725555, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.004088242703422464, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 3.337462743123372e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.238, |
| "loss/crossentropy": 2.484541654586792, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23801321536302567, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.0041026379242091624, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 3.315210342407227e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2338, |
| "loss/crossentropy": 2.4735066890716553, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23380715399980545, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.004117033144995862, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 3.8424134254455565e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1958, |
| "loss/crossentropy": 2.296001434326172, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19581247121095657, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.00413142836578256, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 4.054605960845947e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2233, |
| "loss/crossentropy": 2.469460368156433, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22334590554237366, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.004145823586569259, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 4.0776530901590984e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2563, |
| "loss/crossentropy": 2.3161216378211975, |
| "loss/fcd": 0.505859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2562841549515724, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.0041602188073559575, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 3.233651320139567e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2132, |
| "loss/crossentropy": 2.571072220802307, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2131756693124771, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.004174614028142657, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 3.245572249094645e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2147, |
| "loss/crossentropy": 2.3715583086013794, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21474920213222504, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.004189009248929355, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 3.1503041585286457e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2157, |
| "loss/crossentropy": 2.379094123840332, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21565410494804382, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.004203404469716054, |
| "grad_norm": 0.154296875, |
| "grad_norm_var": 0.00014622112115224203, |
| "learning_rate": 0.0001, |
| "loss": 0.2908, |
| "loss/crossentropy": 2.696184992790222, |
| "loss/fcd": 0.548828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.29075586795806885, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.0042177996905027534, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.00014622112115224203, |
| "learning_rate": 0.0001, |
| "loss": 0.2109, |
| "loss/crossentropy": 2.4592641592025757, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2109208032488823, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.004232194911289452, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00014670689900716147, |
| "learning_rate": 0.0001, |
| "loss": 0.219, |
| "loss/crossentropy": 2.6254968643188477, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21897459030151367, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.004246590132076151, |
| "grad_norm": 0.12451171875, |
| "grad_norm_var": 0.00014286736647288004, |
| "learning_rate": 0.0001, |
| "loss": 0.2263, |
| "loss/crossentropy": 2.7246745824813843, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2262566015124321, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.004260985352862849, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00014515618483225504, |
| "learning_rate": 0.0001, |
| "loss": 0.2029, |
| "loss/crossentropy": 2.3958386182785034, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20287074148654938, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.0042753805736495485, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00014470418294270832, |
| "learning_rate": 0.0001, |
| "loss": 0.2364, |
| "loss/crossentropy": 2.457562804222107, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2363838478922844, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.004289775794436247, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 0.0001447826623916626, |
| "learning_rate": 0.0001, |
| "loss": 0.244, |
| "loss/crossentropy": 2.29829204082489, |
| "loss/fcd": 0.4521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24399850517511368, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.004304171015222946, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00014972686767578125, |
| "learning_rate": 0.0001, |
| "loss": 0.2147, |
| "loss/crossentropy": 2.6273841857910156, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21469515562057495, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.004318566236009644, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.00015286505222320557, |
| "learning_rate": 0.0001, |
| "loss": 0.2202, |
| "loss/crossentropy": 2.4213569164276123, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22024693340063095, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0043329614567963436, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00015286505222320557, |
| "learning_rate": 0.0001, |
| "loss": 0.2201, |
| "loss/crossentropy": 2.4482584595680237, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2201283797621727, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.004347356677583043, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.0001453310251235962, |
| "learning_rate": 0.0001, |
| "loss": 0.1952, |
| "loss/crossentropy": 2.16507089138031, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1952093541622162, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.004361751898369741, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.000141298770904541, |
| "learning_rate": 0.0001, |
| "loss": 0.2006, |
| "loss/crossentropy": 2.2546703815460205, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2005770206451416, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.00437614711915644, |
| "grad_norm": 0.1572265625, |
| "grad_norm_var": 0.00025413731733957924, |
| "learning_rate": 0.0001, |
| "loss": 0.2731, |
| "loss/crossentropy": 2.345265507698059, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.27310631424188614, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.004390542339943139, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.0002516428629557292, |
| "learning_rate": 0.0001, |
| "loss": 0.2321, |
| "loss/crossentropy": 2.4603192806243896, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23207177966833115, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.004404937560729838, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.00025352537631988524, |
| "learning_rate": 0.0001, |
| "loss": 0.222, |
| "loss/crossentropy": 2.598379373550415, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22200769931077957, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.004419332781516536, |
| "grad_norm": 0.1328125, |
| "grad_norm_var": 0.0002648353576660156, |
| "learning_rate": 0.0001, |
| "loss": 0.248, |
| "loss/crossentropy": 2.2982794046401978, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2480178400874138, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.004433728002303235, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00017789900302886963, |
| "learning_rate": 0.0001, |
| "loss": 0.1998, |
| "loss/crossentropy": 2.2329931259155273, |
| "loss/fcd": 0.388671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1997941955924034, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.0044481232230899346, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.00017162561416625977, |
| "learning_rate": 0.0001, |
| "loss": 0.2278, |
| "loss/crossentropy": 2.4267385005950928, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22776535153388977, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.004462518443876633, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.00017028550306955972, |
| "learning_rate": 0.0001, |
| "loss": 0.2176, |
| "loss/crossentropy": 2.1391916275024414, |
| "loss/fcd": 0.46484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2176017314195633, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.004476913664663332, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 0.00016627212365468344, |
| "learning_rate": 0.0001, |
| "loss": 0.2485, |
| "loss/crossentropy": 2.617629051208496, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24850602447986603, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.00449130888545003, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00017232795556386312, |
| "learning_rate": 0.0001, |
| "loss": 0.201, |
| "loss/crossentropy": 2.495308995246887, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20100131630897522, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.00450570410623673, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00017122328281402588, |
| "learning_rate": 0.0001, |
| "loss": 0.2155, |
| "loss/crossentropy": 2.6817585229873657, |
| "loss/fcd": 0.46484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2155066430568695, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.004520099327023428, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00017289221286773682, |
| "learning_rate": 0.0001, |
| "loss": 0.2089, |
| "loss/crossentropy": 2.4506349563598633, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20890694856643677, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.004534494547810127, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00016514460245768228, |
| "learning_rate": 0.0001, |
| "loss": 0.2212, |
| "loss/crossentropy": 2.4268819093704224, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2211536467075348, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.0045488897685968255, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.00016762415568033855, |
| "learning_rate": 0.0001, |
| "loss": 0.1915, |
| "loss/crossentropy": 2.082051396369934, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19149669259786606, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.004563284989383525, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00016717910766601564, |
| "learning_rate": 0.0001, |
| "loss": 0.2085, |
| "loss/crossentropy": 2.178563714027405, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20848772674798965, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.004577680210170224, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00016930003960927327, |
| "learning_rate": 0.0001, |
| "loss": 0.2261, |
| "loss/crossentropy": 2.4262903928756714, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22605551034212112, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.004592075430956922, |
| "grad_norm": 0.12255859375, |
| "grad_norm_var": 0.00016927321751912435, |
| "learning_rate": 0.0001, |
| "loss": 0.1989, |
| "loss/crossentropy": 2.4706810116767883, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19887082278728485, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.004606470651743621, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 5.278488000233968e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2139, |
| "loss/crossentropy": 2.34406316280365, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21389687806367874, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.00462086587253032, |
| "grad_norm": 0.171875, |
| "grad_norm_var": 0.0002678145964940389, |
| "learning_rate": 0.0001, |
| "loss": 0.314, |
| "loss/crossentropy": 2.252693295478821, |
| "loss/fcd": 0.548828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.31398655474185944, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.004635261093317019, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.0002742727597554525, |
| "learning_rate": 0.0001, |
| "loss": 0.205, |
| "loss/crossentropy": 2.3450491428375244, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20497491210699081, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.004649656314103717, |
| "grad_norm": 0.130859375, |
| "grad_norm_var": 0.0002702673276265462, |
| "learning_rate": 0.0001, |
| "loss": 0.2742, |
| "loss/crossentropy": 2.6299513578414917, |
| "loss/fcd": 0.5146484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.27415700256824493, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.0046640515348904165, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.0002730836470921834, |
| "learning_rate": 0.0001, |
| "loss": 0.2158, |
| "loss/crossentropy": 2.512497067451477, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21584390848875046, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.004678446755677115, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.0002787023782730103, |
| "learning_rate": 0.0001, |
| "loss": 0.2, |
| "loss/crossentropy": 2.319981098175049, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20001471787691116, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.004692841976463814, |
| "grad_norm": 0.12890625, |
| "grad_norm_var": 0.00028857290744781493, |
| "learning_rate": 0.0001, |
| "loss": 0.2693, |
| "loss/crossentropy": 2.4298349618911743, |
| "loss/fcd": 0.48046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2693277597427368, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.004707237197250513, |
| "grad_norm": 0.1318359375, |
| "grad_norm_var": 0.0003031412760416667, |
| "learning_rate": 0.0001, |
| "loss": 0.249, |
| "loss/crossentropy": 2.555938482284546, |
| "loss/fcd": 0.4873046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2489527463912964, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.0047216324180372115, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.00028754870096842446, |
| "learning_rate": 0.0001, |
| "loss": 0.2233, |
| "loss/crossentropy": 2.3032290935516357, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2232954055070877, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.004736027638823911, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 0.0002883553504943848, |
| "learning_rate": 0.0001, |
| "loss": 0.2243, |
| "loss/crossentropy": 2.3655673265457153, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2242700606584549, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.004750422859610609, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 0.00028449594974517823, |
| "learning_rate": 0.0001, |
| "loss": 0.2429, |
| "loss/crossentropy": 2.330072522163391, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24292638152837753, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.004764818080397308, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0002801219622294108, |
| "learning_rate": 0.0001, |
| "loss": 0.2179, |
| "loss/crossentropy": 2.2494866847991943, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21786177903413773, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.004779213301184007, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.00027185678482055664, |
| "learning_rate": 0.0001, |
| "loss": 0.2415, |
| "loss/crossentropy": 2.792868733406067, |
| "loss/fcd": 0.4716796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24153122305870056, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.004793608521970706, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0002777258555094401, |
| "learning_rate": 0.0001, |
| "loss": 0.2367, |
| "loss/crossentropy": 2.573932647705078, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23671025037765503, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.004808003742757404, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.0002961436907450358, |
| "learning_rate": 0.0001, |
| "loss": 0.2002, |
| "loss/crossentropy": 2.5787216424942017, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20024622231721878, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.004822398963544103, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.00031576852003733315, |
| "learning_rate": 0.0001, |
| "loss": 0.2067, |
| "loss/crossentropy": 2.5130008459091187, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2067384421825409, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.0048367941843308025, |
| "grad_norm": 0.1962890625, |
| "grad_norm_var": 0.0006899476051330566, |
| "learning_rate": 0.0001, |
| "loss": 0.2185, |
| "loss/crossentropy": 2.2556002140045166, |
| "loss/fcd": 0.494140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2184857428073883, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.004851189405117501, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.0005320707956949869, |
| "learning_rate": 0.0001, |
| "loss": 0.2083, |
| "loss/crossentropy": 2.421205759048462, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20834489911794662, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.0048655846259042, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 0.0005181382099787394, |
| "learning_rate": 0.0001, |
| "loss": 0.2009, |
| "loss/crossentropy": 2.079905390739441, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20086795836687088, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.004879979846690898, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.0005108267068862915, |
| "learning_rate": 0.0001, |
| "loss": 0.2437, |
| "loss/crossentropy": 2.571584105491638, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2436518296599388, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.004894375067477598, |
| "grad_norm": 0.130859375, |
| "grad_norm_var": 0.0005070517460505167, |
| "learning_rate": 0.0001, |
| "loss": 0.252, |
| "loss/crossentropy": 2.3673810958862305, |
| "loss/fcd": 0.525390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2520231306552887, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.004908770288264296, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.0004946142435073853, |
| "learning_rate": 0.0001, |
| "loss": 0.1946, |
| "loss/crossentropy": 1.9378909468650818, |
| "loss/fcd": 0.384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19459272176027298, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.004923165509050995, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.0005005518595377604, |
| "learning_rate": 0.0001, |
| "loss": 0.2064, |
| "loss/crossentropy": 2.391346573829651, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20641817897558212, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.004937560729837694, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.0004995892445246379, |
| "learning_rate": 0.0001, |
| "loss": 0.213, |
| "loss/crossentropy": 2.4029276371002197, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2129564881324768, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.004951955950624393, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 0.0005011399586995443, |
| "learning_rate": 0.0001, |
| "loss": 0.2122, |
| "loss/crossentropy": 2.3750810623168945, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21220777183771133, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.004966351171411092, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.0004993269840876262, |
| "learning_rate": 0.0001, |
| "loss": 0.2483, |
| "loss/crossentropy": 2.713660955429077, |
| "loss/fcd": 0.4853515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24831371009349823, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.00498074639219779, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.0005148798227310181, |
| "learning_rate": 0.0001, |
| "loss": 0.2107, |
| "loss/crossentropy": 2.550423502922058, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21066032350063324, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.004995141612984489, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.000519716739654541, |
| "learning_rate": 0.0001, |
| "loss": 0.2061, |
| "loss/crossentropy": 2.5207024812698364, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2061041295528412, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.005009536833771188, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 0.0005197912454605102, |
| "learning_rate": 0.0001, |
| "loss": 0.2402, |
| "loss/crossentropy": 2.3946497440338135, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24020669609308243, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.005023932054557887, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 0.000536501407623291, |
| "learning_rate": 0.0001, |
| "loss": 0.2128, |
| "loss/crossentropy": 2.516977548599243, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21278280019760132, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.005038327275344585, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 0.0005188534657160441, |
| "learning_rate": 0.0001, |
| "loss": 0.2382, |
| "loss/crossentropy": 2.5282589197158813, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23819837719202042, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0050527224961312844, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.0005096713701883952, |
| "learning_rate": 0.0001, |
| "loss": 0.22, |
| "loss/crossentropy": 2.448602795600891, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21995113044977188, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.005067117716917984, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 8.844435214996337e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1952, |
| "loss/crossentropy": 2.4668463468551636, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19519731402397156, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.005081512937704682, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 8.725225925445556e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2368, |
| "loss/crossentropy": 2.315679907798767, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23678645491600037, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.005095908158491381, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 8.98192326227824e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2111, |
| "loss/crossentropy": 2.417713761329651, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21109846234321594, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.0051103033792780795, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 9.192526340484619e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1963, |
| "loss/crossentropy": 2.3545119762420654, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19629193097352982, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.005124698600064779, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 6.585121154785156e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.1606619358062744, |
| "loss/fcd": 0.3876953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18158919364213943, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.005139093820851477, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 6.665786107381184e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2114, |
| "loss/crossentropy": 2.429716110229492, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21144652366638184, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.005153489041638176, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 7.386902968088785e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2011, |
| "loss/crossentropy": 2.511311650276184, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20114467293024063, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.0051678842624248746, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 7.736583550771078e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1846, |
| "loss/crossentropy": 2.1977522373199463, |
| "loss/fcd": 0.373046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.184633307158947, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.005182279483211574, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 6.67800505956014e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2461, |
| "loss/crossentropy": 2.605985164642334, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24613827466964722, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.005196674703998273, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 7.838805516560873e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.231, |
| "loss/crossentropy": 2.4244812726974487, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23104986548423767, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.005211069924784971, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 7.502933343251546e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2269, |
| "loss/crossentropy": 2.4840404987335205, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22690805047750473, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.0052254651455716705, |
| "grad_norm": 0.126953125, |
| "grad_norm_var": 9.071032206217447e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2776, |
| "loss/crossentropy": 2.631165862083435, |
| "loss/fcd": 0.5244140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.277616910636425, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.005239860366358369, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 9.673039118448893e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2285, |
| "loss/crossentropy": 2.316849708557129, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.228460393846035, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.005254255587145068, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 9.242693583170573e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1957, |
| "loss/crossentropy": 2.315016031265259, |
| "loss/fcd": 0.38671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1956682875752449, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.005268650807931766, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 8.176167805989583e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2118, |
| "loss/crossentropy": 2.3352142572402954, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21177390962839127, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.0052830460287184655, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 7.939239343007406e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2289, |
| "loss/crossentropy": 2.511680841445923, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22891707718372345, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.005297441249505164, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 7.37150510152181e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2201, |
| "loss/crossentropy": 2.2285088300704956, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22013359516859055, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.005311836470291863, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 7.414718468983968e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1957, |
| "loss/crossentropy": 2.389556884765625, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19569466263055801, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.005326231691078562, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 7.047255833943684e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2015, |
| "loss/crossentropy": 2.2860642671585083, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20150135457515717, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.005340626911865261, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 7.21891721089681e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1878, |
| "loss/crossentropy": 2.1553120017051697, |
| "loss/fcd": 0.3740234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18780279159545898, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.00535502213265196, |
| "grad_norm": 0.126953125, |
| "grad_norm_var": 8.811056613922119e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2079, |
| "loss/crossentropy": 2.614238739013672, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20787174999713898, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.005369417353438658, |
| "grad_norm": 0.12255859375, |
| "grad_norm_var": 9.365081787109375e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2249, |
| "loss/crossentropy": 2.365216612815857, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2249324843287468, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.005383812574225357, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 8.481244246164958e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2203, |
| "loss/crossentropy": 2.6173166036605835, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2202518805861473, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.005398207795012056, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 7.554590702056885e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.205, |
| "loss/crossentropy": 2.2174978256225586, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20496949553489685, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.005412603015798755, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 7.710357507069905e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1912, |
| "loss/crossentropy": 2.2786842584609985, |
| "loss/fcd": 0.3974609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19121932238340378, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.005426998236585453, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 7.359882195790609e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2442, |
| "loss/crossentropy": 2.5521395206451416, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24418669939041138, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.005441393457372152, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 7.750888665517172e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2245, |
| "loss/crossentropy": 2.9219515323638916, |
| "loss/fcd": 0.482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22447162866592407, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.005455788678158852, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 6.41783078511556e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2119, |
| "loss/crossentropy": 2.393683671951294, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21186020970344543, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.00547018389894555, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 5.698104699452718e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2392, |
| "loss/crossentropy": 2.7257591485977173, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23916704207658768, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.005484579119732249, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 5.698104699452718e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2175, |
| "loss/crossentropy": 2.604699730873108, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21749083697795868, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.0054989743405189475, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 5.444586277008057e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2128, |
| "loss/crossentropy": 2.3415403366088867, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21281517297029495, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.005513369561305647, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 5.485117435455322e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2118, |
| "loss/crossentropy": 2.164521098136902, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2117796689271927, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.005527764782092345, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 5.648930867513021e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.235, |
| "loss/crossentropy": 2.243640184402466, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23500269651412964, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.005542160002879044, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 5.546808242797852e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2025, |
| "loss/crossentropy": 2.2612792253494263, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20246511697769165, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.005556555223665743, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 5.429188410441081e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.21, |
| "loss/crossentropy": 2.6286587715148926, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21000967174768448, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.005570950444452442, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 5.0572554270426434e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1951, |
| "loss/crossentropy": 2.381960868835449, |
| "loss/fcd": 0.38671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19514141231775284, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.005585345665239141, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 4.003743330637614e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2116, |
| "loss/crossentropy": 2.1127407550811768, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21157918125391006, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.005599740886025839, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 3.414849440256754e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1979, |
| "loss/crossentropy": 2.2836742401123047, |
| "loss/fcd": 0.396484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1978917270898819, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.0056141361068125385, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 3.0163923899332682e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1913, |
| "loss/crossentropy": 2.141560196876526, |
| "loss/fcd": 0.3857421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19127248972654343, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.005628531327599237, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 4.756351312001546e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2001, |
| "loss/crossentropy": 2.0556570291519165, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20014575868844986, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.005642926548385936, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 4.919270674387614e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2564, |
| "loss/crossentropy": 2.434049367904663, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2563505992293358, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.005657321769172634, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 4.583994547526042e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2032, |
| "loss/crossentropy": 2.31030809879303, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20315195620059967, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.0056717169899593335, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 4.297892252604167e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2028, |
| "loss/crossentropy": 2.201537609100342, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20282629132270813, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.005686112210746033, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 4.315276940663656e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1968, |
| "loss/crossentropy": 2.2024729251861572, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1968480423092842, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.005700507431532731, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 4.315276940663656e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2005, |
| "loss/crossentropy": 2.3388434648513794, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20053986459970474, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.00571490265231943, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 4.148383935292562e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.241, |
| "loss/crossentropy": 2.9380890130996704, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24104679375886917, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.005729297873106129, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 4.267593224843343e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1981, |
| "loss/crossentropy": 2.0873407125473022, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1980888992547989, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.005743693093892828, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 4.21673059463501e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2054, |
| "loss/crossentropy": 2.398405909538269, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20542413741350174, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.005758088314679526, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 4.1285157203674315e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.4124823808670044, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21624472737312317, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.005772483535466225, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 3.8185715675354e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2039, |
| "loss/crossentropy": 2.495412826538086, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20386559516191483, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.005786878756252924, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 3.6063790321350095e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2092, |
| "loss/crossentropy": 2.320030093193054, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20922860503196716, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.005801273977039623, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 3.229379653930664e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2263, |
| "loss/crossentropy": 2.5104581117630005, |
| "loss/fcd": 0.46484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22626767307519913, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.005815669197826322, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 3.532469272613525e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2136, |
| "loss/crossentropy": 2.5909669399261475, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.213609017431736, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.00583006441861302, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 4.14202610651652e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1848, |
| "loss/crossentropy": 2.099331498146057, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18482983112335205, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.00584445963939972, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 5.114773909250895e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1927, |
| "loss/crossentropy": 2.245216131210327, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1927170231938362, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.005858854860186418, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 3.54836384455363e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2359, |
| "loss/crossentropy": 2.3967188596725464, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2359299436211586, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.005873250080973117, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 3.3035874366760254e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2072, |
| "loss/crossentropy": 2.4940836429595947, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20715947449207306, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.005887645301759815, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 3.546774387359619e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2031, |
| "loss/crossentropy": 2.4436358213424683, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20313256978988647, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.005902040522546515, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 3.5599867502848306e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2205, |
| "loss/crossentropy": 2.3267935514450073, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22049501538276672, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.005916435743333213, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 3.591775894165039e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2032, |
| "loss/crossentropy": 2.262888252735138, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20318371057510376, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.005930830964119912, |
| "grad_norm": 0.09619140625, |
| "grad_norm_var": 4.8080086708068846e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2031, |
| "loss/crossentropy": 2.5199403762817383, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20311684161424637, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.005945226184906611, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 4.258155822753906e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1989, |
| "loss/crossentropy": 2.3285170793533325, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19889184832572937, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.00595962140569331, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 3.891686598459879e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2254, |
| "loss/crossentropy": 2.5566580295562744, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22541005164384842, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.005974016626480009, |
| "grad_norm": 0.12255859375, |
| "grad_norm_var": 5.155801773071289e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2264, |
| "loss/crossentropy": 2.537785768508911, |
| "loss/fcd": 0.4892578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22644919157028198, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.005988411847266707, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 5.3942203521728516e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2184, |
| "loss/crossentropy": 2.6664167642593384, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2184242233633995, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.006002807068053406, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 4.5719742774963376e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1944, |
| "loss/crossentropy": 2.231179356575012, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19440477341413498, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.006017202288840105, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 4.672110080718994e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2664, |
| "loss/crossentropy": 2.798780918121338, |
| "loss/fcd": 0.513671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.26635295152664185, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.006031597509626804, |
| "grad_norm": 0.1357421875, |
| "grad_norm_var": 9.435017903645834e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.227, |
| "loss/crossentropy": 2.5461186170578003, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22702706605196, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.006045992730413502, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 9.395281473795573e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2178, |
| "loss/crossentropy": 2.566969871520996, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21776312589645386, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0060603879512002015, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 9.696384270985921e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1927, |
| "loss/crossentropy": 2.41417920589447, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19272838532924652, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.006074783171986901, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 9.255409240722656e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2341, |
| "loss/crossentropy": 2.432627320289612, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2340926229953766, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.006089178392773599, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 9.301503499348958e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2064, |
| "loss/crossentropy": 2.22554087638855, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20639413595199585, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.006103573613560298, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 9.301503499348958e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2352, |
| "loss/crossentropy": 2.7009902000427246, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23516181111335754, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.0061179688343469965, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 9.698768456776937e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2242, |
| "loss/crossentropy": 2.5257065296173096, |
| "loss/fcd": 0.50390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22422834485769272, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.006132364055133696, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00010133981704711914, |
| "learning_rate": 0.0001, |
| "loss": 0.1991, |
| "loss/crossentropy": 2.39576256275177, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19912777841091156, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.006146759275920394, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00010203917821248373, |
| "learning_rate": 0.0001, |
| "loss": 0.194, |
| "loss/crossentropy": 2.337485671043396, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19400090724229813, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.006161154496707093, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 9.119908014933268e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2313, |
| "loss/crossentropy": 2.271997570991516, |
| "loss/fcd": 0.4619140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23129994422197342, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.0061755497174937925, |
| "grad_norm": 0.14453125, |
| "grad_norm_var": 0.0001476993163426717, |
| "learning_rate": 0.0001, |
| "loss": 0.2351, |
| "loss/crossentropy": 2.2284241318702698, |
| "loss/fcd": 0.4794921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23510510474443436, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.006189944938280491, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00014778673648834227, |
| "learning_rate": 0.0001, |
| "loss": 0.2179, |
| "loss/crossentropy": 2.487444758415222, |
| "loss/fcd": 0.482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21794230490922928, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.00620434015906719, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 0.00014280378818511962, |
| "learning_rate": 0.0001, |
| "loss": 0.2053, |
| "loss/crossentropy": 2.277848958969116, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20534329116344452, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.006218735379853888, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.00014514923095703124, |
| "learning_rate": 0.0001, |
| "loss": 0.2112, |
| "loss/crossentropy": 2.4542627334594727, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21123766899108887, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.0062331306006405875, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.0001399993896484375, |
| "learning_rate": 0.0001, |
| "loss": 0.2215, |
| "loss/crossentropy": 2.337994694709778, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22145532071590424, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.006247525821427286, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00014856656392415364, |
| "learning_rate": 0.0001, |
| "loss": 0.1865, |
| "loss/crossentropy": 2.2629653215408325, |
| "loss/fcd": 0.3896484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18648526072502136, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.006261921042213985, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.00011246601740519205, |
| "learning_rate": 0.0001, |
| "loss": 0.2497, |
| "loss/crossentropy": 2.5573008060455322, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24965552240610123, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.006276316263000683, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00011167128880818685, |
| "learning_rate": 0.0001, |
| "loss": 0.2199, |
| "loss/crossentropy": 2.3618111610412598, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21985996514558792, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.006290711483787383, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 0.0001062542200088501, |
| "learning_rate": 0.0001, |
| "loss": 0.1995, |
| "loss/crossentropy": 1.9345441460609436, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19948522001504898, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.006305106704574082, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 0.00012149413426717122, |
| "learning_rate": 0.0001, |
| "loss": 0.1829, |
| "loss/crossentropy": 2.2750844955444336, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18287546932697296, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.00631950192536078, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.00012334088484446207, |
| "learning_rate": 0.0001, |
| "loss": 0.206, |
| "loss/crossentropy": 2.178094267845154, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20597843825817108, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.006333897146147479, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.00012308756510416666, |
| "learning_rate": 0.0001, |
| "loss": 0.2026, |
| "loss/crossentropy": 2.2465450763702393, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20255093276500702, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.006348292366934178, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 0.00012148221333821615, |
| "learning_rate": 0.0001, |
| "loss": 0.2194, |
| "loss/crossentropy": 2.3598278760910034, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21944674849510193, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.006362687587720877, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 0.00011393229166666667, |
| "learning_rate": 0.0001, |
| "loss": 0.2421, |
| "loss/crossentropy": 2.523189663887024, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2420613244175911, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.006377082808507575, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00011165837446848551, |
| "learning_rate": 0.0001, |
| "loss": 0.2116, |
| "loss/crossentropy": 2.4686609506607056, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2116122990846634, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.006391478029294274, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.00011196136474609376, |
| "learning_rate": 0.0001, |
| "loss": 0.2574, |
| "loss/crossentropy": 2.7661678791046143, |
| "loss/fcd": 0.5078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2574233114719391, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.006405873250080973, |
| "grad_norm": 0.1435546875, |
| "grad_norm_var": 0.00010795195897420248, |
| "learning_rate": 0.0001, |
| "loss": 0.2239, |
| "loss/crossentropy": 1.9732567071914673, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2238999307155609, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.006420268470867672, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00010865529378255209, |
| "learning_rate": 0.0001, |
| "loss": 0.2209, |
| "loss/crossentropy": 2.3994816541671753, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22091981023550034, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.006434663691654371, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00010942518711090087, |
| "learning_rate": 0.0001, |
| "loss": 0.2094, |
| "loss/crossentropy": 2.385701537132263, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.209386445581913, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.0064490589124410694, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 9.9371870358785e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.212, |
| "loss/crossentropy": 2.204231023788452, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2120284140110016, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.006463454133227769, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.00010139147440592448, |
| "learning_rate": 0.0001, |
| "loss": 0.2106, |
| "loss/crossentropy": 2.4561452865600586, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21060140430927277, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.006477849354014467, |
| "grad_norm": 0.1337890625, |
| "grad_norm_var": 0.00011837383111317953, |
| "learning_rate": 0.0001, |
| "loss": 0.2272, |
| "loss/crossentropy": 2.2843399047851562, |
| "loss/fcd": 0.5146484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2271936535835266, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.006492244574801166, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00012276868025461833, |
| "learning_rate": 0.0001, |
| "loss": 0.2133, |
| "loss/crossentropy": 2.564459443092346, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21329496800899506, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.0065066397955878645, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00012448628743489583, |
| "learning_rate": 0.0001, |
| "loss": 0.2099, |
| "loss/crossentropy": 2.270860195159912, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20991922914981842, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.006521035016374564, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00013029972712198893, |
| "learning_rate": 0.0001, |
| "loss": 0.2251, |
| "loss/crossentropy": 2.482293486595154, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22511228173971176, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.006535430237161262, |
| "grad_norm": 0.154296875, |
| "grad_norm_var": 0.00021419127782185872, |
| "learning_rate": 0.0001, |
| "loss": 0.2411, |
| "loss/crossentropy": 2.320971131324768, |
| "loss/fcd": 0.4951171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24106843769550323, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.006549825457947961, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.00021772285302480062, |
| "learning_rate": 0.0001, |
| "loss": 0.2047, |
| "loss/crossentropy": 2.3206406831741333, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20467744767665863, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0065642206787346604, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.0002218236525853475, |
| "learning_rate": 0.0001, |
| "loss": 0.218, |
| "loss/crossentropy": 2.492337226867676, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2180488407611847, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.006578615899521359, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00022468467553456625, |
| "learning_rate": 0.0001, |
| "loss": 0.2109, |
| "loss/crossentropy": 2.3202375173568726, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21094900369644165, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.006593011120308058, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.00023228228092193605, |
| "learning_rate": 0.0001, |
| "loss": 0.2025, |
| "loss/crossentropy": 2.452348470687866, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20247067511081696, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.006607406341094756, |
| "grad_norm": 0.138671875, |
| "grad_norm_var": 0.00026457707087198894, |
| "learning_rate": 0.0001, |
| "loss": 0.2292, |
| "loss/crossentropy": 2.8116979598999023, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2292005866765976, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0066218015618814555, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.0002692292133967082, |
| "learning_rate": 0.0001, |
| "loss": 0.2044, |
| "loss/crossentropy": 2.337909698486328, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20442651212215424, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.006636196782668154, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.00022170444329579672, |
| "learning_rate": 0.0001, |
| "loss": 0.2165, |
| "loss/crossentropy": 2.452089309692383, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21646161377429962, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.006650592003454853, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00022114813327789307, |
| "learning_rate": 0.0001, |
| "loss": 0.2284, |
| "loss/crossentropy": 2.596395969390869, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22836245596408844, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.006664987224241552, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00022147099177042642, |
| "learning_rate": 0.0001, |
| "loss": 0.2309, |
| "loss/crossentropy": 2.553429961204529, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23092983663082123, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.0066793824450282506, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.00023492872714996337, |
| "learning_rate": 0.0001, |
| "loss": 0.1827, |
| "loss/crossentropy": 2.3164178133010864, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18271666765213013, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.00669377766581495, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00023212035497029623, |
| "learning_rate": 0.0001, |
| "loss": 0.2093, |
| "loss/crossentropy": 2.3045096397399902, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20930374413728714, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.006708172886601648, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00020383894443511962, |
| "learning_rate": 0.0001, |
| "loss": 0.2332, |
| "loss/crossentropy": 2.386527895927429, |
| "loss/fcd": 0.4970703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23323698341846466, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.006722568107388347, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00020166635513305665, |
| "learning_rate": 0.0001, |
| "loss": 0.1988, |
| "loss/crossentropy": 2.151167392730713, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19876766949892044, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.006736963328175046, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 0.00021171470483144123, |
| "learning_rate": 0.0001, |
| "loss": 0.2219, |
| "loss/crossentropy": 2.512352228164673, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22188346087932587, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.006751358548961745, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.00020895699659983317, |
| "learning_rate": 0.0001, |
| "loss": 0.202, |
| "loss/crossentropy": 2.4446065425872803, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2020409256219864, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.006765753769748443, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 9.334782759348551e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2175, |
| "loss/crossentropy": 2.4017263650894165, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21754636615514755, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.006780148990535142, |
| "grad_norm": 0.134765625, |
| "grad_norm_var": 0.00012315809726715088, |
| "learning_rate": 0.0001, |
| "loss": 0.2326, |
| "loss/crossentropy": 2.364670991897583, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.232588529586792, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.0067945442113218416, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00012839237848917643, |
| "learning_rate": 0.0001, |
| "loss": 0.2316, |
| "loss/crossentropy": 2.7174742221832275, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23159676045179367, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.00680893943210854, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.00013192395369211832, |
| "learning_rate": 0.0001, |
| "loss": 0.2198, |
| "loss/crossentropy": 2.472269654273987, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21980835497379303, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.006823334652895239, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.00012710789839426678, |
| "learning_rate": 0.0001, |
| "loss": 0.2208, |
| "loss/crossentropy": 2.5979279279708862, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22081798315048218, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.006837729873681937, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 8.223454157511394e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2017, |
| "loss/crossentropy": 2.337291121482849, |
| "loss/fcd": 0.5244140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20170452445745468, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.006852125094468637, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 9.024540583292643e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2224, |
| "loss/crossentropy": 2.2137837409973145, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22239823639392853, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.006866520315255335, |
| "grad_norm": 0.1376953125, |
| "grad_norm_var": 0.00012429157892862957, |
| "learning_rate": 0.0001, |
| "loss": 0.2253, |
| "loss/crossentropy": 2.044808030128479, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22525641322135925, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.006880915536042034, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00012467304865519205, |
| "learning_rate": 0.0001, |
| "loss": 0.2055, |
| "loss/crossentropy": 2.176842510700226, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20548538118600845, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.0068953107568287325, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.0001228402058283488, |
| "learning_rate": 0.0001, |
| "loss": 0.2052, |
| "loss/crossentropy": 2.43264901638031, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2051537036895752, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.006909705977615432, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.00010639429092407227, |
| "learning_rate": 0.0001, |
| "loss": 0.21, |
| "loss/crossentropy": 2.24726939201355, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.210049070417881, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.006924101198402131, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.0001061081886291504, |
| "learning_rate": 0.0001, |
| "loss": 0.204, |
| "loss/crossentropy": 2.71012020111084, |
| "loss/fcd": 0.4716796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20403584837913513, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.006938496419188829, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.00010386208693186442, |
| "learning_rate": 0.0001, |
| "loss": 0.2221, |
| "loss/crossentropy": 2.3664560317993164, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22213804721832275, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.006952891639975528, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00010653237501780192, |
| "learning_rate": 0.0001, |
| "loss": 0.2118, |
| "loss/crossentropy": 2.541406989097595, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21182993054389954, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.006967286860762227, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 9.980897108713786e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2098, |
| "loss/crossentropy": 2.0675625801086426, |
| "loss/fcd": 0.3955078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20982014387845993, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.006981682081548926, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 9.15755828221639e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2191, |
| "loss/crossentropy": 2.1868069767951965, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21909870952367783, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.006996077302335624, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 9.856919447580973e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2026, |
| "loss/crossentropy": 2.26907217502594, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20264852046966553, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.0070104725231223235, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 7.203022638956706e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2109, |
| "loss/crossentropy": 2.4840633869171143, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21092981100082397, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.007024867743909022, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 6.31640354792277e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2276, |
| "loss/crossentropy": 2.5656063556671143, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22764952480793, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.007039262964695721, |
| "grad_norm": 0.1455078125, |
| "grad_norm_var": 0.0001110623280207316, |
| "learning_rate": 0.0001, |
| "loss": 0.2581, |
| "loss/crossentropy": 2.414512276649475, |
| "loss/fcd": 0.5146484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.258076474070549, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.00705365818548242, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00011261304219563802, |
| "learning_rate": 0.0001, |
| "loss": 0.1995, |
| "loss/crossentropy": 2.2527265548706055, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19945065677165985, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0070680534062691185, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00011458297570546469, |
| "learning_rate": 0.0001, |
| "loss": 0.214, |
| "loss/crossentropy": 2.3830225467681885, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21395207196474075, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.007082448627055818, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.00012197395165761312, |
| "learning_rate": 0.0001, |
| "loss": 0.2245, |
| "loss/crossentropy": 2.578980803489685, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2245059311389923, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.007096843847842516, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 8.859535058339437e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2001, |
| "loss/crossentropy": 2.0505954027175903, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2001277357339859, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.007111239068629215, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 8.837381998697917e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2057, |
| "loss/crossentropy": 2.2900065183639526, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2057407721877098, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.007125634289415914, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 0.00010617574055989583, |
| "learning_rate": 0.0001, |
| "loss": 0.2047, |
| "loss/crossentropy": 2.6084879636764526, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2047056257724762, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.007140029510202613, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0001064211130142212, |
| "learning_rate": 0.0001, |
| "loss": 0.1886, |
| "loss/crossentropy": 2.2180538177490234, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18857233971357346, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.007154424730989311, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.0001118302345275879, |
| "learning_rate": 0.0001, |
| "loss": 0.2011, |
| "loss/crossentropy": 2.3378570079803467, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20111830532550812, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.00716881995177601, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 0.00011839866638183594, |
| "learning_rate": 0.0001, |
| "loss": 0.2105, |
| "loss/crossentropy": 2.4460572004318237, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21054691076278687, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.0071832151725627095, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00012238721052805582, |
| "learning_rate": 0.0001, |
| "loss": 0.189, |
| "loss/crossentropy": 2.358466863632202, |
| "loss/fcd": 0.3779296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18903843313455582, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.007197610393349408, |
| "grad_norm": 0.0986328125, |
| "grad_norm_var": 0.00012767215569814045, |
| "learning_rate": 0.0001, |
| "loss": 0.23, |
| "loss/crossentropy": 2.604634642601013, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23002738505601883, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.007212005614136107, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 0.0001282016436258952, |
| "learning_rate": 0.0001, |
| "loss": 0.1989, |
| "loss/crossentropy": 2.3237578868865967, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19885492324829102, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.007226400834922805, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.00013103087743123373, |
| "learning_rate": 0.0001, |
| "loss": 0.2136, |
| "loss/crossentropy": 2.434670090675354, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21359950304031372, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.007240796055709505, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0001281966765721639, |
| "learning_rate": 0.0001, |
| "loss": 0.2027, |
| "loss/crossentropy": 2.3419206142425537, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20269384235143661, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.007255191276496203, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.0001273860534032186, |
| "learning_rate": 0.0001, |
| "loss": 0.2168, |
| "loss/crossentropy": 2.418344020843506, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21677344292402267, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.007269586497282902, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 3.9155284563700356e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2208, |
| "loss/crossentropy": 2.5992894172668457, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22083494067192078, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.007283981718069601, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 3.770192464192708e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2111, |
| "loss/crossentropy": 2.3879592418670654, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21107713878154755, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.0072983769388563, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 3.7729740142822266e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2234, |
| "loss/crossentropy": 2.7517272233963013, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22336142510175705, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.007312772159642999, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 3.7729740142822266e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1838, |
| "loss/crossentropy": 2.1463602781295776, |
| "loss/fcd": 0.4033203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1838330551981926, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.007327167380429697, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 3.542006015777588e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2068, |
| "loss/crossentropy": 2.4218236207962036, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2068256437778473, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.007341562601216396, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 4.722177982330322e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2242, |
| "loss/crossentropy": 2.253819227218628, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22418855130672455, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.007355957822003095, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 4.3102105458577474e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2045, |
| "loss/crossentropy": 2.1473891735076904, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2044747918844223, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.007370353042789794, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 5.827645460764567e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2399, |
| "loss/crossentropy": 2.4559924602508545, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23986588418483734, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.007384748263576492, |
| "grad_norm": 0.1240234375, |
| "grad_norm_var": 7.387797037760417e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2335, |
| "loss/crossentropy": 2.3832513093948364, |
| "loss/fcd": 0.482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2334604561328888, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.0073991434843631914, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 6.859997908274333e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2134, |
| "loss/crossentropy": 2.4991053342819214, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21339743584394455, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.007413538705149891, |
| "grad_norm": 0.1376953125, |
| "grad_norm_var": 0.00011509160200754802, |
| "learning_rate": 0.0001, |
| "loss": 0.2837, |
| "loss/crossentropy": 2.707633852958679, |
| "loss/fcd": 0.525390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.28367944806814194, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.007427933925936589, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00010480483373006184, |
| "learning_rate": 0.0001, |
| "loss": 0.2266, |
| "loss/crossentropy": 2.4334722757339478, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22656814754009247, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.007442329146723288, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.00010337432225545247, |
| "learning_rate": 0.0001, |
| "loss": 0.2487, |
| "loss/crossentropy": 2.794032335281372, |
| "loss/fcd": 0.4853515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2486870214343071, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.0074567243675099865, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.00010714431603749593, |
| "learning_rate": 0.0001, |
| "loss": 0.1797, |
| "loss/crossentropy": 2.4037466049194336, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1796710044145584, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.007471119588296686, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.00010959208011627198, |
| "learning_rate": 0.0001, |
| "loss": 0.2219, |
| "loss/crossentropy": 2.3638296127319336, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22193115949630737, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.007485514809083384, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00010979076226552328, |
| "learning_rate": 0.0001, |
| "loss": 0.2223, |
| "loss/crossentropy": 2.555932879447937, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22230461984872818, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.007499910029870083, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.00010768473148345948, |
| "learning_rate": 0.0001, |
| "loss": 0.2183, |
| "loss/crossentropy": 2.5372231006622314, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21830307692289352, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.0075143052506567816, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00010279715061187745, |
| "learning_rate": 0.0001, |
| "loss": 0.2068, |
| "loss/crossentropy": 2.566136121749878, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20676826685667038, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.007528700471443481, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 0.00010263025760650634, |
| "learning_rate": 0.0001, |
| "loss": 0.2323, |
| "loss/crossentropy": 2.4165902137756348, |
| "loss/fcd": 0.5078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23230554163455963, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.00754309569223018, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 9.721020857493083e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2198, |
| "loss/crossentropy": 2.5744664669036865, |
| "loss/fcd": 0.4638671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21984682232141495, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.007557490913016878, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 8.722543716430665e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2083, |
| "loss/crossentropy": 2.0694758892059326, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20834185183048248, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.0075718861338035775, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 8.707046508789062e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2306, |
| "loss/crossentropy": 2.5832005739212036, |
| "loss/fcd": 0.474609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23062562197446823, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.007586281354590276, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 7.978677749633789e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2216, |
| "loss/crossentropy": 2.38311767578125, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2215922325849533, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.007600676575376975, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 8.81791114807129e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1902, |
| "loss/crossentropy": 1.8877107501029968, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19022603332996368, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.007615071796163673, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 8.454223473866781e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2042, |
| "loss/crossentropy": 2.158120632171631, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20415493100881577, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.0076294670169503725, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 8.933444817860921e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2037, |
| "loss/crossentropy": 2.460996627807617, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20372942835092545, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.007643862237737071, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 4.8951307932535806e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2132, |
| "loss/crossentropy": 2.226336717605591, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21315942704677582, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.00765825745852377, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 4.7647953033447264e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2321, |
| "loss/crossentropy": 2.446126341819763, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2321249470114708, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.007672652679310469, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 5.016326904296875e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2082, |
| "loss/crossentropy": 2.3785619735717773, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.208193838596344, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.007687047900097168, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 3.9878487586975095e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2113, |
| "loss/crossentropy": 2.2807793617248535, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21130456030368805, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.007701443120883867, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 3.865162531534831e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.231, |
| "loss/crossentropy": 2.65705668926239, |
| "loss/fcd": 0.4619140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23100796341896057, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.007715838341670565, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 3.920296827952067e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2427, |
| "loss/crossentropy": 2.493618130683899, |
| "loss/fcd": 0.474609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24267538636922836, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.007730233562457264, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 4.4710437456766765e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2032, |
| "loss/crossentropy": 2.3469570875167847, |
| "loss/fcd": 0.3955078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2031807154417038, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.007744628783243963, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 4.851023356119792e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2267, |
| "loss/crossentropy": 2.262601613998413, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22673919051885605, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.007759024004030662, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 4.749198754628499e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2243, |
| "loss/crossentropy": 2.8090314865112305, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22425533086061478, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.00777341922481736, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 4.793703556060791e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2073, |
| "loss/crossentropy": 2.41420841217041, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20730414986610413, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.007787814445604059, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 5.024174849192301e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1982, |
| "loss/crossentropy": 2.338608145713806, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19816286861896515, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.007802209666390759, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 5.1875909169514976e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1913, |
| "loss/crossentropy": 2.199298143386841, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19127565622329712, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.007816604887177458, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 5.159278710683187e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2308, |
| "loss/crossentropy": 2.403664708137512, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23076358437538147, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.007831000107964156, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 3.05334726969401e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2145, |
| "loss/crossentropy": 2.511462450027466, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21448855847120285, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.007845395328750854, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 2.3965040842692057e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2069, |
| "loss/crossentropy": 2.543255090713501, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20689202100038528, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.007859790549537553, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 3.03576389948527e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2131, |
| "loss/crossentropy": 2.618165135383606, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2130560278892517, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.007874185770324253, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 2.9260913530985515e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.205, |
| "loss/crossentropy": 2.3171777725219727, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20504355430603027, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.007888580991110951, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 2.8092662493387858e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2055, |
| "loss/crossentropy": 2.3242313861846924, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20549335330724716, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.00790297621189765, |
| "grad_norm": 0.287109375, |
| "grad_norm_var": 0.0020447880029678344, |
| "learning_rate": 0.0001, |
| "loss": 0.2562, |
| "loss/crossentropy": 2.264755129814148, |
| "loss/fcd": 0.548828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2562015801668167, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.00791737143268435, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.002042093873023987, |
| "learning_rate": 0.0001, |
| "loss": 0.1887, |
| "loss/crossentropy": 2.053212523460388, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18873201310634613, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.007931766653471048, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.002041463057200114, |
| "learning_rate": 0.0001, |
| "loss": 0.1847, |
| "loss/crossentropy": 2.1234816908836365, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18469391763210297, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.007946161874257746, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0020542532205581666, |
| "learning_rate": 0.0001, |
| "loss": 0.2206, |
| "loss/crossentropy": 2.5538755655288696, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22058459371328354, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.007960557095044445, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.0020552794138590496, |
| "learning_rate": 0.0001, |
| "loss": 0.2014, |
| "loss/crossentropy": 2.2996666431427, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20140548795461655, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.007974952315831145, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.0020551522572835284, |
| "learning_rate": 0.0001, |
| "loss": 0.1931, |
| "loss/crossentropy": 2.076995849609375, |
| "loss/fcd": 0.3876953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19310477375984192, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.007989347536617843, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 0.0020657857259114582, |
| "learning_rate": 0.0001, |
| "loss": 0.2072, |
| "loss/crossentropy": 2.5844489336013794, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20723149180412292, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.008003742757404541, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.0020593394835789996, |
| "learning_rate": 0.0001, |
| "loss": 0.2353, |
| "loss/crossentropy": 2.580026626586914, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23526855558156967, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.00801813797819124, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.0020690351724624635, |
| "learning_rate": 0.0001, |
| "loss": 0.2148, |
| "loss/crossentropy": 2.526800036430359, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21484342962503433, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.00803253319897794, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.002070759733517965, |
| "learning_rate": 0.0001, |
| "loss": 0.203, |
| "loss/crossentropy": 2.459173560142517, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2029871866106987, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.008046928419764638, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 0.002061744530995687, |
| "learning_rate": 0.0001, |
| "loss": 0.238, |
| "loss/crossentropy": 2.560517430305481, |
| "loss/fcd": 0.4775390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23796609044075012, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.008061323640551336, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.002065872152646383, |
| "learning_rate": 0.0001, |
| "loss": 0.203, |
| "loss/crossentropy": 2.476174235343933, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20299049466848373, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.008075718861338036, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0020527432362238566, |
| "learning_rate": 0.0001, |
| "loss": 0.1991, |
| "loss/crossentropy": 2.5808521509170532, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19911371916532516, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.008090114082124735, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.002040464679400126, |
| "learning_rate": 0.0001, |
| "loss": 0.1829, |
| "loss/crossentropy": 2.2305572628974915, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.182855024933815, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.008104509302911433, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.0020505974690119425, |
| "learning_rate": 0.0001, |
| "loss": 0.197, |
| "loss/crossentropy": 2.2332805395126343, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19702833145856857, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.008118904523698131, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.002046417196591695, |
| "learning_rate": 0.0001, |
| "loss": 0.2281, |
| "loss/crossentropy": 2.634607434272766, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2280866503715515, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.008133299744484832, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 4.386504491170247e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2263, |
| "loss/crossentropy": 2.566808342933655, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22630243003368378, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.00814769496527153, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 4.404385884602864e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2021, |
| "loss/crossentropy": 2.404551863670349, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20205579698085785, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.008162090186058228, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 4.396339257558187e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1902, |
| "loss/crossentropy": 2.052983283996582, |
| "loss/fcd": 0.4033203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19023562967777252, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.008176485406844928, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 4.5942266782124835e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1904, |
| "loss/crossentropy": 2.1084887981414795, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1903728023171425, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.008190880627631627, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 4.348357518513997e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2044, |
| "loss/crossentropy": 2.528154492378235, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20440030097961426, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.008205275848418325, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 3.998180230458577e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2031, |
| "loss/crossentropy": 2.2640358209609985, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20313097536563873, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.008219671069205023, |
| "grad_norm": 0.125, |
| "grad_norm_var": 5.50230344136556e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2647, |
| "loss/crossentropy": 2.533176898956299, |
| "loss/fcd": 0.5107421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2647128999233246, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.008234066289991723, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 4.928807417551676e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2343, |
| "loss/crossentropy": 2.5634536743164062, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23427864164113998, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.008248461510778422, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 4.5518080393473305e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2025, |
| "loss/crossentropy": 2.4560309648513794, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20253371447324753, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.00826285673156512, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 4.742046197255453e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1913, |
| "loss/crossentropy": 2.2056825160980225, |
| "loss/fcd": 0.392578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1913457065820694, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.00827725195235182, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 4.228651523590088e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2063, |
| "loss/crossentropy": 2.330709218978882, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20634697377681732, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.008291647173138518, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 4.224777221679687e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1815, |
| "loss/crossentropy": 1.9496164321899414, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18145756423473358, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.008306042393925217, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 4.2000412940979e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2194, |
| "loss/crossentropy": 2.6926685571670532, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21936995536088943, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.008320437614711915, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 4.031558831532796e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1976, |
| "loss/crossentropy": 2.299630641937256, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1975780501961708, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.008334832835498615, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 3.9418538411458336e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1928, |
| "loss/crossentropy": 2.3579829931259155, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19275517761707306, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.008349228056285313, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 3.998180230458577e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1892, |
| "loss/crossentropy": 2.1995487213134766, |
| "loss/fcd": 0.404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18920866400003433, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.008363623277072012, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 3.161331017812093e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2302, |
| "loss/crossentropy": 2.54054057598114, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23019887506961823, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.00837801849785871, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 3.3692518870035806e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2467, |
| "loss/crossentropy": 2.5756444931030273, |
| "loss/fcd": 0.490234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2466834932565689, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.00839241371864541, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 5.541543165842692e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.217, |
| "loss/crossentropy": 2.225999653339386, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21695519983768463, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.008406808939432108, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 5.202194054921468e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2196, |
| "loss/crossentropy": 2.315016746520996, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21962474286556244, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.008421204160218807, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 5.244811375935872e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2172, |
| "loss/crossentropy": 2.390581250190735, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21722210943698883, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.008435599381005507, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 5.18798828125e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.202, |
| "loss/crossentropy": 2.451754093170166, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20203383266925812, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.008449994601792205, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 5.8710575103759766e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2503, |
| "loss/crossentropy": 2.1609503030776978, |
| "loss/fcd": 0.4873046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2503489702939987, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.008464389822578904, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 5.839268366495768e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1994, |
| "loss/crossentropy": 2.347867727279663, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1994006633758545, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.008478785043365602, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 5.947351455688476e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2185, |
| "loss/crossentropy": 2.502691388130188, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2184857726097107, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.008493180264152302, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 5.555152893066406e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2103, |
| "loss/crossentropy": 2.1813002228736877, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21025604009628296, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.008507575484939, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 5.262692769368489e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2165, |
| "loss/crossentropy": 2.544050931930542, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21648868918418884, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.008521970705725699, |
| "grad_norm": 0.126953125, |
| "grad_norm_var": 7.121463616689046e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2075, |
| "loss/crossentropy": 2.072811484336853, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20745252817869186, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.008536365926512399, |
| "grad_norm": 0.09375, |
| "grad_norm_var": 8.921523888905843e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1907, |
| "loss/crossentropy": 2.657747268676758, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1907452642917633, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.008550761147299097, |
| "grad_norm": 0.18359375, |
| "grad_norm_var": 0.0004295577605565389, |
| "learning_rate": 0.0001, |
| "loss": 0.2969, |
| "loss/crossentropy": 2.365026593208313, |
| "loss/fcd": 0.5234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.29685717821121216, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.008565156368085795, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00042495330174764, |
| "learning_rate": 0.0001, |
| "loss": 0.2275, |
| "loss/crossentropy": 2.617425799369812, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2274792492389679, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.008579551588872494, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 0.0004302342732747396, |
| "learning_rate": 0.0001, |
| "loss": 0.2138, |
| "loss/crossentropy": 2.589759111404419, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2137622982263565, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.008593946809659194, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 0.00042901734511057533, |
| "learning_rate": 0.0001, |
| "loss": 0.2186, |
| "loss/crossentropy": 2.165451228618622, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21863602101802826, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.008608342030445892, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.00042969385782877604, |
| "learning_rate": 0.0001, |
| "loss": 0.2004, |
| "loss/crossentropy": 2.421903610229492, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20041261613368988, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.00862273725123259, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 0.0004218568404515584, |
| "learning_rate": 0.0001, |
| "loss": 0.249, |
| "loss/crossentropy": 2.555266857147217, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2490156590938568, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.008637132472019289, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.00041990180810292564, |
| "learning_rate": 0.0001, |
| "loss": 0.2241, |
| "loss/crossentropy": 2.4431397914886475, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22414565831422806, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.008651527692805989, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 0.0004164050022761027, |
| "learning_rate": 0.0001, |
| "loss": 0.1972, |
| "loss/crossentropy": 2.086324453353882, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19717370718717575, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.008665922913592687, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.000419496496518453, |
| "learning_rate": 0.0001, |
| "loss": 0.1806, |
| "loss/crossentropy": 2.2410671710968018, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18064773827791214, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.008680318134379385, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00041954914728800456, |
| "learning_rate": 0.0001, |
| "loss": 0.214, |
| "loss/crossentropy": 2.3243794441223145, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2140304446220398, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.008694713355166086, |
| "grad_norm": 0.12353515625, |
| "grad_norm_var": 0.0004225889841715495, |
| "learning_rate": 0.0001, |
| "loss": 0.2533, |
| "loss/crossentropy": 2.4268319606781006, |
| "loss/fcd": 0.48828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.25334879010915756, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.008709108575952784, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 0.00042932828267415365, |
| "learning_rate": 0.0001, |
| "loss": 0.1851, |
| "loss/crossentropy": 2.3854864835739136, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18514161556959152, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.008723503796739482, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.0004343261321385702, |
| "learning_rate": 0.0001, |
| "loss": 0.1993, |
| "loss/crossentropy": 2.385258913040161, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19932958483695984, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.00873789901752618, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00043377876281738283, |
| "learning_rate": 0.0001, |
| "loss": 0.2085, |
| "loss/crossentropy": 2.4880837202072144, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20852985978126526, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.00875229423831288, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0004233519236246745, |
| "learning_rate": 0.0001, |
| "loss": 0.2413, |
| "loss/crossentropy": 2.486106753349304, |
| "loss/fcd": 0.4619140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2413138523697853, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.008766689459099579, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.0004109054803848267, |
| "learning_rate": 0.0001, |
| "loss": 0.2237, |
| "loss/crossentropy": 2.713275671005249, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22369590401649475, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.008781084679886277, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 6.965001424153646e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2017, |
| "loss/crossentropy": 2.4143831729888916, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2017301544547081, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.008795479900672977, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 7.179578145345052e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2116, |
| "loss/crossentropy": 2.3723723888397217, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21161457151174545, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.008809875121459676, |
| "grad_norm": 0.095703125, |
| "grad_norm_var": 7.739067077636719e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1875, |
| "loss/crossentropy": 2.3493517637252808, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18747683614492416, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.008824270342246374, |
| "grad_norm": 0.0986328125, |
| "grad_norm_var": 6.656249364217122e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1934, |
| "loss/crossentropy": 2.484821081161499, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1934322491288185, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.008838665563033072, |
| "grad_norm": 0.2138671875, |
| "grad_norm_var": 0.000786288579305013, |
| "learning_rate": 0.0001, |
| "loss": 0.2388, |
| "loss/crossentropy": 2.2311092615127563, |
| "loss/fcd": 0.521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23880772292613983, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.008853060783819772, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.0007862001657485962, |
| "learning_rate": 0.0001, |
| "loss": 0.209, |
| "loss/crossentropy": 2.1388099193573, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20903942734003067, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.00886745600460647, |
| "grad_norm": 0.1396484375, |
| "grad_norm_var": 0.0008295287688573201, |
| "learning_rate": 0.0001, |
| "loss": 0.2045, |
| "loss/crossentropy": 2.118674635887146, |
| "loss/fcd": 0.501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20450318604707718, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.008881851225393169, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.0008352239926656087, |
| "learning_rate": 0.0001, |
| "loss": 0.1834, |
| "loss/crossentropy": 2.149811267852783, |
| "loss/fcd": 0.3935546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18339695036411285, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.008896246446179869, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 0.0008298943440119426, |
| "learning_rate": 0.0001, |
| "loss": 0.2338, |
| "loss/crossentropy": 2.324687123298645, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23375140875577927, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.008910641666966567, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.0008182843526204427, |
| "learning_rate": 0.0001, |
| "loss": 0.2211, |
| "loss/crossentropy": 2.2215802669525146, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22114143520593643, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.008925036887753266, |
| "grad_norm": 0.0986328125, |
| "grad_norm_var": 0.0008311023314793905, |
| "learning_rate": 0.0001, |
| "loss": 0.2057, |
| "loss/crossentropy": 2.406686782836914, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20571539551019669, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.008939432108539964, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.0008170286814371745, |
| "learning_rate": 0.0001, |
| "loss": 0.2058, |
| "loss/crossentropy": 2.327828884124756, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2057729959487915, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.008953827329326664, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0008163203795750936, |
| "learning_rate": 0.0001, |
| "loss": 0.2041, |
| "loss/crossentropy": 2.394818425178528, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2041458711028099, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.008968222550113363, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 0.0008313407500584921, |
| "learning_rate": 0.0001, |
| "loss": 0.1949, |
| "loss/crossentropy": 2.384241223335266, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19486035406589508, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.00898261777090006, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.0008399953444798787, |
| "learning_rate": 0.0001, |
| "loss": 0.2079, |
| "loss/crossentropy": 2.4613648653030396, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20785125344991684, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.00899701299168676, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 0.0008281668027242025, |
| "learning_rate": 0.0001, |
| "loss": 0.2737, |
| "loss/crossentropy": 2.572801351547241, |
| "loss/fcd": 0.54296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.27374986559152603, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.00901140821247346, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 0.0008179575204849243, |
| "learning_rate": 0.0001, |
| "loss": 0.2015, |
| "loss/crossentropy": 1.862765610218048, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2015407457947731, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.009025803433260158, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.0008110642433166504, |
| "learning_rate": 0.0001, |
| "loss": 0.2186, |
| "loss/crossentropy": 2.5048106908798218, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21860718727111816, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.009040198654046856, |
| "grad_norm": 0.1337890625, |
| "grad_norm_var": 0.0007929325103759766, |
| "learning_rate": 0.0001, |
| "loss": 0.22, |
| "loss/crossentropy": 2.3713510036468506, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21999357640743256, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.009054593874833556, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.0007665634155273437, |
| "learning_rate": 0.0001, |
| "loss": 0.2194, |
| "loss/crossentropy": 2.5511568784713745, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2194477617740631, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.009068989095620254, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.00014481544494628906, |
| "learning_rate": 0.0001, |
| "loss": 0.2324, |
| "loss/crossentropy": 2.371564745903015, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2323940396308899, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.009083384316406953, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.00014898677666982016, |
| "learning_rate": 0.0001, |
| "loss": 0.2114, |
| "loss/crossentropy": 2.4888617992401123, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21141232550144196, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.009097779537193651, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.0001020421584447225, |
| "learning_rate": 0.0001, |
| "loss": 0.2337, |
| "loss/crossentropy": 2.7666863203048706, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23369022458791733, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.009112174757980351, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.0001020421584447225, |
| "learning_rate": 0.0001, |
| "loss": 0.2183, |
| "loss/crossentropy": 2.369840621948242, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21829679608345032, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.00912656997876705, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 9.677310784657796e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2211, |
| "loss/crossentropy": 2.469444990158081, |
| "loss/fcd": 0.4775390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22109205275774002, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.009140965199553748, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 9.50247049331665e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2079, |
| "loss/crossentropy": 2.3658159971237183, |
| "loss/fcd": 0.4619140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20790337026119232, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.009155360420340448, |
| "grad_norm": 0.09423828125, |
| "grad_norm_var": 0.0001034379005432129, |
| "learning_rate": 0.0001, |
| "loss": 0.1845, |
| "loss/crossentropy": 2.618008255958557, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18447843939065933, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.009169755641127146, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00010196268558502198, |
| "learning_rate": 0.0001, |
| "loss": 0.2325, |
| "loss/crossentropy": 2.4641441106796265, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23245185613632202, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.009184150861913844, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 0.00010348955790201823, |
| "learning_rate": 0.0001, |
| "loss": 0.2099, |
| "loss/crossentropy": 2.5920947790145874, |
| "loss/fcd": 0.5068359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20993127673864365, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.009198546082700543, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 9.453992048899332e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2037, |
| "loss/crossentropy": 2.207823634147644, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20368105918169022, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.009212941303487243, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 8.891324202219645e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1993, |
| "loss/crossentropy": 2.3137396574020386, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1993313431739807, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.009227336524273941, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 8.830626805623372e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2031, |
| "loss/crossentropy": 2.4254961013793945, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20313136279582977, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.00924173174506064, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 7.775227228800456e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1889, |
| "loss/crossentropy": 1.978569746017456, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18890459090471268, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.00925612696584734, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 7.740259170532226e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2315, |
| "loss/crossentropy": 2.575870633125305, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2314896583557129, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.009270522186634038, |
| "grad_norm": 0.095703125, |
| "grad_norm_var": 4.6253204345703125e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1942, |
| "loss/crossentropy": 2.4864895343780518, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19422397762537003, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.009284917407420736, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 4.5433640480041504e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2053, |
| "loss/crossentropy": 2.3608009815216064, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20532061159610748, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.009299312628207435, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 5.466838677724202e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2398, |
| "loss/crossentropy": 2.3986343145370483, |
| "loss/fcd": 0.5078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23981131613254547, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.009313707848994135, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 5.496243635813395e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2281, |
| "loss/crossentropy": 2.7443615198135376, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2280602902173996, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.009328103069780833, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 5.771319071451823e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2143, |
| "loss/crossentropy": 2.785035014152527, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2142793908715248, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.009342498290567531, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 5.6962172190348305e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2117, |
| "loss/crossentropy": 2.3756792545318604, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21174004673957825, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.00935689351135423, |
| "grad_norm": 0.0986328125, |
| "grad_norm_var": 5.784034729003906e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1971, |
| "loss/crossentropy": 2.4738396406173706, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19714603573083878, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.00937128873214093, |
| "grad_norm": 0.09619140625, |
| "grad_norm_var": 6.304482618967692e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1734, |
| "loss/crossentropy": 2.1993648409843445, |
| "loss/fcd": 0.376953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1733626276254654, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.009385683952927628, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 8.175770441691081e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2205, |
| "loss/crossentropy": 2.3313381671905518, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22045698016881943, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.009400079173714326, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 8.491575717926026e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2458, |
| "loss/crossentropy": 2.5517263412475586, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2457558810710907, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.009414474394501026, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 7.773935794830322e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2048, |
| "loss/crossentropy": 2.1996500492095947, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20480218529701233, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.009428869615287725, |
| "grad_norm": 0.1396484375, |
| "grad_norm_var": 0.00014075835545857747, |
| "learning_rate": 0.0001, |
| "loss": 0.2842, |
| "loss/crossentropy": 2.34015429019928, |
| "loss/fcd": 0.544921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2842213958501816, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.009443264836074423, |
| "grad_norm": 0.1455078125, |
| "grad_norm_var": 0.00022115310033162436, |
| "learning_rate": 0.0001, |
| "loss": 0.2686, |
| "loss/crossentropy": 2.409281849861145, |
| "loss/fcd": 0.5390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.26864343136548996, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.009457660056861121, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.0002117753028869629, |
| "learning_rate": 0.0001, |
| "loss": 0.2035, |
| "loss/crossentropy": 2.250716805458069, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20348752290010452, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.009472055277647821, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00021069447199503581, |
| "learning_rate": 0.0001, |
| "loss": 0.1903, |
| "loss/crossentropy": 2.2704538106918335, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19032004475593567, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.00948645049843452, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.0002124945322672526, |
| "learning_rate": 0.0001, |
| "loss": 0.2032, |
| "loss/crossentropy": 2.31364369392395, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20321927964687347, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.009500845719221218, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 0.00019279221693674725, |
| "learning_rate": 0.0001, |
| "loss": 0.1964, |
| "loss/crossentropy": 1.959843933582306, |
| "loss/fcd": 0.3955078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19641809910535812, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.009515240940007918, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00019235511620839437, |
| "learning_rate": 0.0001, |
| "loss": 0.2212, |
| "loss/crossentropy": 2.4466131925582886, |
| "loss/fcd": 0.4765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22118167579174042, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.009529636160794617, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.00019238789876302082, |
| "learning_rate": 0.0001, |
| "loss": 0.1816, |
| "loss/crossentropy": 2.186416506767273, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18155072629451752, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.009544031381581315, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.00018840531508127847, |
| "learning_rate": 0.0001, |
| "loss": 0.2391, |
| "loss/crossentropy": 2.504140853881836, |
| "loss/fcd": 0.490234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2391308844089508, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.009558426602368013, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.0001780986785888672, |
| "learning_rate": 0.0001, |
| "loss": 0.2317, |
| "loss/crossentropy": 2.4283803701400757, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23169831186532974, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.009572821823154713, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.0001889824867248535, |
| "learning_rate": 0.0001, |
| "loss": 0.2064, |
| "loss/crossentropy": 2.438134789466858, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20637594163417816, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.009587217043941412, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.00017477273941040038, |
| "learning_rate": 0.0001, |
| "loss": 0.1955, |
| "loss/crossentropy": 2.309617757797241, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1954583376646042, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.00960161226472811, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00015706121921539308, |
| "learning_rate": 0.0001, |
| "loss": 0.2246, |
| "loss/crossentropy": 2.53112256526947, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22456367313861847, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.009616007485514808, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00015153884887695313, |
| "learning_rate": 0.0001, |
| "loss": 0.2352, |
| "loss/crossentropy": 2.456951379776001, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23520664870738983, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.009630402706301508, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 0.0001564621925354004, |
| "learning_rate": 0.0001, |
| "loss": 0.2193, |
| "loss/crossentropy": 2.065362870693207, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2193107306957245, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.009644797927088207, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.0001514345407485962, |
| "learning_rate": 0.0001, |
| "loss": 0.2085, |
| "loss/crossentropy": 2.2472126483917236, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20847148448228836, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.009659193147874905, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.00010944604873657227, |
| "learning_rate": 0.0001, |
| "loss": 0.2067, |
| "loss/crossentropy": 2.3741711378097534, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20672930777072906, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.009673588368661605, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 3.067255020141602e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2208, |
| "loss/crossentropy": 2.336190938949585, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22081031650304794, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.009687983589448303, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 3.2389163970947264e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2063, |
| "loss/crossentropy": 2.3808083534240723, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20631127804517746, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.009702378810235002, |
| "grad_norm": 0.09716796875, |
| "grad_norm_var": 4.1007002194722494e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1958, |
| "loss/crossentropy": 2.3818055391311646, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19577700644731522, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.0097167740310217, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 4.0813287099202475e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2047, |
| "loss/crossentropy": 2.5091700553894043, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2047055885195732, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.0097311692518084, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 4.01457150777181e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2221, |
| "loss/crossentropy": 2.404844641685486, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2221018671989441, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.009745564472595098, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 4.01457150777181e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2118, |
| "loss/crossentropy": 2.279817581176758, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2117534652352333, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.009759959693381797, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 3.9767225583394365e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2293, |
| "loss/crossentropy": 2.5266642570495605, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22927331924438477, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.009774354914168497, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 4.020929336547852e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2077, |
| "loss/crossentropy": 2.128249764442444, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20772311836481094, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.009788750134955195, |
| "grad_norm": 0.09814453125, |
| "grad_norm_var": 4.331966241200765e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2052, |
| "loss/crossentropy": 2.6459118127822876, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20518244057893753, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.009803145355741893, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 4.623730977376302e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2262, |
| "loss/crossentropy": 2.3065195083618164, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2262207344174385, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.009817540576528592, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 4.8061211903889976e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2083, |
| "loss/crossentropy": 2.4284178018569946, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20825288444757462, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.009831935797315292, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 4.942814509073893e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1944, |
| "loss/crossentropy": 2.3323564529418945, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1943565011024475, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.00984633101810199, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 4.841486612955729e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2041, |
| "loss/crossentropy": 2.2371606826782227, |
| "loss/fcd": 0.4052734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2040523663163185, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.009860726238888689, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 3.3997495969136556e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2427, |
| "loss/crossentropy": 2.6680363416671753, |
| "loss/fcd": 0.4697265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24272434413433075, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.009875121459675389, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 3.038942813873291e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2114, |
| "loss/crossentropy": 2.392301321029663, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2113867551088333, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.009889516680462087, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 3.0055642127990723e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.238, |
| "loss/crossentropy": 2.646833062171936, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23803511261940002, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.009903911901248785, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 3.134310245513916e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1935, |
| "loss/crossentropy": 2.256480574607849, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19345563650131226, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.009918307122035484, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 3.155072530110677e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2321, |
| "loss/crossentropy": 2.425878643989563, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23210398107767105, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.009932702342822184, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 2.6098887125651042e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2257, |
| "loss/crossentropy": 2.565882086753845, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2256726175546646, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.009947097563608882, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 3.0152002970377605e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1932, |
| "loss/crossentropy": 2.3051689863204956, |
| "loss/fcd": 0.39453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1932462379336357, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.00996149278439558, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 2.981424331665039e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2124, |
| "loss/crossentropy": 2.275663137435913, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21243004500865936, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.009975888005182279, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 2.9221177101135254e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2011, |
| "loss/crossentropy": 2.344420909881592, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20112024247646332, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.009990283225968979, |
| "grad_norm": 0.1455078125, |
| "grad_norm_var": 0.00012252231438954672, |
| "learning_rate": 0.0001, |
| "loss": 0.2729, |
| "loss/crossentropy": 2.1788020730018616, |
| "loss/fcd": 0.521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.27289582788944244, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.010004678446755677, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.00012076298395792643, |
| "learning_rate": 0.0001, |
| "loss": 0.23, |
| "loss/crossentropy": 2.405027389526367, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22998760640621185, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.010019073667542375, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 0.00011615355809529622, |
| "learning_rate": 0.0001, |
| "loss": 0.2227, |
| "loss/crossentropy": 2.9466445446014404, |
| "loss/fcd": 0.4755859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22269698232412338, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.010033468888329075, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.00011360545953114828, |
| "learning_rate": 0.0001, |
| "loss": 0.2136, |
| "loss/crossentropy": 3.066506266593933, |
| "loss/fcd": 0.46875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21359677612781525, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.010047864109115774, |
| "grad_norm": 0.09130859375, |
| "grad_norm_var": 0.0001313169797261556, |
| "learning_rate": 0.0001, |
| "loss": 0.1792, |
| "loss/crossentropy": 2.3103922605514526, |
| "loss/fcd": 0.388671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.17915956676006317, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.010062259329902472, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.000129854679107666, |
| "learning_rate": 0.0001, |
| "loss": 0.2095, |
| "loss/crossentropy": 2.201840400695801, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2094813957810402, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.01007665455068917, |
| "grad_norm": 0.09619140625, |
| "grad_norm_var": 0.0001400272051493327, |
| "learning_rate": 0.0001, |
| "loss": 0.2055, |
| "loss/crossentropy": 2.5452860593795776, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20547957718372345, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.01009104977147587, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.00013989508152008058, |
| "learning_rate": 0.0001, |
| "loss": 0.1918, |
| "loss/crossentropy": 2.007373094558716, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19176460802555084, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.010105444992262569, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.00014006296793619792, |
| "learning_rate": 0.0001, |
| "loss": 0.2082, |
| "loss/crossentropy": 2.3383631706237793, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20822366327047348, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.010119840213049267, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.000139958659807841, |
| "learning_rate": 0.0001, |
| "loss": 0.1983, |
| "loss/crossentropy": 1.9882320761680603, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19827204197645187, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.010134235433835967, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00014079014460245768, |
| "learning_rate": 0.0001, |
| "loss": 0.238, |
| "loss/crossentropy": 2.5094656944274902, |
| "loss/fcd": 0.46875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23796136677265167, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.010148630654622666, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.00014143685499827066, |
| "learning_rate": 0.0001, |
| "loss": 0.206, |
| "loss/crossentropy": 2.1021994948387146, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2059553563594818, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.010163025875409364, |
| "grad_norm": 0.09814453125, |
| "grad_norm_var": 0.00014835894107818605, |
| "learning_rate": 0.0001, |
| "loss": 0.2037, |
| "loss/crossentropy": 2.3918451070785522, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20372479408979416, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.010177421096196062, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00014328956604003906, |
| "learning_rate": 0.0001, |
| "loss": 0.2379, |
| "loss/crossentropy": 2.4593441486358643, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23789776116609573, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.010191816316982762, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.00014485915501912436, |
| "learning_rate": 0.0001, |
| "loss": 0.1983, |
| "loss/crossentropy": 2.2852306365966797, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1982945054769516, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.01020621153776946, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.00014371474583943684, |
| "learning_rate": 0.0001, |
| "loss": 0.2481, |
| "loss/crossentropy": 2.5582568645477295, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24808169901371002, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.010220606758556159, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 5.040069421132406e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.241, |
| "loss/crossentropy": 2.4824811220169067, |
| "loss/fcd": 0.51171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2409602850675583, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.010235001979342857, |
| "grad_norm": 0.12158203125, |
| "grad_norm_var": 6.302197774251302e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2292, |
| "loss/crossentropy": 2.237234354019165, |
| "loss/fcd": 0.4619140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22918210923671722, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.010249397200129557, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 5.577405293782552e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.207, |
| "loss/crossentropy": 2.31795597076416, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20698396116495132, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.010263792420916256, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 5.6409835815429686e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2065, |
| "loss/crossentropy": 2.5415326356887817, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20651167631149292, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.010278187641702954, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 3.9859612782796225e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2139, |
| "loss/crossentropy": 2.2211133241653442, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21389107406139374, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.010292582862489654, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 4.161198933919271e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.212, |
| "loss/crossentropy": 2.3691943883895874, |
| "loss/fcd": 0.4521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2119893953204155, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.010306978083276352, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 3.372828165690104e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2145, |
| "loss/crossentropy": 2.390496850013733, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2145363911986351, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.01032137330406305, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 3.650983174641927e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1619, |
| "loss/crossentropy": 1.7626497149467468, |
| "loss/fcd": 0.513671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.16188892722129822, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.010335768524849749, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 3.790855407714844e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2436, |
| "loss/crossentropy": 2.6944552659988403, |
| "loss/fcd": 0.4609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2435958907008171, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.01035016374563645, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 4.0665268898010254e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2072, |
| "loss/crossentropy": 2.383134961128235, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20715758204460144, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.010364558966423148, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 3.6764144897460935e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2088, |
| "loss/crossentropy": 2.5325286388397217, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20877134799957275, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.010378954187209846, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 3.8829445838928225e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2236, |
| "loss/crossentropy": 2.6585100889205933, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22355867177248, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.010393349407996546, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 3.544092178344727e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2016, |
| "loss/crossentropy": 2.3599932193756104, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2016456127166748, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.010407744628783244, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 4.926919937133789e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2227, |
| "loss/crossentropy": 2.1093697547912598, |
| "loss/fcd": 0.4326171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22268912196159363, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.010422139849569943, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 4.9097339312235516e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2061, |
| "loss/crossentropy": 2.120736837387085, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20611396431922913, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.010436535070356641, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 5.023380120595296e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2238, |
| "loss/crossentropy": 2.3321027755737305, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22382070124149323, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.010450930291143341, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 4.936456680297852e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.206, |
| "loss/crossentropy": 2.2690643668174744, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20598538219928741, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.01046532551193004, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 3.4538904825846356e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2204, |
| "loss/crossentropy": 2.39444100856781, |
| "loss/fcd": 0.4404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22039655596017838, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.010479720732716738, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 3.5429000854492186e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2212, |
| "loss/crossentropy": 2.4713072776794434, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2212340533733368, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.010494115953503438, |
| "grad_norm": 0.138671875, |
| "grad_norm_var": 9.951591491699218e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2904, |
| "loss/crossentropy": 2.2529489994049072, |
| "loss/fcd": 0.5947265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.29038895666599274, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.010508511174290136, |
| "grad_norm": 0.142578125, |
| "grad_norm_var": 0.00017181138197580975, |
| "learning_rate": 0.0001, |
| "loss": 0.2931, |
| "loss/crossentropy": 2.3451786041259766, |
| "loss/fcd": 0.5263671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.29310375452041626, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.010522906395076834, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.00016589065392812093, |
| "learning_rate": 0.0001, |
| "loss": 0.2063, |
| "loss/crossentropy": 2.3045698404312134, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2062971591949463, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.010537301615863533, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00016802847385406495, |
| "learning_rate": 0.0001, |
| "loss": 0.2105, |
| "loss/crossentropy": 2.5085275173187256, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21049045026302338, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.010551696836650233, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 0.00016735394795735676, |
| "learning_rate": 0.0001, |
| "loss": 0.2196, |
| "loss/crossentropy": 2.644802451133728, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2196320742368698, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.010566092057436931, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00016781091690063477, |
| "learning_rate": 0.0001, |
| "loss": 0.1974, |
| "loss/crossentropy": 2.2515525817871094, |
| "loss/fcd": 0.3974609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19744951277971268, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.01058048727822363, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.0001671860615412394, |
| "learning_rate": 0.0001, |
| "loss": 0.1965, |
| "loss/crossentropy": 2.3382036685943604, |
| "loss/fcd": 0.3974609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1964586153626442, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.010594882499010328, |
| "grad_norm": 0.095703125, |
| "grad_norm_var": 0.00017541150252024332, |
| "learning_rate": 0.0001, |
| "loss": 0.2012, |
| "loss/crossentropy": 2.638480305671692, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20124144107103348, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.010609277719797028, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 0.00017036497592926025, |
| "learning_rate": 0.0001, |
| "loss": 0.222, |
| "loss/crossentropy": 2.498441696166992, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22197365015745163, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.010623672940583726, |
| "grad_norm": 0.09814453125, |
| "grad_norm_var": 0.00017648935317993164, |
| "learning_rate": 0.0001, |
| "loss": 0.1943, |
| "loss/crossentropy": 2.2127552032470703, |
| "loss/fcd": 0.3896484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19434216618537903, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.010638068161370424, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.00017264286677042643, |
| "learning_rate": 0.0001, |
| "loss": 0.2065, |
| "loss/crossentropy": 2.4787211418151855, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20646335184574127, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.010652463382157125, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.0001750628153483073, |
| "learning_rate": 0.0001, |
| "loss": 0.1945, |
| "loss/crossentropy": 2.167446494102478, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19451382011175156, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.010666858602943823, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00017729500929514568, |
| "learning_rate": 0.0001, |
| "loss": 0.2069, |
| "loss/crossentropy": 2.3936961889266968, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20688295364379883, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.010681253823730521, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.00017769734064737957, |
| "learning_rate": 0.0001, |
| "loss": 0.2344, |
| "loss/crossentropy": 2.502206325531006, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23436500132083893, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.01069564904451722, |
| "grad_norm": 0.12451171875, |
| "grad_norm_var": 0.00019410053888956706, |
| "learning_rate": 0.0001, |
| "loss": 0.2446, |
| "loss/crossentropy": 2.7519075870513916, |
| "loss/fcd": 0.4853515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2446460798382759, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.01071004426530392, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00019407967726389568, |
| "learning_rate": 0.0001, |
| "loss": 0.2061, |
| "loss/crossentropy": 2.3958401679992676, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20611582696437836, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.010724439486090618, |
| "grad_norm": 0.09716796875, |
| "grad_norm_var": 0.00013910929361979167, |
| "learning_rate": 0.0001, |
| "loss": 0.2229, |
| "loss/crossentropy": 2.6051418781280518, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22285999357700348, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.010738834706877316, |
| "grad_norm": 0.09814453125, |
| "grad_norm_var": 4.988412062327067e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2096, |
| "loss/crossentropy": 2.5375572443008423, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20957274734973907, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.010753229927664016, |
| "grad_norm": 0.123046875, |
| "grad_norm_var": 7.005433241526285e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2273, |
| "loss/crossentropy": 2.2432570457458496, |
| "loss/fcd": 0.46484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22729168832302094, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.010767625148450715, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 7.160405317942301e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1981, |
| "loss/crossentropy": 2.451253056526184, |
| "loss/fcd": 0.404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1981128826737404, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.010782020369237413, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 7.164875666300455e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2238, |
| "loss/crossentropy": 2.6088002920150757, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22383547574281693, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.010796415590024111, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 7.559359073638916e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2204, |
| "loss/crossentropy": 2.3209699392318726, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2203991711139679, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.010810810810810811, |
| "grad_norm": 0.1240234375, |
| "grad_norm_var": 9.606579939524333e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2522, |
| "loss/crossentropy": 2.3715856075286865, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2521570920944214, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.01082520603159751, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 9.13769006729126e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.217, |
| "loss/crossentropy": 2.3642451763153076, |
| "loss/fcd": 0.4931640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21697237342596054, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.010839601252384208, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 9.104013442993165e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2155, |
| "loss/crossentropy": 2.4382712841033936, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21553221344947815, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.010853996473170906, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 8.729199568430583e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1936, |
| "loss/crossentropy": 2.401493191719055, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19359815120697021, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.010868391693957606, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 8.423725763956706e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1962, |
| "loss/crossentropy": 2.1797173619270325, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19620782881975174, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.010882786914744305, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 8.459786574045817e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.2014777660369873, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21618105471134186, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.010897182135531003, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 8.204678694407145e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2144, |
| "loss/crossentropy": 2.5520023107528687, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2143661305308342, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.010911577356317703, |
| "grad_norm": 0.1357421875, |
| "grad_norm_var": 0.00012089014053344727, |
| "learning_rate": 0.0001, |
| "loss": 0.2172, |
| "loss/crossentropy": 2.605940818786621, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2171928584575653, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.010925972577104402, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.00011552075544993082, |
| "learning_rate": 0.0001, |
| "loss": 0.1896, |
| "loss/crossentropy": 2.260614037513733, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1895817369222641, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.0109403677978911, |
| "grad_norm": 0.130859375, |
| "grad_norm_var": 0.00014096001784006754, |
| "learning_rate": 0.0001, |
| "loss": 0.2287, |
| "loss/crossentropy": 2.3699567317962646, |
| "loss/fcd": 0.5078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2286616861820221, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.010954763018677798, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00012553135553995768, |
| "learning_rate": 0.0001, |
| "loss": 0.1979, |
| "loss/crossentropy": 2.0666418075561523, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19788716733455658, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.010969158239464498, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.0001106580098470052, |
| "learning_rate": 0.0001, |
| "loss": 0.2149, |
| "loss/crossentropy": 2.25100314617157, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21493691205978394, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.010983553460251197, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.00010553101698557536, |
| "learning_rate": 0.0001, |
| "loss": 0.2, |
| "loss/crossentropy": 2.3312637209892273, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19997263699769974, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.010997948681037895, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 9.52392816543579e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2213, |
| "loss/crossentropy": 2.4567571878433228, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2212778776884079, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.011012343901824595, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 0.00010437866051991781, |
| "learning_rate": 0.0001, |
| "loss": 0.2505, |
| "loss/crossentropy": 2.3997398614883423, |
| "loss/fcd": 0.48046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.25046147406101227, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.011026739122611293, |
| "grad_norm": 0.09716796875, |
| "grad_norm_var": 0.00012486775716145834, |
| "learning_rate": 0.0001, |
| "loss": 0.1976, |
| "loss/crossentropy": 2.327947497367859, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1975831389427185, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.011041134343397992, |
| "grad_norm": 0.125, |
| "grad_norm_var": 0.00012619892756144207, |
| "learning_rate": 0.0001, |
| "loss": 0.2108, |
| "loss/crossentropy": 2.3216136693954468, |
| "loss/fcd": 0.4521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21076547354459763, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.01105552956418469, |
| "grad_norm": 0.1298828125, |
| "grad_norm_var": 0.00014139811197916668, |
| "learning_rate": 0.0001, |
| "loss": 0.2461, |
| "loss/crossentropy": 2.2610775232315063, |
| "loss/fcd": 0.5029296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2461041733622551, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.01106992478497139, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00013906856377919515, |
| "learning_rate": 0.0001, |
| "loss": 0.199, |
| "loss/crossentropy": 2.2911869883537292, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19901156425476074, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.011084320005758088, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 0.00014190276463826496, |
| "learning_rate": 0.0001, |
| "loss": 0.21, |
| "loss/crossentropy": 2.432590365409851, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2100282460451126, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.011098715226544787, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.00014832417170206706, |
| "learning_rate": 0.0001, |
| "loss": 0.179, |
| "loss/crossentropy": 2.154644250869751, |
| "loss/fcd": 0.4853515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.17896521091461182, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.011113110447331487, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.00014781554539998373, |
| "learning_rate": 0.0001, |
| "loss": 0.2293, |
| "loss/crossentropy": 2.5124725103378296, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22927424311637878, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.011127505668118185, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.00014212032159169514, |
| "learning_rate": 0.0001, |
| "loss": 0.2246, |
| "loss/crossentropy": 2.411632537841797, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22455725073814392, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.011141900888904883, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00011181831359863281, |
| "learning_rate": 0.0001, |
| "loss": 0.2336, |
| "loss/crossentropy": 2.4840848445892334, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23362614214420319, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.011156296109691582, |
| "grad_norm": 0.1142578125, |
| "grad_norm_var": 0.00010143518447875976, |
| "learning_rate": 0.0001, |
| "loss": 0.2162, |
| "loss/crossentropy": 2.2171601057052612, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2162095457315445, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.011170691330478282, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 8.772114912668864e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2101, |
| "loss/crossentropy": 2.466732382774353, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21009384095668793, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.01118508655126498, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 8.824268976847331e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2331, |
| "loss/crossentropy": 2.463024854660034, |
| "loss/fcd": 0.4765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2330816239118576, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.011199481772051679, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 8.945067723592122e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2142, |
| "loss/crossentropy": 2.1225094199180603, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2141725867986679, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.011213876992838377, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 8.852879206339518e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2065, |
| "loss/crossentropy": 2.0846282243728638, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20651061832904816, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.011228272213625077, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 8.51591428120931e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2156, |
| "loss/crossentropy": 2.2128478288650513, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21557357162237167, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.011242667434411775, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 7.343987623850504e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2015, |
| "loss/crossentropy": 2.3130797147750854, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20153620839118958, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.011257062655198474, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 6.468693415323893e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2148, |
| "loss/crossentropy": 2.5943338871002197, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2147517278790474, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.011271457875985174, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 5.4101149241129555e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.229, |
| "loss/crossentropy": 2.7100160121917725, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22897624969482422, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.011285853096771872, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 3.060400485992432e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2299, |
| "loss/crossentropy": 2.500633478164673, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22991500794887543, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.01130024831755857, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 3.067255020141602e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.226, |
| "loss/crossentropy": 2.4316182136535645, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22597461938858032, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.011314643538345269, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 2.8839707374572755e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.217, |
| "loss/crossentropy": 2.592137098312378, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2169811800122261, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.011329038759131969, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 3.44236691792806e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1974, |
| "loss/crossentropy": 2.287144422531128, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1974037140607834, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.011343433979918667, |
| "grad_norm": 0.09521484375, |
| "grad_norm_var": 4.4854482014973957e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2052, |
| "loss/crossentropy": 2.4738489389419556, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20523115992546082, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.011357829200705365, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 4.7318140665690105e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.215, |
| "loss/crossentropy": 2.4524784088134766, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21502291411161423, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.011372224421492065, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 4.888276259104411e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1999, |
| "loss/crossentropy": 2.2310436964035034, |
| "loss/fcd": 0.3916015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19993127137422562, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.011386619642278764, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 5.063911279042562e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2285, |
| "loss/crossentropy": 2.613986611366272, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22845745831727982, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.011401014863065462, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 5.238453547159831e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2083, |
| "loss/crossentropy": 2.5012824535369873, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20829569548368454, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.01141541008385216, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 5.238453547159831e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2444, |
| "loss/crossentropy": 2.225709557533264, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2444288209080696, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.01142980530463886, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 4.264513651529948e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2043, |
| "loss/crossentropy": 2.2551809549331665, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20429246127605438, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.011444200525425559, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 3.8368503252665204e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2353, |
| "loss/crossentropy": 2.4520708322525024, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2352810874581337, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.011458595746212257, |
| "grad_norm": 0.09423828125, |
| "grad_norm_var": 3.733535607655843e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2003, |
| "loss/crossentropy": 2.3560184240341187, |
| "loss/fcd": 0.4130859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2003132924437523, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.011472990966998955, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 3.7534038225809735e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2268, |
| "loss/crossentropy": 2.6456328630447388, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22680091857910156, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.011487386187785656, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 3.840823968251546e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2003, |
| "loss/crossentropy": 2.5123294591903687, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20031608641147614, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.011501781408572354, |
| "grad_norm": 0.130859375, |
| "grad_norm_var": 8.675952752431233e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2347, |
| "loss/crossentropy": 2.2425618171691895, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23472215235233307, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.011516176629359052, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 9.133716424306234e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2115, |
| "loss/crossentropy": 2.1281662583351135, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21148262917995453, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.011530571850145752, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 9.107192357381185e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2171, |
| "loss/crossentropy": 2.4536547660827637, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2171497568488121, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.01154496707093245, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.00010519027709960937, |
| "learning_rate": 0.0001, |
| "loss": 0.2157, |
| "loss/crossentropy": 2.3697547912597656, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21570491790771484, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.011559362291719149, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.00010393361250559489, |
| "learning_rate": 0.0001, |
| "loss": 0.2189, |
| "loss/crossentropy": 2.4509881734848022, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21891363710165024, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.011573757512505847, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 9.564956029256185e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2175, |
| "loss/crossentropy": 2.1731194853782654, |
| "loss/fcd": 0.474609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21748338639736176, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.011588152733292547, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 9.326040744781495e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2308, |
| "loss/crossentropy": 2.4915411472320557, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2308463379740715, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.011602547954079246, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 9.280840555826823e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2106, |
| "loss/crossentropy": 2.4593664407730103, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21063391864299774, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.011616943174865944, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 9.096364180246988e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2093, |
| "loss/crossentropy": 2.420872926712036, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20925325900316238, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.011631338395652644, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 9.145339330037434e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2035, |
| "loss/crossentropy": 2.4789732694625854, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2034958302974701, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.011645733616439342, |
| "grad_norm": 0.0986328125, |
| "grad_norm_var": 9.176631768544515e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2055, |
| "loss/crossentropy": 2.356053352355957, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20546124875545502, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.01166012883722604, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 9.134610493977864e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1982, |
| "loss/crossentropy": 2.286035180091858, |
| "loss/fcd": 0.40625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19822601974010468, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.011674524058012739, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 9.005467096964518e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2307, |
| "loss/crossentropy": 2.546161413192749, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23073262721300125, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.01168891927879944, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 7.832845052083334e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.214, |
| "loss/crossentropy": 2.4045225381851196, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21400006115436554, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.011703314499586137, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 7.412830988566081e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2324, |
| "loss/crossentropy": 2.3815245628356934, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23237691074609756, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.011717709720372836, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 7.412830988566081e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.197, |
| "loss/crossentropy": 2.2638756036758423, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19701003283262253, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.011732104941159536, |
| "grad_norm": 0.095703125, |
| "grad_norm_var": 4.5804182688395184e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1993, |
| "loss/crossentropy": 2.310957193374634, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19931814819574356, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.011746500161946234, |
| "grad_norm": 0.13671875, |
| "grad_norm_var": 9.775857130686441e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2326, |
| "loss/crossentropy": 2.3524898290634155, |
| "loss/fcd": 0.490234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23264919221401215, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.011760895382732933, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 9.795725345611573e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2404, |
| "loss/crossentropy": 2.542204737663269, |
| "loss/fcd": 0.45703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2403649091720581, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.01177529060351963, |
| "grad_norm": 0.1298828125, |
| "grad_norm_var": 0.00012048780918121338, |
| "learning_rate": 0.0001, |
| "loss": 0.2259, |
| "loss/crossentropy": 2.300834894180298, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2259274125099182, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.011789685824306331, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0001206040382385254, |
| "learning_rate": 0.0001, |
| "loss": 0.2003, |
| "loss/crossentropy": 2.309138298034668, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2003060281276703, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.01180408104509303, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.00012012720108032227, |
| "learning_rate": 0.0001, |
| "loss": 0.2192, |
| "loss/crossentropy": 2.516822099685669, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2192147672176361, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.011818476265879728, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00012189547220865885, |
| "learning_rate": 0.0001, |
| "loss": 0.222, |
| "loss/crossentropy": 2.5142600536346436, |
| "loss/fcd": 0.478515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22199787199497223, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.011832871486666426, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.00011879603068033854, |
| "learning_rate": 0.0001, |
| "loss": 0.2006, |
| "loss/crossentropy": 2.166727066040039, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20062025636434555, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.011847266707453126, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.00011602640151977539, |
| "learning_rate": 0.0001, |
| "loss": 0.2171, |
| "loss/crossentropy": 2.2036046981811523, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21710190176963806, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.011861661928239824, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00011036793390909831, |
| "learning_rate": 0.0001, |
| "loss": 0.2275, |
| "loss/crossentropy": 2.2625406980514526, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22749044001102448, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.011876057149026523, |
| "grad_norm": 0.7890625, |
| "grad_norm_var": 0.028886699676513673, |
| "learning_rate": 0.0001, |
| "loss": 0.2046, |
| "loss/crossentropy": 1.833857238292694, |
| "loss/fcd": 0.5595703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20455920696258545, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.011890452369813223, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.028860441843668618, |
| "learning_rate": 0.0001, |
| "loss": 0.2183, |
| "loss/crossentropy": 2.433130979537964, |
| "loss/fcd": 0.423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21825896203517914, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.011904847590599921, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 0.028843144575754803, |
| "learning_rate": 0.0001, |
| "loss": 0.2252, |
| "loss/crossentropy": 2.3956053256988525, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22523467242717743, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.01191924281138662, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.028831319014231364, |
| "learning_rate": 0.0001, |
| "loss": 0.2197, |
| "loss/crossentropy": 2.4500895738601685, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2196703627705574, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.011933638032173318, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 0.0288025697072347, |
| "learning_rate": 0.0001, |
| "loss": 0.2463, |
| "loss/crossentropy": 2.4316296577453613, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24632105976343155, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.011948033252960018, |
| "grad_norm": 0.11669921875, |
| "grad_norm_var": 0.028744553526242573, |
| "learning_rate": 0.0001, |
| "loss": 0.2168, |
| "loss/crossentropy": 2.432488799095154, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21679828315973282, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.011962428473746716, |
| "grad_norm": 0.09375, |
| "grad_norm_var": 0.028760058681170146, |
| "learning_rate": 0.0001, |
| "loss": 0.1881, |
| "loss/crossentropy": 2.3647295236587524, |
| "loss/fcd": 0.3896484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18810325115919113, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.011976823694533414, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 0.02886225382486979, |
| "learning_rate": 0.0001, |
| "loss": 0.2179, |
| "loss/crossentropy": 2.4084588289260864, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2179015353322029, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.011991218915320115, |
| "grad_norm": 0.10400390625, |
| "grad_norm_var": 0.02889500359694163, |
| "learning_rate": 0.0001, |
| "loss": 0.2007, |
| "loss/crossentropy": 2.3215763568878174, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20068107545375824, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.012005614136106813, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 0.029032798608144124, |
| "learning_rate": 0.0001, |
| "loss": 0.1899, |
| "loss/crossentropy": 2.1791869401931763, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18989010155200958, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.012020009356893511, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.02898623843987783, |
| "learning_rate": 0.0001, |
| "loss": 0.2187, |
| "loss/crossentropy": 2.292221188545227, |
| "loss/fcd": 0.4365234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2187333106994629, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.01203440457768021, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.028997563322385154, |
| "learning_rate": 0.0001, |
| "loss": 0.216, |
| "loss/crossentropy": 2.191875457763672, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21604549139738083, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.01204879979846691, |
| "grad_norm": 0.1298828125, |
| "grad_norm_var": 0.028929102420806884, |
| "learning_rate": 0.0001, |
| "loss": 0.2124, |
| "loss/crossentropy": 2.0077582597732544, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21244481950998306, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.012063195019253608, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.028929102420806884, |
| "learning_rate": 0.0001, |
| "loss": 0.2144, |
| "loss/crossentropy": 2.4339540004730225, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21442482620477676, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.012077590240040306, |
| "grad_norm": 0.220703125, |
| "grad_norm_var": 0.0291112889846166, |
| "learning_rate": 0.0001, |
| "loss": 0.2018, |
| "loss/crossentropy": 1.9582195281982422, |
| "loss/fcd": 0.5849609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20181410014629364, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.012091985460827005, |
| "grad_norm": 0.14453125, |
| "grad_norm_var": 0.028948195775349937, |
| "learning_rate": 0.0001, |
| "loss": 0.2261, |
| "loss/crossentropy": 1.961089551448822, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22611552476882935, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.012106380681613705, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.0008943786223729451, |
| "learning_rate": 0.0001, |
| "loss": 0.208, |
| "loss/crossentropy": 2.2961446046829224, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20795201510190964, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.012120775902400403, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.0008890310923258464, |
| "learning_rate": 0.0001, |
| "loss": 0.2208, |
| "loss/crossentropy": 2.3987420797348022, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2207762897014618, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.012135171123187101, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.0009084294239679972, |
| "learning_rate": 0.0001, |
| "loss": 0.1981, |
| "loss/crossentropy": 2.284560799598694, |
| "loss/fcd": 0.4033203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19807633757591248, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.012149566343973801, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.0009122679630915324, |
| "learning_rate": 0.0001, |
| "loss": 0.2087, |
| "loss/crossentropy": 2.5325080156326294, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20872415602207184, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.0121639615647605, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0009208361307779948, |
| "learning_rate": 0.0001, |
| "loss": 0.2326, |
| "loss/crossentropy": 2.4612646102905273, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23255135864019394, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.012178356785547198, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 0.0009262154499689738, |
| "learning_rate": 0.0001, |
| "loss": 0.2151, |
| "loss/crossentropy": 2.463419198989868, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2151269093155861, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.012192752006333896, |
| "grad_norm": 0.11474609375, |
| "grad_norm_var": 0.000887898604075114, |
| "learning_rate": 0.0001, |
| "loss": 0.2149, |
| "loss/crossentropy": 2.0733948945999146, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21493042260408401, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.012207147227120596, |
| "grad_norm": 0.09375, |
| "grad_norm_var": 0.000923815369606018, |
| "learning_rate": 0.0001, |
| "loss": 0.1887, |
| "loss/crossentropy": 2.6692737340927124, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18867085129022598, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.012221542447907295, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0009139657020568847, |
| "learning_rate": 0.0001, |
| "loss": 0.2055, |
| "loss/crossentropy": 2.349723696708679, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2054726406931877, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.012235937668693993, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 0.0009088347355524699, |
| "learning_rate": 0.0001, |
| "loss": 0.2146, |
| "loss/crossentropy": 2.4036346673965454, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21456415951251984, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.012250332889480693, |
| "grad_norm": 0.12890625, |
| "grad_norm_var": 0.0009135882059733073, |
| "learning_rate": 0.0001, |
| "loss": 0.2798, |
| "loss/crossentropy": 2.601546049118042, |
| "loss/fcd": 0.515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.27978505194187164, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.012264728110267391, |
| "grad_norm": 0.12109375, |
| "grad_norm_var": 0.0009022037188212077, |
| "learning_rate": 0.0001, |
| "loss": 0.2426, |
| "loss/crossentropy": 2.657235622406006, |
| "loss/fcd": 0.48828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24257582426071167, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.01227912333105409, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.0009092291196187338, |
| "learning_rate": 0.0001, |
| "loss": 0.2069, |
| "loss/crossentropy": 2.515699028968811, |
| "loss/fcd": 0.4189453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20689593255519867, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.012293518551840788, |
| "grad_norm": 0.1298828125, |
| "grad_norm_var": 0.0009068479140599569, |
| "learning_rate": 0.0001, |
| "loss": 0.2659, |
| "loss/crossentropy": 2.6577337980270386, |
| "loss/fcd": 0.4970703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.26587389409542084, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.012307913772627488, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00019206603368123373, |
| "learning_rate": 0.0001, |
| "loss": 0.2053, |
| "loss/crossentropy": 2.189553380012512, |
| "loss/fcd": 0.4228515625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2052648663520813, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.012322308993414187, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0001191099484761556, |
| "learning_rate": 0.0001, |
| "loss": 0.2515, |
| "loss/crossentropy": 2.7488744258880615, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2514711171388626, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.012336704214200885, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00011858046054840088, |
| "learning_rate": 0.0001, |
| "loss": 0.2299, |
| "loss/crossentropy": 2.5891239643096924, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2299317717552185, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.012351099434987585, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.00012637674808502197, |
| "learning_rate": 0.0001, |
| "loss": 0.196, |
| "loss/crossentropy": 2.2487235069274902, |
| "loss/fcd": 0.4013671875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19604943692684174, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.012365494655774283, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 0.00011815925439198812, |
| "learning_rate": 0.0001, |
| "loss": 0.2355, |
| "loss/crossentropy": 2.448120951652527, |
| "loss/fcd": 0.4658203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23546921461820602, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.012379889876560982, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 0.00011677742004394532, |
| "learning_rate": 0.0001, |
| "loss": 0.2299, |
| "loss/crossentropy": 2.6376739740371704, |
| "loss/fcd": 0.4765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22993575036525726, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.01239428509734768, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 0.00011804004510243734, |
| "learning_rate": 0.0001, |
| "loss": 0.2362, |
| "loss/crossentropy": 2.4012396335601807, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2361709326505661, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.01240868031813438, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 0.0001058568557103475, |
| "learning_rate": 0.0001, |
| "loss": 0.2031, |
| "loss/crossentropy": 2.3792039155960083, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2031245082616806, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.012423075538921078, |
| "grad_norm": 0.1435546875, |
| "grad_norm_var": 0.00017355283101399738, |
| "learning_rate": 0.0001, |
| "loss": 0.1986, |
| "loss/crossentropy": 2.1046639680862427, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19857460260391235, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.012437470759707777, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 0.00016026397546132405, |
| "learning_rate": 0.0001, |
| "loss": 0.2247, |
| "loss/crossentropy": 2.6387428045272827, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22474492341279984, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.012451865980494475, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.00016404787699381512, |
| "learning_rate": 0.0001, |
| "loss": 0.2216, |
| "loss/crossentropy": 2.420724868774414, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22158697247505188, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.012466261201281175, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 0.00015840431054433188, |
| "learning_rate": 0.0001, |
| "loss": 0.2081, |
| "loss/crossentropy": 2.1139690279960632, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20810745656490326, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.012480656422067873, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 0.0001499404509862264, |
| "learning_rate": 0.0001, |
| "loss": 0.208, |
| "loss/crossentropy": 2.540156126022339, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2079625502228737, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.012495051642854572, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 0.0001485149065653483, |
| "learning_rate": 0.0001, |
| "loss": 0.2183, |
| "loss/crossentropy": 2.39568293094635, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21834557503461838, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.012509446863641272, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 0.00014786720275878907, |
| "learning_rate": 0.0001, |
| "loss": 0.2185, |
| "loss/crossentropy": 2.3164258003234863, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2184668406844139, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.01252384208442797, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.00011974573135375977, |
| "learning_rate": 0.0001, |
| "loss": 0.2123, |
| "loss/crossentropy": 2.3733065128326416, |
| "loss/fcd": 0.4384765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21230120956897736, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.012538237305214668, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00011893908182779948, |
| "learning_rate": 0.0001, |
| "loss": 0.217, |
| "loss/crossentropy": 2.5337724685668945, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21697236597537994, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.012552632526001367, |
| "grad_norm": 0.1328125, |
| "grad_norm_var": 0.00015513102213541666, |
| "learning_rate": 0.0001, |
| "loss": 0.2258, |
| "loss/crossentropy": 2.4623916149139404, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2257620170712471, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.012567027746788067, |
| "grad_norm": 0.11767578125, |
| "grad_norm_var": 0.00015417635440826417, |
| "learning_rate": 0.0001, |
| "loss": 0.1991, |
| "loss/crossentropy": 2.1633788347244263, |
| "loss/fcd": 0.41015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1990898996591568, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.012581422967574765, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 0.00015268226464589438, |
| "learning_rate": 0.0001, |
| "loss": 0.2118, |
| "loss/crossentropy": 2.4863176345825195, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21175408363342285, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.012595818188361464, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00015290478865305582, |
| "learning_rate": 0.0001, |
| "loss": 0.2287, |
| "loss/crossentropy": 2.6219388246536255, |
| "loss/fcd": 0.4541015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22867251932621002, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.012610213409148164, |
| "grad_norm": 0.10546875, |
| "grad_norm_var": 0.0001546849807103475, |
| "learning_rate": 0.0001, |
| "loss": 0.2051, |
| "loss/crossentropy": 2.1874676942825317, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.205148346722126, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.012624608629934862, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 0.00014908711115519207, |
| "learning_rate": 0.0001, |
| "loss": 0.2029, |
| "loss/crossentropy": 2.129917621612549, |
| "loss/fcd": 0.412109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20289119333028793, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.01263900385072156, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 0.0001484622557957967, |
| "learning_rate": 0.0001, |
| "loss": 0.2193, |
| "loss/crossentropy": 2.0245165824890137, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2193296253681183, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.012653399071508259, |
| "grad_norm": 0.10986328125, |
| "grad_norm_var": 7.06632932027181e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.197, |
| "loss/crossentropy": 2.1348493099212646, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19701501727104187, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.012667794292294959, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 7.014175256093343e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2465, |
| "loss/crossentropy": 2.4276719093322754, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24645158648490906, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.012682189513081657, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 6.877581278483072e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2378, |
| "loss/crossentropy": 2.301609516143799, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23781420290470123, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.012696584733868355, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 6.821950276692708e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2172, |
| "loss/crossentropy": 2.4009501934051514, |
| "loss/fcd": 0.4345703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21715039014816284, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.012710979954655055, |
| "grad_norm": 0.09521484375, |
| "grad_norm_var": 7.529159386952718e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1797, |
| "loss/crossentropy": 2.402838706970215, |
| "loss/fcd": 0.3974609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1796911582350731, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.012725375175441754, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 7.056792577107748e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2222, |
| "loss/crossentropy": 2.573052167892456, |
| "loss/fcd": 0.46875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22222469747066498, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.012739770396228452, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 7.23510980606079e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2008, |
| "loss/crossentropy": 2.232303559780121, |
| "loss/fcd": 0.40234375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20078317821025848, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.01275416561701515, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 7.402002811431884e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2282, |
| "loss/crossentropy": 2.5020205974578857, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22823868691921234, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.01276856083780185, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 7.444620132446289e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.212, |
| "loss/crossentropy": 2.333465099334717, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.212021104991436, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.012782956058588549, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 3.565847873687744e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2017, |
| "loss/crossentropy": 2.190958023071289, |
| "loss/fcd": 0.404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20165172219276428, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.012797351279375247, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 3.0524532000223796e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2102, |
| "loss/crossentropy": 2.4441522359848022, |
| "loss/fcd": 0.4169921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21022119373083115, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.012811746500161945, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 2.795855204264323e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2209, |
| "loss/crossentropy": 2.504876732826233, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22085034102201462, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.012826141720948646, |
| "grad_norm": 0.11279296875, |
| "grad_norm_var": 2.9993057250976562e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2144, |
| "loss/crossentropy": 2.03822124004364, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2144273966550827, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.012840536941735344, |
| "grad_norm": 0.09521484375, |
| "grad_norm_var": 3.87340784072876e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1949, |
| "loss/crossentropy": 2.364703059196472, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19488562643527985, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.012854932162522042, |
| "grad_norm": 0.09716796875, |
| "grad_norm_var": 4.364649454752604e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2072, |
| "loss/crossentropy": 2.37164306640625, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2072392851114273, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.012869327383308742, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 3.96798054377238e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2372, |
| "loss/crossentropy": 2.513867974281311, |
| "loss/fcd": 0.474609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2371513769030571, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.01288372260409544, |
| "grad_norm": 0.11376953125, |
| "grad_norm_var": 4.2969981829325356e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2024, |
| "loss/crossentropy": 2.266432523727417, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20237434655427933, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.012898117824882139, |
| "grad_norm": 0.1083984375, |
| "grad_norm_var": 3.424386183420817e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2159, |
| "loss/crossentropy": 2.191688299179077, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21588657796382904, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.012912513045668837, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 3.24477752049764e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2044, |
| "loss/crossentropy": 2.1034794449806213, |
| "loss/fcd": 0.421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2044026404619217, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.012926908266455537, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 2.989669640858968e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1955, |
| "loss/crossentropy": 2.1368765830993652, |
| "loss/fcd": 0.4033203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19553573429584503, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.012941303487242236, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 2.3837884267171224e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2218, |
| "loss/crossentropy": 2.5139960050582886, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2217550054192543, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.012955698708028934, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 2.9818216959635416e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.224, |
| "loss/crossentropy": 2.2468815445899963, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22403018921613693, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.012970093928815634, |
| "grad_norm": 0.1220703125, |
| "grad_norm_var": 4.6284000078837076e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2005, |
| "loss/crossentropy": 1.8389571905136108, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20048467069864273, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.012984489149602332, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 4.6736995379130046e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2181, |
| "loss/crossentropy": 2.47100293636322, |
| "loss/fcd": 0.4404296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21807243674993515, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.01299888437038903, |
| "grad_norm": 0.142578125, |
| "grad_norm_var": 0.00012298325697580973, |
| "learning_rate": 0.0001, |
| "loss": 0.2141, |
| "loss/crossentropy": 2.011506676673889, |
| "loss/fcd": 0.4443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21408168226480484, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.013013279591175729, |
| "grad_norm": 0.1064453125, |
| "grad_norm_var": 0.00012222925821940103, |
| "learning_rate": 0.0001, |
| "loss": 0.2293, |
| "loss/crossentropy": 2.7555429935455322, |
| "loss/fcd": 0.4814453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22929980605840683, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.013027674811962429, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00011736154556274414, |
| "learning_rate": 0.0001, |
| "loss": 0.2118, |
| "loss/crossentropy": 2.3159666061401367, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21182847768068314, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.013042070032749127, |
| "grad_norm": 0.09814453125, |
| "grad_norm_var": 0.0001245806614557902, |
| "learning_rate": 0.0001, |
| "loss": 0.2026, |
| "loss/crossentropy": 2.5774402618408203, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20259930193424225, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.013056465253535826, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.00012540817260742188, |
| "learning_rate": 0.0001, |
| "loss": 0.2085, |
| "loss/crossentropy": 2.3706518411636353, |
| "loss/fcd": 0.4306640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2085341438651085, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.013070860474322524, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 0.0001143127679824829, |
| "learning_rate": 0.0001, |
| "loss": 0.2094, |
| "loss/crossentropy": 2.5629345178604126, |
| "loss/fcd": 0.458984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2093740627169609, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.013085255695109224, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.0001051257054011027, |
| "learning_rate": 0.0001, |
| "loss": 0.1934, |
| "loss/crossentropy": 2.204862952232361, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1933920904994011, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.013099650915895922, |
| "grad_norm": 0.08935546875, |
| "grad_norm_var": 0.00013220707575480143, |
| "learning_rate": 0.0001, |
| "loss": 0.1996, |
| "loss/crossentropy": 2.6174755096435547, |
| "loss/fcd": 0.4140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19955138117074966, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.01311404613668262, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.0001332561175028483, |
| "learning_rate": 0.0001, |
| "loss": 0.2163, |
| "loss/crossentropy": 2.3615927696228027, |
| "loss/fcd": 0.4580078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21632999181747437, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.013128441357469321, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00013492802778879802, |
| "learning_rate": 0.0001, |
| "loss": 0.2018, |
| "loss/crossentropy": 2.3113813400268555, |
| "loss/fcd": 0.435546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20180628448724747, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.01314283657825602, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.00013534228006998697, |
| "learning_rate": 0.0001, |
| "loss": 0.2166, |
| "loss/crossentropy": 2.575196623802185, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2166244387626648, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.013157231799042718, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.0001348008712132772, |
| "learning_rate": 0.0001, |
| "loss": 0.2273, |
| "loss/crossentropy": 2.4861690998077393, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22732173651456833, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.013171627019829416, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 0.00014147659142812094, |
| "learning_rate": 0.0001, |
| "loss": 0.2495, |
| "loss/crossentropy": 2.6098480224609375, |
| "loss/fcd": 0.5009765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24945074319839478, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.013186022240616116, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 0.00013860066731770834, |
| "learning_rate": 0.0001, |
| "loss": 0.2244, |
| "loss/crossentropy": 2.3869473934173584, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2244347631931305, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.013200417461402814, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.00012586911519368488, |
| "learning_rate": 0.0001, |
| "loss": 0.2015, |
| "loss/crossentropy": 2.152829647064209, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20145908743143082, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.013214812682189513, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.00012712081273396809, |
| "learning_rate": 0.0001, |
| "loss": 0.2334, |
| "loss/crossentropy": 2.607330799102783, |
| "loss/fcd": 0.4599609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23341640084981918, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.013229207902976213, |
| "grad_norm": 0.09619140625, |
| "grad_norm_var": 4.0013591448465986e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2237, |
| "loss/crossentropy": 2.6008039712905884, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22374649345874786, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.013243603123762911, |
| "grad_norm": 0.1259765625, |
| "grad_norm_var": 7.061064243316651e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2637, |
| "loss/crossentropy": 2.669323205947876, |
| "loss/fcd": 0.509765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.26368650794029236, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.01325799834454961, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 7.044076919555664e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2218, |
| "loss/crossentropy": 2.663564920425415, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22182673960924149, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.013272393565336308, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 6.802777449289957e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2233, |
| "loss/crossentropy": 2.503962516784668, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22326037287712097, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.013286788786123008, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 6.968180338541666e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2051, |
| "loss/crossentropy": 2.488289475440979, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20506569743156433, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.013301184006909706, |
| "grad_norm": 0.1240234375, |
| "grad_norm_var": 9.196201960245768e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2772, |
| "loss/crossentropy": 2.7090861797332764, |
| "loss/fcd": 0.537109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2771788090467453, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.013315579227696404, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 9.65664784113566e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2027, |
| "loss/crossentropy": 2.4453471899032593, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.202697291970253, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.013329974448483104, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 7.879634698232015e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2021, |
| "loss/crossentropy": 2.4429107904434204, |
| "loss/fcd": 0.4248046875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20214182883501053, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.013344369669269803, |
| "grad_norm": 0.11181640625, |
| "grad_norm_var": 7.883608341217042e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2276, |
| "loss/crossentropy": 2.4106050729751587, |
| "loss/fcd": 0.509765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22757098823785782, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.013358764890056501, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 8.347431818644206e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1978, |
| "loss/crossentropy": 2.1090660095214844, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.197757326066494, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.0133731601108432, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 8.369187513987223e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2262, |
| "loss/crossentropy": 2.443942070007324, |
| "loss/fcd": 0.4521484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22621066123247147, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.0133875553316299, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 8.71966282526652e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1729, |
| "loss/crossentropy": 2.2424585819244385, |
| "loss/fcd": 0.4091796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.17293807864189148, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.013401950552416598, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 8.215804894765219e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1967, |
| "loss/crossentropy": 2.0759899616241455, |
| "loss/fcd": 0.41796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19673413038253784, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.013416345773203296, |
| "grad_norm": 0.12060546875, |
| "grad_norm_var": 9.310940901438395e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2183, |
| "loss/crossentropy": 2.1836588382720947, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21834751218557358, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.013430740993989995, |
| "grad_norm": 0.0966796875, |
| "grad_norm_var": 0.00010077059268951417, |
| "learning_rate": 0.0001, |
| "loss": 0.2045, |
| "loss/crossentropy": 2.431378960609436, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2045089453458786, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.013445136214776695, |
| "grad_norm": 0.1005859375, |
| "grad_norm_var": 0.00010162889957427978, |
| "learning_rate": 0.0001, |
| "loss": 0.2197, |
| "loss/crossentropy": 2.4605276584625244, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2197011262178421, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.013459531435563393, |
| "grad_norm": 0.1123046875, |
| "grad_norm_var": 9.326934814453125e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2397, |
| "loss/crossentropy": 2.551363468170166, |
| "loss/fcd": 0.470703125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23971816152334213, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.013473926656350091, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 7.578134536743165e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2057, |
| "loss/crossentropy": 2.3025097846984863, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20573781430721283, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.013488321877136791, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 7.72784153620402e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2001, |
| "loss/crossentropy": 2.358902096748352, |
| "loss/fcd": 0.4208984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20014435052871704, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.01350271709792349, |
| "grad_norm": 0.12890625, |
| "grad_norm_var": 0.00010542770226796468, |
| "learning_rate": 0.0001, |
| "loss": 0.2339, |
| "loss/crossentropy": 2.2731123566627502, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23394957929849625, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.013517112318710188, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 0.00010153353214263916, |
| "learning_rate": 0.0001, |
| "loss": 0.1838, |
| "loss/crossentropy": 2.0903587341308594, |
| "loss/fcd": 0.3876953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1838395819067955, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.013531507539496886, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 8.541345596313477e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.205, |
| "loss/crossentropy": 2.3965718746185303, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2049787938594818, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.013545902760283586, |
| "grad_norm": 0.11083984375, |
| "grad_norm_var": 7.835924625396729e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1831, |
| "loss/crossentropy": 2.0947141647338867, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1831398606300354, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.013560297981070285, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 7.657607396443685e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.223, |
| "loss/crossentropy": 2.4057345390319824, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22295735031366348, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.013574693201856983, |
| "grad_norm": 0.1103515625, |
| "grad_norm_var": 7.612605889638265e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2116, |
| "loss/crossentropy": 2.507383942604065, |
| "loss/fcd": 0.451171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21163207292556763, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.013589088422643683, |
| "grad_norm": 0.0966796875, |
| "grad_norm_var": 7.929702599843344e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.199, |
| "loss/crossentropy": 2.5167927742004395, |
| "loss/fcd": 0.4296875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19900661706924438, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.013603483643430381, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 8.271435896555583e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.223, |
| "loss/crossentropy": 2.495205879211426, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2230018451809883, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.01361787886421708, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 8.191963036855062e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1923, |
| "loss/crossentropy": 2.3246638774871826, |
| "loss/fcd": 0.4072265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19232943654060364, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.013632274085003778, |
| "grad_norm": 0.10791015625, |
| "grad_norm_var": 7.743438084920248e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2183, |
| "loss/crossentropy": 2.342094659805298, |
| "loss/fcd": 0.44140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21831049770116806, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.013646669305790478, |
| "grad_norm": 0.09765625, |
| "grad_norm_var": 6.79562489191691e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2115, |
| "loss/crossentropy": 2.395784616470337, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2115183100104332, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.013661064526577177, |
| "grad_norm": 0.09326171875, |
| "grad_norm_var": 7.262229919433594e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1866, |
| "loss/crossentropy": 2.3914581537246704, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1866021454334259, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.013675459747363875, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 7.293124993642171e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2197, |
| "loss/crossentropy": 2.7165035009384155, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21966220438480377, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.013689854968150573, |
| "grad_norm": 0.0947265625, |
| "grad_norm_var": 7.529159386952718e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2021, |
| "loss/crossentropy": 2.4402034282684326, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20212795585393906, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.013704250188937273, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 8.964439233144124e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2361, |
| "loss/crossentropy": 2.3232113122940063, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2360788732767105, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.013718645409723972, |
| "grad_norm": 0.09619140625, |
| "grad_norm_var": 9.429355462392171e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2187, |
| "loss/crossentropy": 2.51291286945343, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21865685284137726, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.01373304063051067, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 5.412002404530843e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2028, |
| "loss/crossentropy": 2.311350464820862, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20284704118967056, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.01374743585129737, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 5.3857763608296714e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2074, |
| "loss/crossentropy": 2.3779343366622925, |
| "loss/fcd": 0.42578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20741773396730423, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.013761831072084068, |
| "grad_norm": 0.10009765625, |
| "grad_norm_var": 4.9749016761779784e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1919, |
| "loss/crossentropy": 2.285262107849121, |
| "loss/fcd": 0.3984375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19189584255218506, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.013776226292870767, |
| "grad_norm": 0.10498046875, |
| "grad_norm_var": 4.519522190093994e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.2057, |
| "loss/crossentropy": 2.509137988090515, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20570625364780426, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.013790621513657465, |
| "grad_norm": 0.09375, |
| "grad_norm_var": 4.8692027727762856e-05, |
| "learning_rate": 0.0001, |
| "loss": 0.1856, |
| "loss/crossentropy": 2.298775553703308, |
| "loss/fcd": 0.400390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18564346432685852, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.013805016734444165, |
| "grad_norm": 0.1396484375, |
| "grad_norm_var": 0.000137979785601298, |
| "learning_rate": 0.0001, |
| "loss": 0.255, |
| "loss/crossentropy": 2.497642993927002, |
| "loss/fcd": 0.4765625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2549555003643036, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.013819411955230863, |
| "grad_norm": 0.1025390625, |
| "grad_norm_var": 0.00013514260450998942, |
| "learning_rate": 0.0001, |
| "loss": 0.2086, |
| "loss/crossentropy": 2.250615358352661, |
| "loss/fcd": 0.427734375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20857169479131699, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.013833807176017562, |
| "grad_norm": 0.1162109375, |
| "grad_norm_var": 0.00014392435550689698, |
| "learning_rate": 0.0001, |
| "loss": 0.2192, |
| "loss/crossentropy": 2.349924087524414, |
| "loss/fcd": 0.4716796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2192462459206581, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.013848202396804262, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 0.00015734036763509113, |
| "learning_rate": 0.0001, |
| "loss": 0.2206, |
| "loss/crossentropy": 2.6663416624069214, |
| "loss/fcd": 0.46875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22060546278953552, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.01386259761759096, |
| "grad_norm": 0.119140625, |
| "grad_norm_var": 0.00016869604587554932, |
| "learning_rate": 0.0001, |
| "loss": 0.2306, |
| "loss/crossentropy": 2.5041009187698364, |
| "loss/fcd": 0.4501953125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23058706521987915, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.013876992838377658, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00016364653905232747, |
| "learning_rate": 0.0001, |
| "loss": 0.2344, |
| "loss/crossentropy": 2.6687543392181396, |
| "loss/fcd": 0.4609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2344193086028099, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.013891388059164357, |
| "grad_norm": 0.091796875, |
| "grad_norm_var": 0.00016646285851796468, |
| "learning_rate": 0.0001, |
| "loss": 0.2073, |
| "loss/crossentropy": 2.6841739416122437, |
| "loss/fcd": 0.4150390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2073235660791397, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.013905783279951057, |
| "grad_norm": 0.10205078125, |
| "grad_norm_var": 0.00016492903232574464, |
| "learning_rate": 0.0001, |
| "loss": 0.23, |
| "loss/crossentropy": 2.511627197265625, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23003337532281876, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.013920178500737755, |
| "grad_norm": 0.111328125, |
| "grad_norm_var": 0.0001549313465754191, |
| "learning_rate": 0.0001, |
| "loss": 0.2113, |
| "loss/crossentropy": 2.2891138792037964, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21126385778188705, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.013934573721524453, |
| "grad_norm": 0.134765625, |
| "grad_norm_var": 0.0001918862263361613, |
| "learning_rate": 0.0001, |
| "loss": 0.2406, |
| "loss/crossentropy": 2.610072374343872, |
| "loss/fcd": 0.4755859375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24063490331172943, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.013948968942311154, |
| "grad_norm": 0.1181640625, |
| "grad_norm_var": 0.0001845995585123698, |
| "learning_rate": 0.0001, |
| "loss": 0.2615, |
| "loss/crossentropy": 2.6200684309005737, |
| "loss/fcd": 0.484375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2615353539586067, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.013963364163097852, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.00017747879028320312, |
| "learning_rate": 0.0001, |
| "loss": 0.2364, |
| "loss/crossentropy": 2.344622015953064, |
| "loss/fcd": 0.4375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23641209304332733, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.01397775938388455, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.00017270147800445556, |
| "learning_rate": 0.0001, |
| "loss": 0.2414, |
| "loss/crossentropy": 2.6739622354507446, |
| "loss/fcd": 0.50390625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.24144794046878815, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.013992154604671249, |
| "grad_norm": 0.13671875, |
| "grad_norm_var": 0.00019855499267578124, |
| "learning_rate": 0.0001, |
| "loss": 0.1994, |
| "loss/crossentropy": 2.1970399618148804, |
| "loss/fcd": 0.544921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19944548606872559, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.014006549825457949, |
| "grad_norm": 0.1279296875, |
| "grad_norm_var": 0.00020308395226796468, |
| "learning_rate": 0.0001, |
| "loss": 0.2182, |
| "loss/crossentropy": 2.0679745078086853, |
| "loss/fcd": 0.4267578125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21820590645074844, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.014020945046244647, |
| "grad_norm": 0.11572265625, |
| "grad_norm_var": 0.0001689751942952474, |
| "learning_rate": 0.0001, |
| "loss": 0.2122, |
| "loss/crossentropy": 2.2601789236068726, |
| "loss/fcd": 0.43359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21224602311849594, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.014035340267031345, |
| "grad_norm": 0.115234375, |
| "grad_norm_var": 0.00013271570205688476, |
| "learning_rate": 0.0001, |
| "loss": 0.2131, |
| "loss/crossentropy": 2.215391755104065, |
| "loss/fcd": 0.439453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21311646699905396, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.014049735487818044, |
| "grad_norm": 0.0908203125, |
| "grad_norm_var": 0.00016161203384399415, |
| "learning_rate": 0.0001, |
| "loss": 0.2055, |
| "loss/crossentropy": 2.593106508255005, |
| "loss/fcd": 0.4111328125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20546036958694458, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.014064130708604744, |
| "grad_norm": 0.1044921875, |
| "grad_norm_var": 0.000168001651763916, |
| "learning_rate": 0.0001, |
| "loss": 0.2212, |
| "loss/crossentropy": 2.4123164415359497, |
| "loss/fcd": 0.4482421875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22117872536182404, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.014078525929391442, |
| "grad_norm": 0.1357421875, |
| "grad_norm_var": 0.00019616186618804933, |
| "learning_rate": 0.0001, |
| "loss": 0.2037, |
| "loss/crossentropy": 2.111898362636566, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20368139445781708, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.01409292115017814, |
| "grad_norm": 0.10302734375, |
| "grad_norm_var": 0.00020366907119750977, |
| "learning_rate": 0.0001, |
| "loss": 0.2233, |
| "loss/crossentropy": 2.484625220298767, |
| "loss/fcd": 0.466796875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22334980964660645, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.01410731637096484, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 0.0002091874678929647, |
| "learning_rate": 0.0001, |
| "loss": 0.2286, |
| "loss/crossentropy": 2.5562527179718018, |
| "loss/fcd": 0.455078125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.22860489040613174, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.014121711591751539, |
| "grad_norm": 0.109375, |
| "grad_norm_var": 0.0001770724852879842, |
| "learning_rate": 0.0001, |
| "loss": 0.2195, |
| "loss/crossentropy": 2.372304320335388, |
| "loss/fcd": 0.4423828125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21949142217636108, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.014136106812538237, |
| "grad_norm": 0.10888671875, |
| "grad_norm_var": 0.00016833841800689697, |
| "learning_rate": 0.0001, |
| "loss": 0.2166, |
| "loss/crossentropy": 2.5525119304656982, |
| "loss/fcd": 0.453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21661998331546783, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.014150502033324935, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 0.00018307268619537352, |
| "learning_rate": 0.0001, |
| "loss": 0.205, |
| "loss/crossentropy": 2.346623182296753, |
| "loss/fcd": 0.416015625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.205020934343338, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.014164897254111635, |
| "grad_norm": 0.09814453125, |
| "grad_norm_var": 0.00016809701919555663, |
| "learning_rate": 0.0001, |
| "loss": 0.1916, |
| "loss/crossentropy": 2.372196674346924, |
| "loss/fcd": 0.408203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.19163141399621964, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.014179292474898334, |
| "grad_norm": 0.11962890625, |
| "grad_norm_var": 0.00016938745975494384, |
| "learning_rate": 0.0001, |
| "loss": 0.2246, |
| "loss/crossentropy": 2.2609957456588745, |
| "loss/fcd": 0.447265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2246478945016861, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.014193687695685032, |
| "grad_norm": 0.1201171875, |
| "grad_norm_var": 0.000172765056292216, |
| "learning_rate": 0.0001, |
| "loss": 0.2191, |
| "loss/crossentropy": 2.2087113857269287, |
| "loss/fcd": 0.4287109375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21905823051929474, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.014208082916471732, |
| "grad_norm": 0.1171875, |
| "grad_norm_var": 0.00017405251661936443, |
| "learning_rate": 0.0001, |
| "loss": 0.22, |
| "loss/crossentropy": 2.257576823234558, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2199638932943344, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.01422247813725843, |
| "grad_norm": 0.10693359375, |
| "grad_norm_var": 0.00013484557469685873, |
| "learning_rate": 0.0001, |
| "loss": 0.2398, |
| "loss/crossentropy": 2.626092791557312, |
| "loss/fcd": 0.462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23977234959602356, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.014236873358045129, |
| "grad_norm": 0.107421875, |
| "grad_norm_var": 0.00011490186055501302, |
| "learning_rate": 0.0001, |
| "loss": 0.2098, |
| "loss/crossentropy": 2.4047662019729614, |
| "loss/fcd": 0.4453125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.20984865725040436, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.014251268578831827, |
| "grad_norm": 0.11328125, |
| "grad_norm_var": 0.00011332730452219645, |
| "learning_rate": 0.0001, |
| "loss": 0.2132, |
| "loss/crossentropy": 2.4295172691345215, |
| "loss/fcd": 0.4462890625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21317294985055923, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.014265663799618527, |
| "grad_norm": 0.12451171875, |
| "grad_norm_var": 0.0001256903012593587, |
| "learning_rate": 0.0001, |
| "loss": 0.2325, |
| "loss/crossentropy": 2.5081902742385864, |
| "loss/fcd": 0.47265625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.23250433802604675, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.014280059020405226, |
| "grad_norm": 0.1376953125, |
| "grad_norm_var": 0.00014209349950154623, |
| "learning_rate": 0.0001, |
| "loss": 0.2266, |
| "loss/crossentropy": 2.0926729440689087, |
| "loss/fcd": 0.4951171875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.226626954972744, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.014294454241191924, |
| "grad_norm": 0.10595703125, |
| "grad_norm_var": 0.00014054675896962482, |
| "learning_rate": 0.0001, |
| "loss": 0.2136, |
| "loss/crossentropy": 2.383934497833252, |
| "loss/fcd": 0.4609375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21359023451805115, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.014308849461978622, |
| "grad_norm": 0.11865234375, |
| "grad_norm_var": 0.00010741154352823893, |
| "learning_rate": 0.0001, |
| "loss": 0.2225, |
| "loss/crossentropy": 2.4633511304855347, |
| "loss/fcd": 0.443359375, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2224937155842781, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.014323244682765322, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 0.00011001825332641601, |
| "learning_rate": 0.0001, |
| "loss": 0.1897, |
| "loss/crossentropy": 2.1795610189437866, |
| "loss/fcd": 0.3994140625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.18965643644332886, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.01433763990355202, |
| "grad_norm": 0.1015625, |
| "grad_norm_var": 0.00011246601740519205, |
| "learning_rate": 0.0001, |
| "loss": 0.1976, |
| "loss/crossentropy": 2.336984634399414, |
| "loss/fcd": 0.4033203125, |
| "loss/idx": 18.0, |
| "loss/logits": 0.1975797638297081, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.014352035124338719, |
| "grad_norm": 0.099609375, |
| "grad_norm_var": 0.00012168486913045247, |
| "learning_rate": 0.0001, |
| "loss": 0.2147, |
| "loss/crossentropy": 2.664496660232544, |
| "loss/fcd": 0.44921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.2147291675209999, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.014366430345125419, |
| "grad_norm": 0.09912109375, |
| "grad_norm_var": 0.00013074477513631185, |
| "learning_rate": 0.0001, |
| "loss": 0.2094, |
| "loss/crossentropy": 2.33840548992157, |
| "loss/fcd": 0.419921875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.209433451294899, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.014380825565912117, |
| "grad_norm": 0.10107421875, |
| "grad_norm_var": 0.00012872119744618735, |
| "learning_rate": 0.0001, |
| "loss": 0.2101, |
| "loss/crossentropy": 2.5498578548431396, |
| "loss/fcd": 0.431640625, |
| "loss/idx": 18.0, |
| "loss/logits": 0.210076242685318, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.014395220786698816, |
| "grad_norm": 0.103515625, |
| "grad_norm_var": 0.00012149810791015626, |
| "learning_rate": 0.0001, |
| "loss": 0.2192, |
| "loss/crossentropy": 2.547055721282959, |
| "loss/fcd": 0.4560546875, |
| "loss/idx": 18.0, |
| "loss/logits": 0.21922268718481064, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 100000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": true, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.51753290940416e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|