{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 35.97122302158273, "eval_steps": 500, "global_step": 40000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "action_loss": 2.3601033687591553, "epoch": 0, "step": 0 }, { "epoch": 0, "step": 0, "torque_loss": 1.228797435760498 }, { "epoch": 0.008992805755395683, "grad_norm": 25.071945190429688, "learning_rate": 3.0000000000000004e-07, "loss": 2.4776, "step": 10 }, { "action_loss": 1.3460880517959595, "epoch": 0.008992805755395683, "step": 10 }, { "epoch": 0.008992805755395683, "step": 10, "torque_loss": 1.1185355186462402 }, { "epoch": 0.017985611510791366, "grad_norm": 86.86817169189453, "learning_rate": 6.333333333333333e-07, "loss": 2.5981, "step": 20 }, { "action_loss": 1.8547900915145874, "epoch": 0.017985611510791366, "step": 20 }, { "epoch": 0.017985611510791366, "step": 20, "torque_loss": 1.0779401063919067 }, { "epoch": 0.02697841726618705, "grad_norm": 23.92437171936035, "learning_rate": 9.666666666666668e-07, "loss": 2.503, "step": 30 }, { "action_loss": 2.150662660598755, "epoch": 0.02697841726618705, "step": 30 }, { "epoch": 0.02697841726618705, "step": 30, "torque_loss": 1.2505098581314087 }, { "epoch": 0.03597122302158273, "grad_norm": 21.38734245300293, "learning_rate": 1.3e-06, "loss": 2.2972, "step": 40 }, { "action_loss": 1.7744098901748657, "epoch": 0.03597122302158273, "step": 40 }, { "epoch": 0.03597122302158273, "step": 40, "torque_loss": 1.1729792356491089 }, { "epoch": 0.044964028776978415, "grad_norm": 12.067179679870605, "learning_rate": 1.6333333333333333e-06, "loss": 2.1403, "step": 50 }, { "action_loss": 1.2721601724624634, "epoch": 0.044964028776978415, "step": 50 }, { "epoch": 0.044964028776978415, "step": 50, "torque_loss": 1.1698811054229736 }, { "epoch": 0.0539568345323741, "grad_norm": 8.9168119430542, "learning_rate": 1.9666666666666668e-06, "loss": 1.8372, "step": 60 }, { "action_loss": 0.7553159594535828, "epoch": 0.0539568345323741, "step": 60 }, { "epoch": 0.0539568345323741, "step": 60, "torque_loss": 1.2138123512268066 }, { "epoch": 0.06294964028776978, "grad_norm": 9.972052574157715, "learning_rate": 2.3e-06, "loss": 1.3542, "step": 70 }, { "action_loss": 0.5480057597160339, "epoch": 0.06294964028776978, "step": 70 }, { "epoch": 0.06294964028776978, "step": 70, "torque_loss": 1.1928168535232544 }, { "epoch": 0.07194244604316546, "grad_norm": 2.361851453781128, "learning_rate": 2.6333333333333337e-06, "loss": 1.2168, "step": 80 }, { "action_loss": 0.38906529545783997, "epoch": 0.07194244604316546, "step": 80 }, { "epoch": 0.07194244604316546, "step": 80, "torque_loss": 1.0835970640182495 }, { "epoch": 0.08093525179856115, "grad_norm": 1.4709742069244385, "learning_rate": 2.966666666666667e-06, "loss": 1.0607, "step": 90 }, { "action_loss": 0.2794106900691986, "epoch": 0.08093525179856115, "step": 90 }, { "epoch": 0.08093525179856115, "step": 90, "torque_loss": 1.2511152029037476 }, { "epoch": 0.08992805755395683, "grad_norm": 1.374706745147705, "learning_rate": 3.3e-06, "loss": 0.9656, "step": 100 }, { "action_loss": 0.2490370273590088, "epoch": 0.08992805755395683, "step": 100 }, { "epoch": 0.08992805755395683, "step": 100, "torque_loss": 1.1008645296096802 }, { "epoch": 0.09892086330935251, "grad_norm": 1.3973140716552734, "learning_rate": 3.633333333333334e-06, "loss": 0.8887, "step": 110 }, { "action_loss": 0.19860589504241943, "epoch": 0.09892086330935251, "step": 110 }, { "epoch": 0.09892086330935251, "step": 110, "torque_loss": 1.1784859895706177 }, { "epoch": 0.1079136690647482, "grad_norm": 1.5731942653656006, "learning_rate": 3.966666666666667e-06, "loss": 0.8657, "step": 120 }, { "action_loss": 0.14134247601032257, "epoch": 0.1079136690647482, "step": 120 }, { "epoch": 0.1079136690647482, "step": 120, "torque_loss": 1.142398715019226 }, { "epoch": 0.11690647482014388, "grad_norm": 1.0451860427856445, "learning_rate": 4.2999999999999995e-06, "loss": 0.8139, "step": 130 }, { "action_loss": 0.13740815222263336, "epoch": 0.11690647482014388, "step": 130 }, { "epoch": 0.11690647482014388, "step": 130, "torque_loss": 1.1404907703399658 }, { "epoch": 0.12589928057553956, "grad_norm": 1.1420588493347168, "learning_rate": 4.633333333333334e-06, "loss": 0.7882, "step": 140 }, { "action_loss": 0.10212137550115585, "epoch": 0.12589928057553956, "step": 140 }, { "epoch": 0.12589928057553956, "step": 140, "torque_loss": 1.1281633377075195 }, { "epoch": 0.13489208633093525, "grad_norm": 0.9105278849601746, "learning_rate": 4.966666666666667e-06, "loss": 0.7877, "step": 150 }, { "action_loss": 0.2378050684928894, "epoch": 0.13489208633093525, "step": 150 }, { "epoch": 0.13489208633093525, "step": 150, "torque_loss": 1.1047486066818237 }, { "epoch": 0.14388489208633093, "grad_norm": 0.6294947862625122, "learning_rate": 5.3e-06, "loss": 0.7813, "step": 160 }, { "action_loss": 0.1413825899362564, "epoch": 0.14388489208633093, "step": 160 }, { "epoch": 0.14388489208633093, "step": 160, "torque_loss": 1.1528915166854858 }, { "epoch": 0.1528776978417266, "grad_norm": 0.8054171204566956, "learning_rate": 5.633333333333333e-06, "loss": 0.7532, "step": 170 }, { "action_loss": 0.07351673394441605, "epoch": 0.1528776978417266, "step": 170 }, { "epoch": 0.1528776978417266, "step": 170, "torque_loss": 1.0601669549942017 }, { "epoch": 0.1618705035971223, "grad_norm": 1.0823380947113037, "learning_rate": 5.9666666666666666e-06, "loss": 0.7544, "step": 180 }, { "action_loss": 0.09853282570838928, "epoch": 0.1618705035971223, "step": 180 }, { "epoch": 0.1618705035971223, "step": 180, "torque_loss": 1.0987164974212646 }, { "epoch": 0.17086330935251798, "grad_norm": 0.8827524185180664, "learning_rate": 6.300000000000001e-06, "loss": 0.7753, "step": 190 }, { "action_loss": 0.07383780926465988, "epoch": 0.17086330935251798, "step": 190 }, { "epoch": 0.17086330935251798, "step": 190, "torque_loss": 1.1882470846176147 }, { "epoch": 0.17985611510791366, "grad_norm": 1.0798677206039429, "learning_rate": 6.633333333333333e-06, "loss": 0.7628, "step": 200 }, { "action_loss": 0.054432351142168045, "epoch": 0.17985611510791366, "step": 200 }, { "epoch": 0.17985611510791366, "step": 200, "torque_loss": 1.1141352653503418 }, { "epoch": 0.18884892086330934, "grad_norm": 0.6156890988349915, "learning_rate": 6.966666666666667e-06, "loss": 0.7606, "step": 210 }, { "action_loss": 0.09084396809339523, "epoch": 0.18884892086330934, "step": 210 }, { "epoch": 0.18884892086330934, "step": 210, "torque_loss": 1.0857702493667603 }, { "epoch": 0.19784172661870503, "grad_norm": 0.9973111152648926, "learning_rate": 7.2999999999999996e-06, "loss": 0.7624, "step": 220 }, { "action_loss": 0.06743448972702026, "epoch": 0.19784172661870503, "step": 220 }, { "epoch": 0.19784172661870503, "step": 220, "torque_loss": 1.1273317337036133 }, { "epoch": 0.2068345323741007, "grad_norm": 1.0328171253204346, "learning_rate": 7.633333333333334e-06, "loss": 0.7523, "step": 230 }, { "action_loss": 0.08155327290296555, "epoch": 0.2068345323741007, "step": 230 }, { "epoch": 0.2068345323741007, "step": 230, "torque_loss": 1.1435743570327759 }, { "epoch": 0.2158273381294964, "grad_norm": 1.2842570543289185, "learning_rate": 7.966666666666666e-06, "loss": 0.7395, "step": 240 }, { "action_loss": 0.08218440413475037, "epoch": 0.2158273381294964, "step": 240 }, { "epoch": 0.2158273381294964, "step": 240, "torque_loss": 1.1545201539993286 }, { "epoch": 0.22482014388489208, "grad_norm": 0.8005467653274536, "learning_rate": 8.3e-06, "loss": 0.7413, "step": 250 }, { "action_loss": 0.07422365248203278, "epoch": 0.22482014388489208, "step": 250 }, { "epoch": 0.22482014388489208, "step": 250, "torque_loss": 1.0796318054199219 }, { "epoch": 0.23381294964028776, "grad_norm": 0.9177917838096619, "learning_rate": 8.633333333333334e-06, "loss": 0.7332, "step": 260 }, { "action_loss": 0.05043135583400726, "epoch": 0.23381294964028776, "step": 260 }, { "epoch": 0.23381294964028776, "step": 260, "torque_loss": 1.121415138244629 }, { "epoch": 0.24280575539568344, "grad_norm": 0.9168376922607422, "learning_rate": 8.966666666666668e-06, "loss": 0.7469, "step": 270 }, { "action_loss": 0.06627322733402252, "epoch": 0.24280575539568344, "step": 270 }, { "epoch": 0.24280575539568344, "step": 270, "torque_loss": 1.1081852912902832 }, { "epoch": 0.2517985611510791, "grad_norm": 0.8672727346420288, "learning_rate": 9.3e-06, "loss": 0.7168, "step": 280 }, { "action_loss": 0.04705033823847771, "epoch": 0.2517985611510791, "step": 280 }, { "epoch": 0.2517985611510791, "step": 280, "torque_loss": 1.1210312843322754 }, { "epoch": 0.2607913669064748, "grad_norm": 1.0553734302520752, "learning_rate": 9.633333333333335e-06, "loss": 0.7282, "step": 290 }, { "action_loss": 0.07608530670404434, "epoch": 0.2607913669064748, "step": 290 }, { "epoch": 0.2607913669064748, "step": 290, "torque_loss": 1.0456403493881226 }, { "epoch": 0.2697841726618705, "grad_norm": 0.9795993566513062, "learning_rate": 9.966666666666667e-06, "loss": 0.7146, "step": 300 }, { "action_loss": 0.058746304363012314, "epoch": 0.2697841726618705, "step": 300 }, { "epoch": 0.2697841726618705, "step": 300, "torque_loss": 1.0842264890670776 }, { "epoch": 0.2787769784172662, "grad_norm": 1.2752305269241333, "learning_rate": 1.03e-05, "loss": 0.7066, "step": 310 }, { "action_loss": 0.08919032663106918, "epoch": 0.2787769784172662, "step": 310 }, { "epoch": 0.2787769784172662, "step": 310, "torque_loss": 1.030057430267334 }, { "epoch": 0.28776978417266186, "grad_norm": 0.9920501112937927, "learning_rate": 1.0633333333333334e-05, "loss": 0.7087, "step": 320 }, { "action_loss": 0.050046157091856, "epoch": 0.28776978417266186, "step": 320 }, { "epoch": 0.28776978417266186, "step": 320, "torque_loss": 0.9957067370414734 }, { "epoch": 0.29676258992805754, "grad_norm": 1.4219967126846313, "learning_rate": 1.0966666666666666e-05, "loss": 0.7085, "step": 330 }, { "action_loss": 0.061535853892564774, "epoch": 0.29676258992805754, "step": 330 }, { "epoch": 0.29676258992805754, "step": 330, "torque_loss": 1.0251423120498657 }, { "epoch": 0.3057553956834532, "grad_norm": 1.1964991092681885, "learning_rate": 1.13e-05, "loss": 0.6825, "step": 340 }, { "action_loss": 0.04989084601402283, "epoch": 0.3057553956834532, "step": 340 }, { "epoch": 0.3057553956834532, "step": 340, "torque_loss": 1.1280646324157715 }, { "epoch": 0.3147482014388489, "grad_norm": 1.7309211492538452, "learning_rate": 1.1633333333333334e-05, "loss": 0.714, "step": 350 }, { "action_loss": 0.05466362461447716, "epoch": 0.3147482014388489, "step": 350 }, { "epoch": 0.3147482014388489, "step": 350, "torque_loss": 0.9835445284843445 }, { "epoch": 0.3237410071942446, "grad_norm": 1.741851568222046, "learning_rate": 1.1966666666666668e-05, "loss": 0.7045, "step": 360 }, { "action_loss": 0.04053666815161705, "epoch": 0.3237410071942446, "step": 360 }, { "epoch": 0.3237410071942446, "step": 360, "torque_loss": 1.1115670204162598 }, { "epoch": 0.3327338129496403, "grad_norm": 1.0163975954055786, "learning_rate": 1.23e-05, "loss": 0.6874, "step": 370 }, { "action_loss": 0.0805637538433075, "epoch": 0.3327338129496403, "step": 370 }, { "epoch": 0.3327338129496403, "step": 370, "torque_loss": 1.0344704389572144 }, { "epoch": 0.34172661870503596, "grad_norm": 1.9901119470596313, "learning_rate": 1.2633333333333333e-05, "loss": 0.6787, "step": 380 }, { "action_loss": 0.033416908234357834, "epoch": 0.34172661870503596, "step": 380 }, { "epoch": 0.34172661870503596, "step": 380, "torque_loss": 1.10007905960083 }, { "epoch": 0.35071942446043164, "grad_norm": 0.9794852137565613, "learning_rate": 1.2966666666666669e-05, "loss": 0.701, "step": 390 }, { "action_loss": 0.028374990448355675, "epoch": 0.35071942446043164, "step": 390 }, { "epoch": 0.35071942446043164, "step": 390, "torque_loss": 1.0804682970046997 }, { "epoch": 0.3597122302158273, "grad_norm": 1.4453119039535522, "learning_rate": 1.3300000000000001e-05, "loss": 0.6647, "step": 400 }, { "action_loss": 0.08073117583990097, "epoch": 0.3597122302158273, "step": 400 }, { "epoch": 0.3597122302158273, "step": 400, "torque_loss": 1.0079985857009888 }, { "epoch": 0.368705035971223, "grad_norm": 1.3261090517044067, "learning_rate": 1.3633333333333334e-05, "loss": 0.6729, "step": 410 }, { "action_loss": 0.04264439269900322, "epoch": 0.368705035971223, "step": 410 }, { "epoch": 0.368705035971223, "step": 410, "torque_loss": 1.0483264923095703 }, { "epoch": 0.3776978417266187, "grad_norm": 0.8749484419822693, "learning_rate": 1.3966666666666666e-05, "loss": 0.6926, "step": 420 }, { "action_loss": 0.0471532940864563, "epoch": 0.3776978417266187, "step": 420 }, { "epoch": 0.3776978417266187, "step": 420, "torque_loss": 0.9825827479362488 }, { "epoch": 0.38669064748201437, "grad_norm": 1.7766046524047852, "learning_rate": 1.43e-05, "loss": 0.6597, "step": 430 }, { "action_loss": 0.10714120417833328, "epoch": 0.38669064748201437, "step": 430 }, { "epoch": 0.38669064748201437, "step": 430, "torque_loss": 1.040794849395752 }, { "epoch": 0.39568345323741005, "grad_norm": 1.182275414466858, "learning_rate": 1.4633333333333334e-05, "loss": 0.6666, "step": 440 }, { "action_loss": 0.09162747114896774, "epoch": 0.39568345323741005, "step": 440 }, { "epoch": 0.39568345323741005, "step": 440, "torque_loss": 0.9759108424186707 }, { "epoch": 0.40467625899280574, "grad_norm": 0.9825280904769897, "learning_rate": 1.4966666666666668e-05, "loss": 0.6657, "step": 450 }, { "action_loss": 0.05365939438343048, "epoch": 0.40467625899280574, "step": 450 }, { "epoch": 0.40467625899280574, "step": 450, "torque_loss": 0.9661161303520203 }, { "epoch": 0.4136690647482014, "grad_norm": 1.0763386487960815, "learning_rate": 1.53e-05, "loss": 0.6463, "step": 460 }, { "action_loss": 0.06864164024591446, "epoch": 0.4136690647482014, "step": 460 }, { "epoch": 0.4136690647482014, "step": 460, "torque_loss": 1.0370157957077026 }, { "epoch": 0.4226618705035971, "grad_norm": 0.9412219524383545, "learning_rate": 1.563333333333333e-05, "loss": 0.6573, "step": 470 }, { "action_loss": 0.03223542869091034, "epoch": 0.4226618705035971, "step": 470 }, { "epoch": 0.4226618705035971, "step": 470, "torque_loss": 1.011393666267395 }, { "epoch": 0.4316546762589928, "grad_norm": 1.2407145500183105, "learning_rate": 1.5966666666666667e-05, "loss": 0.6541, "step": 480 }, { "action_loss": 0.053185660392045975, "epoch": 0.4316546762589928, "step": 480 }, { "epoch": 0.4316546762589928, "step": 480, "torque_loss": 0.994636058807373 }, { "epoch": 0.44064748201438847, "grad_norm": 0.9485853314399719, "learning_rate": 1.63e-05, "loss": 0.6652, "step": 490 }, { "action_loss": 0.09030773490667343, "epoch": 0.44064748201438847, "step": 490 }, { "epoch": 0.44064748201438847, "step": 490, "torque_loss": 0.9554548263549805 }, { "epoch": 0.44964028776978415, "grad_norm": 0.8695769309997559, "learning_rate": 1.6633333333333336e-05, "loss": 0.642, "step": 500 }, { "action_loss": 0.06017867848277092, "epoch": 0.44964028776978415, "step": 500 }, { "epoch": 0.44964028776978415, "step": 500, "torque_loss": 0.9414789080619812 }, { "epoch": 0.45863309352517984, "grad_norm": 1.0067031383514404, "learning_rate": 1.6966666666666668e-05, "loss": 0.6581, "step": 510 }, { "action_loss": 0.03633496165275574, "epoch": 0.45863309352517984, "step": 510 }, { "epoch": 0.45863309352517984, "step": 510, "torque_loss": 1.0161365270614624 }, { "epoch": 0.4676258992805755, "grad_norm": 0.8872912526130676, "learning_rate": 1.73e-05, "loss": 0.6373, "step": 520 }, { "action_loss": 0.07821891456842422, "epoch": 0.4676258992805755, "step": 520 }, { "epoch": 0.4676258992805755, "step": 520, "torque_loss": 1.0442619323730469 }, { "epoch": 0.4766187050359712, "grad_norm": 1.149326205253601, "learning_rate": 1.7633333333333336e-05, "loss": 0.6611, "step": 530 }, { "action_loss": 0.10009270906448364, "epoch": 0.4766187050359712, "step": 530 }, { "epoch": 0.4766187050359712, "step": 530, "torque_loss": 1.0289759635925293 }, { "epoch": 0.4856115107913669, "grad_norm": 1.413682222366333, "learning_rate": 1.796666666666667e-05, "loss": 0.6763, "step": 540 }, { "action_loss": 0.07956963032484055, "epoch": 0.4856115107913669, "step": 540 }, { "epoch": 0.4856115107913669, "step": 540, "torque_loss": 1.0494105815887451 }, { "epoch": 0.49460431654676257, "grad_norm": 1.2833068370819092, "learning_rate": 1.83e-05, "loss": 0.6525, "step": 550 }, { "action_loss": 0.07492244988679886, "epoch": 0.49460431654676257, "step": 550 }, { "epoch": 0.49460431654676257, "step": 550, "torque_loss": 1.023133635520935 }, { "epoch": 0.5035971223021583, "grad_norm": 0.9770055413246155, "learning_rate": 1.8633333333333333e-05, "loss": 0.6443, "step": 560 }, { "action_loss": 0.03979291394352913, "epoch": 0.5035971223021583, "step": 560 }, { "epoch": 0.5035971223021583, "step": 560, "torque_loss": 1.0212955474853516 }, { "epoch": 0.512589928057554, "grad_norm": 1.280092716217041, "learning_rate": 1.896666666666667e-05, "loss": 0.6657, "step": 570 }, { "action_loss": 0.055417004972696304, "epoch": 0.512589928057554, "step": 570 }, { "epoch": 0.512589928057554, "step": 570, "torque_loss": 1.0120583772659302 }, { "epoch": 0.5215827338129496, "grad_norm": 1.0747944116592407, "learning_rate": 1.93e-05, "loss": 0.6597, "step": 580 }, { "action_loss": 0.06836161762475967, "epoch": 0.5215827338129496, "step": 580 }, { "epoch": 0.5215827338129496, "step": 580, "torque_loss": 0.9599930644035339 }, { "epoch": 0.5305755395683454, "grad_norm": 0.8699008822441101, "learning_rate": 1.9633333333333334e-05, "loss": 0.6526, "step": 590 }, { "action_loss": 0.04463890194892883, "epoch": 0.5305755395683454, "step": 590 }, { "epoch": 0.5305755395683454, "step": 590, "torque_loss": 1.0378084182739258 }, { "epoch": 0.539568345323741, "grad_norm": 1.2482844591140747, "learning_rate": 1.9966666666666666e-05, "loss": 0.6426, "step": 600 }, { "action_loss": 0.09986943751573563, "epoch": 0.539568345323741, "step": 600 }, { "epoch": 0.539568345323741, "step": 600, "torque_loss": 0.9161393046379089 }, { "epoch": 0.5485611510791367, "grad_norm": 1.6824803352355957, "learning_rate": 2.0300000000000002e-05, "loss": 0.6325, "step": 610 }, { "action_loss": 0.06485525518655777, "epoch": 0.5485611510791367, "step": 610 }, { "epoch": 0.5485611510791367, "step": 610, "torque_loss": 1.0097626447677612 }, { "epoch": 0.5575539568345323, "grad_norm": 1.7079585790634155, "learning_rate": 2.0633333333333335e-05, "loss": 0.6462, "step": 620 }, { "action_loss": 0.06220120191574097, "epoch": 0.5575539568345323, "step": 620 }, { "epoch": 0.5575539568345323, "step": 620, "torque_loss": 1.0302258729934692 }, { "epoch": 0.5665467625899281, "grad_norm": 1.087373971939087, "learning_rate": 2.0966666666666667e-05, "loss": 0.6357, "step": 630 }, { "action_loss": 0.036185454577207565, "epoch": 0.5665467625899281, "step": 630 }, { "epoch": 0.5665467625899281, "step": 630, "torque_loss": 1.0345574617385864 }, { "epoch": 0.5755395683453237, "grad_norm": 0.9478173851966858, "learning_rate": 2.13e-05, "loss": 0.6462, "step": 640 }, { "action_loss": 0.0908084511756897, "epoch": 0.5755395683453237, "step": 640 }, { "epoch": 0.5755395683453237, "step": 640, "torque_loss": 1.0032485723495483 }, { "epoch": 0.5845323741007195, "grad_norm": 1.3268548250198364, "learning_rate": 2.1633333333333332e-05, "loss": 0.642, "step": 650 }, { "action_loss": 0.027489980682730675, "epoch": 0.5845323741007195, "step": 650 }, { "epoch": 0.5845323741007195, "step": 650, "torque_loss": 0.8783957958221436 }, { "epoch": 0.5935251798561151, "grad_norm": 1.0037832260131836, "learning_rate": 2.1966666666666668e-05, "loss": 0.6287, "step": 660 }, { "action_loss": 0.04270133376121521, "epoch": 0.5935251798561151, "step": 660 }, { "epoch": 0.5935251798561151, "step": 660, "torque_loss": 0.9307217597961426 }, { "epoch": 0.6025179856115108, "grad_norm": 1.3270783424377441, "learning_rate": 2.23e-05, "loss": 0.6143, "step": 670 }, { "action_loss": 0.06936189532279968, "epoch": 0.6025179856115108, "step": 670 }, { "epoch": 0.6025179856115108, "step": 670, "torque_loss": 0.9971422553062439 }, { "epoch": 0.6115107913669064, "grad_norm": 1.3350270986557007, "learning_rate": 2.2633333333333336e-05, "loss": 0.6395, "step": 680 }, { "action_loss": 0.061157580465078354, "epoch": 0.6115107913669064, "step": 680 }, { "epoch": 0.6115107913669064, "step": 680, "torque_loss": 0.9963483810424805 }, { "epoch": 0.6205035971223022, "grad_norm": 0.9526259899139404, "learning_rate": 2.2966666666666668e-05, "loss": 0.6141, "step": 690 }, { "action_loss": 0.04836845025420189, "epoch": 0.6205035971223022, "step": 690 }, { "epoch": 0.6205035971223022, "step": 690, "torque_loss": 0.9504715800285339 }, { "epoch": 0.6294964028776978, "grad_norm": 1.5896618366241455, "learning_rate": 2.3300000000000004e-05, "loss": 0.6235, "step": 700 }, { "action_loss": 0.028065389022231102, "epoch": 0.6294964028776978, "step": 700 }, { "epoch": 0.6294964028776978, "step": 700, "torque_loss": 0.967683732509613 }, { "epoch": 0.6384892086330936, "grad_norm": 1.3163323402404785, "learning_rate": 2.3633333333333336e-05, "loss": 0.625, "step": 710 }, { "action_loss": 0.02837451733648777, "epoch": 0.6384892086330936, "step": 710 }, { "epoch": 0.6384892086330936, "step": 710, "torque_loss": 0.987034022808075 }, { "epoch": 0.6474820143884892, "grad_norm": 1.3351598978042603, "learning_rate": 2.396666666666667e-05, "loss": 0.6106, "step": 720 }, { "action_loss": 0.07958420366048813, "epoch": 0.6474820143884892, "step": 720 }, { "epoch": 0.6474820143884892, "step": 720, "torque_loss": 0.9238533973693848 }, { "epoch": 0.6564748201438849, "grad_norm": 1.6972004175186157, "learning_rate": 2.43e-05, "loss": 0.6223, "step": 730 }, { "action_loss": 0.07562819868326187, "epoch": 0.6564748201438849, "step": 730 }, { "epoch": 0.6564748201438849, "step": 730, "torque_loss": 0.959472119808197 }, { "epoch": 0.6654676258992805, "grad_norm": 1.3841592073440552, "learning_rate": 2.4633333333333334e-05, "loss": 0.5774, "step": 740 }, { "action_loss": 0.047848593443632126, "epoch": 0.6654676258992805, "step": 740 }, { "epoch": 0.6654676258992805, "step": 740, "torque_loss": 0.8421330451965332 }, { "epoch": 0.6744604316546763, "grad_norm": 1.5274842977523804, "learning_rate": 2.496666666666667e-05, "loss": 0.5657, "step": 750 }, { "action_loss": 0.03686165064573288, "epoch": 0.6744604316546763, "step": 750 }, { "epoch": 0.6744604316546763, "step": 750, "torque_loss": 0.8640791773796082 }, { "epoch": 0.6834532374100719, "grad_norm": 1.6186784505844116, "learning_rate": 2.5300000000000002e-05, "loss": 0.5536, "step": 760 }, { "action_loss": 0.06724251061677933, "epoch": 0.6834532374100719, "step": 760 }, { "epoch": 0.6834532374100719, "step": 760, "torque_loss": 0.8407055735588074 }, { "epoch": 0.6924460431654677, "grad_norm": 1.3599365949630737, "learning_rate": 2.5633333333333338e-05, "loss": 0.5387, "step": 770 }, { "action_loss": 0.030956314876675606, "epoch": 0.6924460431654677, "step": 770 }, { "epoch": 0.6924460431654677, "step": 770, "torque_loss": 0.8808550834655762 }, { "epoch": 0.7014388489208633, "grad_norm": 1.8494763374328613, "learning_rate": 2.5966666666666667e-05, "loss": 0.544, "step": 780 }, { "action_loss": 0.029672781005501747, "epoch": 0.7014388489208633, "step": 780 }, { "epoch": 0.7014388489208633, "step": 780, "torque_loss": 0.7976288199424744 }, { "epoch": 0.710431654676259, "grad_norm": 2.1473541259765625, "learning_rate": 2.6300000000000002e-05, "loss": 0.5437, "step": 790 }, { "action_loss": 0.04914906248450279, "epoch": 0.710431654676259, "step": 790 }, { "epoch": 0.710431654676259, "step": 790, "torque_loss": 0.851344883441925 }, { "epoch": 0.7194244604316546, "grad_norm": 1.415934443473816, "learning_rate": 2.663333333333333e-05, "loss": 0.5456, "step": 800 }, { "action_loss": 0.03175240382552147, "epoch": 0.7194244604316546, "step": 800 }, { "epoch": 0.7194244604316546, "step": 800, "torque_loss": 0.8138476014137268 }, { "epoch": 0.7284172661870504, "grad_norm": 1.5186996459960938, "learning_rate": 2.6966666666666667e-05, "loss": 0.5462, "step": 810 }, { "action_loss": 0.07499395310878754, "epoch": 0.7284172661870504, "step": 810 }, { "epoch": 0.7284172661870504, "step": 810, "torque_loss": 0.8412261605262756 }, { "epoch": 0.737410071942446, "grad_norm": 1.5389217138290405, "learning_rate": 2.7300000000000003e-05, "loss": 0.5573, "step": 820 }, { "action_loss": 0.020929694175720215, "epoch": 0.737410071942446, "step": 820 }, { "epoch": 0.737410071942446, "step": 820, "torque_loss": 0.8161070942878723 }, { "epoch": 0.7464028776978417, "grad_norm": 1.770845651626587, "learning_rate": 2.7633333333333332e-05, "loss": 0.551, "step": 830 }, { "action_loss": 0.029146989807486534, "epoch": 0.7464028776978417, "step": 830 }, { "epoch": 0.7464028776978417, "step": 830, "torque_loss": 0.7613658308982849 }, { "epoch": 0.7553956834532374, "grad_norm": 1.254033088684082, "learning_rate": 2.7966666666666668e-05, "loss": 0.5347, "step": 840 }, { "action_loss": 0.05798740312457085, "epoch": 0.7553956834532374, "step": 840 }, { "epoch": 0.7553956834532374, "step": 840, "torque_loss": 0.8369202613830566 }, { "epoch": 0.7643884892086331, "grad_norm": 1.243747353553772, "learning_rate": 2.83e-05, "loss": 0.534, "step": 850 }, { "action_loss": 0.059894103556871414, "epoch": 0.7643884892086331, "step": 850 }, { "epoch": 0.7643884892086331, "step": 850, "torque_loss": 0.7920210957527161 }, { "epoch": 0.7733812949640287, "grad_norm": 1.788097620010376, "learning_rate": 2.8633333333333336e-05, "loss": 0.5387, "step": 860 }, { "action_loss": 0.03971414268016815, "epoch": 0.7733812949640287, "step": 860 }, { "epoch": 0.7733812949640287, "step": 860, "torque_loss": 0.8520753979682922 }, { "epoch": 0.7823741007194245, "grad_norm": 1.3764604330062866, "learning_rate": 2.8966666666666668e-05, "loss": 0.5191, "step": 870 }, { "action_loss": 0.05784929171204567, "epoch": 0.7823741007194245, "step": 870 }, { "epoch": 0.7823741007194245, "step": 870, "torque_loss": 0.7419889569282532 }, { "epoch": 0.7913669064748201, "grad_norm": 1.7099690437316895, "learning_rate": 2.93e-05, "loss": 0.5002, "step": 880 }, { "action_loss": 0.05348843336105347, "epoch": 0.7913669064748201, "step": 880 }, { "epoch": 0.7913669064748201, "step": 880, "torque_loss": 0.7348904609680176 }, { "epoch": 0.8003597122302158, "grad_norm": 1.9816800355911255, "learning_rate": 2.9633333333333336e-05, "loss": 0.4825, "step": 890 }, { "action_loss": 0.02894936501979828, "epoch": 0.8003597122302158, "step": 890 }, { "epoch": 0.8003597122302158, "step": 890, "torque_loss": 0.6457857489585876 }, { "epoch": 0.8093525179856115, "grad_norm": 2.1590847969055176, "learning_rate": 2.9966666666666672e-05, "loss": 0.4717, "step": 900 }, { "action_loss": 0.07757400721311569, "epoch": 0.8093525179856115, "step": 900 }, { "epoch": 0.8093525179856115, "step": 900, "torque_loss": 0.7191212773323059 }, { "epoch": 0.8183453237410072, "grad_norm": 1.27239990234375, "learning_rate": 3.03e-05, "loss": 0.4694, "step": 910 }, { "action_loss": 0.05625969544053078, "epoch": 0.8183453237410072, "step": 910 }, { "epoch": 0.8183453237410072, "step": 910, "torque_loss": 0.6140779852867126 }, { "epoch": 0.8273381294964028, "grad_norm": 1.5078861713409424, "learning_rate": 3.063333333333334e-05, "loss": 0.4639, "step": 920 }, { "action_loss": 0.04621373116970062, "epoch": 0.8273381294964028, "step": 920 }, { "epoch": 0.8273381294964028, "step": 920, "torque_loss": 0.6801791191101074 }, { "epoch": 0.8363309352517986, "grad_norm": 1.6948964595794678, "learning_rate": 3.096666666666666e-05, "loss": 0.4745, "step": 930 }, { "action_loss": 0.02737528644502163, "epoch": 0.8363309352517986, "step": 930 }, { "epoch": 0.8363309352517986, "step": 930, "torque_loss": 0.6310020685195923 }, { "epoch": 0.8453237410071942, "grad_norm": 1.4358534812927246, "learning_rate": 3.13e-05, "loss": 0.4229, "step": 940 }, { "action_loss": 0.05302533507347107, "epoch": 0.8453237410071942, "step": 940 }, { "epoch": 0.8453237410071942, "step": 940, "torque_loss": 0.6118227243423462 }, { "epoch": 0.85431654676259, "grad_norm": 1.9527961015701294, "learning_rate": 3.1633333333333334e-05, "loss": 0.4121, "step": 950 }, { "action_loss": 0.05449235811829567, "epoch": 0.85431654676259, "step": 950 }, { "epoch": 0.85431654676259, "step": 950, "torque_loss": 0.6620091795921326 }, { "epoch": 0.8633093525179856, "grad_norm": 2.800851583480835, "learning_rate": 3.196666666666667e-05, "loss": 0.4108, "step": 960 }, { "action_loss": 0.037330277264118195, "epoch": 0.8633093525179856, "step": 960 }, { "epoch": 0.8633093525179856, "step": 960, "torque_loss": 0.6791467070579529 }, { "epoch": 0.8723021582733813, "grad_norm": 1.9069832563400269, "learning_rate": 3.2300000000000006e-05, "loss": 0.3863, "step": 970 }, { "action_loss": 0.034259162843227386, "epoch": 0.8723021582733813, "step": 970 }, { "epoch": 0.8723021582733813, "step": 970, "torque_loss": 0.4727899730205536 }, { "epoch": 0.8812949640287769, "grad_norm": 1.7734878063201904, "learning_rate": 3.263333333333333e-05, "loss": 0.3902, "step": 980 }, { "action_loss": 0.029537290334701538, "epoch": 0.8812949640287769, "step": 980 }, { "epoch": 0.8812949640287769, "step": 980, "torque_loss": 0.47530388832092285 }, { "epoch": 0.8902877697841727, "grad_norm": 1.350583791732788, "learning_rate": 3.296666666666667e-05, "loss": 0.3696, "step": 990 }, { "action_loss": 0.027830520644783974, "epoch": 0.8902877697841727, "step": 990 }, { "epoch": 0.8902877697841727, "step": 990, "torque_loss": 0.5462077260017395 }, { "epoch": 0.8992805755395683, "grad_norm": 1.6345831155776978, "learning_rate": 3.33e-05, "loss": 0.3773, "step": 1000 }, { "action_loss": 0.06700121611356735, "epoch": 0.8992805755395683, "step": 1000 }, { "epoch": 0.8992805755395683, "step": 1000, "torque_loss": 0.5600783228874207 }, { "epoch": 0.908273381294964, "grad_norm": 1.2257014513015747, "learning_rate": 3.3633333333333335e-05, "loss": 0.3941, "step": 1010 }, { "action_loss": 0.03146505355834961, "epoch": 0.908273381294964, "step": 1010 }, { "epoch": 0.908273381294964, "step": 1010, "torque_loss": 0.46373534202575684 }, { "epoch": 0.9172661870503597, "grad_norm": 2.255980968475342, "learning_rate": 3.396666666666667e-05, "loss": 0.3504, "step": 1020 }, { "action_loss": 0.06270324438810349, "epoch": 0.9172661870503597, "step": 1020 }, { "epoch": 0.9172661870503597, "step": 1020, "torque_loss": 0.5182711482048035 }, { "epoch": 0.9262589928057554, "grad_norm": 1.1929795742034912, "learning_rate": 3.430000000000001e-05, "loss": 0.3732, "step": 1030 }, { "action_loss": 0.02586393989622593, "epoch": 0.9262589928057554, "step": 1030 }, { "epoch": 0.9262589928057554, "step": 1030, "torque_loss": 0.5341915488243103 }, { "epoch": 0.935251798561151, "grad_norm": 1.3577488660812378, "learning_rate": 3.463333333333333e-05, "loss": 0.3652, "step": 1040 }, { "action_loss": 0.03985318914055824, "epoch": 0.935251798561151, "step": 1040 }, { "epoch": 0.935251798561151, "step": 1040, "torque_loss": 0.5703646540641785 }, { "epoch": 0.9442446043165468, "grad_norm": 1.2191908359527588, "learning_rate": 3.496666666666667e-05, "loss": 0.3694, "step": 1050 }, { "action_loss": 0.054227229207754135, "epoch": 0.9442446043165468, "step": 1050 }, { "epoch": 0.9442446043165468, "step": 1050, "torque_loss": 0.5273457169532776 }, { "epoch": 0.9532374100719424, "grad_norm": 1.700364351272583, "learning_rate": 3.53e-05, "loss": 0.3783, "step": 1060 }, { "action_loss": 0.03287883475422859, "epoch": 0.9532374100719424, "step": 1060 }, { "epoch": 0.9532374100719424, "step": 1060, "torque_loss": 0.4685949385166168 }, { "epoch": 0.9622302158273381, "grad_norm": 1.4304977655410767, "learning_rate": 3.563333333333334e-05, "loss": 0.3365, "step": 1070 }, { "action_loss": 0.055376071482896805, "epoch": 0.9622302158273381, "step": 1070 }, { "epoch": 0.9622302158273381, "step": 1070, "torque_loss": 0.5627819895744324 }, { "epoch": 0.9712230215827338, "grad_norm": 1.5578420162200928, "learning_rate": 3.596666666666667e-05, "loss": 0.3618, "step": 1080 }, { "action_loss": 0.02446863241493702, "epoch": 0.9712230215827338, "step": 1080 }, { "epoch": 0.9712230215827338, "step": 1080, "torque_loss": 0.45511510968208313 }, { "epoch": 0.9802158273381295, "grad_norm": 1.9137933254241943, "learning_rate": 3.63e-05, "loss": 0.3342, "step": 1090 }, { "action_loss": 0.055748242884874344, "epoch": 0.9802158273381295, "step": 1090 }, { "epoch": 0.9802158273381295, "step": 1090, "torque_loss": 0.516370415687561 }, { "epoch": 0.9892086330935251, "grad_norm": 2.237147331237793, "learning_rate": 3.6633333333333334e-05, "loss": 0.2912, "step": 1100 }, { "action_loss": 0.060275573283433914, "epoch": 0.9892086330935251, "step": 1100 }, { "epoch": 0.9892086330935251, "step": 1100, "torque_loss": 0.48215457797050476 }, { "epoch": 0.9982014388489209, "grad_norm": 1.8865132331848145, "learning_rate": 3.6966666666666666e-05, "loss": 0.2759, "step": 1110 }, { "action_loss": 0.03896287828683853, "epoch": 0.9982014388489209, "step": 1110 }, { "epoch": 0.9982014388489209, "step": 1110, "torque_loss": 0.38514623045921326 }, { "epoch": 1.0071942446043165, "grad_norm": 1.7204033136367798, "learning_rate": 3.73e-05, "loss": 0.2459, "step": 1120 }, { "action_loss": 0.054887790232896805, "epoch": 1.0071942446043165, "step": 1120 }, { "epoch": 1.0071942446043165, "step": 1120, "torque_loss": 0.4213196337223053 }, { "epoch": 1.0161870503597121, "grad_norm": 2.1309096813201904, "learning_rate": 3.763333333333334e-05, "loss": 0.239, "step": 1130 }, { "action_loss": 0.026898035779595375, "epoch": 1.0161870503597121, "step": 1130 }, { "epoch": 1.0161870503597121, "step": 1130, "torque_loss": 0.2522331476211548 }, { "epoch": 1.025179856115108, "grad_norm": 2.1035940647125244, "learning_rate": 3.796666666666667e-05, "loss": 0.2141, "step": 1140 }, { "action_loss": 0.05629356577992439, "epoch": 1.025179856115108, "step": 1140 }, { "epoch": 1.025179856115108, "step": 1140, "torque_loss": 0.4360061585903168 }, { "epoch": 1.0341726618705036, "grad_norm": 3.730388641357422, "learning_rate": 3.83e-05, "loss": 0.2251, "step": 1150 }, { "action_loss": 0.03564196825027466, "epoch": 1.0341726618705036, "step": 1150 }, { "epoch": 1.0341726618705036, "step": 1150, "torque_loss": 0.3083765208721161 }, { "epoch": 1.0431654676258992, "grad_norm": 2.3928487300872803, "learning_rate": 3.8633333333333335e-05, "loss": 0.2054, "step": 1160 }, { "action_loss": 0.04846004769206047, "epoch": 1.0431654676258992, "step": 1160 }, { "epoch": 1.0431654676258992, "step": 1160, "torque_loss": 0.30115827918052673 }, { "epoch": 1.0521582733812949, "grad_norm": 1.6622577905654907, "learning_rate": 3.896666666666667e-05, "loss": 0.2448, "step": 1170 }, { "action_loss": 0.05029189586639404, "epoch": 1.0521582733812949, "step": 1170 }, { "epoch": 1.0521582733812949, "step": 1170, "torque_loss": 0.33787909150123596 }, { "epoch": 1.0611510791366907, "grad_norm": 2.1903748512268066, "learning_rate": 3.9300000000000007e-05, "loss": 0.2333, "step": 1180 }, { "action_loss": 0.053037893027067184, "epoch": 1.0611510791366907, "step": 1180 }, { "epoch": 1.0611510791366907, "step": 1180, "torque_loss": 0.29551196098327637 }, { "epoch": 1.0701438848920863, "grad_norm": 1.7137657403945923, "learning_rate": 3.963333333333333e-05, "loss": 0.2071, "step": 1190 }, { "action_loss": 0.06009425222873688, "epoch": 1.0701438848920863, "step": 1190 }, { "epoch": 1.0701438848920863, "step": 1190, "torque_loss": 0.3259294331073761 }, { "epoch": 1.079136690647482, "grad_norm": 1.088654637336731, "learning_rate": 3.996666666666667e-05, "loss": 0.2118, "step": 1200 }, { "action_loss": 0.032147180289030075, "epoch": 1.079136690647482, "step": 1200 }, { "epoch": 1.079136690647482, "step": 1200, "torque_loss": 0.18672464787960052 }, { "epoch": 1.0881294964028776, "grad_norm": 1.6597812175750732, "learning_rate": 4.0300000000000004e-05, "loss": 0.2366, "step": 1210 }, { "action_loss": 0.05867820605635643, "epoch": 1.0881294964028776, "step": 1210 }, { "epoch": 1.0881294964028776, "step": 1210, "torque_loss": 0.3271464705467224 }, { "epoch": 1.0971223021582734, "grad_norm": 2.328322410583496, "learning_rate": 4.0633333333333336e-05, "loss": 0.2363, "step": 1220 }, { "action_loss": 0.04877522215247154, "epoch": 1.0971223021582734, "step": 1220 }, { "epoch": 1.0971223021582734, "step": 1220, "torque_loss": 0.34894898533821106 }, { "epoch": 1.106115107913669, "grad_norm": 0.8066007494926453, "learning_rate": 4.096666666666667e-05, "loss": 0.2057, "step": 1230 }, { "action_loss": 0.0323597714304924, "epoch": 1.106115107913669, "step": 1230 }, { "epoch": 1.106115107913669, "step": 1230, "torque_loss": 0.24356679618358612 }, { "epoch": 1.1151079136690647, "grad_norm": 1.0903337001800537, "learning_rate": 4.13e-05, "loss": 0.2004, "step": 1240 }, { "action_loss": 0.0677773654460907, "epoch": 1.1151079136690647, "step": 1240 }, { "epoch": 1.1151079136690647, "step": 1240, "torque_loss": 0.29066458344459534 }, { "epoch": 1.1241007194244603, "grad_norm": 1.604366660118103, "learning_rate": 4.1633333333333333e-05, "loss": 0.1936, "step": 1250 }, { "action_loss": 0.03668946027755737, "epoch": 1.1241007194244603, "step": 1250 }, { "epoch": 1.1241007194244603, "step": 1250, "torque_loss": 0.21138960123062134 }, { "epoch": 1.1330935251798562, "grad_norm": 1.8731218576431274, "learning_rate": 4.196666666666667e-05, "loss": 0.1847, "step": 1260 }, { "action_loss": 0.044336676597595215, "epoch": 1.1330935251798562, "step": 1260 }, { "epoch": 1.1330935251798562, "step": 1260, "torque_loss": 0.18436622619628906 }, { "epoch": 1.1420863309352518, "grad_norm": 1.7642568349838257, "learning_rate": 4.23e-05, "loss": 0.1751, "step": 1270 }, { "action_loss": 0.0641457661986351, "epoch": 1.1420863309352518, "step": 1270 }, { "epoch": 1.1420863309352518, "step": 1270, "torque_loss": 0.3361654281616211 }, { "epoch": 1.1510791366906474, "grad_norm": 1.336024284362793, "learning_rate": 4.263333333333334e-05, "loss": 0.207, "step": 1280 }, { "action_loss": 0.025867722928524017, "epoch": 1.1510791366906474, "step": 1280 }, { "epoch": 1.1510791366906474, "step": 1280, "torque_loss": 0.2318500280380249 }, { "epoch": 1.1600719424460433, "grad_norm": 1.1879180669784546, "learning_rate": 4.296666666666666e-05, "loss": 0.2091, "step": 1290 }, { "action_loss": 0.04849165678024292, "epoch": 1.1600719424460433, "step": 1290 }, { "epoch": 1.1600719424460433, "step": 1290, "torque_loss": 0.2995452582836151 }, { "epoch": 1.169064748201439, "grad_norm": 1.7940224409103394, "learning_rate": 4.33e-05, "loss": 0.2089, "step": 1300 }, { "action_loss": 0.02219189517199993, "epoch": 1.169064748201439, "step": 1300 }, { "epoch": 1.169064748201439, "step": 1300, "torque_loss": 0.20711804926395416 }, { "epoch": 1.1780575539568345, "grad_norm": 1.5748834609985352, "learning_rate": 4.3633333333333335e-05, "loss": 0.1926, "step": 1310 }, { "action_loss": 0.0565982460975647, "epoch": 1.1780575539568345, "step": 1310 }, { "epoch": 1.1780575539568345, "step": 1310, "torque_loss": 0.26344195008277893 }, { "epoch": 1.1870503597122302, "grad_norm": 1.2831159830093384, "learning_rate": 4.396666666666667e-05, "loss": 0.2149, "step": 1320 }, { "action_loss": 0.04810456559062004, "epoch": 1.1870503597122302, "step": 1320 }, { "epoch": 1.1870503597122302, "step": 1320, "torque_loss": 0.28174030780792236 }, { "epoch": 1.1960431654676258, "grad_norm": 1.3472036123275757, "learning_rate": 4.43e-05, "loss": 0.2051, "step": 1330 }, { "action_loss": 0.03747788071632385, "epoch": 1.1960431654676258, "step": 1330 }, { "epoch": 1.1960431654676258, "step": 1330, "torque_loss": 0.22272752225399017 }, { "epoch": 1.2050359712230216, "grad_norm": 1.5106734037399292, "learning_rate": 4.463333333333334e-05, "loss": 0.2429, "step": 1340 }, { "action_loss": 0.04182146117091179, "epoch": 1.2050359712230216, "step": 1340 }, { "epoch": 1.2050359712230216, "step": 1340, "torque_loss": 0.26510393619537354 }, { "epoch": 1.2140287769784173, "grad_norm": 1.3023890256881714, "learning_rate": 4.496666666666667e-05, "loss": 0.1991, "step": 1350 }, { "action_loss": 0.05001044645905495, "epoch": 1.2140287769784173, "step": 1350 }, { "epoch": 1.2140287769784173, "step": 1350, "torque_loss": 0.24331529438495636 }, { "epoch": 1.223021582733813, "grad_norm": 1.6315251588821411, "learning_rate": 4.53e-05, "loss": 0.23, "step": 1360 }, { "action_loss": 0.033759232610464096, "epoch": 1.223021582733813, "step": 1360 }, { "epoch": 1.223021582733813, "step": 1360, "torque_loss": 0.1999327689409256 }, { "epoch": 1.2320143884892087, "grad_norm": 1.6175017356872559, "learning_rate": 4.5633333333333336e-05, "loss": 0.198, "step": 1370 }, { "action_loss": 0.04159950837492943, "epoch": 1.2320143884892087, "step": 1370 }, { "epoch": 1.2320143884892087, "step": 1370, "torque_loss": 0.1472519040107727 }, { "epoch": 1.2410071942446044, "grad_norm": 2.2657501697540283, "learning_rate": 4.596666666666667e-05, "loss": 0.2003, "step": 1380 }, { "action_loss": 0.03667006269097328, "epoch": 1.2410071942446044, "step": 1380 }, { "epoch": 1.2410071942446044, "step": 1380, "torque_loss": 0.2072998732328415 }, { "epoch": 1.25, "grad_norm": 1.7759621143341064, "learning_rate": 4.630000000000001e-05, "loss": 0.1993, "step": 1390 }, { "action_loss": 0.03233005106449127, "epoch": 1.25, "step": 1390 }, { "epoch": 1.25, "step": 1390, "torque_loss": 0.1902974247932434 }, { "epoch": 1.2589928057553956, "grad_norm": 1.433287501335144, "learning_rate": 4.663333333333333e-05, "loss": 0.198, "step": 1400 }, { "action_loss": 0.03840872272849083, "epoch": 1.2589928057553956, "step": 1400 }, { "epoch": 1.2589928057553956, "step": 1400, "torque_loss": 0.1836148053407669 }, { "epoch": 1.2679856115107913, "grad_norm": 1.4578485488891602, "learning_rate": 4.696666666666667e-05, "loss": 0.2316, "step": 1410 }, { "action_loss": 0.0493522547185421, "epoch": 1.2679856115107913, "step": 1410 }, { "epoch": 1.2679856115107913, "step": 1410, "torque_loss": 0.27771809697151184 }, { "epoch": 1.276978417266187, "grad_norm": 1.4004695415496826, "learning_rate": 4.73e-05, "loss": 0.2089, "step": 1420 }, { "action_loss": 0.07767688482999802, "epoch": 1.276978417266187, "step": 1420 }, { "epoch": 1.276978417266187, "step": 1420, "torque_loss": 0.4274906814098358 }, { "epoch": 1.2859712230215827, "grad_norm": 2.3029985427856445, "learning_rate": 4.763333333333334e-05, "loss": 0.225, "step": 1430 }, { "action_loss": 0.07624570280313492, "epoch": 1.2859712230215827, "step": 1430 }, { "epoch": 1.2859712230215827, "step": 1430, "torque_loss": 0.30630865693092346 }, { "epoch": 1.2949640287769784, "grad_norm": 2.743070602416992, "learning_rate": 4.796666666666667e-05, "loss": 0.1924, "step": 1440 }, { "action_loss": 0.05436474457383156, "epoch": 1.2949640287769784, "step": 1440 }, { "epoch": 1.2949640287769784, "step": 1440, "torque_loss": 0.32865357398986816 }, { "epoch": 1.3039568345323742, "grad_norm": 0.9835125803947449, "learning_rate": 4.83e-05, "loss": 0.2012, "step": 1450 }, { "action_loss": 0.05473802983760834, "epoch": 1.3039568345323742, "step": 1450 }, { "epoch": 1.3039568345323742, "step": 1450, "torque_loss": 0.2832389175891876 }, { "epoch": 1.3129496402877698, "grad_norm": 2.2664992809295654, "learning_rate": 4.8633333333333334e-05, "loss": 0.1989, "step": 1460 }, { "action_loss": 0.027439633384346962, "epoch": 1.3129496402877698, "step": 1460 }, { "epoch": 1.3129496402877698, "step": 1460, "torque_loss": 0.18204765021800995 }, { "epoch": 1.3219424460431655, "grad_norm": 1.500249981880188, "learning_rate": 4.8966666666666667e-05, "loss": 0.177, "step": 1470 }, { "action_loss": 0.051999881863594055, "epoch": 1.3219424460431655, "step": 1470 }, { "epoch": 1.3219424460431655, "step": 1470, "torque_loss": 0.23033757507801056 }, { "epoch": 1.330935251798561, "grad_norm": 1.5382485389709473, "learning_rate": 4.93e-05, "loss": 0.1798, "step": 1480 }, { "action_loss": 0.02041007950901985, "epoch": 1.330935251798561, "step": 1480 }, { "epoch": 1.330935251798561, "step": 1480, "torque_loss": 0.19124417006969452 }, { "epoch": 1.3399280575539567, "grad_norm": 1.6923507452011108, "learning_rate": 4.963333333333334e-05, "loss": 0.1834, "step": 1490 }, { "action_loss": 0.0513102225959301, "epoch": 1.3399280575539567, "step": 1490 }, { "epoch": 1.3399280575539567, "step": 1490, "torque_loss": 0.24583090841770172 }, { "epoch": 1.3489208633093526, "grad_norm": 1.2972358465194702, "learning_rate": 4.996666666666667e-05, "loss": 0.2039, "step": 1500 }, { "action_loss": 0.03534022346138954, "epoch": 1.3489208633093526, "step": 1500 }, { "epoch": 1.3489208633093526, "step": 1500, "torque_loss": 0.23883770406246185 }, { "epoch": 1.3579136690647482, "grad_norm": 0.7992076277732849, "learning_rate": 5.03e-05, "loss": 0.1595, "step": 1510 }, { "action_loss": 0.056145694106817245, "epoch": 1.3579136690647482, "step": 1510 }, { "epoch": 1.3579136690647482, "step": 1510, "torque_loss": 0.31424859166145325 }, { "epoch": 1.3669064748201438, "grad_norm": 1.9746333360671997, "learning_rate": 5.0633333333333335e-05, "loss": 0.1943, "step": 1520 }, { "action_loss": 0.03471263125538826, "epoch": 1.3669064748201438, "step": 1520 }, { "epoch": 1.3669064748201438, "step": 1520, "torque_loss": 0.2010846883058548 }, { "epoch": 1.3758992805755397, "grad_norm": 1.0390357971191406, "learning_rate": 5.0966666666666674e-05, "loss": 0.2047, "step": 1530 }, { "action_loss": 0.03669046238064766, "epoch": 1.3758992805755397, "step": 1530 }, { "epoch": 1.3758992805755397, "step": 1530, "torque_loss": 0.22810983657836914 }, { "epoch": 1.3848920863309353, "grad_norm": 1.1564204692840576, "learning_rate": 5.130000000000001e-05, "loss": 0.1848, "step": 1540 }, { "action_loss": 0.02542993612587452, "epoch": 1.3848920863309353, "step": 1540 }, { "epoch": 1.3848920863309353, "step": 1540, "torque_loss": 0.22431211173534393 }, { "epoch": 1.393884892086331, "grad_norm": 1.3474466800689697, "learning_rate": 5.163333333333333e-05, "loss": 0.2154, "step": 1550 }, { "action_loss": 0.04140591248869896, "epoch": 1.393884892086331, "step": 1550 }, { "epoch": 1.393884892086331, "step": 1550, "torque_loss": 0.24919819831848145 }, { "epoch": 1.4028776978417266, "grad_norm": 1.3492481708526611, "learning_rate": 5.196666666666667e-05, "loss": 0.172, "step": 1560 }, { "action_loss": 0.04144539684057236, "epoch": 1.4028776978417266, "step": 1560 }, { "epoch": 1.4028776978417266, "step": 1560, "torque_loss": 0.19199323654174805 }, { "epoch": 1.4118705035971222, "grad_norm": 1.5253697633743286, "learning_rate": 5.2300000000000004e-05, "loss": 0.186, "step": 1570 }, { "action_loss": 0.04017492011189461, "epoch": 1.4118705035971222, "step": 1570 }, { "epoch": 1.4118705035971222, "step": 1570, "torque_loss": 0.18769705295562744 }, { "epoch": 1.420863309352518, "grad_norm": 1.7465101480484009, "learning_rate": 5.2633333333333336e-05, "loss": 0.1947, "step": 1580 }, { "action_loss": 0.027137726545333862, "epoch": 1.420863309352518, "step": 1580 }, { "epoch": 1.420863309352518, "step": 1580, "torque_loss": 0.2069772630929947 }, { "epoch": 1.4298561151079137, "grad_norm": 1.9963868856430054, "learning_rate": 5.296666666666666e-05, "loss": 0.1886, "step": 1590 }, { "action_loss": 0.033583179116249084, "epoch": 1.4298561151079137, "step": 1590 }, { "epoch": 1.4298561151079137, "step": 1590, "torque_loss": 0.2676877975463867 }, { "epoch": 1.4388489208633093, "grad_norm": 1.4588085412979126, "learning_rate": 5.330000000000001e-05, "loss": 0.1855, "step": 1600 }, { "action_loss": 0.036470960825681686, "epoch": 1.4388489208633093, "step": 1600 }, { "epoch": 1.4388489208633093, "step": 1600, "torque_loss": 0.28059422969818115 }, { "epoch": 1.4478417266187051, "grad_norm": 1.7007945775985718, "learning_rate": 5.3633333333333334e-05, "loss": 0.2131, "step": 1610 }, { "action_loss": 0.06419728696346283, "epoch": 1.4478417266187051, "step": 1610 }, { "epoch": 1.4478417266187051, "step": 1610, "torque_loss": 0.22035379707813263 }, { "epoch": 1.4568345323741008, "grad_norm": 1.0284358263015747, "learning_rate": 5.3966666666666666e-05, "loss": 0.2027, "step": 1620 }, { "action_loss": 0.04524751007556915, "epoch": 1.4568345323741008, "step": 1620 }, { "epoch": 1.4568345323741008, "step": 1620, "torque_loss": 0.2338164895772934 }, { "epoch": 1.4658273381294964, "grad_norm": 1.2820512056350708, "learning_rate": 5.4300000000000005e-05, "loss": 0.1989, "step": 1630 }, { "action_loss": 0.037725020200014114, "epoch": 1.4658273381294964, "step": 1630 }, { "epoch": 1.4658273381294964, "step": 1630, "torque_loss": 0.17981772124767303 }, { "epoch": 1.474820143884892, "grad_norm": 1.3845694065093994, "learning_rate": 5.463333333333334e-05, "loss": 0.1961, "step": 1640 }, { "action_loss": 0.0403883196413517, "epoch": 1.474820143884892, "step": 1640 }, { "epoch": 1.474820143884892, "step": 1640, "torque_loss": 0.2424883395433426 }, { "epoch": 1.4838129496402876, "grad_norm": 0.9319870471954346, "learning_rate": 5.496666666666666e-05, "loss": 0.1828, "step": 1650 }, { "action_loss": 0.028459584340453148, "epoch": 1.4838129496402876, "step": 1650 }, { "epoch": 1.4838129496402876, "step": 1650, "torque_loss": 0.1869153529405594 }, { "epoch": 1.4928057553956835, "grad_norm": 1.3589965105056763, "learning_rate": 5.530000000000001e-05, "loss": 0.2126, "step": 1660 }, { "action_loss": 0.03954601660370827, "epoch": 1.4928057553956835, "step": 1660 }, { "epoch": 1.4928057553956835, "step": 1660, "torque_loss": 0.20188188552856445 }, { "epoch": 1.5017985611510791, "grad_norm": 1.3298144340515137, "learning_rate": 5.5633333333333335e-05, "loss": 0.1823, "step": 1670 }, { "action_loss": 0.06515335291624069, "epoch": 1.5017985611510791, "step": 1670 }, { "epoch": 1.5017985611510791, "step": 1670, "torque_loss": 0.2163563221693039 }, { "epoch": 1.5107913669064748, "grad_norm": 1.1110050678253174, "learning_rate": 5.596666666666667e-05, "loss": 0.1885, "step": 1680 }, { "action_loss": 0.029764940962195396, "epoch": 1.5107913669064748, "step": 1680 }, { "epoch": 1.5107913669064748, "step": 1680, "torque_loss": 0.19621829688549042 }, { "epoch": 1.5197841726618706, "grad_norm": 1.2508468627929688, "learning_rate": 5.63e-05, "loss": 0.2143, "step": 1690 }, { "action_loss": 0.02922545187175274, "epoch": 1.5197841726618706, "step": 1690 }, { "epoch": 1.5197841726618706, "step": 1690, "torque_loss": 0.18200482428073883 }, { "epoch": 1.5287769784172662, "grad_norm": 1.5846954584121704, "learning_rate": 5.663333333333334e-05, "loss": 0.2063, "step": 1700 }, { "action_loss": 0.04093484953045845, "epoch": 1.5287769784172662, "step": 1700 }, { "epoch": 1.5287769784172662, "step": 1700, "torque_loss": 0.24212555587291718 }, { "epoch": 1.5377697841726619, "grad_norm": 1.1507614850997925, "learning_rate": 5.696666666666667e-05, "loss": 0.1931, "step": 1710 }, { "action_loss": 0.04107213020324707, "epoch": 1.5377697841726619, "step": 1710 }, { "epoch": 1.5377697841726619, "step": 1710, "torque_loss": 0.24990685284137726 }, { "epoch": 1.5467625899280577, "grad_norm": 1.482911467552185, "learning_rate": 5.73e-05, "loss": 0.216, "step": 1720 }, { "action_loss": 0.03748412802815437, "epoch": 1.5467625899280577, "step": 1720 }, { "epoch": 1.5467625899280577, "step": 1720, "torque_loss": 0.1568470001220703 }, { "epoch": 1.5557553956834531, "grad_norm": 1.1974776983261108, "learning_rate": 5.7633333333333336e-05, "loss": 0.1717, "step": 1730 }, { "action_loss": 0.0663800835609436, "epoch": 1.5557553956834531, "step": 1730 }, { "epoch": 1.5557553956834531, "step": 1730, "torque_loss": 0.45275673270225525 }, { "epoch": 1.564748201438849, "grad_norm": 1.324669361114502, "learning_rate": 5.796666666666667e-05, "loss": 0.2118, "step": 1740 }, { "action_loss": 0.04940791428089142, "epoch": 1.564748201438849, "step": 1740 }, { "epoch": 1.564748201438849, "step": 1740, "torque_loss": 0.2667076289653778 }, { "epoch": 1.5737410071942446, "grad_norm": 1.290582537651062, "learning_rate": 5.83e-05, "loss": 0.1892, "step": 1750 }, { "action_loss": 0.042719125747680664, "epoch": 1.5737410071942446, "step": 1750 }, { "epoch": 1.5737410071942446, "step": 1750, "torque_loss": 0.2376115322113037 }, { "epoch": 1.5827338129496402, "grad_norm": 1.2932226657867432, "learning_rate": 5.863333333333334e-05, "loss": 0.1903, "step": 1760 }, { "action_loss": 0.03939172253012657, "epoch": 1.5827338129496402, "step": 1760 }, { "epoch": 1.5827338129496402, "step": 1760, "torque_loss": 0.2828308641910553 }, { "epoch": 1.591726618705036, "grad_norm": 1.4118731021881104, "learning_rate": 5.896666666666667e-05, "loss": 0.1976, "step": 1770 }, { "action_loss": 0.02695036679506302, "epoch": 1.591726618705036, "step": 1770 }, { "epoch": 1.591726618705036, "step": 1770, "torque_loss": 0.2069142609834671 }, { "epoch": 1.6007194244604317, "grad_norm": 1.9598239660263062, "learning_rate": 5.93e-05, "loss": 0.163, "step": 1780 }, { "action_loss": 0.03470180556178093, "epoch": 1.6007194244604317, "step": 1780 }, { "epoch": 1.6007194244604317, "step": 1780, "torque_loss": 0.24922461807727814 }, { "epoch": 1.6097122302158273, "grad_norm": 1.0525918006896973, "learning_rate": 5.9633333333333344e-05, "loss": 0.2214, "step": 1790 }, { "action_loss": 0.02089432068169117, "epoch": 1.6097122302158273, "step": 1790 }, { "epoch": 1.6097122302158273, "step": 1790, "torque_loss": 0.2205437570810318 }, { "epoch": 1.6187050359712232, "grad_norm": 1.3010022640228271, "learning_rate": 5.996666666666667e-05, "loss": 0.1979, "step": 1800 }, { "action_loss": 0.025884171947836876, "epoch": 1.6187050359712232, "step": 1800 }, { "epoch": 1.6187050359712232, "step": 1800, "torque_loss": 0.23913271725177765 }, { "epoch": 1.6276978417266186, "grad_norm": 1.062484622001648, "learning_rate": 6.03e-05, "loss": 0.1871, "step": 1810 }, { "action_loss": 0.03451920673251152, "epoch": 1.6276978417266186, "step": 1810 }, { "epoch": 1.6276978417266186, "step": 1810, "torque_loss": 0.18789173662662506 }, { "epoch": 1.6366906474820144, "grad_norm": 1.412408471107483, "learning_rate": 6.063333333333333e-05, "loss": 0.1683, "step": 1820 }, { "action_loss": 0.028138145804405212, "epoch": 1.6366906474820144, "step": 1820 }, { "epoch": 1.6366906474820144, "step": 1820, "torque_loss": 0.21203310787677765 }, { "epoch": 1.64568345323741, "grad_norm": 1.0366275310516357, "learning_rate": 6.0966666666666674e-05, "loss": 0.1943, "step": 1830 }, { "action_loss": 0.04788921773433685, "epoch": 1.64568345323741, "step": 1830 }, { "epoch": 1.64568345323741, "step": 1830, "torque_loss": 0.22789700329303741 }, { "epoch": 1.6546762589928057, "grad_norm": 0.9584492444992065, "learning_rate": 6.13e-05, "loss": 0.2058, "step": 1840 }, { "action_loss": 0.033312465995550156, "epoch": 1.6546762589928057, "step": 1840 }, { "epoch": 1.6546762589928057, "step": 1840, "torque_loss": 0.23749573528766632 }, { "epoch": 1.6636690647482015, "grad_norm": 1.1597014665603638, "learning_rate": 6.163333333333333e-05, "loss": 0.1749, "step": 1850 }, { "action_loss": 0.028527170419692993, "epoch": 1.6636690647482015, "step": 1850 }, { "epoch": 1.6636690647482015, "step": 1850, "torque_loss": 0.28533241152763367 }, { "epoch": 1.6726618705035972, "grad_norm": 1.245605707168579, "learning_rate": 6.196666666666668e-05, "loss": 0.187, "step": 1860 }, { "action_loss": 0.039489325135946274, "epoch": 1.6726618705035972, "step": 1860 }, { "epoch": 1.6726618705035972, "step": 1860, "torque_loss": 0.2051321268081665 }, { "epoch": 1.6816546762589928, "grad_norm": 1.4763439893722534, "learning_rate": 6.23e-05, "loss": 0.1738, "step": 1870 }, { "action_loss": 0.029935233294963837, "epoch": 1.6816546762589928, "step": 1870 }, { "epoch": 1.6816546762589928, "step": 1870, "torque_loss": 0.3341348171234131 }, { "epoch": 1.6906474820143886, "grad_norm": 1.1533327102661133, "learning_rate": 6.263333333333333e-05, "loss": 0.1801, "step": 1880 }, { "action_loss": 0.02751818299293518, "epoch": 1.6906474820143886, "step": 1880 }, { "epoch": 1.6906474820143886, "step": 1880, "torque_loss": 0.16865409910678864 }, { "epoch": 1.699640287769784, "grad_norm": 1.0709233283996582, "learning_rate": 6.296666666666667e-05, "loss": 0.2152, "step": 1890 }, { "action_loss": 0.04189513996243477, "epoch": 1.699640287769784, "step": 1890 }, { "epoch": 1.699640287769784, "step": 1890, "torque_loss": 0.2824627161026001 }, { "epoch": 1.70863309352518, "grad_norm": 1.1156259775161743, "learning_rate": 6.330000000000001e-05, "loss": 0.1972, "step": 1900 }, { "action_loss": 0.036163460463285446, "epoch": 1.70863309352518, "step": 1900 }, { "epoch": 1.70863309352518, "step": 1900, "torque_loss": 0.22062461078166962 }, { "epoch": 1.7176258992805755, "grad_norm": 0.9872162342071533, "learning_rate": 6.363333333333334e-05, "loss": 0.1793, "step": 1910 }, { "action_loss": 0.02875983715057373, "epoch": 1.7176258992805755, "step": 1910 }, { "epoch": 1.7176258992805755, "step": 1910, "torque_loss": 0.19125783443450928 }, { "epoch": 1.7266187050359711, "grad_norm": 1.3411977291107178, "learning_rate": 6.396666666666667e-05, "loss": 0.1654, "step": 1920 }, { "action_loss": 0.07741799205541611, "epoch": 1.7266187050359711, "step": 1920 }, { "epoch": 1.7266187050359711, "step": 1920, "torque_loss": 0.2763902246952057 }, { "epoch": 1.735611510791367, "grad_norm": 1.1523159742355347, "learning_rate": 6.43e-05, "loss": 0.1918, "step": 1930 }, { "action_loss": 0.05739777162671089, "epoch": 1.735611510791367, "step": 1930 }, { "epoch": 1.735611510791367, "step": 1930, "torque_loss": 0.2693094313144684 }, { "epoch": 1.7446043165467626, "grad_norm": 1.256042242050171, "learning_rate": 6.463333333333334e-05, "loss": 0.1646, "step": 1940 }, { "action_loss": 0.03888488933444023, "epoch": 1.7446043165467626, "step": 1940 }, { "epoch": 1.7446043165467626, "step": 1940, "torque_loss": 0.32551199197769165 }, { "epoch": 1.7535971223021583, "grad_norm": 1.3437511920928955, "learning_rate": 6.496666666666667e-05, "loss": 0.2012, "step": 1950 }, { "action_loss": 0.046645790338516235, "epoch": 1.7535971223021583, "step": 1950 }, { "epoch": 1.7535971223021583, "step": 1950, "torque_loss": 0.2841830551624298 }, { "epoch": 1.762589928057554, "grad_norm": 1.489492416381836, "learning_rate": 6.53e-05, "loss": 0.187, "step": 1960 }, { "action_loss": 0.030797457322478294, "epoch": 1.762589928057554, "step": 1960 }, { "epoch": 1.762589928057554, "step": 1960, "torque_loss": 0.170052170753479 }, { "epoch": 1.7715827338129495, "grad_norm": 1.1059294939041138, "learning_rate": 6.563333333333333e-05, "loss": 0.2005, "step": 1970 }, { "action_loss": 0.06381866335868835, "epoch": 1.7715827338129495, "step": 1970 }, { "epoch": 1.7715827338129495, "step": 1970, "torque_loss": 0.2972308099269867 }, { "epoch": 1.7805755395683454, "grad_norm": 0.9871699213981628, "learning_rate": 6.596666666666667e-05, "loss": 0.1858, "step": 1980 }, { "action_loss": 0.028540687635540962, "epoch": 1.7805755395683454, "step": 1980 }, { "epoch": 1.7805755395683454, "step": 1980, "torque_loss": 0.18525183200836182 }, { "epoch": 1.789568345323741, "grad_norm": 1.0717177391052246, "learning_rate": 6.630000000000001e-05, "loss": 0.1863, "step": 1990 }, { "action_loss": 0.01802976243197918, "epoch": 1.789568345323741, "step": 1990 }, { "epoch": 1.789568345323741, "step": 1990, "torque_loss": 0.19889824092388153 }, { "epoch": 1.7985611510791366, "grad_norm": 0.8776652812957764, "learning_rate": 6.663333333333333e-05, "loss": 0.1904, "step": 2000 }, { "action_loss": 0.03754677623510361, "epoch": 1.7985611510791366, "step": 2000 }, { "epoch": 1.7985611510791366, "step": 2000, "torque_loss": 0.22252561151981354 }, { "epoch": 1.8075539568345325, "grad_norm": 1.4249894618988037, "learning_rate": 6.696666666666666e-05, "loss": 0.1701, "step": 2010 }, { "action_loss": 0.04412977024912834, "epoch": 1.8075539568345325, "step": 2010 }, { "epoch": 1.8075539568345325, "step": 2010, "torque_loss": 0.22114042937755585 }, { "epoch": 1.816546762589928, "grad_norm": 0.8910714983940125, "learning_rate": 6.730000000000001e-05, "loss": 0.1718, "step": 2020 }, { "action_loss": 0.03859308362007141, "epoch": 1.816546762589928, "step": 2020 }, { "epoch": 1.816546762589928, "step": 2020, "torque_loss": 0.29290032386779785 }, { "epoch": 1.8255395683453237, "grad_norm": 0.8825613260269165, "learning_rate": 6.763333333333334e-05, "loss": 0.1924, "step": 2030 }, { "action_loss": 0.030887776985764503, "epoch": 1.8255395683453237, "step": 2030 }, { "epoch": 1.8255395683453237, "step": 2030, "torque_loss": 0.273565411567688 }, { "epoch": 1.8345323741007196, "grad_norm": 1.0531922578811646, "learning_rate": 6.796666666666666e-05, "loss": 0.1729, "step": 2040 }, { "action_loss": 0.039413902908563614, "epoch": 1.8345323741007196, "step": 2040 }, { "epoch": 1.8345323741007196, "step": 2040, "torque_loss": 0.24809543788433075 }, { "epoch": 1.843525179856115, "grad_norm": 0.7149152755737305, "learning_rate": 6.83e-05, "loss": 0.1755, "step": 2050 }, { "action_loss": 0.01792014203965664, "epoch": 1.843525179856115, "step": 2050 }, { "epoch": 1.843525179856115, "step": 2050, "torque_loss": 0.14377032220363617 }, { "epoch": 1.8525179856115108, "grad_norm": 1.066757321357727, "learning_rate": 6.863333333333334e-05, "loss": 0.188, "step": 2060 }, { "action_loss": 0.03233494237065315, "epoch": 1.8525179856115108, "step": 2060 }, { "epoch": 1.8525179856115108, "step": 2060, "torque_loss": 0.19698774814605713 }, { "epoch": 1.8615107913669064, "grad_norm": 1.2496477365493774, "learning_rate": 6.896666666666667e-05, "loss": 0.1894, "step": 2070 }, { "action_loss": 0.029381627216935158, "epoch": 1.8615107913669064, "step": 2070 }, { "epoch": 1.8615107913669064, "step": 2070, "torque_loss": 0.2580276429653168 }, { "epoch": 1.870503597122302, "grad_norm": 1.1071528196334839, "learning_rate": 6.93e-05, "loss": 0.1929, "step": 2080 }, { "action_loss": 0.04345273971557617, "epoch": 1.870503597122302, "step": 2080 }, { "epoch": 1.870503597122302, "step": 2080, "torque_loss": 0.25307270884513855 }, { "epoch": 1.879496402877698, "grad_norm": 0.9540648460388184, "learning_rate": 6.963333333333334e-05, "loss": 0.1714, "step": 2090 }, { "action_loss": 0.03308546915650368, "epoch": 1.879496402877698, "step": 2090 }, { "epoch": 1.879496402877698, "step": 2090, "torque_loss": 0.31015744805336 }, { "epoch": 1.8884892086330936, "grad_norm": 1.203490972518921, "learning_rate": 6.996666666666667e-05, "loss": 0.1711, "step": 2100 }, { "action_loss": 0.025437256321310997, "epoch": 1.8884892086330936, "step": 2100 }, { "epoch": 1.8884892086330936, "step": 2100, "torque_loss": 0.175981804728508 }, { "epoch": 1.8974820143884892, "grad_norm": 0.8149719834327698, "learning_rate": 7.03e-05, "loss": 0.1889, "step": 2110 }, { "action_loss": 0.03627795726060867, "epoch": 1.8974820143884892, "step": 2110 }, { "epoch": 1.8974820143884892, "step": 2110, "torque_loss": 0.22580397129058838 }, { "epoch": 1.906474820143885, "grad_norm": 0.9615484476089478, "learning_rate": 7.063333333333333e-05, "loss": 0.2119, "step": 2120 }, { "action_loss": 0.03163142874836922, "epoch": 1.906474820143885, "step": 2120 }, { "epoch": 1.906474820143885, "step": 2120, "torque_loss": 0.1841520071029663 }, { "epoch": 1.9154676258992804, "grad_norm": 1.1510553359985352, "learning_rate": 7.096666666666667e-05, "loss": 0.2084, "step": 2130 }, { "action_loss": 0.03735082969069481, "epoch": 1.9154676258992804, "step": 2130 }, { "epoch": 1.9154676258992804, "step": 2130, "torque_loss": 0.26284560561180115 }, { "epoch": 1.9244604316546763, "grad_norm": 1.2142341136932373, "learning_rate": 7.13e-05, "loss": 0.1878, "step": 2140 }, { "action_loss": 0.041151657700538635, "epoch": 1.9244604316546763, "step": 2140 }, { "epoch": 1.9244604316546763, "step": 2140, "torque_loss": 0.32396605610847473 }, { "epoch": 1.933453237410072, "grad_norm": 0.9198023080825806, "learning_rate": 7.163333333333334e-05, "loss": 0.1763, "step": 2150 }, { "action_loss": 0.025605956092476845, "epoch": 1.933453237410072, "step": 2150 }, { "epoch": 1.933453237410072, "step": 2150, "torque_loss": 0.2530941963195801 }, { "epoch": 1.9424460431654675, "grad_norm": 1.2277367115020752, "learning_rate": 7.196666666666668e-05, "loss": 0.1936, "step": 2160 }, { "action_loss": 0.046468645334243774, "epoch": 1.9424460431654675, "step": 2160 }, { "epoch": 1.9424460431654675, "step": 2160, "torque_loss": 0.23827606439590454 }, { "epoch": 1.9514388489208634, "grad_norm": 1.3778629302978516, "learning_rate": 7.23e-05, "loss": 0.1738, "step": 2170 }, { "action_loss": 0.03623973950743675, "epoch": 1.9514388489208634, "step": 2170 }, { "epoch": 1.9514388489208634, "step": 2170, "torque_loss": 0.17318372428417206 }, { "epoch": 1.960431654676259, "grad_norm": 1.2485857009887695, "learning_rate": 7.263333333333334e-05, "loss": 0.1507, "step": 2180 }, { "action_loss": 0.037312742322683334, "epoch": 1.960431654676259, "step": 2180 }, { "epoch": 1.960431654676259, "step": 2180, "torque_loss": 0.2241894006729126 }, { "epoch": 1.9694244604316546, "grad_norm": 1.521353840827942, "learning_rate": 7.296666666666667e-05, "loss": 0.1805, "step": 2190 }, { "action_loss": 0.04329133406281471, "epoch": 1.9694244604316546, "step": 2190 }, { "epoch": 1.9694244604316546, "step": 2190, "torque_loss": 0.24648988246917725 }, { "epoch": 1.9784172661870505, "grad_norm": 0.9644017815589905, "learning_rate": 7.33e-05, "loss": 0.1773, "step": 2200 }, { "action_loss": 0.05346617102622986, "epoch": 1.9784172661870505, "step": 2200 }, { "epoch": 1.9784172661870505, "step": 2200, "torque_loss": 0.36603572964668274 }, { "epoch": 1.987410071942446, "grad_norm": 0.8555987477302551, "learning_rate": 7.363333333333334e-05, "loss": 0.1965, "step": 2210 }, { "action_loss": 0.0448712594807148, "epoch": 1.987410071942446, "step": 2210 }, { "epoch": 1.987410071942446, "step": 2210, "torque_loss": 0.2682015597820282 }, { "epoch": 1.9964028776978417, "grad_norm": 1.064080834388733, "learning_rate": 7.396666666666667e-05, "loss": 0.1794, "step": 2220 }, { "action_loss": 0.03338354453444481, "epoch": 1.9964028776978417, "step": 2220 }, { "epoch": 1.9964028776978417, "step": 2220, "torque_loss": 0.211073637008667 }, { "epoch": 2.0053956834532376, "grad_norm": 1.2192391157150269, "learning_rate": 7.43e-05, "loss": 0.1857, "step": 2230 }, { "action_loss": 0.056077226996421814, "epoch": 2.0053956834532376, "step": 2230 }, { "epoch": 2.0053956834532376, "step": 2230, "torque_loss": 0.34936535358428955 }, { "epoch": 2.014388489208633, "grad_norm": 1.4691566228866577, "learning_rate": 7.463333333333334e-05, "loss": 0.179, "step": 2240 }, { "action_loss": 0.040664639323949814, "epoch": 2.014388489208633, "step": 2240 }, { "epoch": 2.014388489208633, "step": 2240, "torque_loss": 0.30719685554504395 }, { "epoch": 2.023381294964029, "grad_norm": 0.9464696645736694, "learning_rate": 7.496666666666667e-05, "loss": 0.1979, "step": 2250 }, { "action_loss": 0.027231967076659203, "epoch": 2.023381294964029, "step": 2250 }, { "epoch": 2.023381294964029, "step": 2250, "torque_loss": 0.19606085121631622 }, { "epoch": 2.0323741007194243, "grad_norm": 0.6981268525123596, "learning_rate": 7.53e-05, "loss": 0.2021, "step": 2260 }, { "action_loss": 0.02166188694536686, "epoch": 2.0323741007194243, "step": 2260 }, { "epoch": 2.0323741007194243, "step": 2260, "torque_loss": 0.18720275163650513 }, { "epoch": 2.04136690647482, "grad_norm": 1.0285924673080444, "learning_rate": 7.563333333333333e-05, "loss": 0.1924, "step": 2270 }, { "action_loss": 0.06088582053780556, "epoch": 2.04136690647482, "step": 2270 }, { "epoch": 2.04136690647482, "step": 2270, "torque_loss": 0.2850090265274048 }, { "epoch": 2.050359712230216, "grad_norm": 1.2473974227905273, "learning_rate": 7.596666666666668e-05, "loss": 0.1833, "step": 2280 }, { "action_loss": 0.055059004575014114, "epoch": 2.050359712230216, "step": 2280 }, { "epoch": 2.050359712230216, "step": 2280, "torque_loss": 0.30954524874687195 }, { "epoch": 2.0593525179856114, "grad_norm": 0.7912524342536926, "learning_rate": 7.630000000000001e-05, "loss": 0.1921, "step": 2290 }, { "action_loss": 0.03146032989025116, "epoch": 2.0593525179856114, "step": 2290 }, { "epoch": 2.0593525179856114, "step": 2290, "torque_loss": 0.2310253381729126 }, { "epoch": 2.068345323741007, "grad_norm": 1.1133698225021362, "learning_rate": 7.663333333333333e-05, "loss": 0.1768, "step": 2300 }, { "action_loss": 0.023026784881949425, "epoch": 2.068345323741007, "step": 2300 }, { "epoch": 2.068345323741007, "step": 2300, "torque_loss": 0.19194848835468292 }, { "epoch": 2.077338129496403, "grad_norm": 0.865003228187561, "learning_rate": 7.696666666666668e-05, "loss": 0.1678, "step": 2310 }, { "action_loss": 0.02465830184519291, "epoch": 2.077338129496403, "step": 2310 }, { "epoch": 2.077338129496403, "step": 2310, "torque_loss": 0.17816364765167236 }, { "epoch": 2.0863309352517985, "grad_norm": 1.1504647731781006, "learning_rate": 7.730000000000001e-05, "loss": 0.1897, "step": 2320 }, { "action_loss": 0.03480272367596626, "epoch": 2.0863309352517985, "step": 2320 }, { "epoch": 2.0863309352517985, "step": 2320, "torque_loss": 0.18526776134967804 }, { "epoch": 2.0953237410071943, "grad_norm": 0.9174593687057495, "learning_rate": 7.763333333333334e-05, "loss": 0.188, "step": 2330 }, { "action_loss": 0.03690126910805702, "epoch": 2.0953237410071943, "step": 2330 }, { "epoch": 2.0953237410071943, "step": 2330, "torque_loss": 0.2459900975227356 }, { "epoch": 2.1043165467625897, "grad_norm": 1.098821759223938, "learning_rate": 7.796666666666666e-05, "loss": 0.1764, "step": 2340 }, { "action_loss": 0.030632534995675087, "epoch": 2.1043165467625897, "step": 2340 }, { "epoch": 2.1043165467625897, "step": 2340, "torque_loss": 0.24314117431640625 }, { "epoch": 2.1133093525179856, "grad_norm": 0.7877588868141174, "learning_rate": 7.83e-05, "loss": 0.1843, "step": 2350 }, { "action_loss": 0.0456099770963192, "epoch": 2.1133093525179856, "step": 2350 }, { "epoch": 2.1133093525179856, "step": 2350, "torque_loss": 0.24205006659030914 }, { "epoch": 2.1223021582733814, "grad_norm": 1.012729287147522, "learning_rate": 7.863333333333334e-05, "loss": 0.1722, "step": 2360 }, { "action_loss": 0.04457930102944374, "epoch": 2.1223021582733814, "step": 2360 }, { "epoch": 2.1223021582733814, "step": 2360, "torque_loss": 0.19897128641605377 }, { "epoch": 2.131294964028777, "grad_norm": 0.8656269311904907, "learning_rate": 7.896666666666667e-05, "loss": 0.1778, "step": 2370 }, { "action_loss": 0.02142155170440674, "epoch": 2.131294964028777, "step": 2370 }, { "epoch": 2.131294964028777, "step": 2370, "torque_loss": 0.21871991455554962 }, { "epoch": 2.1402877697841727, "grad_norm": 0.9156185388565063, "learning_rate": 7.93e-05, "loss": 0.1746, "step": 2380 }, { "action_loss": 0.051273662596940994, "epoch": 2.1402877697841727, "step": 2380 }, { "epoch": 2.1402877697841727, "step": 2380, "torque_loss": 0.3112185299396515 }, { "epoch": 2.1492805755395685, "grad_norm": 0.6909753084182739, "learning_rate": 7.963333333333334e-05, "loss": 0.1992, "step": 2390 }, { "action_loss": 0.029538124799728394, "epoch": 2.1492805755395685, "step": 2390 }, { "epoch": 2.1492805755395685, "step": 2390, "torque_loss": 0.19576160609722137 }, { "epoch": 2.158273381294964, "grad_norm": 0.874372661113739, "learning_rate": 7.996666666666667e-05, "loss": 0.1775, "step": 2400 }, { "action_loss": 0.04146328195929527, "epoch": 2.158273381294964, "step": 2400 }, { "epoch": 2.158273381294964, "step": 2400, "torque_loss": 0.14249853789806366 }, { "epoch": 2.16726618705036, "grad_norm": 1.0559849739074707, "learning_rate": 8.030000000000001e-05, "loss": 0.1855, "step": 2410 }, { "action_loss": 0.03743477165699005, "epoch": 2.16726618705036, "step": 2410 }, { "epoch": 2.16726618705036, "step": 2410, "torque_loss": 0.19504757225513458 }, { "epoch": 2.176258992805755, "grad_norm": 0.8279873132705688, "learning_rate": 8.063333333333333e-05, "loss": 0.1773, "step": 2420 }, { "action_loss": 0.04920132830739021, "epoch": 2.176258992805755, "step": 2420 }, { "epoch": 2.176258992805755, "step": 2420, "torque_loss": 0.3146803081035614 }, { "epoch": 2.185251798561151, "grad_norm": 1.530151128768921, "learning_rate": 8.096666666666667e-05, "loss": 0.1756, "step": 2430 }, { "action_loss": 0.04897041618824005, "epoch": 2.185251798561151, "step": 2430 }, { "epoch": 2.185251798561151, "step": 2430, "torque_loss": 0.2706270217895508 }, { "epoch": 2.194244604316547, "grad_norm": 0.7952126860618591, "learning_rate": 8.13e-05, "loss": 0.1752, "step": 2440 }, { "action_loss": 0.030906708911061287, "epoch": 2.194244604316547, "step": 2440 }, { "epoch": 2.194244604316547, "step": 2440, "torque_loss": 0.17272688448429108 }, { "epoch": 2.2032374100719423, "grad_norm": 1.0380154848098755, "learning_rate": 8.163333333333334e-05, "loss": 0.1822, "step": 2450 }, { "action_loss": 0.036600567400455475, "epoch": 2.2032374100719423, "step": 2450 }, { "epoch": 2.2032374100719423, "step": 2450, "torque_loss": 0.2009994387626648 }, { "epoch": 2.212230215827338, "grad_norm": 1.3534965515136719, "learning_rate": 8.196666666666668e-05, "loss": 0.1737, "step": 2460 }, { "action_loss": 0.029478082433342934, "epoch": 2.212230215827338, "step": 2460 }, { "epoch": 2.212230215827338, "step": 2460, "torque_loss": 0.27246344089508057 }, { "epoch": 2.221223021582734, "grad_norm": 0.9862401485443115, "learning_rate": 8.23e-05, "loss": 0.1629, "step": 2470 }, { "action_loss": 0.025289371609687805, "epoch": 2.221223021582734, "step": 2470 }, { "epoch": 2.221223021582734, "step": 2470, "torque_loss": 0.1336299031972885 }, { "epoch": 2.2302158273381294, "grad_norm": 0.5524766445159912, "learning_rate": 8.263333333333334e-05, "loss": 0.1778, "step": 2480 }, { "action_loss": 0.059403061866760254, "epoch": 2.2302158273381294, "step": 2480 }, { "epoch": 2.2302158273381294, "step": 2480, "torque_loss": 0.21764595806598663 }, { "epoch": 2.2392086330935252, "grad_norm": 0.8829817771911621, "learning_rate": 8.296666666666667e-05, "loss": 0.1911, "step": 2490 }, { "action_loss": 0.0365251749753952, "epoch": 2.2392086330935252, "step": 2490 }, { "epoch": 2.2392086330935252, "step": 2490, "torque_loss": 0.21073229610919952 }, { "epoch": 2.2482014388489207, "grad_norm": 0.8790428042411804, "learning_rate": 8.33e-05, "loss": 0.1796, "step": 2500 }, { "action_loss": 0.04717804864048958, "epoch": 2.2482014388489207, "step": 2500 }, { "epoch": 2.2482014388489207, "step": 2500, "torque_loss": 0.3137301504611969 }, { "epoch": 2.2571942446043165, "grad_norm": 0.8811512589454651, "learning_rate": 8.363333333333334e-05, "loss": 0.1936, "step": 2510 }, { "action_loss": 0.04025311395525932, "epoch": 2.2571942446043165, "step": 2510 }, { "epoch": 2.2571942446043165, "step": 2510, "torque_loss": 0.3026411533355713 }, { "epoch": 2.2661870503597124, "grad_norm": 1.097564458847046, "learning_rate": 8.396666666666667e-05, "loss": 0.1579, "step": 2520 }, { "action_loss": 0.056756194680929184, "epoch": 2.2661870503597124, "step": 2520 }, { "epoch": 2.2661870503597124, "step": 2520, "torque_loss": 0.29115059971809387 }, { "epoch": 2.2751798561151078, "grad_norm": 1.4205818176269531, "learning_rate": 8.43e-05, "loss": 0.161, "step": 2530 }, { "action_loss": 0.05868129804730415, "epoch": 2.2751798561151078, "step": 2530 }, { "epoch": 2.2751798561151078, "step": 2530, "torque_loss": 0.27166321873664856 }, { "epoch": 2.2841726618705036, "grad_norm": 0.5193299055099487, "learning_rate": 8.463333333333335e-05, "loss": 0.2144, "step": 2540 }, { "action_loss": 0.037607885897159576, "epoch": 2.2841726618705036, "step": 2540 }, { "epoch": 2.2841726618705036, "step": 2540, "torque_loss": 0.24810700118541718 }, { "epoch": 2.2931654676258995, "grad_norm": 0.9774449467658997, "learning_rate": 8.496666666666667e-05, "loss": 0.1729, "step": 2550 }, { "action_loss": 0.025868279859423637, "epoch": 2.2931654676258995, "step": 2550 }, { "epoch": 2.2931654676258995, "step": 2550, "torque_loss": 0.17854245007038116 }, { "epoch": 2.302158273381295, "grad_norm": 0.9375455975532532, "learning_rate": 8.53e-05, "loss": 0.1996, "step": 2560 }, { "action_loss": 0.03848273679614067, "epoch": 2.302158273381295, "step": 2560 }, { "epoch": 2.302158273381295, "step": 2560, "torque_loss": 0.2734481990337372 }, { "epoch": 2.3111510791366907, "grad_norm": 0.915597140789032, "learning_rate": 8.563333333333333e-05, "loss": 0.1831, "step": 2570 }, { "action_loss": 0.03490402176976204, "epoch": 2.3111510791366907, "step": 2570 }, { "epoch": 2.3111510791366907, "step": 2570, "torque_loss": 0.1620626151561737 }, { "epoch": 2.3201438848920866, "grad_norm": 1.3078819513320923, "learning_rate": 8.596666666666668e-05, "loss": 0.1947, "step": 2580 }, { "action_loss": 0.044929202646017075, "epoch": 2.3201438848920866, "step": 2580 }, { "epoch": 2.3201438848920866, "step": 2580, "torque_loss": 0.2800597846508026 }, { "epoch": 2.329136690647482, "grad_norm": 0.971284031867981, "learning_rate": 8.63e-05, "loss": 0.1783, "step": 2590 }, { "action_loss": 0.038797806948423386, "epoch": 2.329136690647482, "step": 2590 }, { "epoch": 2.329136690647482, "step": 2590, "torque_loss": 0.2307034581899643 }, { "epoch": 2.338129496402878, "grad_norm": 0.8855559229850769, "learning_rate": 8.663333333333333e-05, "loss": 0.1829, "step": 2600 }, { "action_loss": 0.04769815132021904, "epoch": 2.338129496402878, "step": 2600 }, { "epoch": 2.338129496402878, "step": 2600, "torque_loss": 0.1770680993795395 }, { "epoch": 2.347122302158273, "grad_norm": 1.1670985221862793, "learning_rate": 8.696666666666668e-05, "loss": 0.199, "step": 2610 }, { "action_loss": 0.028244605287909508, "epoch": 2.347122302158273, "step": 2610 }, { "epoch": 2.347122302158273, "step": 2610, "torque_loss": 0.21536843478679657 }, { "epoch": 2.356115107913669, "grad_norm": 0.7691673636436462, "learning_rate": 8.730000000000001e-05, "loss": 0.1765, "step": 2620 }, { "action_loss": 0.05115498974919319, "epoch": 2.356115107913669, "step": 2620 }, { "epoch": 2.356115107913669, "step": 2620, "torque_loss": 0.2772064208984375 }, { "epoch": 2.365107913669065, "grad_norm": 1.0487374067306519, "learning_rate": 8.763333333333334e-05, "loss": 0.1882, "step": 2630 }, { "action_loss": 0.04736854508519173, "epoch": 2.365107913669065, "step": 2630 }, { "epoch": 2.365107913669065, "step": 2630, "torque_loss": 0.3105471432209015 }, { "epoch": 2.3741007194244603, "grad_norm": 0.9945613145828247, "learning_rate": 8.796666666666667e-05, "loss": 0.1912, "step": 2640 }, { "action_loss": 0.0694267675280571, "epoch": 2.3741007194244603, "step": 2640 }, { "epoch": 2.3741007194244603, "step": 2640, "torque_loss": 0.2645087242126465 }, { "epoch": 2.383093525179856, "grad_norm": 0.6112527847290039, "learning_rate": 8.83e-05, "loss": 0.1681, "step": 2650 }, { "action_loss": 0.04595302417874336, "epoch": 2.383093525179856, "step": 2650 }, { "epoch": 2.383093525179856, "step": 2650, "torque_loss": 0.17838972806930542 }, { "epoch": 2.3920863309352516, "grad_norm": 0.9825464487075806, "learning_rate": 8.863333333333334e-05, "loss": 0.1704, "step": 2660 }, { "action_loss": 0.06639229506254196, "epoch": 2.3920863309352516, "step": 2660 }, { "epoch": 2.3920863309352516, "step": 2660, "torque_loss": 0.23566614091396332 }, { "epoch": 2.4010791366906474, "grad_norm": 0.575599730014801, "learning_rate": 8.896666666666667e-05, "loss": 0.1831, "step": 2670 }, { "action_loss": 0.021346965804696083, "epoch": 2.4010791366906474, "step": 2670 }, { "epoch": 2.4010791366906474, "step": 2670, "torque_loss": 0.197331503033638 }, { "epoch": 2.4100719424460433, "grad_norm": 0.8426250219345093, "learning_rate": 8.93e-05, "loss": 0.1646, "step": 2680 }, { "action_loss": 0.032143108546733856, "epoch": 2.4100719424460433, "step": 2680 }, { "epoch": 2.4100719424460433, "step": 2680, "torque_loss": 0.1904972791671753 }, { "epoch": 2.4190647482014387, "grad_norm": 0.8711763024330139, "learning_rate": 8.963333333333333e-05, "loss": 0.157, "step": 2690 }, { "action_loss": 0.03259913995862007, "epoch": 2.4190647482014387, "step": 2690 }, { "epoch": 2.4190647482014387, "step": 2690, "torque_loss": 0.30608853697776794 }, { "epoch": 2.4280575539568345, "grad_norm": 0.8301617503166199, "learning_rate": 8.996666666666667e-05, "loss": 0.1648, "step": 2700 }, { "action_loss": 0.02459898591041565, "epoch": 2.4280575539568345, "step": 2700 }, { "epoch": 2.4280575539568345, "step": 2700, "torque_loss": 0.18209002912044525 }, { "epoch": 2.4370503597122304, "grad_norm": 0.5862883925437927, "learning_rate": 9.030000000000001e-05, "loss": 0.1653, "step": 2710 }, { "action_loss": 0.028349494561553, "epoch": 2.4370503597122304, "step": 2710 }, { "epoch": 2.4370503597122304, "step": 2710, "torque_loss": 0.21465925872325897 }, { "epoch": 2.446043165467626, "grad_norm": 0.9890007376670837, "learning_rate": 9.063333333333333e-05, "loss": 0.1874, "step": 2720 }, { "action_loss": 0.012776019982993603, "epoch": 2.446043165467626, "step": 2720 }, { "epoch": 2.446043165467626, "step": 2720, "torque_loss": 0.13926582038402557 }, { "epoch": 2.4550359712230216, "grad_norm": 0.6599013209342957, "learning_rate": 9.096666666666666e-05, "loss": 0.1471, "step": 2730 }, { "action_loss": 0.024034298956394196, "epoch": 2.4550359712230216, "step": 2730 }, { "epoch": 2.4550359712230216, "step": 2730, "torque_loss": 0.22410959005355835 }, { "epoch": 2.4640287769784175, "grad_norm": 0.7020226716995239, "learning_rate": 9.130000000000001e-05, "loss": 0.1634, "step": 2740 }, { "action_loss": 0.03747546672821045, "epoch": 2.4640287769784175, "step": 2740 }, { "epoch": 2.4640287769784175, "step": 2740, "torque_loss": 0.29399803280830383 }, { "epoch": 2.473021582733813, "grad_norm": 0.9042898416519165, "learning_rate": 9.163333333333334e-05, "loss": 0.1882, "step": 2750 }, { "action_loss": 0.06875535845756531, "epoch": 2.473021582733813, "step": 2750 }, { "epoch": 2.473021582733813, "step": 2750, "torque_loss": 0.23685447871685028 }, { "epoch": 2.4820143884892087, "grad_norm": 1.1190818548202515, "learning_rate": 9.196666666666666e-05, "loss": 0.1829, "step": 2760 }, { "action_loss": 0.041641730815172195, "epoch": 2.4820143884892087, "step": 2760 }, { "epoch": 2.4820143884892087, "step": 2760, "torque_loss": 0.3415559232234955 }, { "epoch": 2.491007194244604, "grad_norm": 0.9239036440849304, "learning_rate": 9.230000000000001e-05, "loss": 0.1644, "step": 2770 }, { "action_loss": 0.04984307289123535, "epoch": 2.491007194244604, "step": 2770 }, { "epoch": 2.491007194244604, "step": 2770, "torque_loss": 0.2521437108516693 }, { "epoch": 2.5, "grad_norm": 0.7768085598945618, "learning_rate": 9.263333333333334e-05, "loss": 0.1621, "step": 2780 }, { "action_loss": 0.03405581787228584, "epoch": 2.5, "step": 2780 }, { "epoch": 2.5, "step": 2780, "torque_loss": 0.25922760367393494 }, { "epoch": 2.508992805755396, "grad_norm": 0.7812137603759766, "learning_rate": 9.296666666666667e-05, "loss": 0.1895, "step": 2790 }, { "action_loss": 0.0214341189712286, "epoch": 2.508992805755396, "step": 2790 }, { "epoch": 2.508992805755396, "step": 2790, "torque_loss": 0.18661729991436005 }, { "epoch": 2.5179856115107913, "grad_norm": 0.9062943458557129, "learning_rate": 9.33e-05, "loss": 0.1678, "step": 2800 }, { "action_loss": 0.03733142837882042, "epoch": 2.5179856115107913, "step": 2800 }, { "epoch": 2.5179856115107913, "step": 2800, "torque_loss": 0.26082900166511536 }, { "epoch": 2.526978417266187, "grad_norm": 0.7451797127723694, "learning_rate": 9.363333333333334e-05, "loss": 0.1705, "step": 2810 }, { "action_loss": 0.0340036042034626, "epoch": 2.526978417266187, "step": 2810 }, { "epoch": 2.526978417266187, "step": 2810, "torque_loss": 0.20398761332035065 }, { "epoch": 2.5359712230215825, "grad_norm": 0.9242757558822632, "learning_rate": 9.396666666666667e-05, "loss": 0.1636, "step": 2820 }, { "action_loss": 0.019967570900917053, "epoch": 2.5359712230215825, "step": 2820 }, { "epoch": 2.5359712230215825, "step": 2820, "torque_loss": 0.17101150751113892 }, { "epoch": 2.5449640287769784, "grad_norm": 0.5743494629859924, "learning_rate": 9.43e-05, "loss": 0.1833, "step": 2830 }, { "action_loss": 0.033128365874290466, "epoch": 2.5449640287769784, "step": 2830 }, { "epoch": 2.5449640287769784, "step": 2830, "torque_loss": 0.2086247354745865 }, { "epoch": 2.553956834532374, "grad_norm": 1.0134040117263794, "learning_rate": 9.463333333333333e-05, "loss": 0.1736, "step": 2840 }, { "action_loss": 0.017834598198533058, "epoch": 2.553956834532374, "step": 2840 }, { "epoch": 2.553956834532374, "step": 2840, "torque_loss": 0.1474757343530655 }, { "epoch": 2.56294964028777, "grad_norm": 0.7921326756477356, "learning_rate": 9.496666666666667e-05, "loss": 0.1641, "step": 2850 }, { "action_loss": 0.03435974195599556, "epoch": 2.56294964028777, "step": 2850 }, { "epoch": 2.56294964028777, "step": 2850, "torque_loss": 0.2518726885318756 }, { "epoch": 2.5719424460431655, "grad_norm": 0.8380470871925354, "learning_rate": 9.53e-05, "loss": 0.1817, "step": 2860 }, { "action_loss": 0.024473972618579865, "epoch": 2.5719424460431655, "step": 2860 }, { "epoch": 2.5719424460431655, "step": 2860, "torque_loss": 0.2516339123249054 }, { "epoch": 2.5809352517985613, "grad_norm": 0.6142778992652893, "learning_rate": 9.563333333333334e-05, "loss": 0.1877, "step": 2870 }, { "action_loss": 0.040658190846443176, "epoch": 2.5809352517985613, "step": 2870 }, { "epoch": 2.5809352517985613, "step": 2870, "torque_loss": 0.2592298090457916 }, { "epoch": 2.5899280575539567, "grad_norm": 0.8638019561767578, "learning_rate": 9.596666666666668e-05, "loss": 0.1621, "step": 2880 }, { "action_loss": 0.045856598764657974, "epoch": 2.5899280575539567, "step": 2880 }, { "epoch": 2.5899280575539567, "step": 2880, "torque_loss": 0.21954099833965302 }, { "epoch": 2.5989208633093526, "grad_norm": 0.8769229650497437, "learning_rate": 9.63e-05, "loss": 0.1721, "step": 2890 }, { "action_loss": 0.045179594308137894, "epoch": 2.5989208633093526, "step": 2890 }, { "epoch": 2.5989208633093526, "step": 2890, "torque_loss": 0.27072277665138245 }, { "epoch": 2.6079136690647484, "grad_norm": 0.6910836696624756, "learning_rate": 9.663333333333334e-05, "loss": 0.1754, "step": 2900 }, { "action_loss": 0.025974944233894348, "epoch": 2.6079136690647484, "step": 2900 }, { "epoch": 2.6079136690647484, "step": 2900, "torque_loss": 0.19095559418201447 }, { "epoch": 2.616906474820144, "grad_norm": 0.7392135858535767, "learning_rate": 9.696666666666667e-05, "loss": 0.1631, "step": 2910 }, { "action_loss": 0.03671032562851906, "epoch": 2.616906474820144, "step": 2910 }, { "epoch": 2.616906474820144, "step": 2910, "torque_loss": 0.31771746277809143 }, { "epoch": 2.6258992805755397, "grad_norm": 1.2275060415267944, "learning_rate": 9.730000000000001e-05, "loss": 0.1751, "step": 2920 }, { "action_loss": 0.03621062636375427, "epoch": 2.6258992805755397, "step": 2920 }, { "epoch": 2.6258992805755397, "step": 2920, "torque_loss": 0.15818250179290771 }, { "epoch": 2.634892086330935, "grad_norm": 0.7934859395027161, "learning_rate": 9.763333333333334e-05, "loss": 0.1572, "step": 2930 }, { "action_loss": 0.033138055354356766, "epoch": 2.634892086330935, "step": 2930 }, { "epoch": 2.634892086330935, "step": 2930, "torque_loss": 0.3371175527572632 }, { "epoch": 2.643884892086331, "grad_norm": 0.9872663021087646, "learning_rate": 9.796666666666667e-05, "loss": 0.1873, "step": 2940 }, { "action_loss": 0.07665334641933441, "epoch": 2.643884892086331, "step": 2940 }, { "epoch": 2.643884892086331, "step": 2940, "torque_loss": 0.32208451628685 }, { "epoch": 2.652877697841727, "grad_norm": 0.8891186714172363, "learning_rate": 9.83e-05, "loss": 0.1809, "step": 2950 }, { "action_loss": 0.05144814774394035, "epoch": 2.652877697841727, "step": 2950 }, { "epoch": 2.652877697841727, "step": 2950, "torque_loss": 0.2725497782230377 }, { "epoch": 2.661870503597122, "grad_norm": 0.7142044901847839, "learning_rate": 9.863333333333334e-05, "loss": 0.1779, "step": 2960 }, { "action_loss": 0.017679506912827492, "epoch": 2.661870503597122, "step": 2960 }, { "epoch": 2.661870503597122, "step": 2960, "torque_loss": 0.1468411535024643 }, { "epoch": 2.670863309352518, "grad_norm": 0.8411307334899902, "learning_rate": 9.896666666666667e-05, "loss": 0.1655, "step": 2970 }, { "action_loss": 0.04583049193024635, "epoch": 2.670863309352518, "step": 2970 }, { "epoch": 2.670863309352518, "step": 2970, "torque_loss": 0.26762655377388 }, { "epoch": 2.6798561151079134, "grad_norm": 0.8158789873123169, "learning_rate": 9.93e-05, "loss": 0.1552, "step": 2980 }, { "action_loss": 0.030100546777248383, "epoch": 2.6798561151079134, "step": 2980 }, { "epoch": 2.6798561151079134, "step": 2980, "torque_loss": 0.20539379119873047 }, { "epoch": 2.6888489208633093, "grad_norm": 0.9888566732406616, "learning_rate": 9.963333333333333e-05, "loss": 0.1835, "step": 2990 }, { "action_loss": 0.03856873884797096, "epoch": 2.6888489208633093, "step": 2990 }, { "epoch": 2.6888489208633093, "step": 2990, "torque_loss": 0.40404191613197327 }, { "epoch": 2.697841726618705, "grad_norm": 0.897696852684021, "learning_rate": 9.996666666666668e-05, "loss": 0.1976, "step": 3000 }, { "action_loss": 0.026626795530319214, "epoch": 2.697841726618705, "step": 3000 }, { "epoch": 2.697841726618705, "step": 3000, "torque_loss": 0.21663641929626465 }, { "epoch": 2.706834532374101, "grad_norm": 0.6242678165435791, "learning_rate": 9.999999384858465e-05, "loss": 0.1614, "step": 3010 }, { "action_loss": 0.028543060645461082, "epoch": 2.706834532374101, "step": 3010 }, { "epoch": 2.706834532374101, "step": 3010, "torque_loss": 0.2040422111749649 }, { "epoch": 2.7158273381294964, "grad_norm": 0.6033580303192139, "learning_rate": 9.999997258443473e-05, "loss": 0.1743, "step": 3020 }, { "action_loss": 0.03732160106301308, "epoch": 2.7158273381294964, "step": 3020 }, { "epoch": 2.7158273381294964, "step": 3020, "torque_loss": 0.2828497886657715 }, { "epoch": 2.7248201438848922, "grad_norm": 0.8652268052101135, "learning_rate": 9.999993613161331e-05, "loss": 0.1641, "step": 3030 }, { "action_loss": 0.042367223650217056, "epoch": 2.7248201438848922, "step": 3030 }, { "epoch": 2.7248201438848922, "step": 3030, "torque_loss": 0.27860018610954285 }, { "epoch": 2.7338129496402876, "grad_norm": 0.6465263962745667, "learning_rate": 9.999988449013146e-05, "loss": 0.1797, "step": 3040 }, { "action_loss": 0.03038354031741619, "epoch": 2.7338129496402876, "step": 3040 }, { "epoch": 2.7338129496402876, "step": 3040, "torque_loss": 0.2349443882703781 }, { "epoch": 2.7428057553956835, "grad_norm": 1.012575626373291, "learning_rate": 9.99998176600049e-05, "loss": 0.1989, "step": 3050 }, { "action_loss": 0.02037590928375721, "epoch": 2.7428057553956835, "step": 3050 }, { "epoch": 2.7428057553956835, "step": 3050, "torque_loss": 0.15306538343429565 }, { "epoch": 2.7517985611510793, "grad_norm": 0.8973447680473328, "learning_rate": 9.999973564125389e-05, "loss": 0.1583, "step": 3060 }, { "action_loss": 0.020966636016964912, "epoch": 2.7517985611510793, "step": 3060 }, { "epoch": 2.7517985611510793, "step": 3060, "torque_loss": 0.23605318367481232 }, { "epoch": 2.7607913669064748, "grad_norm": 0.9076579809188843, "learning_rate": 9.999963843390335e-05, "loss": 0.1619, "step": 3070 }, { "action_loss": 0.02813025377690792, "epoch": 2.7607913669064748, "step": 3070 }, { "epoch": 2.7607913669064748, "step": 3070, "torque_loss": 0.208149716258049 }, { "epoch": 2.7697841726618706, "grad_norm": 0.9132898449897766, "learning_rate": 9.999952603798282e-05, "loss": 0.2004, "step": 3080 }, { "action_loss": 0.04241865873336792, "epoch": 2.7697841726618706, "step": 3080 }, { "epoch": 2.7697841726618706, "step": 3080, "torque_loss": 0.21946799755096436 }, { "epoch": 2.778776978417266, "grad_norm": 0.9363276958465576, "learning_rate": 9.999939845352646e-05, "loss": 0.1702, "step": 3090 }, { "action_loss": 0.02192108891904354, "epoch": 2.778776978417266, "step": 3090 }, { "epoch": 2.778776978417266, "step": 3090, "torque_loss": 0.22751887142658234 }, { "epoch": 2.787769784172662, "grad_norm": 0.9137658476829529, "learning_rate": 9.999925568057298e-05, "loss": 0.1718, "step": 3100 }, { "action_loss": 0.031889889389276505, "epoch": 2.787769784172662, "step": 3100 }, { "epoch": 2.787769784172662, "step": 3100, "torque_loss": 0.13643886148929596 }, { "epoch": 2.7967625899280577, "grad_norm": 0.8435372114181519, "learning_rate": 9.999909771916578e-05, "loss": 0.1673, "step": 3110 }, { "action_loss": 0.03972965106368065, "epoch": 2.7967625899280577, "step": 3110 }, { "epoch": 2.7967625899280577, "step": 3110, "torque_loss": 0.20372998714447021 }, { "epoch": 2.805755395683453, "grad_norm": 0.6084837317466736, "learning_rate": 9.999892456935285e-05, "loss": 0.174, "step": 3120 }, { "action_loss": 0.0517246313393116, "epoch": 2.805755395683453, "step": 3120 }, { "epoch": 2.805755395683453, "step": 3120, "torque_loss": 0.3727372884750366 }, { "epoch": 2.814748201438849, "grad_norm": 0.7857725620269775, "learning_rate": 9.999873623118679e-05, "loss": 0.1724, "step": 3130 }, { "action_loss": 0.024789273738861084, "epoch": 2.814748201438849, "step": 3130 }, { "epoch": 2.814748201438849, "step": 3130, "torque_loss": 0.2597333788871765 }, { "epoch": 2.8237410071942444, "grad_norm": 0.9307717680931091, "learning_rate": 9.999853270472479e-05, "loss": 0.1694, "step": 3140 }, { "action_loss": 0.026181140914559364, "epoch": 2.8237410071942444, "step": 3140 }, { "epoch": 2.8237410071942444, "step": 3140, "torque_loss": 0.28141531348228455 }, { "epoch": 2.83273381294964, "grad_norm": 0.608248770236969, "learning_rate": 9.999831399002871e-05, "loss": 0.1625, "step": 3150 }, { "action_loss": 0.029587173834443092, "epoch": 2.83273381294964, "step": 3150 }, { "epoch": 2.83273381294964, "step": 3150, "torque_loss": 0.24546201527118683 }, { "epoch": 2.841726618705036, "grad_norm": 0.857422947883606, "learning_rate": 9.999808008716494e-05, "loss": 0.1572, "step": 3160 }, { "action_loss": 0.05876150727272034, "epoch": 2.841726618705036, "step": 3160 }, { "epoch": 2.841726618705036, "step": 3160, "torque_loss": 0.3761621415615082 }, { "epoch": 2.850719424460432, "grad_norm": 0.7740470767021179, "learning_rate": 9.999783099620459e-05, "loss": 0.169, "step": 3170 }, { "action_loss": 0.014202896505594254, "epoch": 2.850719424460432, "step": 3170 }, { "epoch": 2.850719424460432, "step": 3170, "torque_loss": 0.19408686459064484 }, { "epoch": 2.8597122302158273, "grad_norm": 0.7465882301330566, "learning_rate": 9.999756671722328e-05, "loss": 0.1575, "step": 3180 }, { "action_loss": 0.021320229396224022, "epoch": 2.8597122302158273, "step": 3180 }, { "epoch": 2.8597122302158273, "step": 3180, "torque_loss": 0.18188445270061493 }, { "epoch": 2.868705035971223, "grad_norm": 0.5912874341011047, "learning_rate": 9.99972872503013e-05, "loss": 0.1555, "step": 3190 }, { "action_loss": 0.027159558609128, "epoch": 2.868705035971223, "step": 3190 }, { "epoch": 2.868705035971223, "step": 3190, "torque_loss": 0.2572857141494751 }, { "epoch": 2.8776978417266186, "grad_norm": 0.9510894417762756, "learning_rate": 9.999699259552359e-05, "loss": 0.1815, "step": 3200 }, { "action_loss": 0.015557602047920227, "epoch": 2.8776978417266186, "step": 3200 }, { "epoch": 2.8776978417266186, "step": 3200, "torque_loss": 0.1212419793009758 }, { "epoch": 2.8866906474820144, "grad_norm": 0.646770715713501, "learning_rate": 9.99966827529796e-05, "loss": 0.1465, "step": 3210 }, { "action_loss": 0.024984868243336678, "epoch": 2.8866906474820144, "step": 3210 }, { "epoch": 2.8866906474820144, "step": 3210, "torque_loss": 0.20868487656116486 }, { "epoch": 2.8956834532374103, "grad_norm": 0.8469381928443909, "learning_rate": 9.999635772276348e-05, "loss": 0.1594, "step": 3220 }, { "action_loss": 0.02082737535238266, "epoch": 2.8956834532374103, "step": 3220 }, { "epoch": 2.8956834532374103, "step": 3220, "torque_loss": 0.13652342557907104 }, { "epoch": 2.9046762589928057, "grad_norm": 0.6366978883743286, "learning_rate": 9.999601750497396e-05, "loss": 0.1496, "step": 3230 }, { "action_loss": 0.023640111088752747, "epoch": 2.9046762589928057, "step": 3230 }, { "epoch": 2.9046762589928057, "step": 3230, "torque_loss": 0.2213105410337448 }, { "epoch": 2.9136690647482015, "grad_norm": 0.783977746963501, "learning_rate": 9.99956620997144e-05, "loss": 0.1598, "step": 3240 }, { "action_loss": 0.025585750117897987, "epoch": 2.9136690647482015, "step": 3240 }, { "epoch": 2.9136690647482015, "step": 3240, "torque_loss": 0.19591379165649414 }, { "epoch": 2.922661870503597, "grad_norm": 0.8095446228981018, "learning_rate": 9.999529150709275e-05, "loss": 0.1703, "step": 3250 }, { "action_loss": 0.027026211842894554, "epoch": 2.922661870503597, "step": 3250 }, { "epoch": 2.922661870503597, "step": 3250, "torque_loss": 0.18704302608966827 }, { "epoch": 2.931654676258993, "grad_norm": 0.7060543298721313, "learning_rate": 9.999490572722158e-05, "loss": 0.1579, "step": 3260 }, { "action_loss": 0.02905534766614437, "epoch": 2.931654676258993, "step": 3260 }, { "epoch": 2.931654676258993, "step": 3260, "torque_loss": 0.2797172963619232 }, { "epoch": 2.9406474820143886, "grad_norm": 0.8726913928985596, "learning_rate": 9.99945047602181e-05, "loss": 0.1721, "step": 3270 }, { "action_loss": 0.020707283169031143, "epoch": 2.9406474820143886, "step": 3270 }, { "epoch": 2.9406474820143886, "step": 3270, "torque_loss": 0.17078639566898346 }, { "epoch": 2.949640287769784, "grad_norm": 0.6975228190422058, "learning_rate": 9.99940886062041e-05, "loss": 0.1594, "step": 3280 }, { "action_loss": 0.024200694635510445, "epoch": 2.949640287769784, "step": 3280 }, { "epoch": 2.949640287769784, "step": 3280, "torque_loss": 0.2540251910686493 }, { "epoch": 2.95863309352518, "grad_norm": 0.695429801940918, "learning_rate": 9.999365726530599e-05, "loss": 0.1601, "step": 3290 }, { "action_loss": 0.028896307572722435, "epoch": 2.95863309352518, "step": 3290 }, { "epoch": 2.95863309352518, "step": 3290, "torque_loss": 0.295061856508255 }, { "epoch": 2.9676258992805753, "grad_norm": 0.6173990368843079, "learning_rate": 9.999321073765481e-05, "loss": 0.1971, "step": 3300 }, { "action_loss": 0.025268493220210075, "epoch": 2.9676258992805753, "step": 3300 }, { "epoch": 2.9676258992805753, "step": 3300, "torque_loss": 0.23275358974933624 }, { "epoch": 2.976618705035971, "grad_norm": 0.5064751505851746, "learning_rate": 9.99927490233862e-05, "loss": 0.1433, "step": 3310 }, { "action_loss": 0.030221475288271904, "epoch": 2.976618705035971, "step": 3310 }, { "epoch": 2.976618705035971, "step": 3310, "torque_loss": 0.2279936820268631 }, { "epoch": 2.985611510791367, "grad_norm": 0.9989295601844788, "learning_rate": 9.999227212264043e-05, "loss": 0.1472, "step": 3320 }, { "action_loss": 0.02555588074028492, "epoch": 2.985611510791367, "step": 3320 }, { "epoch": 2.985611510791367, "step": 3320, "torque_loss": 0.16741611063480377 }, { "epoch": 2.994604316546763, "grad_norm": 0.8547114133834839, "learning_rate": 9.999178003556236e-05, "loss": 0.1683, "step": 3330 }, { "action_loss": 0.032825272530317307, "epoch": 2.994604316546763, "step": 3330 }, { "epoch": 2.994604316546763, "step": 3330, "torque_loss": 0.179527148604393 }, { "epoch": 3.0035971223021583, "grad_norm": 0.7222832441329956, "learning_rate": 9.999127276230146e-05, "loss": 0.1532, "step": 3340 }, { "action_loss": 0.03490176424384117, "epoch": 3.0035971223021583, "step": 3340 }, { "epoch": 3.0035971223021583, "step": 3340, "torque_loss": 0.19747082889080048 }, { "epoch": 3.012589928057554, "grad_norm": 1.060142159461975, "learning_rate": 9.999075030301184e-05, "loss": 0.1624, "step": 3350 }, { "action_loss": 0.029090115800499916, "epoch": 3.012589928057554, "step": 3350 }, { "epoch": 3.012589928057554, "step": 3350, "torque_loss": 0.21507571637630463 }, { "epoch": 3.0215827338129495, "grad_norm": 0.4572644829750061, "learning_rate": 9.999021265785221e-05, "loss": 0.1768, "step": 3360 }, { "action_loss": 0.030014747753739357, "epoch": 3.0215827338129495, "step": 3360 }, { "epoch": 3.0215827338129495, "step": 3360, "torque_loss": 0.2302626222372055 }, { "epoch": 3.0305755395683454, "grad_norm": 0.5789883136749268, "learning_rate": 9.998965982698589e-05, "loss": 0.178, "step": 3370 }, { "action_loss": 0.025535881519317627, "epoch": 3.0305755395683454, "step": 3370 }, { "epoch": 3.0305755395683454, "step": 3370, "torque_loss": 0.1938336342573166 }, { "epoch": 3.039568345323741, "grad_norm": 0.6226507425308228, "learning_rate": 9.998909181058082e-05, "loss": 0.1555, "step": 3380 }, { "action_loss": 0.03841463848948479, "epoch": 3.039568345323741, "step": 3380 }, { "epoch": 3.039568345323741, "step": 3380, "torque_loss": 0.19621367752552032 }, { "epoch": 3.0485611510791366, "grad_norm": 0.8098050951957703, "learning_rate": 9.998850860880953e-05, "loss": 0.1606, "step": 3390 }, { "action_loss": 0.02712087333202362, "epoch": 3.0485611510791366, "step": 3390 }, { "epoch": 3.0485611510791366, "step": 3390, "torque_loss": 0.21225547790527344 }, { "epoch": 3.0575539568345325, "grad_norm": 0.8120296597480774, "learning_rate": 9.998791022184922e-05, "loss": 0.163, "step": 3400 }, { "action_loss": 0.015243013389408588, "epoch": 3.0575539568345325, "step": 3400 }, { "epoch": 3.0575539568345325, "step": 3400, "torque_loss": 0.114946149289608 }, { "epoch": 3.066546762589928, "grad_norm": 0.6503015160560608, "learning_rate": 9.99872966498816e-05, "loss": 0.1485, "step": 3410 }, { "action_loss": 0.032093435525894165, "epoch": 3.066546762589928, "step": 3410 }, { "epoch": 3.066546762589928, "step": 3410, "torque_loss": 0.19455619156360626 }, { "epoch": 3.0755395683453237, "grad_norm": 0.6964169144630432, "learning_rate": 9.998666789309313e-05, "loss": 0.1732, "step": 3420 }, { "action_loss": 0.013531864620745182, "epoch": 3.0755395683453237, "step": 3420 }, { "epoch": 3.0755395683453237, "step": 3420, "torque_loss": 0.16221188008785248 }, { "epoch": 3.0845323741007196, "grad_norm": 0.5183081030845642, "learning_rate": 9.998602395167475e-05, "loss": 0.1595, "step": 3430 }, { "action_loss": 0.02847466617822647, "epoch": 3.0845323741007196, "step": 3430 }, { "epoch": 3.0845323741007196, "step": 3430, "torque_loss": 0.2411622405052185 }, { "epoch": 3.093525179856115, "grad_norm": 0.702516496181488, "learning_rate": 9.998536482582213e-05, "loss": 0.1779, "step": 3440 }, { "action_loss": 0.028873315081000328, "epoch": 3.093525179856115, "step": 3440 }, { "epoch": 3.093525179856115, "step": 3440, "torque_loss": 0.1874125599861145 }, { "epoch": 3.102517985611511, "grad_norm": 0.7235767841339111, "learning_rate": 9.998469051573544e-05, "loss": 0.1667, "step": 3450 }, { "action_loss": 0.03556182608008385, "epoch": 3.102517985611511, "step": 3450 }, { "epoch": 3.102517985611511, "step": 3450, "torque_loss": 0.22525489330291748 }, { "epoch": 3.1115107913669067, "grad_norm": 0.669238805770874, "learning_rate": 9.998400102161954e-05, "loss": 0.1666, "step": 3460 }, { "action_loss": 0.025445343926548958, "epoch": 3.1115107913669067, "step": 3460 }, { "epoch": 3.1115107913669067, "step": 3460, "torque_loss": 0.21836459636688232 }, { "epoch": 3.120503597122302, "grad_norm": 0.6236045956611633, "learning_rate": 9.998329634368388e-05, "loss": 0.1643, "step": 3470 }, { "action_loss": 0.029471328482031822, "epoch": 3.120503597122302, "step": 3470 }, { "epoch": 3.120503597122302, "step": 3470, "torque_loss": 0.17869198322296143 }, { "epoch": 3.129496402877698, "grad_norm": 0.6385704278945923, "learning_rate": 9.998257648214253e-05, "loss": 0.1544, "step": 3480 }, { "action_loss": 0.04957398399710655, "epoch": 3.129496402877698, "step": 3480 }, { "epoch": 3.129496402877698, "step": 3480, "torque_loss": 0.2810024917125702 }, { "epoch": 3.1384892086330933, "grad_norm": 0.8745490908622742, "learning_rate": 9.998184143721417e-05, "loss": 0.1711, "step": 3490 }, { "action_loss": 0.03289538249373436, "epoch": 3.1384892086330933, "step": 3490 }, { "epoch": 3.1384892086330933, "step": 3490, "torque_loss": 0.23052020370960236 }, { "epoch": 3.147482014388489, "grad_norm": 0.7269488573074341, "learning_rate": 9.998109120912206e-05, "loss": 0.1645, "step": 3500 }, { "action_loss": 0.03949172422289848, "epoch": 3.147482014388489, "step": 3500 }, { "epoch": 3.147482014388489, "step": 3500, "torque_loss": 0.2724590599536896 }, { "epoch": 3.156474820143885, "grad_norm": 0.6740773320198059, "learning_rate": 9.998032579809411e-05, "loss": 0.1578, "step": 3510 }, { "action_loss": 0.020019732415676117, "epoch": 3.156474820143885, "step": 3510 }, { "epoch": 3.156474820143885, "step": 3510, "torque_loss": 0.240207239985466 }, { "epoch": 3.1654676258992804, "grad_norm": 0.6950735449790955, "learning_rate": 9.997954520436286e-05, "loss": 0.1705, "step": 3520 }, { "action_loss": 0.02063731662929058, "epoch": 3.1654676258992804, "step": 3520 }, { "epoch": 3.1654676258992804, "step": 3520, "torque_loss": 0.29937854409217834 }, { "epoch": 3.1744604316546763, "grad_norm": 0.6845229864120483, "learning_rate": 9.997874942816538e-05, "loss": 0.1499, "step": 3530 }, { "action_loss": 0.05345747992396355, "epoch": 3.1744604316546763, "step": 3530 }, { "epoch": 3.1744604316546763, "step": 3530, "torque_loss": 0.22740735113620758 }, { "epoch": 3.183453237410072, "grad_norm": 0.7476611733436584, "learning_rate": 9.997793846974345e-05, "loss": 0.1827, "step": 3540 }, { "action_loss": 0.021720165386795998, "epoch": 3.183453237410072, "step": 3540 }, { "epoch": 3.183453237410072, "step": 3540, "torque_loss": 0.11882876604795456 }, { "epoch": 3.1924460431654675, "grad_norm": 0.9152717590332031, "learning_rate": 9.997711232934341e-05, "loss": 0.1538, "step": 3550 }, { "action_loss": 0.036371443420648575, "epoch": 3.1924460431654675, "step": 3550 }, { "epoch": 3.1924460431654675, "step": 3550, "torque_loss": 0.29139789938926697 }, { "epoch": 3.2014388489208634, "grad_norm": 0.4673975706100464, "learning_rate": 9.99762710072162e-05, "loss": 0.1633, "step": 3560 }, { "action_loss": 0.03934439644217491, "epoch": 3.2014388489208634, "step": 3560 }, { "epoch": 3.2014388489208634, "step": 3560, "torque_loss": 0.2281150072813034 }, { "epoch": 3.210431654676259, "grad_norm": 0.5388296246528625, "learning_rate": 9.997541450361743e-05, "loss": 0.1586, "step": 3570 }, { "action_loss": 0.05487588047981262, "epoch": 3.210431654676259, "step": 3570 }, { "epoch": 3.210431654676259, "step": 3570, "torque_loss": 0.251565545797348 }, { "epoch": 3.2194244604316546, "grad_norm": 0.7149212956428528, "learning_rate": 9.997454281880723e-05, "loss": 0.173, "step": 3580 }, { "action_loss": 0.015623525716364384, "epoch": 3.2194244604316546, "step": 3580 }, { "epoch": 3.2194244604316546, "step": 3580, "torque_loss": 0.13801313936710358 }, { "epoch": 3.2284172661870505, "grad_norm": 0.738354504108429, "learning_rate": 9.997365595305044e-05, "loss": 0.1615, "step": 3590 }, { "action_loss": 0.05091744661331177, "epoch": 3.2284172661870505, "step": 3590 }, { "epoch": 3.2284172661870505, "step": 3590, "torque_loss": 0.3991478979587555 }, { "epoch": 3.237410071942446, "grad_norm": 0.8139817118644714, "learning_rate": 9.997275390661644e-05, "loss": 0.1578, "step": 3600 }, { "action_loss": 0.020093463361263275, "epoch": 3.237410071942446, "step": 3600 }, { "epoch": 3.237410071942446, "step": 3600, "torque_loss": 0.14820335805416107 }, { "epoch": 3.2464028776978417, "grad_norm": 0.708817720413208, "learning_rate": 9.997183667977926e-05, "loss": 0.1636, "step": 3610 }, { "action_loss": 0.019221549853682518, "epoch": 3.2464028776978417, "step": 3610 }, { "epoch": 3.2464028776978417, "step": 3610, "torque_loss": 0.2165941745042801 }, { "epoch": 3.2553956834532376, "grad_norm": 0.6555910110473633, "learning_rate": 9.997090427281752e-05, "loss": 0.1559, "step": 3620 }, { "action_loss": 0.035430509597063065, "epoch": 3.2553956834532376, "step": 3620 }, { "epoch": 3.2553956834532376, "step": 3620, "torque_loss": 0.21334342658519745 }, { "epoch": 3.264388489208633, "grad_norm": 0.5643985271453857, "learning_rate": 9.996995668601448e-05, "loss": 0.166, "step": 3630 }, { "action_loss": 0.04423769190907478, "epoch": 3.264388489208633, "step": 3630 }, { "epoch": 3.264388489208633, "step": 3630, "torque_loss": 0.22323495149612427 }, { "epoch": 3.273381294964029, "grad_norm": 0.6168771982192993, "learning_rate": 9.996899391965798e-05, "loss": 0.1363, "step": 3640 }, { "action_loss": 0.03549988940358162, "epoch": 3.273381294964029, "step": 3640 }, { "epoch": 3.273381294964029, "step": 3640, "torque_loss": 0.25340354442596436 }, { "epoch": 3.2823741007194247, "grad_norm": 0.5054361820220947, "learning_rate": 9.996801597404048e-05, "loss": 0.1702, "step": 3650 }, { "action_loss": 0.030335085466504097, "epoch": 3.2823741007194247, "step": 3650 }, { "epoch": 3.2823741007194247, "step": 3650, "torque_loss": 0.2975369095802307 }, { "epoch": 3.29136690647482, "grad_norm": 0.6188281774520874, "learning_rate": 9.996702284945905e-05, "loss": 0.1506, "step": 3660 }, { "action_loss": 0.02726396918296814, "epoch": 3.29136690647482, "step": 3660 }, { "epoch": 3.29136690647482, "step": 3660, "torque_loss": 0.23064494132995605 }, { "epoch": 3.300359712230216, "grad_norm": 0.7555333375930786, "learning_rate": 9.996601454621539e-05, "loss": 0.1669, "step": 3670 }, { "action_loss": 0.022011086344718933, "epoch": 3.300359712230216, "step": 3670 }, { "epoch": 3.300359712230216, "step": 3670, "torque_loss": 0.2239406257867813 }, { "epoch": 3.3093525179856114, "grad_norm": 0.6106038689613342, "learning_rate": 9.996499106461577e-05, "loss": 0.1522, "step": 3680 }, { "action_loss": 0.016639990732073784, "epoch": 3.3093525179856114, "step": 3680 }, { "epoch": 3.3093525179856114, "step": 3680, "torque_loss": 0.16177964210510254 }, { "epoch": 3.318345323741007, "grad_norm": 0.6518740653991699, "learning_rate": 9.996395240497112e-05, "loss": 0.1594, "step": 3690 }, { "action_loss": 0.03151654824614525, "epoch": 3.318345323741007, "step": 3690 }, { "epoch": 3.318345323741007, "step": 3690, "torque_loss": 0.22145068645477295 }, { "epoch": 3.327338129496403, "grad_norm": 0.703041136264801, "learning_rate": 9.996289856759696e-05, "loss": 0.1619, "step": 3700 }, { "action_loss": 0.0610877126455307, "epoch": 3.327338129496403, "step": 3700 }, { "epoch": 3.327338129496403, "step": 3700, "torque_loss": 0.21703112125396729 }, { "epoch": 3.3363309352517985, "grad_norm": 0.6544398069381714, "learning_rate": 9.996182955281342e-05, "loss": 0.1668, "step": 3710 }, { "action_loss": 0.053961317986249924, "epoch": 3.3363309352517985, "step": 3710 }, { "epoch": 3.3363309352517985, "step": 3710, "torque_loss": 0.31532713770866394 }, { "epoch": 3.3453237410071943, "grad_norm": 0.7850503921508789, "learning_rate": 9.996074536094519e-05, "loss": 0.1718, "step": 3720 }, { "action_loss": 0.04034365341067314, "epoch": 3.3453237410071943, "step": 3720 }, { "epoch": 3.3453237410071943, "step": 3720, "torque_loss": 0.26011037826538086 }, { "epoch": 3.3543165467625897, "grad_norm": 0.6600376963615417, "learning_rate": 9.995964599232168e-05, "loss": 0.152, "step": 3730 }, { "action_loss": 0.04485657438635826, "epoch": 3.3543165467625897, "step": 3730 }, { "epoch": 3.3543165467625897, "step": 3730, "torque_loss": 0.29549235105514526 }, { "epoch": 3.3633093525179856, "grad_norm": 0.6431972980499268, "learning_rate": 9.995853144727683e-05, "loss": 0.165, "step": 3740 }, { "action_loss": 0.031164348125457764, "epoch": 3.3633093525179856, "step": 3740 }, { "epoch": 3.3633093525179856, "step": 3740, "torque_loss": 0.27727141976356506 }, { "epoch": 3.3723021582733814, "grad_norm": 0.8523962497711182, "learning_rate": 9.99574017261492e-05, "loss": 0.1569, "step": 3750 }, { "action_loss": 0.020992735400795937, "epoch": 3.3723021582733814, "step": 3750 }, { "epoch": 3.3723021582733814, "step": 3750, "torque_loss": 0.18412023782730103 }, { "epoch": 3.381294964028777, "grad_norm": 0.5939038395881653, "learning_rate": 9.995625682928198e-05, "loss": 0.1441, "step": 3760 }, { "action_loss": 0.01799250952899456, "epoch": 3.381294964028777, "step": 3760 }, { "epoch": 3.381294964028777, "step": 3760, "torque_loss": 0.24543572962284088 }, { "epoch": 3.3902877697841727, "grad_norm": 0.6511802673339844, "learning_rate": 9.995509675702295e-05, "loss": 0.1409, "step": 3770 }, { "action_loss": 0.01479797437787056, "epoch": 3.3902877697841727, "step": 3770 }, { "epoch": 3.3902877697841727, "step": 3770, "torque_loss": 0.1902928352355957 }, { "epoch": 3.3992805755395685, "grad_norm": 0.6929193735122681, "learning_rate": 9.995392150972451e-05, "loss": 0.1463, "step": 3780 }, { "action_loss": 0.058321867138147354, "epoch": 3.3992805755395685, "step": 3780 }, { "epoch": 3.3992805755395685, "step": 3780, "torque_loss": 0.2530662715435028 }, { "epoch": 3.408273381294964, "grad_norm": 0.637980043888092, "learning_rate": 9.995273108774366e-05, "loss": 0.1602, "step": 3790 }, { "action_loss": 0.013070132583379745, "epoch": 3.408273381294964, "step": 3790 }, { "epoch": 3.408273381294964, "step": 3790, "torque_loss": 0.209986612200737 }, { "epoch": 3.41726618705036, "grad_norm": 0.45474451780319214, "learning_rate": 9.995152549144205e-05, "loss": 0.1728, "step": 3800 }, { "action_loss": 0.0333232544362545, "epoch": 3.41726618705036, "step": 3800 }, { "epoch": 3.41726618705036, "step": 3800, "torque_loss": 0.2586748003959656 }, { "epoch": 3.4262589928057556, "grad_norm": 0.7453661561012268, "learning_rate": 9.995030472118587e-05, "loss": 0.1631, "step": 3810 }, { "action_loss": 0.025324106216430664, "epoch": 3.4262589928057556, "step": 3810 }, { "epoch": 3.4262589928057556, "step": 3810, "torque_loss": 0.2057429552078247 }, { "epoch": 3.435251798561151, "grad_norm": 0.5804601311683655, "learning_rate": 9.9949068777346e-05, "loss": 0.1666, "step": 3820 }, { "action_loss": 0.015803875401616096, "epoch": 3.435251798561151, "step": 3820 }, { "epoch": 3.435251798561151, "step": 3820, "torque_loss": 0.24794626235961914 }, { "epoch": 3.444244604316547, "grad_norm": 0.7009686231613159, "learning_rate": 9.994781766029786e-05, "loss": 0.1571, "step": 3830 }, { "action_loss": 0.050871122628450394, "epoch": 3.444244604316547, "step": 3830 }, { "epoch": 3.444244604316547, "step": 3830, "torque_loss": 0.3511470854282379 }, { "epoch": 3.4532374100719423, "grad_norm": 0.5677936673164368, "learning_rate": 9.994655137042151e-05, "loss": 0.1774, "step": 3840 }, { "action_loss": 0.057148948311805725, "epoch": 3.4532374100719423, "step": 3840 }, { "epoch": 3.4532374100719423, "step": 3840, "torque_loss": 0.26383838057518005 }, { "epoch": 3.462230215827338, "grad_norm": 0.6129845976829529, "learning_rate": 9.99452699081016e-05, "loss": 0.1545, "step": 3850 }, { "action_loss": 0.023469993844628334, "epoch": 3.462230215827338, "step": 3850 }, { "epoch": 3.462230215827338, "step": 3850, "torque_loss": 0.17072610557079315 }, { "epoch": 3.471223021582734, "grad_norm": 0.5939493179321289, "learning_rate": 9.994397327372743e-05, "loss": 0.1695, "step": 3860 }, { "action_loss": 0.02528201974928379, "epoch": 3.471223021582734, "step": 3860 }, { "epoch": 3.471223021582734, "step": 3860, "torque_loss": 0.2828733026981354 }, { "epoch": 3.4802158273381294, "grad_norm": 0.6165569424629211, "learning_rate": 9.994266146769286e-05, "loss": 0.1708, "step": 3870 }, { "action_loss": 0.015489608980715275, "epoch": 3.4802158273381294, "step": 3870 }, { "epoch": 3.4802158273381294, "step": 3870, "torque_loss": 0.16849462687969208 }, { "epoch": 3.4892086330935252, "grad_norm": 0.7076467275619507, "learning_rate": 9.994133449039642e-05, "loss": 0.1568, "step": 3880 }, { "action_loss": 0.018005935475230217, "epoch": 3.4892086330935252, "step": 3880 }, { "epoch": 3.4892086330935252, "step": 3880, "torque_loss": 0.20920109748840332 }, { "epoch": 3.4982014388489207, "grad_norm": 0.8013393878936768, "learning_rate": 9.993999234224118e-05, "loss": 0.1646, "step": 3890 }, { "action_loss": 0.036220941692590714, "epoch": 3.4982014388489207, "step": 3890 }, { "epoch": 3.4982014388489207, "step": 3890, "torque_loss": 0.26025936007499695 }, { "epoch": 3.5071942446043165, "grad_norm": 0.8663239479064941, "learning_rate": 9.993863502363485e-05, "loss": 0.15, "step": 3900 }, { "action_loss": 0.026436910033226013, "epoch": 3.5071942446043165, "step": 3900 }, { "epoch": 3.5071942446043165, "step": 3900, "torque_loss": 0.22075814008712769 }, { "epoch": 3.5161870503597124, "grad_norm": 0.7398295402526855, "learning_rate": 9.993726253498976e-05, "loss": 0.138, "step": 3910 }, { "action_loss": 0.016353776678442955, "epoch": 3.5161870503597124, "step": 3910 }, { "epoch": 3.5161870503597124, "step": 3910, "torque_loss": 0.14437468349933624 }, { "epoch": 3.5251798561151078, "grad_norm": 0.9377062320709229, "learning_rate": 9.993587487672282e-05, "loss": 0.1403, "step": 3920 }, { "action_loss": 0.023662908002734184, "epoch": 3.5251798561151078, "step": 3920 }, { "epoch": 3.5251798561151078, "step": 3920, "torque_loss": 0.26449456810951233 }, { "epoch": 3.5341726618705036, "grad_norm": 0.7559934854507446, "learning_rate": 9.993447204925558e-05, "loss": 0.1739, "step": 3930 }, { "action_loss": 0.039845388382673264, "epoch": 3.5341726618705036, "step": 3930 }, { "epoch": 3.5341726618705036, "step": 3930, "torque_loss": 0.2419026643037796 }, { "epoch": 3.543165467625899, "grad_norm": 0.7336196899414062, "learning_rate": 9.993305405301416e-05, "loss": 0.1827, "step": 3940 }, { "action_loss": 0.02796604298055172, "epoch": 3.543165467625899, "step": 3940 }, { "epoch": 3.543165467625899, "step": 3940, "torque_loss": 0.21675324440002441 }, { "epoch": 3.552158273381295, "grad_norm": 0.6412901282310486, "learning_rate": 9.993162088842935e-05, "loss": 0.1439, "step": 3950 }, { "action_loss": 0.050373177975416183, "epoch": 3.552158273381295, "step": 3950 }, { "epoch": 3.552158273381295, "step": 3950, "torque_loss": 0.3036406338214874 }, { "epoch": 3.5611510791366907, "grad_norm": 0.6730105876922607, "learning_rate": 9.993017255593646e-05, "loss": 0.1816, "step": 3960 }, { "action_loss": 0.020885786041617393, "epoch": 3.5611510791366907, "step": 3960 }, { "epoch": 3.5611510791366907, "step": 3960, "torque_loss": 0.1734883338212967 }, { "epoch": 3.5701438848920866, "grad_norm": 0.7729699611663818, "learning_rate": 9.992870905597548e-05, "loss": 0.1669, "step": 3970 }, { "action_loss": 0.02965478040277958, "epoch": 3.5701438848920866, "step": 3970 }, { "epoch": 3.5701438848920866, "step": 3970, "torque_loss": 0.20710110664367676 }, { "epoch": 3.579136690647482, "grad_norm": 0.8205403089523315, "learning_rate": 9.9927230388991e-05, "loss": 0.1559, "step": 3980 }, { "action_loss": 0.024399898946285248, "epoch": 3.579136690647482, "step": 3980 }, { "epoch": 3.579136690647482, "step": 3980, "torque_loss": 0.21590656042099 }, { "epoch": 3.588129496402878, "grad_norm": 0.6529257893562317, "learning_rate": 9.992573655543215e-05, "loss": 0.1583, "step": 3990 }, { "action_loss": 0.021138211712241173, "epoch": 3.588129496402878, "step": 3990 }, { "epoch": 3.588129496402878, "step": 3990, "torque_loss": 0.17043472826480865 }, { "epoch": 3.597122302158273, "grad_norm": 0.582210898399353, "learning_rate": 9.992422755575277e-05, "loss": 0.1389, "step": 4000 }, { "action_loss": 0.04525167867541313, "epoch": 3.597122302158273, "step": 4000 }, { "epoch": 3.597122302158273, "step": 4000, "torque_loss": 0.24272514879703522 }, { "epoch": 3.606115107913669, "grad_norm": 0.6165055632591248, "learning_rate": 9.992270339041123e-05, "loss": 0.1529, "step": 4010 }, { "action_loss": 0.017860351130366325, "epoch": 3.606115107913669, "step": 4010 }, { "epoch": 3.606115107913669, "step": 4010, "torque_loss": 0.25204554200172424 }, { "epoch": 3.615107913669065, "grad_norm": 0.6889089941978455, "learning_rate": 9.992116405987053e-05, "loss": 0.148, "step": 4020 }, { "action_loss": 0.03520755097270012, "epoch": 3.615107913669065, "step": 4020 }, { "epoch": 3.615107913669065, "step": 4020, "torque_loss": 0.21806569397449493 }, { "epoch": 3.6241007194244603, "grad_norm": 0.6763757467269897, "learning_rate": 9.991960956459828e-05, "loss": 0.1585, "step": 4030 }, { "action_loss": 0.02430984377861023, "epoch": 3.6241007194244603, "step": 4030 }, { "epoch": 3.6241007194244603, "step": 4030, "torque_loss": 0.1834084838628769 }, { "epoch": 3.633093525179856, "grad_norm": 0.6306350231170654, "learning_rate": 9.991803990506669e-05, "loss": 0.1676, "step": 4040 }, { "action_loss": 0.02154175378382206, "epoch": 3.633093525179856, "step": 4040 }, { "epoch": 3.633093525179856, "step": 4040, "torque_loss": 0.19273845851421356 }, { "epoch": 3.6420863309352516, "grad_norm": 0.5946177840232849, "learning_rate": 9.991645508175258e-05, "loss": 0.1704, "step": 4050 }, { "action_loss": 0.09357108920812607, "epoch": 3.6420863309352516, "step": 4050 }, { "epoch": 3.6420863309352516, "step": 4050, "torque_loss": 0.310708612203598 }, { "epoch": 3.6510791366906474, "grad_norm": 0.5814290642738342, "learning_rate": 9.99148550951374e-05, "loss": 0.1514, "step": 4060 }, { "action_loss": 0.0159620214253664, "epoch": 3.6510791366906474, "step": 4060 }, { "epoch": 3.6510791366906474, "step": 4060, "torque_loss": 0.2639828622341156 }, { "epoch": 3.6600719424460433, "grad_norm": 0.8040772676467896, "learning_rate": 9.991323994570716e-05, "loss": 0.1839, "step": 4070 }, { "action_loss": 0.017114395275712013, "epoch": 3.6600719424460433, "step": 4070 }, { "epoch": 3.6600719424460433, "step": 4070, "torque_loss": 0.13883236050605774 }, { "epoch": 3.6690647482014387, "grad_norm": 0.7416817545890808, "learning_rate": 9.99116096339525e-05, "loss": 0.161, "step": 4080 }, { "action_loss": 0.04111438989639282, "epoch": 3.6690647482014387, "step": 4080 }, { "epoch": 3.6690647482014387, "step": 4080, "torque_loss": 0.2873859405517578 }, { "epoch": 3.6780575539568345, "grad_norm": 0.8287255167961121, "learning_rate": 9.990996416036869e-05, "loss": 0.1582, "step": 4090 }, { "action_loss": 0.029431244358420372, "epoch": 3.6780575539568345, "step": 4090 }, { "epoch": 3.6780575539568345, "step": 4090, "torque_loss": 0.18149615824222565 }, { "epoch": 3.68705035971223, "grad_norm": 0.7998374700546265, "learning_rate": 9.990830352545555e-05, "loss": 0.1529, "step": 4100 }, { "action_loss": 0.013056258670985699, "epoch": 3.68705035971223, "step": 4100 }, { "epoch": 3.68705035971223, "step": 4100, "torque_loss": 0.1123645231127739 }, { "epoch": 3.696043165467626, "grad_norm": 0.6469091176986694, "learning_rate": 9.990662772971756e-05, "loss": 0.1293, "step": 4110 }, { "action_loss": 0.04452994093298912, "epoch": 3.696043165467626, "step": 4110 }, { "epoch": 3.696043165467626, "step": 4110, "torque_loss": 0.3020593822002411 }, { "epoch": 3.7050359712230216, "grad_norm": 1.0034425258636475, "learning_rate": 9.990493677366376e-05, "loss": 0.1547, "step": 4120 }, { "action_loss": 0.02899215556681156, "epoch": 3.7050359712230216, "step": 4120 }, { "epoch": 3.7050359712230216, "step": 4120, "torque_loss": 0.2035144567489624 }, { "epoch": 3.7140287769784175, "grad_norm": 0.5836743116378784, "learning_rate": 9.990323065780786e-05, "loss": 0.1425, "step": 4130 }, { "action_loss": 0.026828140020370483, "epoch": 3.7140287769784175, "step": 4130 }, { "epoch": 3.7140287769784175, "step": 4130, "torque_loss": 0.16946227848529816 }, { "epoch": 3.723021582733813, "grad_norm": 0.6180920600891113, "learning_rate": 9.990150938266808e-05, "loss": 0.1569, "step": 4140 }, { "action_loss": 0.030163874849677086, "epoch": 3.723021582733813, "step": 4140 }, { "epoch": 3.723021582733813, "step": 4140, "torque_loss": 0.19983188807964325 }, { "epoch": 3.7320143884892087, "grad_norm": 0.6022657752037048, "learning_rate": 9.989977294876733e-05, "loss": 0.1553, "step": 4150 }, { "action_loss": 0.025800002738833427, "epoch": 3.7320143884892087, "step": 4150 }, { "epoch": 3.7320143884892087, "step": 4150, "torque_loss": 0.21198010444641113 }, { "epoch": 3.741007194244604, "grad_norm": 0.5502544045448303, "learning_rate": 9.989802135663308e-05, "loss": 0.1339, "step": 4160 }, { "action_loss": 0.04976429417729378, "epoch": 3.741007194244604, "step": 4160 }, { "epoch": 3.741007194244604, "step": 4160, "torque_loss": 0.2514127194881439 }, { "epoch": 3.75, "grad_norm": 0.5433077812194824, "learning_rate": 9.989625460679743e-05, "loss": 0.1382, "step": 4170 }, { "action_loss": 0.04029426351189613, "epoch": 3.75, "step": 4170 }, { "epoch": 3.75, "step": 4170, "torque_loss": 0.2730608284473419 }, { "epoch": 3.758992805755396, "grad_norm": 0.5370930433273315, "learning_rate": 9.989447269979706e-05, "loss": 0.1475, "step": 4180 }, { "action_loss": 0.027814289554953575, "epoch": 3.758992805755396, "step": 4180 }, { "epoch": 3.758992805755396, "step": 4180, "torque_loss": 0.21030867099761963 }, { "epoch": 3.7679856115107913, "grad_norm": 0.5663517713546753, "learning_rate": 9.989267563617328e-05, "loss": 0.1472, "step": 4190 }, { "action_loss": 0.008376700803637505, "epoch": 3.7679856115107913, "step": 4190 }, { "epoch": 3.7679856115107913, "step": 4190, "torque_loss": 0.12208351492881775 }, { "epoch": 3.776978417266187, "grad_norm": 0.6702066659927368, "learning_rate": 9.989086341647198e-05, "loss": 0.1375, "step": 4200 }, { "action_loss": 0.03607834875583649, "epoch": 3.776978417266187, "step": 4200 }, { "epoch": 3.776978417266187, "step": 4200, "torque_loss": 0.23236925899982452 }, { "epoch": 3.7859712230215825, "grad_norm": 0.5242424011230469, "learning_rate": 9.988903604124366e-05, "loss": 0.1385, "step": 4210 }, { "action_loss": 0.013741972856223583, "epoch": 3.7859712230215825, "step": 4210 }, { "epoch": 3.7859712230215825, "step": 4210, "torque_loss": 0.17153899371623993 }, { "epoch": 3.7949640287769784, "grad_norm": 0.5670526623725891, "learning_rate": 9.988719351104343e-05, "loss": 0.1481, "step": 4220 }, { "action_loss": 0.019346686080098152, "epoch": 3.7949640287769784, "step": 4220 }, { "epoch": 3.7949640287769784, "step": 4220, "torque_loss": 0.17669928073883057 }, { "epoch": 3.803956834532374, "grad_norm": 0.6788485050201416, "learning_rate": 9.9885335826431e-05, "loss": 0.1494, "step": 4230 }, { "action_loss": 0.018203943967819214, "epoch": 3.803956834532374, "step": 4230 }, { "epoch": 3.803956834532374, "step": 4230, "torque_loss": 0.15677039325237274 }, { "epoch": 3.81294964028777, "grad_norm": 0.6910423040390015, "learning_rate": 9.988346298797071e-05, "loss": 0.1345, "step": 4240 }, { "action_loss": 0.02663491852581501, "epoch": 3.81294964028777, "step": 4240 }, { "epoch": 3.81294964028777, "step": 4240, "torque_loss": 0.20852720737457275 }, { "epoch": 3.8219424460431655, "grad_norm": 0.5988829731941223, "learning_rate": 9.988157499623146e-05, "loss": 0.1738, "step": 4250 }, { "action_loss": 0.023429477587342262, "epoch": 3.8219424460431655, "step": 4250 }, { "epoch": 3.8219424460431655, "step": 4250, "torque_loss": 0.19496536254882812 }, { "epoch": 3.8309352517985613, "grad_norm": 0.6244550943374634, "learning_rate": 9.987967185178677e-05, "loss": 0.1326, "step": 4260 }, { "action_loss": 0.024983035400509834, "epoch": 3.8309352517985613, "step": 4260 }, { "epoch": 3.8309352517985613, "step": 4260, "torque_loss": 0.21768300235271454 }, { "epoch": 3.8399280575539567, "grad_norm": 0.4387204945087433, "learning_rate": 9.987775355521476e-05, "loss": 0.1627, "step": 4270 }, { "action_loss": 0.012552856467664242, "epoch": 3.8399280575539567, "step": 4270 }, { "epoch": 3.8399280575539567, "step": 4270, "torque_loss": 0.19961251318454742 }, { "epoch": 3.8489208633093526, "grad_norm": 0.8260754346847534, "learning_rate": 9.987582010709817e-05, "loss": 0.1572, "step": 4280 }, { "action_loss": 0.07541808485984802, "epoch": 3.8489208633093526, "step": 4280 }, { "epoch": 3.8489208633093526, "step": 4280, "torque_loss": 0.3352765738964081 }, { "epoch": 3.8579136690647484, "grad_norm": 0.6814550161361694, "learning_rate": 9.987387150802431e-05, "loss": 0.1991, "step": 4290 }, { "action_loss": 0.012470086105167866, "epoch": 3.8579136690647484, "step": 4290 }, { "epoch": 3.8579136690647484, "step": 4290, "torque_loss": 0.19502033293247223 }, { "epoch": 3.866906474820144, "grad_norm": 0.5860317349433899, "learning_rate": 9.987190775858517e-05, "loss": 0.1342, "step": 4300 }, { "action_loss": 0.021741673350334167, "epoch": 3.866906474820144, "step": 4300 }, { "epoch": 3.866906474820144, "step": 4300, "torque_loss": 0.20000743865966797 }, { "epoch": 3.8758992805755397, "grad_norm": 0.7478262782096863, "learning_rate": 9.98699288593772e-05, "loss": 0.152, "step": 4310 }, { "action_loss": 0.01746954768896103, "epoch": 3.8758992805755397, "step": 4310 }, { "epoch": 3.8758992805755397, "step": 4310, "torque_loss": 0.1999153345823288 }, { "epoch": 3.884892086330935, "grad_norm": 0.5581755042076111, "learning_rate": 9.986793481100161e-05, "loss": 0.1569, "step": 4320 }, { "action_loss": 0.04933272674679756, "epoch": 3.884892086330935, "step": 4320 }, { "epoch": 3.884892086330935, "step": 4320, "torque_loss": 0.29448142647743225 }, { "epoch": 3.893884892086331, "grad_norm": 0.7816908955574036, "learning_rate": 9.986592561406412e-05, "loss": 0.149, "step": 4330 }, { "action_loss": 0.01940774917602539, "epoch": 3.893884892086331, "step": 4330 }, { "epoch": 3.893884892086331, "step": 4330, "torque_loss": 0.15239548683166504 }, { "epoch": 3.902877697841727, "grad_norm": 0.6837709546089172, "learning_rate": 9.986390126917503e-05, "loss": 0.1332, "step": 4340 }, { "action_loss": 0.020224491134285927, "epoch": 3.902877697841727, "step": 4340 }, { "epoch": 3.902877697841727, "step": 4340, "torque_loss": 0.1799001693725586 }, { "epoch": 3.911870503597122, "grad_norm": 0.5418379306793213, "learning_rate": 9.986186177694933e-05, "loss": 0.1365, "step": 4350 }, { "action_loss": 0.03507109358906746, "epoch": 3.911870503597122, "step": 4350 }, { "epoch": 3.911870503597122, "step": 4350, "torque_loss": 0.2329397201538086 }, { "epoch": 3.920863309352518, "grad_norm": 0.6915778517723083, "learning_rate": 9.985980713800656e-05, "loss": 0.1804, "step": 4360 }, { "action_loss": 0.014803054742515087, "epoch": 3.920863309352518, "step": 4360 }, { "epoch": 3.920863309352518, "step": 4360, "torque_loss": 0.09777305275201797 }, { "epoch": 3.9298561151079134, "grad_norm": 0.708846926689148, "learning_rate": 9.985773735297084e-05, "loss": 0.1281, "step": 4370 }, { "action_loss": 0.02440345101058483, "epoch": 3.9298561151079134, "step": 4370 }, { "epoch": 3.9298561151079134, "step": 4370, "torque_loss": 0.21750831604003906 }, { "epoch": 3.9388489208633093, "grad_norm": 0.6217613220214844, "learning_rate": 9.985565242247092e-05, "loss": 0.1507, "step": 4380 }, { "action_loss": 0.03284168615937233, "epoch": 3.9388489208633093, "step": 4380 }, { "epoch": 3.9388489208633093, "step": 4380, "torque_loss": 0.27241232991218567 }, { "epoch": 3.947841726618705, "grad_norm": 0.5560563206672668, "learning_rate": 9.985355234714016e-05, "loss": 0.1573, "step": 4390 }, { "action_loss": 0.013472981750965118, "epoch": 3.947841726618705, "step": 4390 }, { "epoch": 3.947841726618705, "step": 4390, "torque_loss": 0.1650732457637787 }, { "epoch": 3.956834532374101, "grad_norm": 0.662022590637207, "learning_rate": 9.985143712761652e-05, "loss": 0.1273, "step": 4400 }, { "action_loss": 0.02087295614182949, "epoch": 3.956834532374101, "step": 4400 }, { "epoch": 3.956834532374101, "step": 4400, "torque_loss": 0.1388367861509323 }, { "epoch": 3.9658273381294964, "grad_norm": 0.6145968437194824, "learning_rate": 9.984930676454252e-05, "loss": 0.1468, "step": 4410 }, { "action_loss": 0.03045114316046238, "epoch": 3.9658273381294964, "step": 4410 }, { "epoch": 3.9658273381294964, "step": 4410, "torque_loss": 0.26670947670936584 }, { "epoch": 3.9748201438848922, "grad_norm": 0.81181800365448, "learning_rate": 9.984716125856532e-05, "loss": 0.1482, "step": 4420 }, { "action_loss": 0.016581743955612183, "epoch": 3.9748201438848922, "step": 4420 }, { "epoch": 3.9748201438848922, "step": 4420, "torque_loss": 0.14960306882858276 }, { "epoch": 3.9838129496402876, "grad_norm": 0.7950723767280579, "learning_rate": 9.984500061033667e-05, "loss": 0.1345, "step": 4430 }, { "action_loss": 0.020091407001018524, "epoch": 3.9838129496402876, "step": 4430 }, { "epoch": 3.9838129496402876, "step": 4430, "torque_loss": 0.17451155185699463 }, { "epoch": 3.9928057553956835, "grad_norm": 0.8703324794769287, "learning_rate": 9.984282482051293e-05, "loss": 0.158, "step": 4440 }, { "action_loss": 0.05302907153964043, "epoch": 3.9928057553956835, "step": 4440 }, { "epoch": 3.9928057553956835, "step": 4440, "torque_loss": 0.32844358682632446 }, { "epoch": 4.001798561151079, "grad_norm": 0.5864046216011047, "learning_rate": 9.9840633889755e-05, "loss": 0.1486, "step": 4450 }, { "action_loss": 0.01273049134761095, "epoch": 4.001798561151079, "step": 4450 }, { "epoch": 4.001798561151079, "step": 4450, "torque_loss": 0.1867072582244873 }, { "epoch": 4.010791366906475, "grad_norm": 0.603525698184967, "learning_rate": 9.983842781872848e-05, "loss": 0.1586, "step": 4460 }, { "action_loss": 0.025639409199357033, "epoch": 4.010791366906475, "step": 4460 }, { "epoch": 4.010791366906475, "step": 4460, "torque_loss": 0.1666850596666336 }, { "epoch": 4.01978417266187, "grad_norm": 0.7813773155212402, "learning_rate": 9.98362066081035e-05, "loss": 0.1533, "step": 4470 }, { "action_loss": 0.034973446279764175, "epoch": 4.01978417266187, "step": 4470 }, { "epoch": 4.01978417266187, "step": 4470, "torque_loss": 0.1945546418428421 }, { "epoch": 4.028776978417266, "grad_norm": 0.6397462487220764, "learning_rate": 9.983397025855479e-05, "loss": 0.1326, "step": 4480 }, { "action_loss": 0.041529640555381775, "epoch": 4.028776978417266, "step": 4480 }, { "epoch": 4.028776978417266, "step": 4480, "torque_loss": 0.265989750623703 }, { "epoch": 4.037769784172662, "grad_norm": 0.5219550728797913, "learning_rate": 9.983171877076171e-05, "loss": 0.1467, "step": 4490 }, { "action_loss": 0.07969601452350616, "epoch": 4.037769784172662, "step": 4490 }, { "epoch": 4.037769784172662, "step": 4490, "torque_loss": 0.24267548322677612 }, { "epoch": 4.046762589928058, "grad_norm": 0.8426265120506287, "learning_rate": 9.98294521454082e-05, "loss": 0.1804, "step": 4500 }, { "action_loss": 0.018208418041467667, "epoch": 4.046762589928058, "step": 4500 }, { "epoch": 4.046762589928058, "step": 4500, "torque_loss": 0.13963933289051056 }, { "epoch": 4.055755395683454, "grad_norm": 0.64494788646698, "learning_rate": 9.98271703831828e-05, "loss": 0.1488, "step": 4510 }, { "action_loss": 0.03937200829386711, "epoch": 4.055755395683454, "step": 4510 }, { "epoch": 4.055755395683454, "step": 4510, "torque_loss": 0.26095038652420044 }, { "epoch": 4.0647482014388485, "grad_norm": 0.5073079466819763, "learning_rate": 9.982487348477865e-05, "loss": 0.1719, "step": 4520 }, { "action_loss": 0.049027878791093826, "epoch": 4.0647482014388485, "step": 4520 }, { "epoch": 4.0647482014388485, "step": 4520, "torque_loss": 0.22285199165344238 }, { "epoch": 4.073741007194244, "grad_norm": 0.6302840113639832, "learning_rate": 9.982256145089347e-05, "loss": 0.1501, "step": 4530 }, { "action_loss": 0.02102654241025448, "epoch": 4.073741007194244, "step": 4530 }, { "epoch": 4.073741007194244, "step": 4530, "torque_loss": 0.1349773406982422 }, { "epoch": 4.08273381294964, "grad_norm": 0.7318467497825623, "learning_rate": 9.982023428222962e-05, "loss": 0.1374, "step": 4540 }, { "action_loss": 0.06424128264188766, "epoch": 4.08273381294964, "step": 4540 }, { "epoch": 4.08273381294964, "step": 4540, "torque_loss": 0.31402671337127686 }, { "epoch": 4.091726618705036, "grad_norm": 0.4468785524368286, "learning_rate": 9.981789197949403e-05, "loss": 0.1565, "step": 4550 }, { "action_loss": 0.04947575926780701, "epoch": 4.091726618705036, "step": 4550 }, { "epoch": 4.091726618705036, "step": 4550, "torque_loss": 0.3256961703300476 }, { "epoch": 4.100719424460432, "grad_norm": 0.6995185613632202, "learning_rate": 9.98155345433982e-05, "loss": 0.1582, "step": 4560 }, { "action_loss": 0.0519779734313488, "epoch": 4.100719424460432, "step": 4560 }, { "epoch": 4.100719424460432, "step": 4560, "torque_loss": 0.26802995800971985 }, { "epoch": 4.109712230215828, "grad_norm": 0.6949416399002075, "learning_rate": 9.981316197465831e-05, "loss": 0.1466, "step": 4570 }, { "action_loss": 0.03747817501425743, "epoch": 4.109712230215828, "step": 4570 }, { "epoch": 4.109712230215828, "step": 4570, "torque_loss": 0.31246218085289 }, { "epoch": 4.118705035971223, "grad_norm": 0.610203742980957, "learning_rate": 9.981077427399504e-05, "loss": 0.16, "step": 4580 }, { "action_loss": 0.02879214845597744, "epoch": 4.118705035971223, "step": 4580 }, { "epoch": 4.118705035971223, "step": 4580, "torque_loss": 0.22938160598278046 }, { "epoch": 4.127697841726619, "grad_norm": 0.5127555131912231, "learning_rate": 9.980837144213371e-05, "loss": 0.1332, "step": 4590 }, { "action_loss": 0.026274897158145905, "epoch": 4.127697841726619, "step": 4590 }, { "epoch": 4.127697841726619, "step": 4590, "torque_loss": 0.1920972615480423 }, { "epoch": 4.136690647482014, "grad_norm": 0.6314271092414856, "learning_rate": 9.980595347980426e-05, "loss": 0.1338, "step": 4600 }, { "action_loss": 0.021787038072943687, "epoch": 4.136690647482014, "step": 4600 }, { "epoch": 4.136690647482014, "step": 4600, "torque_loss": 0.1622474044561386 }, { "epoch": 4.14568345323741, "grad_norm": 0.6408776640892029, "learning_rate": 9.980352038774119e-05, "loss": 0.1567, "step": 4610 }, { "action_loss": 0.029053613543510437, "epoch": 4.14568345323741, "step": 4610 }, { "epoch": 4.14568345323741, "step": 4610, "torque_loss": 0.2500464618206024 }, { "epoch": 4.154676258992806, "grad_norm": 0.4971717596054077, "learning_rate": 9.98010721666836e-05, "loss": 0.146, "step": 4620 }, { "action_loss": 0.019325116649270058, "epoch": 4.154676258992806, "step": 4620 }, { "epoch": 4.154676258992806, "step": 4620, "torque_loss": 0.10538844019174576 }, { "epoch": 4.163669064748201, "grad_norm": 0.6583346724510193, "learning_rate": 9.979860881737523e-05, "loss": 0.1254, "step": 4630 }, { "action_loss": 0.01001161988824606, "epoch": 4.163669064748201, "step": 4630 }, { "epoch": 4.163669064748201, "step": 4630, "torque_loss": 0.19564305245876312 }, { "epoch": 4.172661870503597, "grad_norm": 0.680870532989502, "learning_rate": 9.979613034056434e-05, "loss": 0.1486, "step": 4640 }, { "action_loss": 0.0649513304233551, "epoch": 4.172661870503597, "step": 4640 }, { "epoch": 4.172661870503597, "step": 4640, "torque_loss": 0.19289644062519073 }, { "epoch": 4.181654676258993, "grad_norm": 0.7566598057746887, "learning_rate": 9.979363673700386e-05, "loss": 0.1558, "step": 4650 }, { "action_loss": 0.022880597040057182, "epoch": 4.181654676258993, "step": 4650 }, { "epoch": 4.181654676258993, "step": 4650, "torque_loss": 0.19452667236328125 }, { "epoch": 4.190647482014389, "grad_norm": 0.48941585421562195, "learning_rate": 9.979112800745124e-05, "loss": 0.1646, "step": 4660 }, { "action_loss": 0.0448145754635334, "epoch": 4.190647482014389, "step": 4660 }, { "epoch": 4.190647482014389, "step": 4660, "torque_loss": 0.17879481613636017 }, { "epoch": 4.1996402877697845, "grad_norm": 0.7493339776992798, "learning_rate": 9.978860415266861e-05, "loss": 0.158, "step": 4670 }, { "action_loss": 0.05043886974453926, "epoch": 4.1996402877697845, "step": 4670 }, { "epoch": 4.1996402877697845, "step": 4670, "torque_loss": 0.21581220626831055 }, { "epoch": 4.2086330935251794, "grad_norm": 0.817766547203064, "learning_rate": 9.978606517342262e-05, "loss": 0.1402, "step": 4680 }, { "action_loss": 0.014638036489486694, "epoch": 4.2086330935251794, "step": 4680 }, { "epoch": 4.2086330935251794, "step": 4680, "torque_loss": 0.1692972630262375 }, { "epoch": 4.217625899280575, "grad_norm": 0.7414416670799255, "learning_rate": 9.978351107048456e-05, "loss": 0.1604, "step": 4690 }, { "action_loss": 0.009729860350489616, "epoch": 4.217625899280575, "step": 4690 }, { "epoch": 4.217625899280575, "step": 4690, "torque_loss": 0.14386916160583496 }, { "epoch": 4.226618705035971, "grad_norm": 0.7088095545768738, "learning_rate": 9.978094184463029e-05, "loss": 0.1536, "step": 4700 }, { "action_loss": 0.04376353695988655, "epoch": 4.226618705035971, "step": 4700 }, { "epoch": 4.226618705035971, "step": 4700, "torque_loss": 0.3280535936355591 }, { "epoch": 4.235611510791367, "grad_norm": 0.5300129055976868, "learning_rate": 9.977835749664029e-05, "loss": 0.1414, "step": 4710 }, { "action_loss": 0.018919862806797028, "epoch": 4.235611510791367, "step": 4710 }, { "epoch": 4.235611510791367, "step": 4710, "torque_loss": 0.2074086219072342 }, { "epoch": 4.244604316546763, "grad_norm": 0.7055826783180237, "learning_rate": 9.97757580272996e-05, "loss": 0.1329, "step": 4720 }, { "action_loss": 0.014083978720009327, "epoch": 4.244604316546763, "step": 4720 }, { "epoch": 4.244604316546763, "step": 4720, "torque_loss": 0.24970221519470215 }, { "epoch": 4.253597122302159, "grad_norm": 0.7342536449432373, "learning_rate": 9.977314343739786e-05, "loss": 0.1269, "step": 4730 }, { "action_loss": 0.0444553904235363, "epoch": 4.253597122302159, "step": 4730 }, { "epoch": 4.253597122302159, "step": 4730, "torque_loss": 0.25491562485694885 }, { "epoch": 4.262589928057554, "grad_norm": 0.6658812165260315, "learning_rate": 9.977051372772934e-05, "loss": 0.1578, "step": 4740 }, { "action_loss": 0.023178840056061745, "epoch": 4.262589928057554, "step": 4740 }, { "epoch": 4.262589928057554, "step": 4740, "torque_loss": 0.20881497859954834 }, { "epoch": 4.2715827338129495, "grad_norm": 0.6212975978851318, "learning_rate": 9.976786889909286e-05, "loss": 0.1367, "step": 4750 }, { "action_loss": 0.025107039138674736, "epoch": 4.2715827338129495, "step": 4750 }, { "epoch": 4.2715827338129495, "step": 4750, "torque_loss": 0.16921794414520264 }, { "epoch": 4.280575539568345, "grad_norm": 0.6254833936691284, "learning_rate": 9.976520895229185e-05, "loss": 0.1357, "step": 4760 }, { "action_loss": 0.025208353996276855, "epoch": 4.280575539568345, "step": 4760 }, { "epoch": 4.280575539568345, "step": 4760, "torque_loss": 0.17256473004817963 }, { "epoch": 4.289568345323741, "grad_norm": 0.6681833863258362, "learning_rate": 9.976253388813433e-05, "loss": 0.1509, "step": 4770 }, { "action_loss": 0.011959371156990528, "epoch": 4.289568345323741, "step": 4770 }, { "epoch": 4.289568345323741, "step": 4770, "torque_loss": 0.15658317506313324 }, { "epoch": 4.298561151079137, "grad_norm": 0.7261105179786682, "learning_rate": 9.975984370743293e-05, "loss": 0.1504, "step": 4780 }, { "action_loss": 0.024754365906119347, "epoch": 4.298561151079137, "step": 4780 }, { "epoch": 4.298561151079137, "step": 4780, "torque_loss": 0.2513566315174103 }, { "epoch": 4.307553956834532, "grad_norm": 0.7491858005523682, "learning_rate": 9.975713841100485e-05, "loss": 0.1623, "step": 4790 }, { "action_loss": 0.017602084204554558, "epoch": 4.307553956834532, "step": 4790 }, { "epoch": 4.307553956834532, "step": 4790, "torque_loss": 0.19910864531993866 }, { "epoch": 4.316546762589928, "grad_norm": 0.6964789628982544, "learning_rate": 9.975441799967187e-05, "loss": 0.1485, "step": 4800 }, { "action_loss": 0.020189911127090454, "epoch": 4.316546762589928, "step": 4800 }, { "epoch": 4.316546762589928, "step": 4800, "torque_loss": 0.13104169070720673 }, { "epoch": 4.325539568345324, "grad_norm": 0.6588019728660583, "learning_rate": 9.975168247426039e-05, "loss": 0.1442, "step": 4810 }, { "action_loss": 0.039116356521844864, "epoch": 4.325539568345324, "step": 4810 }, { "epoch": 4.325539568345324, "step": 4810, "torque_loss": 0.16774602234363556 }, { "epoch": 4.33453237410072, "grad_norm": 0.7456806302070618, "learning_rate": 9.974893183560139e-05, "loss": 0.1289, "step": 4820 }, { "action_loss": 0.0210821982473135, "epoch": 4.33453237410072, "step": 4820 }, { "epoch": 4.33453237410072, "step": 4820, "torque_loss": 0.1800859421491623 }, { "epoch": 4.343525179856115, "grad_norm": 0.5159510970115662, "learning_rate": 9.974616608453045e-05, "loss": 0.1364, "step": 4830 }, { "action_loss": 0.030595863237977028, "epoch": 4.343525179856115, "step": 4830 }, { "epoch": 4.343525179856115, "step": 4830, "torque_loss": 0.2174859493970871 }, { "epoch": 4.35251798561151, "grad_norm": 0.42422372102737427, "learning_rate": 9.974338522188772e-05, "loss": 0.1444, "step": 4840 }, { "action_loss": 0.02754923887550831, "epoch": 4.35251798561151, "step": 4840 }, { "epoch": 4.35251798561151, "step": 4840, "torque_loss": 0.24916620552539825 }, { "epoch": 4.361510791366906, "grad_norm": 0.5824350118637085, "learning_rate": 9.974058924851797e-05, "loss": 0.1448, "step": 4850 }, { "action_loss": 0.03276650607585907, "epoch": 4.361510791366906, "step": 4850 }, { "epoch": 4.361510791366906, "step": 4850, "torque_loss": 0.21122534573078156 }, { "epoch": 4.370503597122302, "grad_norm": 0.7973430752754211, "learning_rate": 9.973777816527051e-05, "loss": 0.1535, "step": 4860 }, { "action_loss": 0.05149819329380989, "epoch": 4.370503597122302, "step": 4860 }, { "epoch": 4.370503597122302, "step": 4860, "torque_loss": 0.3496286869049072 }, { "epoch": 4.379496402877698, "grad_norm": 0.5378323197364807, "learning_rate": 9.973495197299931e-05, "loss": 0.1627, "step": 4870 }, { "action_loss": 0.013384613208472729, "epoch": 4.379496402877698, "step": 4870 }, { "epoch": 4.379496402877698, "step": 4870, "torque_loss": 0.1706562489271164 }, { "epoch": 4.388489208633094, "grad_norm": 0.6621399521827698, "learning_rate": 9.973211067256287e-05, "loss": 0.1469, "step": 4880 }, { "action_loss": 0.01770607940852642, "epoch": 4.388489208633094, "step": 4880 }, { "epoch": 4.388489208633094, "step": 4880, "torque_loss": 0.20910155773162842 }, { "epoch": 4.39748201438849, "grad_norm": 0.6288256049156189, "learning_rate": 9.97292542648243e-05, "loss": 0.1625, "step": 4890 }, { "action_loss": 0.012295223772525787, "epoch": 4.39748201438849, "step": 4890 }, { "epoch": 4.39748201438849, "step": 4890, "torque_loss": 0.11600256711244583 }, { "epoch": 4.406474820143885, "grad_norm": 0.6413540244102478, "learning_rate": 9.972638275065131e-05, "loss": 0.1386, "step": 4900 }, { "action_loss": 0.03861216455698013, "epoch": 4.406474820143885, "step": 4900 }, { "epoch": 4.406474820143885, "step": 4900, "torque_loss": 0.19478142261505127 }, { "epoch": 4.41546762589928, "grad_norm": 0.5607053637504578, "learning_rate": 9.972349613091621e-05, "loss": 0.1558, "step": 4910 }, { "action_loss": 0.039844002574682236, "epoch": 4.41546762589928, "step": 4910 }, { "epoch": 4.41546762589928, "step": 4910, "torque_loss": 0.25864943861961365 }, { "epoch": 4.424460431654676, "grad_norm": 0.7476945519447327, "learning_rate": 9.972059440649584e-05, "loss": 0.1636, "step": 4920 }, { "action_loss": 0.020896300673484802, "epoch": 4.424460431654676, "step": 4920 }, { "epoch": 4.424460431654676, "step": 4920, "torque_loss": 0.15795820951461792 }, { "epoch": 4.433453237410072, "grad_norm": 0.5457589626312256, "learning_rate": 9.971767757827168e-05, "loss": 0.1372, "step": 4930 }, { "action_loss": 0.018241068348288536, "epoch": 4.433453237410072, "step": 4930 }, { "epoch": 4.433453237410072, "step": 4930, "torque_loss": 0.14755286276340485 }, { "epoch": 4.442446043165468, "grad_norm": 0.7167475819587708, "learning_rate": 9.971474564712982e-05, "loss": 0.1497, "step": 4940 }, { "action_loss": 0.028665468096733093, "epoch": 4.442446043165468, "step": 4940 }, { "epoch": 4.442446043165468, "step": 4940, "torque_loss": 0.14780820906162262 }, { "epoch": 4.451438848920863, "grad_norm": 0.7268010377883911, "learning_rate": 9.971179861396084e-05, "loss": 0.1446, "step": 4950 }, { "action_loss": 0.04411076381802559, "epoch": 4.451438848920863, "step": 4950 }, { "epoch": 4.451438848920863, "step": 4950, "torque_loss": 0.20660285651683807 }, { "epoch": 4.460431654676259, "grad_norm": 0.6925868391990662, "learning_rate": 9.970883647966003e-05, "loss": 0.1546, "step": 4960 }, { "action_loss": 0.03738158568739891, "epoch": 4.460431654676259, "step": 4960 }, { "epoch": 4.460431654676259, "step": 4960, "torque_loss": 0.30896177887916565 }, { "epoch": 4.469424460431655, "grad_norm": 0.9162440299987793, "learning_rate": 9.970585924512717e-05, "loss": 0.1432, "step": 4970 }, { "action_loss": 0.016121534630656242, "epoch": 4.469424460431655, "step": 4970 }, { "epoch": 4.469424460431655, "step": 4970, "torque_loss": 0.18823279440402985 }, { "epoch": 4.4784172661870505, "grad_norm": 0.4750308394432068, "learning_rate": 9.970286691126669e-05, "loss": 0.1353, "step": 4980 }, { "action_loss": 0.014760772697627544, "epoch": 4.4784172661870505, "step": 4980 }, { "epoch": 4.4784172661870505, "step": 4980, "torque_loss": 0.19003649055957794 }, { "epoch": 4.487410071942446, "grad_norm": 0.6422733664512634, "learning_rate": 9.969985947898756e-05, "loss": 0.1437, "step": 4990 }, { "action_loss": 0.03703463077545166, "epoch": 4.487410071942446, "step": 4990 }, { "epoch": 4.487410071942446, "step": 4990, "torque_loss": 0.2897397577762604 }, { "epoch": 4.496402877697841, "grad_norm": 0.5954152345657349, "learning_rate": 9.969683694920337e-05, "loss": 0.1715, "step": 5000 }, { "action_loss": 0.052507732063531876, "epoch": 4.496402877697841, "step": 5000 }, { "epoch": 4.496402877697841, "step": 5000, "torque_loss": 0.25783053040504456 }, { "epoch": 4.505395683453237, "grad_norm": 0.804078996181488, "learning_rate": 9.969379932283228e-05, "loss": 0.149, "step": 5010 }, { "action_loss": 0.02460164576768875, "epoch": 4.505395683453237, "step": 5010 }, { "epoch": 4.505395683453237, "step": 5010, "torque_loss": 0.18151597678661346 }, { "epoch": 4.514388489208633, "grad_norm": 0.6934306621551514, "learning_rate": 9.969074660079704e-05, "loss": 0.1463, "step": 5020 }, { "action_loss": 0.050948381423950195, "epoch": 4.514388489208633, "step": 5020 }, { "epoch": 4.514388489208633, "step": 5020, "torque_loss": 0.26605963706970215 }, { "epoch": 4.523381294964029, "grad_norm": 0.5864652395248413, "learning_rate": 9.968767878402501e-05, "loss": 0.1495, "step": 5030 }, { "action_loss": 0.01688360422849655, "epoch": 4.523381294964029, "step": 5030 }, { "epoch": 4.523381294964029, "step": 5030, "torque_loss": 0.17108316719532013 }, { "epoch": 4.532374100719425, "grad_norm": 0.5859226584434509, "learning_rate": 9.968459587344808e-05, "loss": 0.1337, "step": 5040 }, { "action_loss": 0.015177734196186066, "epoch": 4.532374100719425, "step": 5040 }, { "epoch": 4.532374100719425, "step": 5040, "torque_loss": 0.2415459156036377 }, { "epoch": 4.5413669064748206, "grad_norm": 0.6429872512817383, "learning_rate": 9.968149787000278e-05, "loss": 0.1534, "step": 5050 }, { "action_loss": 0.010898127220571041, "epoch": 4.5413669064748206, "step": 5050 }, { "epoch": 4.5413669064748206, "step": 5050, "torque_loss": 0.12798167765140533 }, { "epoch": 4.5503597122302155, "grad_norm": 0.536632776260376, "learning_rate": 9.967838477463018e-05, "loss": 0.1207, "step": 5060 }, { "action_loss": 0.02823948860168457, "epoch": 4.5503597122302155, "step": 5060 }, { "epoch": 4.5503597122302155, "step": 5060, "torque_loss": 0.12318258732557297 }, { "epoch": 4.559352517985611, "grad_norm": 0.4310348927974701, "learning_rate": 9.967525658827597e-05, "loss": 0.1492, "step": 5070 }, { "action_loss": 0.03695925697684288, "epoch": 4.559352517985611, "step": 5070 }, { "epoch": 4.559352517985611, "step": 5070, "torque_loss": 0.1681365817785263 }, { "epoch": 4.568345323741007, "grad_norm": 0.5831528306007385, "learning_rate": 9.967211331189042e-05, "loss": 0.1482, "step": 5080 }, { "action_loss": 0.02887253277003765, "epoch": 4.568345323741007, "step": 5080 }, { "epoch": 4.568345323741007, "step": 5080, "torque_loss": 0.18469424545764923 }, { "epoch": 4.577338129496403, "grad_norm": 0.5382142066955566, "learning_rate": 9.966895494642834e-05, "loss": 0.1603, "step": 5090 }, { "action_loss": 0.040478628128767014, "epoch": 4.577338129496403, "step": 5090 }, { "epoch": 4.577338129496403, "step": 5090, "torque_loss": 0.25957751274108887 }, { "epoch": 4.586330935251799, "grad_norm": 0.8423056602478027, "learning_rate": 9.96657814928492e-05, "loss": 0.1717, "step": 5100 }, { "action_loss": 0.020497119054198265, "epoch": 4.586330935251799, "step": 5100 }, { "epoch": 4.586330935251799, "step": 5100, "torque_loss": 0.17771615087985992 }, { "epoch": 4.595323741007194, "grad_norm": 0.4580293893814087, "learning_rate": 9.966259295211697e-05, "loss": 0.153, "step": 5110 }, { "action_loss": 0.026135222986340523, "epoch": 4.595323741007194, "step": 5110 }, { "epoch": 4.595323741007194, "step": 5110, "torque_loss": 0.18983419239521027 }, { "epoch": 4.60431654676259, "grad_norm": 0.6265316605567932, "learning_rate": 9.965938932520028e-05, "loss": 0.1416, "step": 5120 }, { "action_loss": 0.04611576721072197, "epoch": 4.60431654676259, "step": 5120 }, { "epoch": 4.60431654676259, "step": 5120, "torque_loss": 0.27970942854881287 }, { "epoch": 4.613309352517986, "grad_norm": 0.6505469679832458, "learning_rate": 9.965617061307229e-05, "loss": 0.146, "step": 5130 }, { "action_loss": 0.04323343560099602, "epoch": 4.613309352517986, "step": 5130 }, { "epoch": 4.613309352517986, "step": 5130, "torque_loss": 0.20262128114700317 }, { "epoch": 4.622302158273381, "grad_norm": 0.5022614002227783, "learning_rate": 9.965293681671077e-05, "loss": 0.1602, "step": 5140 }, { "action_loss": 0.06890880316495895, "epoch": 4.622302158273381, "step": 5140 }, { "epoch": 4.622302158273381, "step": 5140, "torque_loss": 0.30320122838020325 }, { "epoch": 4.631294964028777, "grad_norm": 0.6097627282142639, "learning_rate": 9.964968793709804e-05, "loss": 0.1529, "step": 5150 }, { "action_loss": 0.016457492485642433, "epoch": 4.631294964028777, "step": 5150 }, { "epoch": 4.631294964028777, "step": 5150, "torque_loss": 0.1747227907180786 }, { "epoch": 4.640287769784173, "grad_norm": 0.6818853616714478, "learning_rate": 9.964642397522106e-05, "loss": 0.1488, "step": 5160 }, { "action_loss": 0.025943463668227196, "epoch": 4.640287769784173, "step": 5160 }, { "epoch": 4.640287769784173, "step": 5160, "torque_loss": 0.2371092587709427 }, { "epoch": 4.649280575539568, "grad_norm": 0.7182847261428833, "learning_rate": 9.96431449320713e-05, "loss": 0.1446, "step": 5170 }, { "action_loss": 0.017890706658363342, "epoch": 4.649280575539568, "step": 5170 }, { "epoch": 4.649280575539568, "step": 5170, "torque_loss": 0.16075961291790009 }, { "epoch": 4.658273381294964, "grad_norm": 0.5409006476402283, "learning_rate": 9.963985080864486e-05, "loss": 0.1538, "step": 5180 }, { "action_loss": 0.02201598882675171, "epoch": 4.658273381294964, "step": 5180 }, { "epoch": 4.658273381294964, "step": 5180, "torque_loss": 0.2352110892534256 }, { "epoch": 4.66726618705036, "grad_norm": 0.5276452302932739, "learning_rate": 9.96365416059424e-05, "loss": 0.1416, "step": 5190 }, { "action_loss": 0.04028233885765076, "epoch": 4.66726618705036, "step": 5190 }, { "epoch": 4.66726618705036, "step": 5190, "torque_loss": 0.26825520396232605 }, { "epoch": 4.676258992805756, "grad_norm": 0.715116560459137, "learning_rate": 9.963321732496919e-05, "loss": 0.1493, "step": 5200 }, { "action_loss": 0.024695128202438354, "epoch": 4.676258992805756, "step": 5200 }, { "epoch": 4.676258992805756, "step": 5200, "torque_loss": 0.25081753730773926 }, { "epoch": 4.685251798561151, "grad_norm": 0.49023914337158203, "learning_rate": 9.962987796673506e-05, "loss": 0.1476, "step": 5210 }, { "action_loss": 0.027434712275862694, "epoch": 4.685251798561151, "step": 5210 }, { "epoch": 4.685251798561151, "step": 5210, "torque_loss": 0.23692171275615692 }, { "epoch": 4.694244604316546, "grad_norm": 0.6122496128082275, "learning_rate": 9.962652353225438e-05, "loss": 0.1266, "step": 5220 }, { "action_loss": 0.02177140675485134, "epoch": 4.694244604316546, "step": 5220 }, { "epoch": 4.694244604316546, "step": 5220, "torque_loss": 0.1427275836467743 }, { "epoch": 4.703237410071942, "grad_norm": 0.4970516264438629, "learning_rate": 9.962315402254619e-05, "loss": 0.1499, "step": 5230 }, { "action_loss": 0.02240573614835739, "epoch": 4.703237410071942, "step": 5230 }, { "epoch": 4.703237410071942, "step": 5230, "torque_loss": 0.19496099650859833 }, { "epoch": 4.712230215827338, "grad_norm": 0.6291744709014893, "learning_rate": 9.9619769438634e-05, "loss": 0.1338, "step": 5240 }, { "action_loss": 0.034097280353307724, "epoch": 4.712230215827338, "step": 5240 }, { "epoch": 4.712230215827338, "step": 5240, "torque_loss": 0.1975380778312683 }, { "epoch": 4.721223021582734, "grad_norm": 0.5432637333869934, "learning_rate": 9.9616369781546e-05, "loss": 0.1241, "step": 5250 }, { "action_loss": 0.009923787787556648, "epoch": 4.721223021582734, "step": 5250 }, { "epoch": 4.721223021582734, "step": 5250, "torque_loss": 0.14760862290859222 }, { "epoch": 4.73021582733813, "grad_norm": 0.5170552134513855, "learning_rate": 9.961295505231491e-05, "loss": 0.1422, "step": 5260 }, { "action_loss": 0.022851401939988136, "epoch": 4.73021582733813, "step": 5260 }, { "epoch": 4.73021582733813, "step": 5260, "torque_loss": 0.17782141268253326 }, { "epoch": 4.739208633093525, "grad_norm": 0.6533814668655396, "learning_rate": 9.960952525197804e-05, "loss": 0.1343, "step": 5270 }, { "action_loss": 0.009202363900840282, "epoch": 4.739208633093525, "step": 5270 }, { "epoch": 4.739208633093525, "step": 5270, "torque_loss": 0.18029475212097168 }, { "epoch": 4.748201438848921, "grad_norm": 0.507968544960022, "learning_rate": 9.960608038157724e-05, "loss": 0.1202, "step": 5280 }, { "action_loss": 0.024336040019989014, "epoch": 4.748201438848921, "step": 5280 }, { "epoch": 4.748201438848921, "step": 5280, "torque_loss": 0.23003393411636353 }, { "epoch": 4.7571942446043165, "grad_norm": 0.6242617964744568, "learning_rate": 9.960262044215901e-05, "loss": 0.1406, "step": 5290 }, { "action_loss": 0.05256226658821106, "epoch": 4.7571942446043165, "step": 5290 }, { "epoch": 4.7571942446043165, "step": 5290, "torque_loss": 0.310042142868042 }, { "epoch": 4.766187050359712, "grad_norm": 0.5238114595413208, "learning_rate": 9.959914543477435e-05, "loss": 0.1615, "step": 5300 }, { "action_loss": 0.07123535871505737, "epoch": 4.766187050359712, "step": 5300 }, { "epoch": 4.766187050359712, "step": 5300, "torque_loss": 0.29466021060943604 }, { "epoch": 4.775179856115108, "grad_norm": 0.5987823009490967, "learning_rate": 9.959565536047892e-05, "loss": 0.1635, "step": 5310 }, { "action_loss": 0.01409983355551958, "epoch": 4.775179856115108, "step": 5310 }, { "epoch": 4.775179856115108, "step": 5310, "torque_loss": 0.12135880440473557 }, { "epoch": 4.784172661870503, "grad_norm": 0.7255853414535522, "learning_rate": 9.959215022033288e-05, "loss": 0.1406, "step": 5320 }, { "action_loss": 0.028494983911514282, "epoch": 4.784172661870503, "step": 5320 }, { "epoch": 4.784172661870503, "step": 5320, "torque_loss": 0.26011598110198975 }, { "epoch": 4.793165467625899, "grad_norm": 0.5435407757759094, "learning_rate": 9.9588630015401e-05, "loss": 0.1496, "step": 5330 }, { "action_loss": 0.029404861852526665, "epoch": 4.793165467625899, "step": 5330 }, { "epoch": 4.793165467625899, "step": 5330, "torque_loss": 0.23921573162078857 }, { "epoch": 4.802158273381295, "grad_norm": 0.5056909322738647, "learning_rate": 9.958509474675264e-05, "loss": 0.1353, "step": 5340 }, { "action_loss": 0.01688995026051998, "epoch": 4.802158273381295, "step": 5340 }, { "epoch": 4.802158273381295, "step": 5340, "torque_loss": 0.20813946425914764 }, { "epoch": 4.811151079136691, "grad_norm": 0.6004248261451721, "learning_rate": 9.958154441546171e-05, "loss": 0.1535, "step": 5350 }, { "action_loss": 0.017984412610530853, "epoch": 4.811151079136691, "step": 5350 }, { "epoch": 4.811151079136691, "step": 5350, "torque_loss": 0.2282819002866745 }, { "epoch": 4.820143884892087, "grad_norm": 0.5769628882408142, "learning_rate": 9.957797902260673e-05, "loss": 0.1424, "step": 5360 }, { "action_loss": 0.026443615555763245, "epoch": 4.820143884892087, "step": 5360 }, { "epoch": 4.820143884892087, "step": 5360, "torque_loss": 0.2352173775434494 }, { "epoch": 4.829136690647482, "grad_norm": 0.9066013097763062, "learning_rate": 9.957439856927073e-05, "loss": 0.1277, "step": 5370 }, { "action_loss": 0.034022193402051926, "epoch": 4.829136690647482, "step": 5370 }, { "epoch": 4.829136690647482, "step": 5370, "torque_loss": 0.24937427043914795 }, { "epoch": 4.838129496402877, "grad_norm": 0.6660497188568115, "learning_rate": 9.957080305654139e-05, "loss": 0.1638, "step": 5380 }, { "action_loss": 0.020909858867526054, "epoch": 4.838129496402877, "step": 5380 }, { "epoch": 4.838129496402877, "step": 5380, "torque_loss": 0.2639136016368866 }, { "epoch": 4.847122302158273, "grad_norm": 0.6563220024108887, "learning_rate": 9.956719248551092e-05, "loss": 0.1553, "step": 5390 }, { "action_loss": 0.009315184317529202, "epoch": 4.847122302158273, "step": 5390 }, { "epoch": 4.847122302158273, "step": 5390, "torque_loss": 0.1401190310716629 }, { "epoch": 4.856115107913669, "grad_norm": 0.6764304637908936, "learning_rate": 9.956356685727612e-05, "loss": 0.1522, "step": 5400 }, { "action_loss": 0.02023950032889843, "epoch": 4.856115107913669, "step": 5400 }, { "epoch": 4.856115107913669, "step": 5400, "torque_loss": 0.1471051424741745 }, { "epoch": 4.865107913669065, "grad_norm": 0.4864187240600586, "learning_rate": 9.955992617293836e-05, "loss": 0.1451, "step": 5410 }, { "action_loss": 0.015987925231456757, "epoch": 4.865107913669065, "step": 5410 }, { "epoch": 4.865107913669065, "step": 5410, "torque_loss": 0.16826315224170685 }, { "epoch": 4.874100719424461, "grad_norm": 0.46791109442710876, "learning_rate": 9.955627043360358e-05, "loss": 0.1377, "step": 5420 }, { "action_loss": 0.01674521528184414, "epoch": 4.874100719424461, "step": 5420 }, { "epoch": 4.874100719424461, "step": 5420, "torque_loss": 0.24247176945209503 }, { "epoch": 4.883093525179856, "grad_norm": 0.5387957096099854, "learning_rate": 9.955259964038231e-05, "loss": 0.1413, "step": 5430 }, { "action_loss": 0.013654832728207111, "epoch": 4.883093525179856, "step": 5430 }, { "epoch": 4.883093525179856, "step": 5430, "torque_loss": 0.1960185170173645 }, { "epoch": 4.892086330935252, "grad_norm": 0.6663799285888672, "learning_rate": 9.954891379438962e-05, "loss": 0.153, "step": 5440 }, { "action_loss": 0.021597668528556824, "epoch": 4.892086330935252, "step": 5440 }, { "epoch": 4.892086330935252, "step": 5440, "torque_loss": 0.1845097541809082 }, { "epoch": 4.901079136690647, "grad_norm": 0.49657347798347473, "learning_rate": 9.954521289674519e-05, "loss": 0.1536, "step": 5450 }, { "action_loss": 0.015884900465607643, "epoch": 4.901079136690647, "step": 5450 }, { "epoch": 4.901079136690647, "step": 5450, "torque_loss": 0.18431270122528076 }, { "epoch": 4.910071942446043, "grad_norm": 0.6922895908355713, "learning_rate": 9.954149694857325e-05, "loss": 0.1471, "step": 5460 }, { "action_loss": 0.019438611343503, "epoch": 4.910071942446043, "step": 5460 }, { "epoch": 4.910071942446043, "step": 5460, "torque_loss": 0.12160726636648178 }, { "epoch": 4.919064748201439, "grad_norm": 0.5057725310325623, "learning_rate": 9.953776595100258e-05, "loss": 0.1598, "step": 5470 }, { "action_loss": 0.0268804132938385, "epoch": 4.919064748201439, "step": 5470 }, { "epoch": 4.919064748201439, "step": 5470, "torque_loss": 0.21067476272583008 }, { "epoch": 4.928057553956835, "grad_norm": 0.7903064489364624, "learning_rate": 9.95340199051666e-05, "loss": 0.1327, "step": 5480 }, { "action_loss": 0.011137762106955051, "epoch": 4.928057553956835, "step": 5480 }, { "epoch": 4.928057553956835, "step": 5480, "torque_loss": 0.2272164672613144 }, { "epoch": 4.93705035971223, "grad_norm": 0.5267750024795532, "learning_rate": 9.953025881220325e-05, "loss": 0.1329, "step": 5490 }, { "action_loss": 0.0150243379175663, "epoch": 4.93705035971223, "step": 5490 }, { "epoch": 4.93705035971223, "step": 5490, "torque_loss": 0.17521989345550537 }, { "epoch": 4.946043165467626, "grad_norm": 0.5156149864196777, "learning_rate": 9.952648267325504e-05, "loss": 0.1325, "step": 5500 }, { "action_loss": 0.029559483751654625, "epoch": 4.946043165467626, "step": 5500 }, { "epoch": 4.946043165467626, "step": 5500, "torque_loss": 0.21972627937793732 }, { "epoch": 4.955035971223022, "grad_norm": 0.5856043100357056, "learning_rate": 9.952269148946905e-05, "loss": 0.1415, "step": 5510 }, { "action_loss": 0.015427912585437298, "epoch": 4.955035971223022, "step": 5510 }, { "epoch": 4.955035971223022, "step": 5510, "torque_loss": 0.14944954216480255 }, { "epoch": 4.9640287769784175, "grad_norm": 0.7778714299201965, "learning_rate": 9.951888526199697e-05, "loss": 0.1391, "step": 5520 }, { "action_loss": 0.01884440891444683, "epoch": 4.9640287769784175, "step": 5520 }, { "epoch": 4.9640287769784175, "step": 5520, "torque_loss": 0.17745937407016754 }, { "epoch": 4.9730215827338125, "grad_norm": 0.6587964296340942, "learning_rate": 9.951506399199501e-05, "loss": 0.1428, "step": 5530 }, { "action_loss": 0.013799979351460934, "epoch": 4.9730215827338125, "step": 5530 }, { "epoch": 4.9730215827338125, "step": 5530, "torque_loss": 0.17662020027637482 }, { "epoch": 4.982014388489208, "grad_norm": 0.44622960686683655, "learning_rate": 9.951122768062399e-05, "loss": 0.1281, "step": 5540 }, { "action_loss": 0.016183240339159966, "epoch": 4.982014388489208, "step": 5540 }, { "epoch": 4.982014388489208, "step": 5540, "torque_loss": 0.21955859661102295 }, { "epoch": 4.991007194244604, "grad_norm": 0.585726261138916, "learning_rate": 9.950737632904927e-05, "loss": 0.1385, "step": 5550 }, { "action_loss": 0.013170632533729076, "epoch": 4.991007194244604, "step": 5550 }, { "epoch": 4.991007194244604, "step": 5550, "torque_loss": 0.13952837884426117 }, { "epoch": 5.0, "grad_norm": 0.649267852306366, "learning_rate": 9.950350993844077e-05, "loss": 0.141, "step": 5560 }, { "action_loss": 0.024133553728461266, "epoch": 5.0, "step": 5560 }, { "epoch": 5.0, "step": 5560, "torque_loss": 0.17963016033172607 }, { "epoch": 5.008992805755396, "grad_norm": 0.5341352820396423, "learning_rate": 9.949962850997303e-05, "loss": 0.1443, "step": 5570 }, { "action_loss": 0.011242099106311798, "epoch": 5.008992805755396, "step": 5570 }, { "epoch": 5.008992805755396, "step": 5570, "torque_loss": 0.16999705135822296 }, { "epoch": 5.017985611510792, "grad_norm": 0.5905406475067139, "learning_rate": 9.949573204482512e-05, "loss": 0.1267, "step": 5580 }, { "action_loss": 0.02215324342250824, "epoch": 5.017985611510792, "step": 5580 }, { "epoch": 5.017985611510792, "step": 5580, "torque_loss": 0.16735200583934784 }, { "epoch": 5.026978417266187, "grad_norm": 0.550835132598877, "learning_rate": 9.949182054418064e-05, "loss": 0.1317, "step": 5590 }, { "action_loss": 0.03683167323470116, "epoch": 5.026978417266187, "step": 5590 }, { "epoch": 5.026978417266187, "step": 5590, "torque_loss": 0.358081579208374 }, { "epoch": 5.0359712230215825, "grad_norm": 0.5519558191299438, "learning_rate": 9.948789400922787e-05, "loss": 0.1621, "step": 5600 }, { "action_loss": 0.03886045888066292, "epoch": 5.0359712230215825, "step": 5600 }, { "epoch": 5.0359712230215825, "step": 5600, "torque_loss": 0.17117787897586823 }, { "epoch": 5.044964028776978, "grad_norm": 0.7142127156257629, "learning_rate": 9.948395244115953e-05, "loss": 0.1414, "step": 5610 }, { "action_loss": 0.026641622185707092, "epoch": 5.044964028776978, "step": 5610 }, { "epoch": 5.044964028776978, "step": 5610, "torque_loss": 0.20262162387371063 }, { "epoch": 5.053956834532374, "grad_norm": 0.5479272603988647, "learning_rate": 9.9479995841173e-05, "loss": 0.1451, "step": 5620 }, { "action_loss": 0.023846713826060295, "epoch": 5.053956834532374, "step": 5620 }, { "epoch": 5.053956834532374, "step": 5620, "torque_loss": 0.11285867542028427 }, { "epoch": 5.06294964028777, "grad_norm": 0.5859777331352234, "learning_rate": 9.947602421047017e-05, "loss": 0.1108, "step": 5630 }, { "action_loss": 0.02362748794257641, "epoch": 5.06294964028777, "step": 5630 }, { "epoch": 5.06294964028777, "step": 5630, "torque_loss": 0.1724000722169876 }, { "epoch": 5.071942446043165, "grad_norm": 0.5172885656356812, "learning_rate": 9.947203755025753e-05, "loss": 0.1421, "step": 5640 }, { "action_loss": 0.030057614669203758, "epoch": 5.071942446043165, "step": 5640 }, { "epoch": 5.071942446043165, "step": 5640, "torque_loss": 0.1746872514486313 }, { "epoch": 5.080935251798561, "grad_norm": 0.4033794403076172, "learning_rate": 9.946803586174611e-05, "loss": 0.1368, "step": 5650 }, { "action_loss": 0.03697885200381279, "epoch": 5.080935251798561, "step": 5650 }, { "epoch": 5.080935251798561, "step": 5650, "torque_loss": 0.2778954803943634 }, { "epoch": 5.089928057553957, "grad_norm": 0.3841419219970703, "learning_rate": 9.946401914615151e-05, "loss": 0.1474, "step": 5660 }, { "action_loss": 0.017757287248969078, "epoch": 5.089928057553957, "step": 5660 }, { "epoch": 5.089928057553957, "step": 5660, "torque_loss": 0.1623672991991043 }, { "epoch": 5.098920863309353, "grad_norm": 0.5542572736740112, "learning_rate": 9.945998740469394e-05, "loss": 0.1182, "step": 5670 }, { "action_loss": 0.07763903588056564, "epoch": 5.098920863309353, "step": 5670 }, { "epoch": 5.098920863309353, "step": 5670, "torque_loss": 0.22029775381088257 }, { "epoch": 5.107913669064748, "grad_norm": 0.6385448575019836, "learning_rate": 9.945594063859809e-05, "loss": 0.1568, "step": 5680 }, { "action_loss": 0.007914519868791103, "epoch": 5.107913669064748, "step": 5680 }, { "epoch": 5.107913669064748, "step": 5680, "torque_loss": 0.19260920584201813 }, { "epoch": 5.116906474820144, "grad_norm": 0.5957069993019104, "learning_rate": 9.94518788490933e-05, "loss": 0.1312, "step": 5690 }, { "action_loss": 0.049867164343595505, "epoch": 5.116906474820144, "step": 5690 }, { "epoch": 5.116906474820144, "step": 5690, "torque_loss": 0.2794138491153717 }, { "epoch": 5.125899280575539, "grad_norm": 0.5382339954376221, "learning_rate": 9.944780203741341e-05, "loss": 0.1544, "step": 5700 }, { "action_loss": 0.017856886610388756, "epoch": 5.125899280575539, "step": 5700 }, { "epoch": 5.125899280575539, "step": 5700, "torque_loss": 0.14151223003864288 }, { "epoch": 5.134892086330935, "grad_norm": 0.5635290145874023, "learning_rate": 9.944371020479686e-05, "loss": 0.1378, "step": 5710 }, { "action_loss": 0.020891299471259117, "epoch": 5.134892086330935, "step": 5710 }, { "epoch": 5.134892086330935, "step": 5710, "torque_loss": 0.14831537008285522 }, { "epoch": 5.143884892086331, "grad_norm": 0.6257177591323853, "learning_rate": 9.943960335248662e-05, "loss": 0.1286, "step": 5720 }, { "action_loss": 0.011303327977657318, "epoch": 5.143884892086331, "step": 5720 }, { "epoch": 5.143884892086331, "step": 5720, "torque_loss": 0.1219455823302269 }, { "epoch": 5.152877697841727, "grad_norm": 0.6775807738304138, "learning_rate": 9.943548148173027e-05, "loss": 0.1439, "step": 5730 }, { "action_loss": 0.014654979109764099, "epoch": 5.152877697841727, "step": 5730 }, { "epoch": 5.152877697841727, "step": 5730, "torque_loss": 0.15974752604961395 }, { "epoch": 5.161870503597123, "grad_norm": 0.5781309008598328, "learning_rate": 9.943134459377992e-05, "loss": 0.125, "step": 5740 }, { "action_loss": 0.016419118270277977, "epoch": 5.161870503597123, "step": 5740 }, { "epoch": 5.161870503597123, "step": 5740, "torque_loss": 0.17371618747711182 }, { "epoch": 5.170863309352518, "grad_norm": 0.5089454650878906, "learning_rate": 9.942719268989222e-05, "loss": 0.1424, "step": 5750 }, { "action_loss": 0.0187834482640028, "epoch": 5.170863309352518, "step": 5750 }, { "epoch": 5.170863309352518, "step": 5750, "torque_loss": 0.16223473846912384 }, { "epoch": 5.179856115107913, "grad_norm": 0.6395071148872375, "learning_rate": 9.942302577132844e-05, "loss": 0.1301, "step": 5760 }, { "action_loss": 0.022559473291039467, "epoch": 5.179856115107913, "step": 5760 }, { "epoch": 5.179856115107913, "step": 5760, "torque_loss": 0.21222440898418427 }, { "epoch": 5.188848920863309, "grad_norm": 0.5692628622055054, "learning_rate": 9.941884383935438e-05, "loss": 0.1352, "step": 5770 }, { "action_loss": 0.03680765628814697, "epoch": 5.188848920863309, "step": 5770 }, { "epoch": 5.188848920863309, "step": 5770, "torque_loss": 0.2563823163509369 }, { "epoch": 5.197841726618705, "grad_norm": 0.552270233631134, "learning_rate": 9.941464689524039e-05, "loss": 0.1398, "step": 5780 }, { "action_loss": 0.018099790439009666, "epoch": 5.197841726618705, "step": 5780 }, { "epoch": 5.197841726618705, "step": 5780, "torque_loss": 0.19251960515975952 }, { "epoch": 5.206834532374101, "grad_norm": 0.5350431799888611, "learning_rate": 9.941043494026139e-05, "loss": 0.1476, "step": 5790 }, { "action_loss": 0.028808971866965294, "epoch": 5.206834532374101, "step": 5790 }, { "epoch": 5.206834532374101, "step": 5790, "torque_loss": 0.24028931558132172 }, { "epoch": 5.215827338129497, "grad_norm": 0.6225906610488892, "learning_rate": 9.940620797569685e-05, "loss": 0.1413, "step": 5800 }, { "action_loss": 0.02264578640460968, "epoch": 5.215827338129497, "step": 5800 }, { "epoch": 5.215827338129497, "step": 5800, "torque_loss": 0.17832307517528534 }, { "epoch": 5.224820143884892, "grad_norm": 0.48993316292762756, "learning_rate": 9.940196600283082e-05, "loss": 0.1668, "step": 5810 }, { "action_loss": 0.018269727006554604, "epoch": 5.224820143884892, "step": 5810 }, { "epoch": 5.224820143884892, "step": 5810, "torque_loss": 0.23943346738815308 }, { "epoch": 5.233812949640288, "grad_norm": 0.7786487936973572, "learning_rate": 9.939770902295192e-05, "loss": 0.1509, "step": 5820 }, { "action_loss": 0.011454696767032146, "epoch": 5.233812949640288, "step": 5820 }, { "epoch": 5.233812949640288, "step": 5820, "torque_loss": 0.14716951549053192 }, { "epoch": 5.2428057553956835, "grad_norm": 0.45754343271255493, "learning_rate": 9.939343703735329e-05, "loss": 0.1469, "step": 5830 }, { "action_loss": 0.017715981230139732, "epoch": 5.2428057553956835, "step": 5830 }, { "epoch": 5.2428057553956835, "step": 5830, "torque_loss": 0.1529974788427353 }, { "epoch": 5.251798561151079, "grad_norm": 0.6234074234962463, "learning_rate": 9.938915004733264e-05, "loss": 0.1281, "step": 5840 }, { "action_loss": 0.017504505813121796, "epoch": 5.251798561151079, "step": 5840 }, { "epoch": 5.251798561151079, "step": 5840, "torque_loss": 0.28541991114616394 }, { "epoch": 5.260791366906475, "grad_norm": 0.4601658880710602, "learning_rate": 9.938484805419224e-05, "loss": 0.1442, "step": 5850 }, { "action_loss": 0.021935993805527687, "epoch": 5.260791366906475, "step": 5850 }, { "epoch": 5.260791366906475, "step": 5850, "torque_loss": 0.2047911435365677 }, { "epoch": 5.26978417266187, "grad_norm": 0.6790842413902283, "learning_rate": 9.938053105923894e-05, "loss": 0.142, "step": 5860 }, { "action_loss": 0.016041500493884087, "epoch": 5.26978417266187, "step": 5860 }, { "epoch": 5.26978417266187, "step": 5860, "torque_loss": 0.18306773900985718 }, { "epoch": 5.278776978417266, "grad_norm": 0.7295709252357483, "learning_rate": 9.937619906378413e-05, "loss": 0.1297, "step": 5870 }, { "action_loss": 0.013760142028331757, "epoch": 5.278776978417266, "step": 5870 }, { "epoch": 5.278776978417266, "step": 5870, "torque_loss": 0.15456855297088623 }, { "epoch": 5.287769784172662, "grad_norm": 0.5662158131599426, "learning_rate": 9.937185206914374e-05, "loss": 0.1424, "step": 5880 }, { "action_loss": 0.0341128408908844, "epoch": 5.287769784172662, "step": 5880 }, { "epoch": 5.287769784172662, "step": 5880, "torque_loss": 0.18096835911273956 }, { "epoch": 5.296762589928058, "grad_norm": 0.6081100106239319, "learning_rate": 9.936749007663829e-05, "loss": 0.1305, "step": 5890 }, { "action_loss": 0.021209046244621277, "epoch": 5.296762589928058, "step": 5890 }, { "epoch": 5.296762589928058, "step": 5890, "torque_loss": 0.20209574699401855 }, { "epoch": 5.305755395683454, "grad_norm": 0.49237748980522156, "learning_rate": 9.93631130875928e-05, "loss": 0.134, "step": 5900 }, { "action_loss": 0.038805678486824036, "epoch": 5.305755395683454, "step": 5900 }, { "epoch": 5.305755395683454, "step": 5900, "torque_loss": 0.24759572744369507 }, { "epoch": 5.3147482014388485, "grad_norm": 0.7004761695861816, "learning_rate": 9.935872110333692e-05, "loss": 0.156, "step": 5910 }, { "action_loss": 0.02615748904645443, "epoch": 5.3147482014388485, "step": 5910 }, { "epoch": 5.3147482014388485, "step": 5910, "torque_loss": 0.21060477197170258 }, { "epoch": 5.323741007194244, "grad_norm": 0.5586017966270447, "learning_rate": 9.935431412520484e-05, "loss": 0.1475, "step": 5920 }, { "action_loss": 0.016202883794903755, "epoch": 5.323741007194244, "step": 5920 }, { "epoch": 5.323741007194244, "step": 5920, "torque_loss": 0.2089506834745407 }, { "epoch": 5.33273381294964, "grad_norm": 0.6177256107330322, "learning_rate": 9.934989215453523e-05, "loss": 0.1487, "step": 5930 }, { "action_loss": 0.023547274991869926, "epoch": 5.33273381294964, "step": 5930 }, { "epoch": 5.33273381294964, "step": 5930, "torque_loss": 0.22804264724254608 }, { "epoch": 5.341726618705036, "grad_norm": 0.6266664266586304, "learning_rate": 9.934545519267139e-05, "loss": 0.1445, "step": 5940 }, { "action_loss": 0.015717286616563797, "epoch": 5.341726618705036, "step": 5940 }, { "epoch": 5.341726618705036, "step": 5940, "torque_loss": 0.2411840707063675 }, { "epoch": 5.350719424460432, "grad_norm": 0.5441216826438904, "learning_rate": 9.934100324096117e-05, "loss": 0.1395, "step": 5950 }, { "action_loss": 0.017060918733477592, "epoch": 5.350719424460432, "step": 5950 }, { "epoch": 5.350719424460432, "step": 5950, "torque_loss": 0.23402619361877441 }, { "epoch": 5.359712230215827, "grad_norm": 0.5129082202911377, "learning_rate": 9.933653630075692e-05, "loss": 0.1293, "step": 5960 }, { "action_loss": 0.027217289432883263, "epoch": 5.359712230215827, "step": 5960 }, { "epoch": 5.359712230215827, "step": 5960, "torque_loss": 0.1945703774690628 }, { "epoch": 5.368705035971223, "grad_norm": 0.5436323881149292, "learning_rate": 9.93320543734156e-05, "loss": 0.1453, "step": 5970 }, { "action_loss": 0.00972067005932331, "epoch": 5.368705035971223, "step": 5970 }, { "epoch": 5.368705035971223, "step": 5970, "torque_loss": 0.16062100231647491 }, { "epoch": 5.377697841726619, "grad_norm": 0.4361002445220947, "learning_rate": 9.932755746029871e-05, "loss": 0.1241, "step": 5980 }, { "action_loss": 0.007001470308750868, "epoch": 5.377697841726619, "step": 5980 }, { "epoch": 5.377697841726619, "step": 5980, "torque_loss": 0.10597901791334152 }, { "epoch": 5.386690647482014, "grad_norm": 0.5322760343551636, "learning_rate": 9.932304556277228e-05, "loss": 0.1406, "step": 5990 }, { "action_loss": 0.021162159740924835, "epoch": 5.386690647482014, "step": 5990 }, { "epoch": 5.386690647482014, "step": 5990, "torque_loss": 0.17949752509593964 }, { "epoch": 5.39568345323741, "grad_norm": 0.540668249130249, "learning_rate": 9.93185186822069e-05, "loss": 0.1521, "step": 6000 }, { "action_loss": 0.02859322540462017, "epoch": 5.39568345323741, "step": 6000 }, { "epoch": 5.39568345323741, "step": 6000, "torque_loss": 0.20789416134357452 }, { "epoch": 5.404676258992806, "grad_norm": 0.63994961977005, "learning_rate": 9.931397681997773e-05, "loss": 0.1384, "step": 6010 }, { "action_loss": 0.014481914229691029, "epoch": 5.404676258992806, "step": 6010 }, { "epoch": 5.404676258992806, "step": 6010, "torque_loss": 0.1613428145647049 }, { "epoch": 5.413669064748201, "grad_norm": 0.570260226726532, "learning_rate": 9.930941997746446e-05, "loss": 0.1345, "step": 6020 }, { "action_loss": 0.024898409843444824, "epoch": 5.413669064748201, "step": 6020 }, { "epoch": 5.413669064748201, "step": 6020, "torque_loss": 0.1847987174987793 }, { "epoch": 5.422661870503597, "grad_norm": 0.3505989611148834, "learning_rate": 9.930484815605134e-05, "loss": 0.1411, "step": 6030 }, { "action_loss": 0.011401374824345112, "epoch": 5.422661870503597, "step": 6030 }, { "epoch": 5.422661870503597, "step": 6030, "torque_loss": 0.17315669357776642 }, { "epoch": 5.431654676258993, "grad_norm": 0.46978235244750977, "learning_rate": 9.930026135712717e-05, "loss": 0.1508, "step": 6040 }, { "action_loss": 0.008659337647259235, "epoch": 5.431654676258993, "step": 6040 }, { "epoch": 5.431654676258993, "step": 6040, "torque_loss": 0.11863281577825546 }, { "epoch": 5.440647482014389, "grad_norm": 0.5270165801048279, "learning_rate": 9.92956595820853e-05, "loss": 0.1407, "step": 6050 }, { "action_loss": 0.011349822394549847, "epoch": 5.440647482014389, "step": 6050 }, { "epoch": 5.440647482014389, "step": 6050, "torque_loss": 0.10461780428886414 }, { "epoch": 5.4496402877697845, "grad_norm": 0.587076723575592, "learning_rate": 9.929104283232362e-05, "loss": 0.1273, "step": 6060 }, { "action_loss": 0.03613879531621933, "epoch": 5.4496402877697845, "step": 6060 }, { "epoch": 5.4496402877697845, "step": 6060, "torque_loss": 0.27844932675361633 }, { "epoch": 5.4586330935251794, "grad_norm": 0.6695418953895569, "learning_rate": 9.92864111092446e-05, "loss": 0.1398, "step": 6070 }, { "action_loss": 0.008286813274025917, "epoch": 5.4586330935251794, "step": 6070 }, { "epoch": 5.4586330935251794, "step": 6070, "torque_loss": 0.12285073846578598 }, { "epoch": 5.467625899280575, "grad_norm": 0.5969694256782532, "learning_rate": 9.92817644142552e-05, "loss": 0.1172, "step": 6080 }, { "action_loss": 0.014564592391252518, "epoch": 5.467625899280575, "step": 6080 }, { "epoch": 5.467625899280575, "step": 6080, "torque_loss": 0.1815851479768753 }, { "epoch": 5.476618705035971, "grad_norm": 0.7406890988349915, "learning_rate": 9.927710274876698e-05, "loss": 0.1436, "step": 6090 }, { "action_loss": 0.01803181692957878, "epoch": 5.476618705035971, "step": 6090 }, { "epoch": 5.476618705035971, "step": 6090, "torque_loss": 0.26286736130714417 }, { "epoch": 5.485611510791367, "grad_norm": 0.6956456899642944, "learning_rate": 9.927242611419603e-05, "loss": 0.1534, "step": 6100 }, { "action_loss": 0.027797222137451172, "epoch": 5.485611510791367, "step": 6100 }, { "epoch": 5.485611510791367, "step": 6100, "torque_loss": 0.2277393788099289 }, { "epoch": 5.494604316546763, "grad_norm": 0.5152212381362915, "learning_rate": 9.926773451196301e-05, "loss": 0.1599, "step": 6110 }, { "action_loss": 0.01375400647521019, "epoch": 5.494604316546763, "step": 6110 }, { "epoch": 5.494604316546763, "step": 6110, "torque_loss": 0.1358250379562378 }, { "epoch": 5.503597122302159, "grad_norm": 0.49098020792007446, "learning_rate": 9.926302794349306e-05, "loss": 0.1358, "step": 6120 }, { "action_loss": 0.023830873891711235, "epoch": 5.503597122302159, "step": 6120 }, { "epoch": 5.503597122302159, "step": 6120, "torque_loss": 0.25719431042671204 }, { "epoch": 5.512589928057554, "grad_norm": 0.5354481339454651, "learning_rate": 9.925830641021594e-05, "loss": 0.1605, "step": 6130 }, { "action_loss": 0.030774682760238647, "epoch": 5.512589928057554, "step": 6130 }, { "epoch": 5.512589928057554, "step": 6130, "torque_loss": 0.27729594707489014 }, { "epoch": 5.5215827338129495, "grad_norm": 0.5995681285858154, "learning_rate": 9.925356991356593e-05, "loss": 0.1359, "step": 6140 }, { "action_loss": 0.06358995288610458, "epoch": 5.5215827338129495, "step": 6140 }, { "epoch": 5.5215827338129495, "step": 6140, "torque_loss": 0.2774023115634918 }, { "epoch": 5.530575539568345, "grad_norm": 0.6447850465774536, "learning_rate": 9.924881845498184e-05, "loss": 0.1347, "step": 6150 }, { "action_loss": 0.015296850353479385, "epoch": 5.530575539568345, "step": 6150 }, { "epoch": 5.530575539568345, "step": 6150, "torque_loss": 0.23633229732513428 }, { "epoch": 5.539568345323741, "grad_norm": 0.4467301368713379, "learning_rate": 9.924405203590705e-05, "loss": 0.1418, "step": 6160 }, { "action_loss": 0.019701246172189713, "epoch": 5.539568345323741, "step": 6160 }, { "epoch": 5.539568345323741, "step": 6160, "torque_loss": 0.1607493907213211 }, { "epoch": 5.548561151079137, "grad_norm": 0.5674362778663635, "learning_rate": 9.923927065778946e-05, "loss": 0.1353, "step": 6170 }, { "action_loss": 0.009004323743283749, "epoch": 5.548561151079137, "step": 6170 }, { "epoch": 5.548561151079137, "step": 6170, "torque_loss": 0.16978709399700165 }, { "epoch": 5.557553956834532, "grad_norm": 0.5341231226921082, "learning_rate": 9.923447432208154e-05, "loss": 0.1402, "step": 6180 }, { "action_loss": 0.017802415415644646, "epoch": 5.557553956834532, "step": 6180 }, { "epoch": 5.557553956834532, "step": 6180, "torque_loss": 0.28284937143325806 }, { "epoch": 5.566546762589928, "grad_norm": 0.5791513323783875, "learning_rate": 9.922966303024027e-05, "loss": 0.1338, "step": 6190 }, { "action_loss": 0.020266830921173096, "epoch": 5.566546762589928, "step": 6190 }, { "epoch": 5.566546762589928, "step": 6190, "torque_loss": 0.28979572653770447 }, { "epoch": 5.575539568345324, "grad_norm": 0.529244065284729, "learning_rate": 9.922483678372721e-05, "loss": 0.1638, "step": 6200 }, { "action_loss": 0.0430021695792675, "epoch": 5.575539568345324, "step": 6200 }, { "epoch": 5.575539568345324, "step": 6200, "torque_loss": 0.28823307156562805 }, { "epoch": 5.58453237410072, "grad_norm": 0.6983257532119751, "learning_rate": 9.921999558400845e-05, "loss": 0.1456, "step": 6210 }, { "action_loss": 0.0199684277176857, "epoch": 5.58453237410072, "step": 6210 }, { "epoch": 5.58453237410072, "step": 6210, "torque_loss": 0.1957160085439682 }, { "epoch": 5.593525179856115, "grad_norm": 0.6642294526100159, "learning_rate": 9.92151394325546e-05, "loss": 0.1661, "step": 6220 }, { "action_loss": 0.015183950774371624, "epoch": 5.593525179856115, "step": 6220 }, { "epoch": 5.593525179856115, "step": 6220, "torque_loss": 0.15461687743663788 }, { "epoch": 5.602517985611511, "grad_norm": 0.6250678896903992, "learning_rate": 9.921026833084084e-05, "loss": 0.1289, "step": 6230 }, { "action_loss": 0.01856544055044651, "epoch": 5.602517985611511, "step": 6230 }, { "epoch": 5.602517985611511, "step": 6230, "torque_loss": 0.22081594169139862 }, { "epoch": 5.611510791366906, "grad_norm": 0.46308889985084534, "learning_rate": 9.920538228034689e-05, "loss": 0.1286, "step": 6240 }, { "action_loss": 0.022802993655204773, "epoch": 5.611510791366906, "step": 6240 }, { "epoch": 5.611510791366906, "step": 6240, "torque_loss": 0.17201454937458038 }, { "epoch": 5.620503597122302, "grad_norm": 0.4377543330192566, "learning_rate": 9.920048128255699e-05, "loss": 0.1261, "step": 6250 }, { "action_loss": 0.017986664548516273, "epoch": 5.620503597122302, "step": 6250 }, { "epoch": 5.620503597122302, "step": 6250, "torque_loss": 0.1691322922706604 }, { "epoch": 5.629496402877698, "grad_norm": 0.5257594585418701, "learning_rate": 9.919556533895995e-05, "loss": 0.1553, "step": 6260 }, { "action_loss": 0.02768351323902607, "epoch": 5.629496402877698, "step": 6260 }, { "epoch": 5.629496402877698, "step": 6260, "torque_loss": 0.18707217276096344 }, { "epoch": 5.638489208633094, "grad_norm": 0.511772632598877, "learning_rate": 9.919063445104907e-05, "loss": 0.146, "step": 6270 }, { "action_loss": 0.013599961064755917, "epoch": 5.638489208633094, "step": 6270 }, { "epoch": 5.638489208633094, "step": 6270, "torque_loss": 0.22925643622875214 }, { "epoch": 5.647482014388489, "grad_norm": 0.6039383411407471, "learning_rate": 9.918568862032227e-05, "loss": 0.1424, "step": 6280 }, { "action_loss": 0.02400307171046734, "epoch": 5.647482014388489, "step": 6280 }, { "epoch": 5.647482014388489, "step": 6280, "torque_loss": 0.3119698464870453 }, { "epoch": 5.656474820143885, "grad_norm": 0.6196522116661072, "learning_rate": 9.918072784828194e-05, "loss": 0.1354, "step": 6290 }, { "action_loss": 0.03543631732463837, "epoch": 5.656474820143885, "step": 6290 }, { "epoch": 5.656474820143885, "step": 6290, "torque_loss": 0.2315060943365097 }, { "epoch": 5.66546762589928, "grad_norm": 0.5028137564659119, "learning_rate": 9.917575213643501e-05, "loss": 0.14, "step": 6300 }, { "action_loss": 0.01955563761293888, "epoch": 5.66546762589928, "step": 6300 }, { "epoch": 5.66546762589928, "step": 6300, "torque_loss": 0.21410799026489258 }, { "epoch": 5.674460431654676, "grad_norm": 0.49142923951148987, "learning_rate": 9.917076148629302e-05, "loss": 0.1321, "step": 6310 }, { "action_loss": 0.01146967988461256, "epoch": 5.674460431654676, "step": 6310 }, { "epoch": 5.674460431654676, "step": 6310, "torque_loss": 0.16865421831607819 }, { "epoch": 5.683453237410072, "grad_norm": 0.5261096358299255, "learning_rate": 9.916575589937196e-05, "loss": 0.1482, "step": 6320 }, { "action_loss": 0.01872357912361622, "epoch": 5.683453237410072, "step": 6320 }, { "epoch": 5.683453237410072, "step": 6320, "torque_loss": 0.21090908348560333 }, { "epoch": 5.692446043165468, "grad_norm": 0.5384975671768188, "learning_rate": 9.916073537719239e-05, "loss": 0.1349, "step": 6330 }, { "action_loss": 0.017394280061125755, "epoch": 5.692446043165468, "step": 6330 }, { "epoch": 5.692446043165468, "step": 6330, "torque_loss": 0.24160321056842804 }, { "epoch": 5.701438848920863, "grad_norm": 0.580590009689331, "learning_rate": 9.915569992127944e-05, "loss": 0.1351, "step": 6340 }, { "action_loss": 0.015036702156066895, "epoch": 5.701438848920863, "step": 6340 }, { "epoch": 5.701438848920863, "step": 6340, "torque_loss": 0.2496287077665329 }, { "epoch": 5.710431654676259, "grad_norm": 0.6509239077568054, "learning_rate": 9.915064953316273e-05, "loss": 0.133, "step": 6350 }, { "action_loss": 0.011092732660472393, "epoch": 5.710431654676259, "step": 6350 }, { "epoch": 5.710431654676259, "step": 6350, "torque_loss": 0.21777351200580597 }, { "epoch": 5.719424460431655, "grad_norm": 0.5085830092430115, "learning_rate": 9.914558421437645e-05, "loss": 0.1325, "step": 6360 }, { "action_loss": 0.014022848568856716, "epoch": 5.719424460431655, "step": 6360 }, { "epoch": 5.719424460431655, "step": 6360, "torque_loss": 0.14730991423130035 }, { "epoch": 5.7284172661870505, "grad_norm": 0.7094071507453918, "learning_rate": 9.914050396645929e-05, "loss": 0.1396, "step": 6370 }, { "action_loss": 0.021780962124466896, "epoch": 5.7284172661870505, "step": 6370 }, { "epoch": 5.7284172661870505, "step": 6370, "torque_loss": 0.17993251979351044 }, { "epoch": 5.737410071942446, "grad_norm": 0.5202098488807678, "learning_rate": 9.913540879095452e-05, "loss": 0.1389, "step": 6380 }, { "action_loss": 0.04251869395375252, "epoch": 5.737410071942446, "step": 6380 }, { "epoch": 5.737410071942446, "step": 6380, "torque_loss": 0.31853917241096497 }, { "epoch": 5.746402877697841, "grad_norm": 0.4869391918182373, "learning_rate": 9.913029868940987e-05, "loss": 0.1523, "step": 6390 }, { "action_loss": 0.024577610194683075, "epoch": 5.746402877697841, "step": 6390 }, { "epoch": 5.746402877697841, "step": 6390, "torque_loss": 0.18336807191371918 }, { "epoch": 5.755395683453237, "grad_norm": 0.4438258707523346, "learning_rate": 9.912517366337772e-05, "loss": 0.1455, "step": 6400 }, { "action_loss": 0.02254561521112919, "epoch": 5.755395683453237, "step": 6400 }, { "epoch": 5.755395683453237, "step": 6400, "torque_loss": 0.17257337272167206 }, { "epoch": 5.764388489208633, "grad_norm": 0.6030913591384888, "learning_rate": 9.912003371441487e-05, "loss": 0.1348, "step": 6410 }, { "action_loss": 0.027914149686694145, "epoch": 5.764388489208633, "step": 6410 }, { "epoch": 5.764388489208633, "step": 6410, "torque_loss": 0.21281373500823975 }, { "epoch": 5.773381294964029, "grad_norm": 0.687258243560791, "learning_rate": 9.911487884408271e-05, "loss": 0.1304, "step": 6420 }, { "action_loss": 0.00857588555663824, "epoch": 5.773381294964029, "step": 6420 }, { "epoch": 5.773381294964029, "step": 6420, "torque_loss": 0.1343742460012436 }, { "epoch": 5.782374100719425, "grad_norm": 0.4180031418800354, "learning_rate": 9.910970905394719e-05, "loss": 0.1373, "step": 6430 }, { "action_loss": 0.011482015252113342, "epoch": 5.782374100719425, "step": 6430 }, { "epoch": 5.782374100719425, "step": 6430, "torque_loss": 0.12796883285045624 }, { "epoch": 5.7913669064748206, "grad_norm": 0.7260056734085083, "learning_rate": 9.91045243455787e-05, "loss": 0.1272, "step": 6440 }, { "action_loss": 0.014509507454931736, "epoch": 5.7913669064748206, "step": 6440 }, { "epoch": 5.7913669064748206, "step": 6440, "torque_loss": 0.1279943436384201 }, { "epoch": 5.8003597122302155, "grad_norm": 0.647864580154419, "learning_rate": 9.909932472055225e-05, "loss": 0.1524, "step": 6450 }, { "action_loss": 0.01826428435742855, "epoch": 5.8003597122302155, "step": 6450 }, { "epoch": 5.8003597122302155, "step": 6450, "torque_loss": 0.11712998151779175 }, { "epoch": 5.809352517985611, "grad_norm": 0.6983824968338013, "learning_rate": 9.909411018044734e-05, "loss": 0.1142, "step": 6460 }, { "action_loss": 0.024804679676890373, "epoch": 5.809352517985611, "step": 6460 }, { "epoch": 5.809352517985611, "step": 6460, "torque_loss": 0.20562678575515747 }, { "epoch": 5.818345323741007, "grad_norm": 0.518579363822937, "learning_rate": 9.908888072684802e-05, "loss": 0.1383, "step": 6470 }, { "action_loss": 0.02822883427143097, "epoch": 5.818345323741007, "step": 6470 }, { "epoch": 5.818345323741007, "step": 6470, "torque_loss": 0.20650345087051392 }, { "epoch": 5.827338129496403, "grad_norm": 0.7092698812484741, "learning_rate": 9.908363636134285e-05, "loss": 0.1211, "step": 6480 }, { "action_loss": 0.018880248069763184, "epoch": 5.827338129496403, "step": 6480 }, { "epoch": 5.827338129496403, "step": 6480, "torque_loss": 0.211702361702919 }, { "epoch": 5.836330935251799, "grad_norm": 0.5707871913909912, "learning_rate": 9.907837708552493e-05, "loss": 0.1248, "step": 6490 }, { "action_loss": 0.018224850296974182, "epoch": 5.836330935251799, "step": 6490 }, { "epoch": 5.836330935251799, "step": 6490, "torque_loss": 0.20718121528625488 }, { "epoch": 5.845323741007194, "grad_norm": 0.6005966067314148, "learning_rate": 9.90731029009919e-05, "loss": 0.1285, "step": 6500 }, { "action_loss": 0.013688714243471622, "epoch": 5.845323741007194, "step": 6500 }, { "epoch": 5.845323741007194, "step": 6500, "torque_loss": 0.15566708147525787 }, { "epoch": 5.85431654676259, "grad_norm": 0.566154420375824, "learning_rate": 9.906781380934589e-05, "loss": 0.156, "step": 6510 }, { "action_loss": 0.026763953268527985, "epoch": 5.85431654676259, "step": 6510 }, { "epoch": 5.85431654676259, "step": 6510, "torque_loss": 0.25938138365745544 }, { "epoch": 5.863309352517986, "grad_norm": 0.593975841999054, "learning_rate": 9.906250981219362e-05, "loss": 0.1467, "step": 6520 }, { "action_loss": 0.0109262615442276, "epoch": 5.863309352517986, "step": 6520 }, { "epoch": 5.863309352517986, "step": 6520, "torque_loss": 0.18979495763778687 }, { "epoch": 5.872302158273381, "grad_norm": 0.6803259253501892, "learning_rate": 9.905719091114628e-05, "loss": 0.1217, "step": 6530 }, { "action_loss": 0.01661919243633747, "epoch": 5.872302158273381, "step": 6530 }, { "epoch": 5.872302158273381, "step": 6530, "torque_loss": 0.17390668392181396 }, { "epoch": 5.881294964028777, "grad_norm": 0.6654937863349915, "learning_rate": 9.905185710781964e-05, "loss": 0.1443, "step": 6540 }, { "action_loss": 0.012779696844518185, "epoch": 5.881294964028777, "step": 6540 }, { "epoch": 5.881294964028777, "step": 6540, "torque_loss": 0.15771134197711945 }, { "epoch": 5.890287769784173, "grad_norm": 0.4974304735660553, "learning_rate": 9.904650840383392e-05, "loss": 0.1319, "step": 6550 }, { "action_loss": 0.013018111698329449, "epoch": 5.890287769784173, "step": 6550 }, { "epoch": 5.890287769784173, "step": 6550, "torque_loss": 0.21054325997829437 }, { "epoch": 5.899280575539568, "grad_norm": 0.45250338315963745, "learning_rate": 9.904114480081397e-05, "loss": 0.1202, "step": 6560 }, { "action_loss": 0.011673684231936932, "epoch": 5.899280575539568, "step": 6560 }, { "epoch": 5.899280575539568, "step": 6560, "torque_loss": 0.1480821967124939 }, { "epoch": 5.908273381294964, "grad_norm": 0.4982425272464752, "learning_rate": 9.903576630038906e-05, "loss": 0.1417, "step": 6570 }, { "action_loss": 0.018374474719166756, "epoch": 5.908273381294964, "step": 6570 }, { "epoch": 5.908273381294964, "step": 6570, "torque_loss": 0.16930489242076874 }, { "epoch": 5.91726618705036, "grad_norm": 0.45154890418052673, "learning_rate": 9.903037290419309e-05, "loss": 0.1289, "step": 6580 }, { "action_loss": 0.01077913586050272, "epoch": 5.91726618705036, "step": 6580 }, { "epoch": 5.91726618705036, "step": 6580, "torque_loss": 0.1432366818189621 }, { "epoch": 5.926258992805756, "grad_norm": 0.6035374402999878, "learning_rate": 9.902496461386439e-05, "loss": 0.1253, "step": 6590 }, { "action_loss": 0.010621778666973114, "epoch": 5.926258992805756, "step": 6590 }, { "epoch": 5.926258992805756, "step": 6590, "torque_loss": 0.17748777568340302 }, { "epoch": 5.935251798561151, "grad_norm": 0.7617992758750916, "learning_rate": 9.901954143104588e-05, "loss": 0.1255, "step": 6600 }, { "action_loss": 0.011792276985943317, "epoch": 5.935251798561151, "step": 6600 }, { "epoch": 5.935251798561151, "step": 6600, "torque_loss": 0.1717759370803833 }, { "epoch": 5.944244604316546, "grad_norm": 0.5329558253288269, "learning_rate": 9.901410335738496e-05, "loss": 0.1107, "step": 6610 }, { "action_loss": 0.023324916139245033, "epoch": 5.944244604316546, "step": 6610 }, { "epoch": 5.944244604316546, "step": 6610, "torque_loss": 0.23085828125476837 }, { "epoch": 5.953237410071942, "grad_norm": 0.6099438071250916, "learning_rate": 9.900865039453358e-05, "loss": 0.1357, "step": 6620 }, { "action_loss": 0.010387777350842953, "epoch": 5.953237410071942, "step": 6620 }, { "epoch": 5.953237410071942, "step": 6620, "torque_loss": 0.17261017858982086 }, { "epoch": 5.962230215827338, "grad_norm": 0.494281142950058, "learning_rate": 9.900318254414821e-05, "loss": 0.1315, "step": 6630 }, { "action_loss": 0.02977617084980011, "epoch": 5.962230215827338, "step": 6630 }, { "epoch": 5.962230215827338, "step": 6630, "torque_loss": 0.24680566787719727 }, { "epoch": 5.971223021582734, "grad_norm": 0.5103464126586914, "learning_rate": 9.899769980788985e-05, "loss": 0.1295, "step": 6640 }, { "action_loss": 0.010959174484014511, "epoch": 5.971223021582734, "step": 6640 }, { "epoch": 5.971223021582734, "step": 6640, "torque_loss": 0.13393443822860718 }, { "epoch": 5.98021582733813, "grad_norm": 0.5276363492012024, "learning_rate": 9.899220218742398e-05, "loss": 0.1189, "step": 6650 }, { "action_loss": 0.010261159390211105, "epoch": 5.98021582733813, "step": 6650 }, { "epoch": 5.98021582733813, "step": 6650, "torque_loss": 0.16217941045761108 }, { "epoch": 5.989208633093525, "grad_norm": 0.607358992099762, "learning_rate": 9.898668968442066e-05, "loss": 0.1589, "step": 6660 }, { "action_loss": 0.025243153795599937, "epoch": 5.989208633093525, "step": 6660 }, { "epoch": 5.989208633093525, "step": 6660, "torque_loss": 0.22340263426303864 }, { "epoch": 5.998201438848921, "grad_norm": 0.7807033658027649, "learning_rate": 9.898116230055443e-05, "loss": 0.1331, "step": 6670 }, { "action_loss": 0.021006939932703972, "epoch": 5.998201438848921, "step": 6670 }, { "epoch": 5.998201438848921, "step": 6670, "torque_loss": 0.21579430997371674 }, { "epoch": 6.0071942446043165, "grad_norm": 0.7072218060493469, "learning_rate": 9.897562003750437e-05, "loss": 0.1291, "step": 6680 }, { "action_loss": 0.017358265817165375, "epoch": 6.0071942446043165, "step": 6680 }, { "epoch": 6.0071942446043165, "step": 6680, "torque_loss": 0.1687830239534378 }, { "epoch": 6.016187050359712, "grad_norm": 0.4368653893470764, "learning_rate": 9.897006289695407e-05, "loss": 0.1322, "step": 6690 }, { "action_loss": 0.030406737700104713, "epoch": 6.016187050359712, "step": 6690 }, { "epoch": 6.016187050359712, "step": 6690, "torque_loss": 0.22919882833957672 }, { "epoch": 6.025179856115108, "grad_norm": 0.5325899124145508, "learning_rate": 9.896449088059164e-05, "loss": 0.1521, "step": 6700 }, { "action_loss": 0.012899558991193771, "epoch": 6.025179856115108, "step": 6700 }, { "epoch": 6.025179856115108, "step": 6700, "torque_loss": 0.158796489238739 }, { "epoch": 6.034172661870503, "grad_norm": 0.4698043465614319, "learning_rate": 9.89589039901097e-05, "loss": 0.141, "step": 6710 }, { "action_loss": 0.013239274732768536, "epoch": 6.034172661870503, "step": 6710 }, { "epoch": 6.034172661870503, "step": 6710, "torque_loss": 0.1629478484392166 }, { "epoch": 6.043165467625899, "grad_norm": 0.5453435182571411, "learning_rate": 9.895330222720542e-05, "loss": 0.1284, "step": 6720 }, { "action_loss": 0.01877593994140625, "epoch": 6.043165467625899, "step": 6720 }, { "epoch": 6.043165467625899, "step": 6720, "torque_loss": 0.16894201934337616 }, { "epoch": 6.052158273381295, "grad_norm": 0.3586134910583496, "learning_rate": 9.894768559358047e-05, "loss": 0.157, "step": 6730 }, { "action_loss": 0.06417828053236008, "epoch": 6.052158273381295, "step": 6730 }, { "epoch": 6.052158273381295, "step": 6730, "torque_loss": 0.21290524303913116 }, { "epoch": 6.061151079136691, "grad_norm": 0.5188984274864197, "learning_rate": 9.894205409094101e-05, "loss": 0.1463, "step": 6740 }, { "action_loss": 0.03032868541777134, "epoch": 6.061151079136691, "step": 6740 }, { "epoch": 6.061151079136691, "step": 6740, "torque_loss": 0.2890433967113495 }, { "epoch": 6.070143884892087, "grad_norm": 0.611182689666748, "learning_rate": 9.893640772099777e-05, "loss": 0.125, "step": 6750 }, { "action_loss": 0.016491210088133812, "epoch": 6.070143884892087, "step": 6750 }, { "epoch": 6.070143884892087, "step": 6750, "torque_loss": 0.1883302927017212 }, { "epoch": 6.079136690647482, "grad_norm": 0.5243465304374695, "learning_rate": 9.893074648546595e-05, "loss": 0.1492, "step": 6760 }, { "action_loss": 0.02925245463848114, "epoch": 6.079136690647482, "step": 6760 }, { "epoch": 6.079136690647482, "step": 6760, "torque_loss": 0.20984280109405518 }, { "epoch": 6.088129496402877, "grad_norm": 0.5027456879615784, "learning_rate": 9.892507038606528e-05, "loss": 0.1421, "step": 6770 }, { "action_loss": 0.016264529898762703, "epoch": 6.088129496402877, "step": 6770 }, { "epoch": 6.088129496402877, "step": 6770, "torque_loss": 0.2536693811416626 }, { "epoch": 6.097122302158273, "grad_norm": 0.5248166918754578, "learning_rate": 9.891937942452003e-05, "loss": 0.1297, "step": 6780 }, { "action_loss": 0.011080104857683182, "epoch": 6.097122302158273, "step": 6780 }, { "epoch": 6.097122302158273, "step": 6780, "torque_loss": 0.2528373897075653 }, { "epoch": 6.106115107913669, "grad_norm": 0.4438765048980713, "learning_rate": 9.891367360255895e-05, "loss": 0.1291, "step": 6790 }, { "action_loss": 0.0375971756875515, "epoch": 6.106115107913669, "step": 6790 }, { "epoch": 6.106115107913669, "step": 6790, "torque_loss": 0.2747572958469391 }, { "epoch": 6.115107913669065, "grad_norm": 0.552991509437561, "learning_rate": 9.890795292191532e-05, "loss": 0.1606, "step": 6800 }, { "action_loss": 0.019379638135433197, "epoch": 6.115107913669065, "step": 6800 }, { "epoch": 6.115107913669065, "step": 6800, "torque_loss": 0.15614376962184906 }, { "epoch": 6.124100719424461, "grad_norm": 0.468839168548584, "learning_rate": 9.890221738432694e-05, "loss": 0.134, "step": 6810 }, { "action_loss": 0.014003965072333813, "epoch": 6.124100719424461, "step": 6810 }, { "epoch": 6.124100719424461, "step": 6810, "torque_loss": 0.1983504742383957 }, { "epoch": 6.133093525179856, "grad_norm": 0.42331600189208984, "learning_rate": 9.88964669915361e-05, "loss": 0.1279, "step": 6820 }, { "action_loss": 0.020038379356265068, "epoch": 6.133093525179856, "step": 6820 }, { "epoch": 6.133093525179856, "step": 6820, "torque_loss": 0.19073696434497833 }, { "epoch": 6.142086330935252, "grad_norm": 0.6574915051460266, "learning_rate": 9.889070174528963e-05, "loss": 0.1489, "step": 6830 }, { "action_loss": 0.014303489588201046, "epoch": 6.142086330935252, "step": 6830 }, { "epoch": 6.142086330935252, "step": 6830, "torque_loss": 0.17403078079223633 }, { "epoch": 6.151079136690647, "grad_norm": 0.5453472137451172, "learning_rate": 9.888492164733883e-05, "loss": 0.1277, "step": 6840 }, { "action_loss": 0.013868586160242558, "epoch": 6.151079136690647, "step": 6840 }, { "epoch": 6.151079136690647, "step": 6840, "torque_loss": 0.17677254974842072 }, { "epoch": 6.160071942446043, "grad_norm": 0.48788416385650635, "learning_rate": 9.88791266994396e-05, "loss": 0.1295, "step": 6850 }, { "action_loss": 0.01192426960915327, "epoch": 6.160071942446043, "step": 6850 }, { "epoch": 6.160071942446043, "step": 6850, "torque_loss": 0.14504440128803253 }, { "epoch": 6.169064748201439, "grad_norm": 0.4950529932975769, "learning_rate": 9.887331690335223e-05, "loss": 0.121, "step": 6860 }, { "action_loss": 0.02239970862865448, "epoch": 6.169064748201439, "step": 6860 }, { "epoch": 6.169064748201439, "step": 6860, "torque_loss": 0.27124491333961487 }, { "epoch": 6.178057553956835, "grad_norm": 0.5096455812454224, "learning_rate": 9.886749226084163e-05, "loss": 0.1225, "step": 6870 }, { "action_loss": 0.011669143103063107, "epoch": 6.178057553956835, "step": 6870 }, { "epoch": 6.178057553956835, "step": 6870, "torque_loss": 0.1369217336177826 }, { "epoch": 6.18705035971223, "grad_norm": 0.6057453155517578, "learning_rate": 9.886165277367714e-05, "loss": 0.1452, "step": 6880 }, { "action_loss": 0.022491440176963806, "epoch": 6.18705035971223, "step": 6880 }, { "epoch": 6.18705035971223, "step": 6880, "torque_loss": 0.1402449607849121 }, { "epoch": 6.196043165467626, "grad_norm": 0.47069185972213745, "learning_rate": 9.885579844363265e-05, "loss": 0.1217, "step": 6890 }, { "action_loss": 0.015501863323152065, "epoch": 6.196043165467626, "step": 6890 }, { "epoch": 6.196043165467626, "step": 6890, "torque_loss": 0.21059846878051758 }, { "epoch": 6.205035971223022, "grad_norm": 0.44601109623908997, "learning_rate": 9.884992927248656e-05, "loss": 0.1217, "step": 6900 }, { "action_loss": 0.007339660543948412, "epoch": 6.205035971223022, "step": 6900 }, { "epoch": 6.205035971223022, "step": 6900, "torque_loss": 0.1330493837594986 }, { "epoch": 6.2140287769784175, "grad_norm": 0.5231848359107971, "learning_rate": 9.884404526202178e-05, "loss": 0.1308, "step": 6910 }, { "action_loss": 0.020772747695446014, "epoch": 6.2140287769784175, "step": 6910 }, { "epoch": 6.2140287769784175, "step": 6910, "torque_loss": 0.2364349365234375 }, { "epoch": 6.223021582733813, "grad_norm": 0.4249134659767151, "learning_rate": 9.883814641402568e-05, "loss": 0.145, "step": 6920 }, { "action_loss": 0.014840682037174702, "epoch": 6.223021582733813, "step": 6920 }, { "epoch": 6.223021582733813, "step": 6920, "torque_loss": 0.13466106355190277 }, { "epoch": 6.232014388489208, "grad_norm": 0.5313286185264587, "learning_rate": 9.88322327302902e-05, "loss": 0.1199, "step": 6930 }, { "action_loss": 0.01581881009042263, "epoch": 6.232014388489208, "step": 6930 }, { "epoch": 6.232014388489208, "step": 6930, "torque_loss": 0.1301898956298828 }, { "epoch": 6.241007194244604, "grad_norm": 0.5017443895339966, "learning_rate": 9.882630421261176e-05, "loss": 0.1157, "step": 6940 }, { "action_loss": 0.009893429465591908, "epoch": 6.241007194244604, "step": 6940 }, { "epoch": 6.241007194244604, "step": 6940, "torque_loss": 0.16905276477336884 }, { "epoch": 6.25, "grad_norm": 0.5750678181648254, "learning_rate": 9.88203608627913e-05, "loss": 0.1267, "step": 6950 }, { "action_loss": 0.018552618101239204, "epoch": 6.25, "step": 6950 }, { "epoch": 6.25, "step": 6950, "torque_loss": 0.1566590815782547 }, { "epoch": 6.258992805755396, "grad_norm": 0.48383793234825134, "learning_rate": 9.881440268263422e-05, "loss": 0.1376, "step": 6960 }, { "action_loss": 0.010782641358673573, "epoch": 6.258992805755396, "step": 6960 }, { "epoch": 6.258992805755396, "step": 6960, "torque_loss": 0.09312129765748978 }, { "epoch": 6.267985611510792, "grad_norm": 0.49816271662712097, "learning_rate": 9.880842967395048e-05, "loss": 0.1282, "step": 6970 }, { "action_loss": 0.013085521757602692, "epoch": 6.267985611510792, "step": 6970 }, { "epoch": 6.267985611510792, "step": 6970, "torque_loss": 0.1651206761598587 }, { "epoch": 6.276978417266187, "grad_norm": 0.49774080514907837, "learning_rate": 9.880244183855452e-05, "loss": 0.1294, "step": 6980 }, { "action_loss": 0.015142402611672878, "epoch": 6.276978417266187, "step": 6980 }, { "epoch": 6.276978417266187, "step": 6980, "torque_loss": 0.22068889439105988 }, { "epoch": 6.2859712230215825, "grad_norm": 0.8211982846260071, "learning_rate": 9.879643917826527e-05, "loss": 0.1623, "step": 6990 }, { "action_loss": 0.035020917654037476, "epoch": 6.2859712230215825, "step": 6990 }, { "epoch": 6.2859712230215825, "step": 6990, "torque_loss": 0.2530559003353119 }, { "epoch": 6.294964028776978, "grad_norm": 0.5184189081192017, "learning_rate": 9.87904216949062e-05, "loss": 0.1393, "step": 7000 }, { "action_loss": 0.01928875595331192, "epoch": 6.294964028776978, "step": 7000 }, { "epoch": 6.294964028776978, "step": 7000, "torque_loss": 0.2204713374376297 }, { "epoch": 6.303956834532374, "grad_norm": 0.6239527463912964, "learning_rate": 9.878438939030526e-05, "loss": 0.151, "step": 7010 }, { "action_loss": 0.018945490941405296, "epoch": 6.303956834532374, "step": 7010 }, { "epoch": 6.303956834532374, "step": 7010, "torque_loss": 0.19711287319660187 }, { "epoch": 6.31294964028777, "grad_norm": 0.6229615807533264, "learning_rate": 9.877834226629489e-05, "loss": 0.1317, "step": 7020 }, { "action_loss": 0.005822690669447184, "epoch": 6.31294964028777, "step": 7020 }, { "epoch": 6.31294964028777, "step": 7020, "torque_loss": 0.141839399933815 }, { "epoch": 6.321942446043165, "grad_norm": 0.49270984530448914, "learning_rate": 9.877228032471206e-05, "loss": 0.1237, "step": 7030 }, { "action_loss": 0.020515812560915947, "epoch": 6.321942446043165, "step": 7030 }, { "epoch": 6.321942446043165, "step": 7030, "torque_loss": 0.19136019051074982 }, { "epoch": 6.330935251798561, "grad_norm": 0.31469276547431946, "learning_rate": 9.876620356739823e-05, "loss": 0.1219, "step": 7040 }, { "action_loss": 0.017836349084973335, "epoch": 6.330935251798561, "step": 7040 }, { "epoch": 6.330935251798561, "step": 7040, "torque_loss": 0.19780278205871582 }, { "epoch": 6.339928057553957, "grad_norm": 0.6137352585792542, "learning_rate": 9.876011199619935e-05, "loss": 0.123, "step": 7050 }, { "action_loss": 0.004797155037522316, "epoch": 6.339928057553957, "step": 7050 }, { "epoch": 6.339928057553957, "step": 7050, "torque_loss": 0.08126512914896011 }, { "epoch": 6.348920863309353, "grad_norm": 0.4362714886665344, "learning_rate": 9.875400561296589e-05, "loss": 0.1279, "step": 7060 }, { "action_loss": 0.01647365652024746, "epoch": 6.348920863309353, "step": 7060 }, { "epoch": 6.348920863309353, "step": 7060, "torque_loss": 0.204476997256279 }, { "epoch": 6.357913669064748, "grad_norm": 0.4284490644931793, "learning_rate": 9.874788441955278e-05, "loss": 0.1352, "step": 7070 }, { "action_loss": 0.011893530376255512, "epoch": 6.357913669064748, "step": 7070 }, { "epoch": 6.357913669064748, "step": 7070, "torque_loss": 0.13365910947322845 }, { "epoch": 6.366906474820144, "grad_norm": 0.4819446802139282, "learning_rate": 9.874174841781951e-05, "loss": 0.1461, "step": 7080 }, { "action_loss": 0.02895781397819519, "epoch": 6.366906474820144, "step": 7080 }, { "epoch": 6.366906474820144, "step": 7080, "torque_loss": 0.23287774622440338 }, { "epoch": 6.375899280575539, "grad_norm": 0.6260504126548767, "learning_rate": 9.873559760963003e-05, "loss": 0.1358, "step": 7090 }, { "action_loss": 0.026063168421387672, "epoch": 6.375899280575539, "step": 7090 }, { "epoch": 6.375899280575539, "step": 7090, "torque_loss": 0.17907555401325226 }, { "epoch": 6.384892086330935, "grad_norm": 0.589446485042572, "learning_rate": 9.872943199685278e-05, "loss": 0.129, "step": 7100 }, { "action_loss": 0.02155614085495472, "epoch": 6.384892086330935, "step": 7100 }, { "epoch": 6.384892086330935, "step": 7100, "torque_loss": 0.22990405559539795 }, { "epoch": 6.393884892086331, "grad_norm": 0.5256199240684509, "learning_rate": 9.872325158136071e-05, "loss": 0.1469, "step": 7110 }, { "action_loss": 0.009468899108469486, "epoch": 6.393884892086331, "step": 7110 }, { "epoch": 6.393884892086331, "step": 7110, "torque_loss": 0.1340576410293579 }, { "epoch": 6.402877697841727, "grad_norm": 0.6006612777709961, "learning_rate": 9.871705636503128e-05, "loss": 0.1334, "step": 7120 }, { "action_loss": 0.020253121852874756, "epoch": 6.402877697841727, "step": 7120 }, { "epoch": 6.402877697841727, "step": 7120, "torque_loss": 0.16795210540294647 }, { "epoch": 6.411870503597123, "grad_norm": 0.719935417175293, "learning_rate": 9.871084634974641e-05, "loss": 0.1385, "step": 7130 }, { "action_loss": 0.028610000386834145, "epoch": 6.411870503597123, "step": 7130 }, { "epoch": 6.411870503597123, "step": 7130, "torque_loss": 0.28119537234306335 }, { "epoch": 6.420863309352518, "grad_norm": 0.8488429188728333, "learning_rate": 9.870462153739257e-05, "loss": 0.156, "step": 7140 }, { "action_loss": 0.012149647809565067, "epoch": 6.420863309352518, "step": 7140 }, { "epoch": 6.420863309352518, "step": 7140, "torque_loss": 0.17656737565994263 }, { "epoch": 6.429856115107913, "grad_norm": 0.4457622468471527, "learning_rate": 9.869838192986067e-05, "loss": 0.1298, "step": 7150 }, { "action_loss": 0.011051966808736324, "epoch": 6.429856115107913, "step": 7150 }, { "epoch": 6.429856115107913, "step": 7150, "torque_loss": 0.16398723423480988 }, { "epoch": 6.438848920863309, "grad_norm": 0.6592724919319153, "learning_rate": 9.869212752904616e-05, "loss": 0.1424, "step": 7160 }, { "action_loss": 0.016201278194785118, "epoch": 6.438848920863309, "step": 7160 }, { "epoch": 6.438848920863309, "step": 7160, "torque_loss": 0.21290616691112518 }, { "epoch": 6.447841726618705, "grad_norm": 0.5303897261619568, "learning_rate": 9.868585833684894e-05, "loss": 0.1301, "step": 7170 }, { "action_loss": 0.01682237721979618, "epoch": 6.447841726618705, "step": 7170 }, { "epoch": 6.447841726618705, "step": 7170, "torque_loss": 0.10459347814321518 }, { "epoch": 6.456834532374101, "grad_norm": 0.4261557161808014, "learning_rate": 9.867957435517342e-05, "loss": 0.1292, "step": 7180 }, { "action_loss": 0.019317107275128365, "epoch": 6.456834532374101, "step": 7180 }, { "epoch": 6.456834532374101, "step": 7180, "torque_loss": 0.1639845222234726 }, { "epoch": 6.465827338129497, "grad_norm": 0.4588714838027954, "learning_rate": 9.867327558592854e-05, "loss": 0.1367, "step": 7190 }, { "action_loss": 0.013941660523414612, "epoch": 6.465827338129497, "step": 7190 }, { "epoch": 6.465827338129497, "step": 7190, "torque_loss": 0.12886081635951996 }, { "epoch": 6.474820143884892, "grad_norm": 0.4840279817581177, "learning_rate": 9.866696203102766e-05, "loss": 0.1405, "step": 7200 }, { "action_loss": 0.01027427427470684, "epoch": 6.474820143884892, "step": 7200 }, { "epoch": 6.474820143884892, "step": 7200, "torque_loss": 0.12506087124347687 }, { "epoch": 6.483812949640288, "grad_norm": 0.5582743287086487, "learning_rate": 9.86606336923887e-05, "loss": 0.1254, "step": 7210 }, { "action_loss": 0.011683876626193523, "epoch": 6.483812949640288, "step": 7210 }, { "epoch": 6.483812949640288, "step": 7210, "torque_loss": 0.1546328216791153 }, { "epoch": 6.4928057553956835, "grad_norm": 0.5572634339332581, "learning_rate": 9.865429057193403e-05, "loss": 0.1321, "step": 7220 }, { "action_loss": 0.03493089973926544, "epoch": 6.4928057553956835, "step": 7220 }, { "epoch": 6.4928057553956835, "step": 7220, "torque_loss": 0.21432511508464813 }, { "epoch": 6.501798561151079, "grad_norm": 0.6388423442840576, "learning_rate": 9.864793267159053e-05, "loss": 0.1315, "step": 7230 }, { "action_loss": 0.009366370737552643, "epoch": 6.501798561151079, "step": 7230 }, { "epoch": 6.501798561151079, "step": 7230, "torque_loss": 0.13317525386810303 }, { "epoch": 6.510791366906475, "grad_norm": 0.42183148860931396, "learning_rate": 9.864155999328957e-05, "loss": 0.1241, "step": 7240 }, { "action_loss": 0.008611700497567654, "epoch": 6.510791366906475, "step": 7240 }, { "epoch": 6.510791366906475, "step": 7240, "torque_loss": 0.15196886658668518 }, { "epoch": 6.51978417266187, "grad_norm": 0.583014726638794, "learning_rate": 9.8635172538967e-05, "loss": 0.1353, "step": 7250 }, { "action_loss": 0.023413831368088722, "epoch": 6.51978417266187, "step": 7250 }, { "epoch": 6.51978417266187, "step": 7250, "torque_loss": 0.15770076215267181 }, { "epoch": 6.528776978417266, "grad_norm": 0.3902617394924164, "learning_rate": 9.862877031056312e-05, "loss": 0.1271, "step": 7260 }, { "action_loss": 0.007882400415837765, "epoch": 6.528776978417266, "step": 7260 }, { "epoch": 6.528776978417266, "step": 7260, "torque_loss": 0.136775404214859 }, { "epoch": 6.537769784172662, "grad_norm": 0.4746233820915222, "learning_rate": 9.862235331002279e-05, "loss": 0.1433, "step": 7270 }, { "action_loss": 0.018964892253279686, "epoch": 6.537769784172662, "step": 7270 }, { "epoch": 6.537769784172662, "step": 7270, "torque_loss": 0.16460323333740234 }, { "epoch": 6.546762589928058, "grad_norm": 0.5689901113510132, "learning_rate": 9.861592153929533e-05, "loss": 0.1354, "step": 7280 }, { "action_loss": 0.01079903170466423, "epoch": 6.546762589928058, "step": 7280 }, { "epoch": 6.546762589928058, "step": 7280, "torque_loss": 0.09026835113763809 }, { "epoch": 6.555755395683454, "grad_norm": 0.5723434686660767, "learning_rate": 9.860947500033455e-05, "loss": 0.1159, "step": 7290 }, { "action_loss": 0.04919368028640747, "epoch": 6.555755395683454, "step": 7290 }, { "epoch": 6.555755395683454, "step": 7290, "torque_loss": 0.21033744513988495 }, { "epoch": 6.564748201438849, "grad_norm": 0.4833504557609558, "learning_rate": 9.86030136950987e-05, "loss": 0.1371, "step": 7300 }, { "action_loss": 0.013666502200067043, "epoch": 6.564748201438849, "step": 7300 }, { "epoch": 6.564748201438849, "step": 7300, "torque_loss": 0.16001008450984955 }, { "epoch": 6.573741007194244, "grad_norm": 0.4688897132873535, "learning_rate": 9.85965376255506e-05, "loss": 0.118, "step": 7310 }, { "action_loss": 0.011797520332038403, "epoch": 6.573741007194244, "step": 7310 }, { "epoch": 6.573741007194244, "step": 7310, "torque_loss": 0.16395753622055054 }, { "epoch": 6.58273381294964, "grad_norm": 0.5053675770759583, "learning_rate": 9.859004679365747e-05, "loss": 0.1177, "step": 7320 }, { "action_loss": 0.017963046208024025, "epoch": 6.58273381294964, "step": 7320 }, { "epoch": 6.58273381294964, "step": 7320, "torque_loss": 0.2286285012960434 }, { "epoch": 6.591726618705036, "grad_norm": 0.4577694237232208, "learning_rate": 9.858354120139108e-05, "loss": 0.1303, "step": 7330 }, { "action_loss": 0.020713087171316147, "epoch": 6.591726618705036, "step": 7330 }, { "epoch": 6.591726618705036, "step": 7330, "torque_loss": 0.2597671449184418 }, { "epoch": 6.600719424460432, "grad_norm": 0.6117475032806396, "learning_rate": 9.857702085072764e-05, "loss": 0.1306, "step": 7340 }, { "action_loss": 0.012313214130699635, "epoch": 6.600719424460432, "step": 7340 }, { "epoch": 6.600719424460432, "step": 7340, "torque_loss": 0.18457789719104767 }, { "epoch": 6.609712230215827, "grad_norm": 0.47476130723953247, "learning_rate": 9.857048574364787e-05, "loss": 0.1358, "step": 7350 }, { "action_loss": 0.009821786545217037, "epoch": 6.609712230215827, "step": 7350 }, { "epoch": 6.609712230215827, "step": 7350, "torque_loss": 0.14045122265815735 }, { "epoch": 6.618705035971223, "grad_norm": 0.6021174788475037, "learning_rate": 9.856393588213698e-05, "loss": 0.1336, "step": 7360 }, { "action_loss": 0.016008691862225533, "epoch": 6.618705035971223, "step": 7360 }, { "epoch": 6.618705035971223, "step": 7360, "torque_loss": 0.2502686083316803 }, { "epoch": 6.627697841726619, "grad_norm": 0.3490820825099945, "learning_rate": 9.855737126818458e-05, "loss": 0.1407, "step": 7370 }, { "action_loss": 0.011812195181846619, "epoch": 6.627697841726619, "step": 7370 }, { "epoch": 6.627697841726619, "step": 7370, "torque_loss": 0.1308145523071289 }, { "epoch": 6.636690647482014, "grad_norm": 0.4691697061061859, "learning_rate": 9.855079190378491e-05, "loss": 0.1421, "step": 7380 }, { "action_loss": 0.024934565648436546, "epoch": 6.636690647482014, "step": 7380 }, { "epoch": 6.636690647482014, "step": 7380, "torque_loss": 0.18195241689682007 }, { "epoch": 6.64568345323741, "grad_norm": 0.4612026810646057, "learning_rate": 9.854419779093655e-05, "loss": 0.1287, "step": 7390 }, { "action_loss": 0.018369270488619804, "epoch": 6.64568345323741, "step": 7390 }, { "epoch": 6.64568345323741, "step": 7390, "torque_loss": 0.1462731808423996 }, { "epoch": 6.654676258992806, "grad_norm": 0.534422755241394, "learning_rate": 9.853758893164264e-05, "loss": 0.1297, "step": 7400 }, { "action_loss": 0.013210180215537548, "epoch": 6.654676258992806, "step": 7400 }, { "epoch": 6.654676258992806, "step": 7400, "torque_loss": 0.13040977716445923 }, { "epoch": 6.663669064748201, "grad_norm": 0.49519893527030945, "learning_rate": 9.853096532791078e-05, "loss": 0.1227, "step": 7410 }, { "action_loss": 0.016162816435098648, "epoch": 6.663669064748201, "step": 7410 }, { "epoch": 6.663669064748201, "step": 7410, "torque_loss": 0.19201882183551788 }, { "epoch": 6.672661870503597, "grad_norm": 0.6436981558799744, "learning_rate": 9.852432698175304e-05, "loss": 0.1373, "step": 7420 }, { "action_loss": 0.014552711509168148, "epoch": 6.672661870503597, "step": 7420 }, { "epoch": 6.672661870503597, "step": 7420, "torque_loss": 0.14851857721805573 }, { "epoch": 6.681654676258993, "grad_norm": 0.5544201135635376, "learning_rate": 9.851767389518597e-05, "loss": 0.1431, "step": 7430 }, { "action_loss": 0.012077376246452332, "epoch": 6.681654676258993, "step": 7430 }, { "epoch": 6.681654676258993, "step": 7430, "torque_loss": 0.16721034049987793 }, { "epoch": 6.690647482014389, "grad_norm": 0.6473069787025452, "learning_rate": 9.85110060702306e-05, "loss": 0.1278, "step": 7440 }, { "action_loss": 0.008131624199450016, "epoch": 6.690647482014389, "step": 7440 }, { "epoch": 6.690647482014389, "step": 7440, "torque_loss": 0.12619581818580627 }, { "epoch": 6.6996402877697845, "grad_norm": 0.4321233034133911, "learning_rate": 9.850432350891245e-05, "loss": 0.119, "step": 7450 }, { "action_loss": 0.01448801625519991, "epoch": 6.6996402877697845, "step": 7450 }, { "epoch": 6.6996402877697845, "step": 7450, "torque_loss": 0.15228687226772308 }, { "epoch": 6.7086330935251794, "grad_norm": 0.5478025078773499, "learning_rate": 9.84976262132615e-05, "loss": 0.1224, "step": 7460 }, { "action_loss": 0.010106951929628849, "epoch": 6.7086330935251794, "step": 7460 }, { "epoch": 6.7086330935251794, "step": 7460, "torque_loss": 0.16480731964111328 }, { "epoch": 6.717625899280575, "grad_norm": 0.5932726263999939, "learning_rate": 9.849091418531222e-05, "loss": 0.129, "step": 7470 }, { "action_loss": 0.020502334460616112, "epoch": 6.717625899280575, "step": 7470 }, { "epoch": 6.717625899280575, "step": 7470, "torque_loss": 0.21006911993026733 }, { "epoch": 6.726618705035971, "grad_norm": 0.6282484531402588, "learning_rate": 9.848418742710353e-05, "loss": 0.1296, "step": 7480 }, { "action_loss": 0.009796182624995708, "epoch": 6.726618705035971, "step": 7480 }, { "epoch": 6.726618705035971, "step": 7480, "torque_loss": 0.131507009267807 }, { "epoch": 6.735611510791367, "grad_norm": 0.5970615148544312, "learning_rate": 9.847744594067885e-05, "loss": 0.1268, "step": 7490 }, { "action_loss": 0.019233427941799164, "epoch": 6.735611510791367, "step": 7490 }, { "epoch": 6.735611510791367, "step": 7490, "torque_loss": 0.16233742237091064 }, { "epoch": 6.744604316546763, "grad_norm": 0.4514527916908264, "learning_rate": 9.847068972808607e-05, "loss": 0.1326, "step": 7500 }, { "action_loss": 0.010349535383284092, "epoch": 6.744604316546763, "step": 7500 }, { "epoch": 6.744604316546763, "step": 7500, "torque_loss": 0.17022450268268585 }, { "epoch": 6.753597122302159, "grad_norm": 0.5431055426597595, "learning_rate": 9.846391879137756e-05, "loss": 0.1183, "step": 7510 }, { "action_loss": 0.011597511358559132, "epoch": 6.753597122302159, "step": 7510 }, { "epoch": 6.753597122302159, "step": 7510, "torque_loss": 0.1667826920747757 }, { "epoch": 6.762589928057554, "grad_norm": 0.38842251896858215, "learning_rate": 9.845713313261012e-05, "loss": 0.1283, "step": 7520 }, { "action_loss": 0.017845168709754944, "epoch": 6.762589928057554, "step": 7520 }, { "epoch": 6.762589928057554, "step": 7520, "torque_loss": 0.19281132519245148 }, { "epoch": 6.7715827338129495, "grad_norm": 0.5211015343666077, "learning_rate": 9.845033275384505e-05, "loss": 0.1535, "step": 7530 }, { "action_loss": 0.009760488756000996, "epoch": 6.7715827338129495, "step": 7530 }, { "epoch": 6.7715827338129495, "step": 7530, "torque_loss": 0.12222877889871597 }, { "epoch": 6.780575539568345, "grad_norm": 0.6835273504257202, "learning_rate": 9.844351765714818e-05, "loss": 0.1261, "step": 7540 }, { "action_loss": 0.024145126342773438, "epoch": 6.780575539568345, "step": 7540 }, { "epoch": 6.780575539568345, "step": 7540, "torque_loss": 0.2593745291233063 }, { "epoch": 6.789568345323741, "grad_norm": 0.561438262462616, "learning_rate": 9.843668784458971e-05, "loss": 0.1417, "step": 7550 }, { "action_loss": 0.008156786672770977, "epoch": 6.789568345323741, "step": 7550 }, { "epoch": 6.789568345323741, "step": 7550, "torque_loss": 0.15898247063159943 }, { "epoch": 6.798561151079137, "grad_norm": 0.5355768799781799, "learning_rate": 9.842984331824437e-05, "loss": 0.1254, "step": 7560 }, { "action_loss": 0.014936176128685474, "epoch": 6.798561151079137, "step": 7560 }, { "epoch": 6.798561151079137, "step": 7560, "torque_loss": 0.1805974841117859 }, { "epoch": 6.807553956834532, "grad_norm": 0.4801589846611023, "learning_rate": 9.842298408019133e-05, "loss": 0.1058, "step": 7570 }, { "action_loss": 0.019861290231347084, "epoch": 6.807553956834532, "step": 7570 }, { "epoch": 6.807553956834532, "step": 7570, "torque_loss": 0.20131666958332062 }, { "epoch": 6.816546762589928, "grad_norm": 0.43517670035362244, "learning_rate": 9.841611013251429e-05, "loss": 0.1386, "step": 7580 }, { "action_loss": 0.011176243424415588, "epoch": 6.816546762589928, "step": 7580 }, { "epoch": 6.816546762589928, "step": 7580, "torque_loss": 0.2152065485715866 }, { "epoch": 6.825539568345324, "grad_norm": 0.5461550354957581, "learning_rate": 9.840922147730133e-05, "loss": 0.1306, "step": 7590 }, { "action_loss": 0.02146032452583313, "epoch": 6.825539568345324, "step": 7590 }, { "epoch": 6.825539568345324, "step": 7590, "torque_loss": 0.24785394966602325 }, { "epoch": 6.83453237410072, "grad_norm": 0.47458750009536743, "learning_rate": 9.840231811664506e-05, "loss": 0.1348, "step": 7600 }, { "action_loss": 0.010415187105536461, "epoch": 6.83453237410072, "step": 7600 }, { "epoch": 6.83453237410072, "step": 7600, "torque_loss": 0.18352758884429932 }, { "epoch": 6.843525179856115, "grad_norm": 0.4778413772583008, "learning_rate": 9.839540005264252e-05, "loss": 0.1276, "step": 7610 }, { "action_loss": 0.011416471563279629, "epoch": 6.843525179856115, "step": 7610 }, { "epoch": 6.843525179856115, "step": 7610, "torque_loss": 0.1547328680753708 }, { "epoch": 6.852517985611511, "grad_norm": 0.4928676187992096, "learning_rate": 9.838846728739527e-05, "loss": 0.115, "step": 7620 }, { "action_loss": 0.006913152989000082, "epoch": 6.852517985611511, "step": 7620 }, { "epoch": 6.852517985611511, "step": 7620, "torque_loss": 0.12112357467412949 }, { "epoch": 6.861510791366906, "grad_norm": 0.5133723616600037, "learning_rate": 9.838151982300927e-05, "loss": 0.1509, "step": 7630 }, { "action_loss": 0.01631067879498005, "epoch": 6.861510791366906, "step": 7630 }, { "epoch": 6.861510791366906, "step": 7630, "torque_loss": 0.2573308050632477 }, { "epoch": 6.870503597122302, "grad_norm": 0.8805178999900818, "learning_rate": 9.8374557661595e-05, "loss": 0.1202, "step": 7640 }, { "action_loss": 0.02616722323000431, "epoch": 6.870503597122302, "step": 7640 }, { "epoch": 6.870503597122302, "step": 7640, "torque_loss": 0.25774767994880676 }, { "epoch": 6.879496402877698, "grad_norm": 0.45935991406440735, "learning_rate": 9.836758080526735e-05, "loss": 0.1513, "step": 7650 }, { "action_loss": 0.020315786823630333, "epoch": 6.879496402877698, "step": 7650 }, { "epoch": 6.879496402877698, "step": 7650, "torque_loss": 0.22506965696811676 }, { "epoch": 6.888489208633094, "grad_norm": 0.5348014831542969, "learning_rate": 9.836058925614575e-05, "loss": 0.1214, "step": 7660 }, { "action_loss": 0.008786645717918873, "epoch": 6.888489208633094, "step": 7660 }, { "epoch": 6.888489208633094, "step": 7660, "torque_loss": 0.15587422251701355 }, { "epoch": 6.897482014388489, "grad_norm": 0.43298330903053284, "learning_rate": 9.8353583016354e-05, "loss": 0.1249, "step": 7670 }, { "action_loss": 0.017447935417294502, "epoch": 6.897482014388489, "step": 7670 }, { "epoch": 6.897482014388489, "step": 7670, "torque_loss": 0.2386215478181839 }, { "epoch": 6.906474820143885, "grad_norm": 0.502255916595459, "learning_rate": 9.834656208802044e-05, "loss": 0.1329, "step": 7680 }, { "action_loss": 0.01877833716571331, "epoch": 6.906474820143885, "step": 7680 }, { "epoch": 6.906474820143885, "step": 7680, "torque_loss": 0.1953829973936081 }, { "epoch": 6.91546762589928, "grad_norm": 0.5233989357948303, "learning_rate": 9.833952647327784e-05, "loss": 0.1342, "step": 7690 }, { "action_loss": 0.008184658363461494, "epoch": 6.91546762589928, "step": 7690 }, { "epoch": 6.91546762589928, "step": 7690, "torque_loss": 0.12337478250265121 }, { "epoch": 6.924460431654676, "grad_norm": 0.464336633682251, "learning_rate": 9.833247617426342e-05, "loss": 0.1167, "step": 7700 }, { "action_loss": 0.009869935922324657, "epoch": 6.924460431654676, "step": 7700 }, { "epoch": 6.924460431654676, "step": 7700, "torque_loss": 0.15056614577770233 }, { "epoch": 6.933453237410072, "grad_norm": 0.534161388874054, "learning_rate": 9.832541119311889e-05, "loss": 0.1219, "step": 7710 }, { "action_loss": 0.028191963210701942, "epoch": 6.933453237410072, "step": 7710 }, { "epoch": 6.933453237410072, "step": 7710, "torque_loss": 0.25465014576911926 }, { "epoch": 6.942446043165468, "grad_norm": 0.5290017127990723, "learning_rate": 9.83183315319904e-05, "loss": 0.1394, "step": 7720 }, { "action_loss": 0.039206262677907944, "epoch": 6.942446043165468, "step": 7720 }, { "epoch": 6.942446043165468, "step": 7720, "torque_loss": 0.2538861334323883 }, { "epoch": 6.951438848920863, "grad_norm": 0.5696597695350647, "learning_rate": 9.831123719302855e-05, "loss": 0.1401, "step": 7730 }, { "action_loss": 0.013157927431166172, "epoch": 6.951438848920863, "step": 7730 }, { "epoch": 6.951438848920863, "step": 7730, "torque_loss": 0.1455109715461731 }, { "epoch": 6.960431654676259, "grad_norm": 0.7254674434661865, "learning_rate": 9.830412817838842e-05, "loss": 0.124, "step": 7740 }, { "action_loss": 0.026387033984065056, "epoch": 6.960431654676259, "step": 7740 }, { "epoch": 6.960431654676259, "step": 7740, "torque_loss": 0.206592857837677 }, { "epoch": 6.969424460431655, "grad_norm": 0.6177034974098206, "learning_rate": 9.829700449022956e-05, "loss": 0.1302, "step": 7750 }, { "action_loss": 0.032828379422426224, "epoch": 6.969424460431655, "step": 7750 }, { "epoch": 6.969424460431655, "step": 7750, "torque_loss": 0.23726487159729004 }, { "epoch": 6.9784172661870505, "grad_norm": 0.592140257358551, "learning_rate": 9.828986613071593e-05, "loss": 0.1249, "step": 7760 }, { "action_loss": 0.008199618197977543, "epoch": 6.9784172661870505, "step": 7760 }, { "epoch": 6.9784172661870505, "step": 7760, "torque_loss": 0.15350492298603058 }, { "epoch": 6.987410071942446, "grad_norm": 0.3964797258377075, "learning_rate": 9.828271310201601e-05, "loss": 0.1242, "step": 7770 }, { "action_loss": 0.007822681218385696, "epoch": 6.987410071942446, "step": 7770 }, { "epoch": 6.987410071942446, "step": 7770, "torque_loss": 0.18475012481212616 }, { "epoch": 6.996402877697841, "grad_norm": 0.679054319858551, "learning_rate": 9.827554540630268e-05, "loss": 0.1249, "step": 7780 }, { "action_loss": 0.02545514702796936, "epoch": 6.996402877697841, "step": 7780 }, { "epoch": 6.996402877697841, "step": 7780, "torque_loss": 0.28034940361976624 }, { "epoch": 7.005395683453237, "grad_norm": 0.4811067283153534, "learning_rate": 9.826836304575329e-05, "loss": 0.1559, "step": 7790 }, { "action_loss": 0.01835801638662815, "epoch": 7.005395683453237, "step": 7790 }, { "epoch": 7.005395683453237, "step": 7790, "torque_loss": 0.22255432605743408 }, { "epoch": 7.014388489208633, "grad_norm": 0.5271545052528381, "learning_rate": 9.826116602254966e-05, "loss": 0.1201, "step": 7800 }, { "action_loss": 0.01801934838294983, "epoch": 7.014388489208633, "step": 7800 }, { "epoch": 7.014388489208633, "step": 7800, "torque_loss": 0.2681841552257538 }, { "epoch": 7.023381294964029, "grad_norm": 0.3736742436885834, "learning_rate": 9.825395433887805e-05, "loss": 0.141, "step": 7810 }, { "action_loss": 0.013634989969432354, "epoch": 7.023381294964029, "step": 7810 }, { "epoch": 7.023381294964029, "step": 7810, "torque_loss": 0.18754731118679047 }, { "epoch": 7.032374100719425, "grad_norm": 0.5110796093940735, "learning_rate": 9.824672799692917e-05, "loss": 0.132, "step": 7820 }, { "action_loss": 0.012774703092873096, "epoch": 7.032374100719425, "step": 7820 }, { "epoch": 7.032374100719425, "step": 7820, "torque_loss": 0.17373324930667877 }, { "epoch": 7.0413669064748206, "grad_norm": 0.4417451322078705, "learning_rate": 9.823948699889823e-05, "loss": 0.1372, "step": 7830 }, { "action_loss": 0.012947794049978256, "epoch": 7.0413669064748206, "step": 7830 }, { "epoch": 7.0413669064748206, "step": 7830, "torque_loss": 0.1101313903927803 }, { "epoch": 7.0503597122302155, "grad_norm": 0.46324780583381653, "learning_rate": 9.823223134698483e-05, "loss": 0.122, "step": 7840 }, { "action_loss": 0.024414516985416412, "epoch": 7.0503597122302155, "step": 7840 }, { "epoch": 7.0503597122302155, "step": 7840, "torque_loss": 0.24531178176403046 }, { "epoch": 7.059352517985611, "grad_norm": 0.33867284655570984, "learning_rate": 9.822496104339303e-05, "loss": 0.1319, "step": 7850 }, { "action_loss": 0.014089641161262989, "epoch": 7.059352517985611, "step": 7850 }, { "epoch": 7.059352517985611, "step": 7850, "torque_loss": 0.17183156311511993 }, { "epoch": 7.068345323741007, "grad_norm": 0.6038378477096558, "learning_rate": 9.821767609033138e-05, "loss": 0.1322, "step": 7860 }, { "action_loss": 0.01213113497942686, "epoch": 7.068345323741007, "step": 7860 }, { "epoch": 7.068345323741007, "step": 7860, "torque_loss": 0.20271359384059906 }, { "epoch": 7.077338129496403, "grad_norm": 0.5215276479721069, "learning_rate": 9.821037649001284e-05, "loss": 0.135, "step": 7870 }, { "action_loss": 0.031092731282114983, "epoch": 7.077338129496403, "step": 7870 }, { "epoch": 7.077338129496403, "step": 7870, "torque_loss": 0.2689630389213562 }, { "epoch": 7.086330935251799, "grad_norm": 0.5377815365791321, "learning_rate": 9.820306224465486e-05, "loss": 0.1375, "step": 7880 }, { "action_loss": 0.008258363232016563, "epoch": 7.086330935251799, "step": 7880 }, { "epoch": 7.086330935251799, "step": 7880, "torque_loss": 0.10946919769048691 }, { "epoch": 7.095323741007194, "grad_norm": 0.4278516173362732, "learning_rate": 9.819573335647928e-05, "loss": 0.1251, "step": 7890 }, { "action_loss": 0.0071639493107795715, "epoch": 7.095323741007194, "step": 7890 }, { "epoch": 7.095323741007194, "step": 7890, "torque_loss": 0.13728365302085876 }, { "epoch": 7.10431654676259, "grad_norm": 0.35318711400032043, "learning_rate": 9.818838982771246e-05, "loss": 0.148, "step": 7900 }, { "action_loss": 0.008354694582521915, "epoch": 7.10431654676259, "step": 7900 }, { "epoch": 7.10431654676259, "step": 7900, "torque_loss": 0.21476125717163086 }, { "epoch": 7.113309352517986, "grad_norm": 0.5466328263282776, "learning_rate": 9.818103166058514e-05, "loss": 0.1342, "step": 7910 }, { "action_loss": 0.02578691951930523, "epoch": 7.113309352517986, "step": 7910 }, { "epoch": 7.113309352517986, "step": 7910, "torque_loss": 0.24299192428588867 }, { "epoch": 7.122302158273381, "grad_norm": 0.5533431768417358, "learning_rate": 9.817365885733254e-05, "loss": 0.1345, "step": 7920 }, { "action_loss": 0.011179403401911259, "epoch": 7.122302158273381, "step": 7920 }, { "epoch": 7.122302158273381, "step": 7920, "torque_loss": 0.17737846076488495 }, { "epoch": 7.131294964028777, "grad_norm": 0.5208855271339417, "learning_rate": 9.816627142019434e-05, "loss": 0.1264, "step": 7930 }, { "action_loss": 0.013655642978847027, "epoch": 7.131294964028777, "step": 7930 }, { "epoch": 7.131294964028777, "step": 7930, "torque_loss": 0.271430641412735 }, { "epoch": 7.140287769784172, "grad_norm": 0.46772921085357666, "learning_rate": 9.815886935141463e-05, "loss": 0.1492, "step": 7940 }, { "action_loss": 0.014166408218443394, "epoch": 7.140287769784172, "step": 7940 }, { "epoch": 7.140287769784172, "step": 7940, "torque_loss": 0.17782074213027954 }, { "epoch": 7.149280575539568, "grad_norm": 0.46522256731987, "learning_rate": 9.8151452653242e-05, "loss": 0.1237, "step": 7950 }, { "action_loss": 0.013634120114147663, "epoch": 7.149280575539568, "step": 7950 }, { "epoch": 7.149280575539568, "step": 7950, "torque_loss": 0.16126008331775665 }, { "epoch": 7.158273381294964, "grad_norm": 0.5392661690711975, "learning_rate": 9.814402132792939e-05, "loss": 0.1292, "step": 7960 }, { "action_loss": 0.03066641092300415, "epoch": 7.158273381294964, "step": 7960 }, { "epoch": 7.158273381294964, "step": 7960, "torque_loss": 0.19422554969787598 }, { "epoch": 7.16726618705036, "grad_norm": 0.419284462928772, "learning_rate": 9.813657537773428e-05, "loss": 0.1275, "step": 7970 }, { "action_loss": 0.009606080129742622, "epoch": 7.16726618705036, "step": 7970 }, { "epoch": 7.16726618705036, "step": 7970, "torque_loss": 0.12898629903793335 }, { "epoch": 7.176258992805756, "grad_norm": 0.44632062315940857, "learning_rate": 9.812911480491854e-05, "loss": 0.1165, "step": 7980 }, { "action_loss": 0.015307359397411346, "epoch": 7.176258992805756, "step": 7980 }, { "epoch": 7.176258992805756, "step": 7980, "torque_loss": 0.1707616001367569 }, { "epoch": 7.1852517985611515, "grad_norm": 0.5069199800491333, "learning_rate": 9.81216396117485e-05, "loss": 0.1379, "step": 7990 }, { "action_loss": 0.02205611951649189, "epoch": 7.1852517985611515, "step": 7990 }, { "epoch": 7.1852517985611515, "step": 7990, "torque_loss": 0.2385188192129135 }, { "epoch": 7.194244604316546, "grad_norm": 0.45145779848098755, "learning_rate": 9.811414980049491e-05, "loss": 0.1284, "step": 8000 }, { "action_loss": 0.029218042269349098, "epoch": 7.194244604316546, "step": 8000 }, { "epoch": 7.194244604316546, "step": 8000, "torque_loss": 0.2430303543806076 }, { "epoch": 7.203237410071942, "grad_norm": 0.5294502973556519, "learning_rate": 9.810664537343301e-05, "loss": 0.1255, "step": 8010 }, { "action_loss": 0.020094437524676323, "epoch": 7.203237410071942, "step": 8010 }, { "epoch": 7.203237410071942, "step": 8010, "torque_loss": 0.21394318342208862 }, { "epoch": 7.212230215827338, "grad_norm": 0.42837318778038025, "learning_rate": 9.809912633284243e-05, "loss": 0.1274, "step": 8020 }, { "action_loss": 0.009415646083652973, "epoch": 7.212230215827338, "step": 8020 }, { "epoch": 7.212230215827338, "step": 8020, "torque_loss": 0.1736125946044922 }, { "epoch": 7.221223021582734, "grad_norm": 0.514460027217865, "learning_rate": 9.809159268100725e-05, "loss": 0.122, "step": 8030 }, { "action_loss": 0.011503874324262142, "epoch": 7.221223021582734, "step": 8030 }, { "epoch": 7.221223021582734, "step": 8030, "torque_loss": 0.171620175242424 }, { "epoch": 7.23021582733813, "grad_norm": 0.38285645842552185, "learning_rate": 9.808404442021599e-05, "loss": 0.1297, "step": 8040 }, { "action_loss": 0.01682380586862564, "epoch": 7.23021582733813, "step": 8040 }, { "epoch": 7.23021582733813, "step": 8040, "torque_loss": 0.19129042327404022 }, { "epoch": 7.239208633093525, "grad_norm": 0.4512690603733063, "learning_rate": 9.807648155276163e-05, "loss": 0.1332, "step": 8050 }, { "action_loss": 0.028663218021392822, "epoch": 7.239208633093525, "step": 8050 }, { "epoch": 7.239208633093525, "step": 8050, "torque_loss": 0.20488502085208893 }, { "epoch": 7.248201438848921, "grad_norm": 0.4574315547943115, "learning_rate": 9.806890408094156e-05, "loss": 0.1324, "step": 8060 }, { "action_loss": 0.009677162393927574, "epoch": 7.248201438848921, "step": 8060 }, { "epoch": 7.248201438848921, "step": 8060, "torque_loss": 0.17265570163726807 }, { "epoch": 7.2571942446043165, "grad_norm": 0.47681814432144165, "learning_rate": 9.806131200705761e-05, "loss": 0.1228, "step": 8070 }, { "action_loss": 0.016324400901794434, "epoch": 7.2571942446043165, "step": 8070 }, { "epoch": 7.2571942446043165, "step": 8070, "torque_loss": 0.22875522077083588 }, { "epoch": 7.266187050359712, "grad_norm": 0.41090354323387146, "learning_rate": 9.805370533341605e-05, "loss": 0.1329, "step": 8080 }, { "action_loss": 0.016688494011759758, "epoch": 7.266187050359712, "step": 8080 }, { "epoch": 7.266187050359712, "step": 8080, "torque_loss": 0.22781126201152802 }, { "epoch": 7.275179856115108, "grad_norm": 0.37701892852783203, "learning_rate": 9.804608406232762e-05, "loss": 0.1177, "step": 8090 }, { "action_loss": 0.015987807884812355, "epoch": 7.275179856115108, "step": 8090 }, { "epoch": 7.275179856115108, "step": 8090, "torque_loss": 0.20025987923145294 }, { "epoch": 7.284172661870503, "grad_norm": 0.534715473651886, "learning_rate": 9.803844819610741e-05, "loss": 0.1318, "step": 8100 }, { "action_loss": 0.03532217815518379, "epoch": 7.284172661870503, "step": 8100 }, { "epoch": 7.284172661870503, "step": 8100, "torque_loss": 0.2862694561481476 }, { "epoch": 7.293165467625899, "grad_norm": 0.47590771317481995, "learning_rate": 9.803079773707504e-05, "loss": 0.1262, "step": 8110 }, { "action_loss": 0.015476390719413757, "epoch": 7.293165467625899, "step": 8110 }, { "epoch": 7.293165467625899, "step": 8110, "torque_loss": 0.27780136466026306 }, { "epoch": 7.302158273381295, "grad_norm": 0.5117525458335876, "learning_rate": 9.802313268755447e-05, "loss": 0.1518, "step": 8120 }, { "action_loss": 0.021830864250659943, "epoch": 7.302158273381295, "step": 8120 }, { "epoch": 7.302158273381295, "step": 8120, "torque_loss": 0.2170936018228531 }, { "epoch": 7.311151079136691, "grad_norm": 0.5229195952415466, "learning_rate": 9.801545304987419e-05, "loss": 0.137, "step": 8130 }, { "action_loss": 0.027498861774802208, "epoch": 7.311151079136691, "step": 8130 }, { "epoch": 7.311151079136691, "step": 8130, "torque_loss": 0.22548894584178925 }, { "epoch": 7.320143884892087, "grad_norm": 0.3562336266040802, "learning_rate": 9.800775882636704e-05, "loss": 0.1284, "step": 8140 }, { "action_loss": 0.026813847944140434, "epoch": 7.320143884892087, "step": 8140 }, { "epoch": 7.320143884892087, "step": 8140, "torque_loss": 0.20669694244861603 }, { "epoch": 7.329136690647482, "grad_norm": 0.3849460184574127, "learning_rate": 9.800005001937034e-05, "loss": 0.1179, "step": 8150 }, { "action_loss": 0.026478150859475136, "epoch": 7.329136690647482, "step": 8150 }, { "epoch": 7.329136690647482, "step": 8150, "torque_loss": 0.1835613250732422 }, { "epoch": 7.338129496402877, "grad_norm": 0.45471319556236267, "learning_rate": 9.79923266312258e-05, "loss": 0.1254, "step": 8160 }, { "action_loss": 0.026585916057229042, "epoch": 7.338129496402877, "step": 8160 }, { "epoch": 7.338129496402877, "step": 8160, "torque_loss": 0.1627868413925171 }, { "epoch": 7.347122302158273, "grad_norm": 0.41202235221862793, "learning_rate": 9.79845886642796e-05, "loss": 0.1238, "step": 8170 }, { "action_loss": 0.008480846881866455, "epoch": 7.347122302158273, "step": 8170 }, { "epoch": 7.347122302158273, "step": 8170, "torque_loss": 0.18745370209217072 }, { "epoch": 7.356115107913669, "grad_norm": 0.447020560503006, "learning_rate": 9.797683612088233e-05, "loss": 0.1198, "step": 8180 }, { "action_loss": 0.019362663850188255, "epoch": 7.356115107913669, "step": 8180 }, { "epoch": 7.356115107913669, "step": 8180, "torque_loss": 0.16178320348262787 }, { "epoch": 7.365107913669065, "grad_norm": 0.35116180777549744, "learning_rate": 9.796906900338898e-05, "loss": 0.1348, "step": 8190 }, { "action_loss": 0.013063580729067326, "epoch": 7.365107913669065, "step": 8190 }, { "epoch": 7.365107913669065, "step": 8190, "torque_loss": 0.19291476905345917 }, { "epoch": 7.374100719424461, "grad_norm": 0.5568772554397583, "learning_rate": 9.796128731415903e-05, "loss": 0.1443, "step": 8200 }, { "action_loss": 0.011339542455971241, "epoch": 7.374100719424461, "step": 8200 }, { "epoch": 7.374100719424461, "step": 8200, "torque_loss": 0.18447645008563995 }, { "epoch": 7.383093525179856, "grad_norm": 0.5435598492622375, "learning_rate": 9.795349105555634e-05, "loss": 0.1269, "step": 8210 }, { "action_loss": 0.01808335818350315, "epoch": 7.383093525179856, "step": 8210 }, { "epoch": 7.383093525179856, "step": 8210, "torque_loss": 0.15693648159503937 }, { "epoch": 7.392086330935252, "grad_norm": 0.5087056756019592, "learning_rate": 9.794568022994922e-05, "loss": 0.1357, "step": 8220 }, { "action_loss": 0.012843060307204723, "epoch": 7.392086330935252, "step": 8220 }, { "epoch": 7.392086330935252, "step": 8220, "torque_loss": 0.18942968547344208 }, { "epoch": 7.401079136690647, "grad_norm": 0.5740323662757874, "learning_rate": 9.793785483971034e-05, "loss": 0.1169, "step": 8230 }, { "action_loss": 0.010233073495328426, "epoch": 7.401079136690647, "step": 8230 }, { "epoch": 7.401079136690647, "step": 8230, "torque_loss": 0.1619015336036682 }, { "epoch": 7.410071942446043, "grad_norm": 0.39029058814048767, "learning_rate": 9.793001488721691e-05, "loss": 0.1389, "step": 8240 }, { "action_loss": 0.017934253439307213, "epoch": 7.410071942446043, "step": 8240 }, { "epoch": 7.410071942446043, "step": 8240, "torque_loss": 0.16206450760364532 }, { "epoch": 7.419064748201439, "grad_norm": 0.5188407897949219, "learning_rate": 9.792216037485047e-05, "loss": 0.1268, "step": 8250 }, { "action_loss": 0.019661476835608482, "epoch": 7.419064748201439, "step": 8250 }, { "epoch": 7.419064748201439, "step": 8250, "torque_loss": 0.25594887137413025 }, { "epoch": 7.428057553956835, "grad_norm": 0.4383695125579834, "learning_rate": 9.791429130499704e-05, "loss": 0.136, "step": 8260 }, { "action_loss": 0.021037571132183075, "epoch": 7.428057553956835, "step": 8260 }, { "epoch": 7.428057553956835, "step": 8260, "torque_loss": 0.2464589923620224 }, { "epoch": 7.43705035971223, "grad_norm": 0.519355058670044, "learning_rate": 9.790640768004698e-05, "loss": 0.1278, "step": 8270 }, { "action_loss": 0.01937875896692276, "epoch": 7.43705035971223, "step": 8270 }, { "epoch": 7.43705035971223, "step": 8270, "torque_loss": 0.16722293198108673 }, { "epoch": 7.446043165467626, "grad_norm": 0.543757975101471, "learning_rate": 9.789850950239518e-05, "loss": 0.1438, "step": 8280 }, { "action_loss": 0.020343219861388206, "epoch": 7.446043165467626, "step": 8280 }, { "epoch": 7.446043165467626, "step": 8280, "torque_loss": 0.2287444919347763 }, { "epoch": 7.455035971223022, "grad_norm": 0.5331960916519165, "learning_rate": 9.789059677444089e-05, "loss": 0.1328, "step": 8290 }, { "action_loss": 0.009542682208120823, "epoch": 7.455035971223022, "step": 8290 }, { "epoch": 7.455035971223022, "step": 8290, "torque_loss": 0.17521162331104279 }, { "epoch": 7.4640287769784175, "grad_norm": 0.3991667926311493, "learning_rate": 9.788266949858776e-05, "loss": 0.109, "step": 8300 }, { "action_loss": 0.006766708102077246, "epoch": 7.4640287769784175, "step": 8300 }, { "epoch": 7.4640287769784175, "step": 8300, "torque_loss": 0.1670873761177063 }, { "epoch": 7.473021582733813, "grad_norm": 0.438070684671402, "learning_rate": 9.787472767724392e-05, "loss": 0.116, "step": 8310 }, { "action_loss": 0.00886692013591528, "epoch": 7.473021582733813, "step": 8310 }, { "epoch": 7.473021582733813, "step": 8310, "torque_loss": 0.096455417573452 }, { "epoch": 7.482014388489208, "grad_norm": 0.6069698333740234, "learning_rate": 9.786677131282185e-05, "loss": 0.1276, "step": 8320 }, { "action_loss": 0.01635180227458477, "epoch": 7.482014388489208, "step": 8320 }, { "epoch": 7.482014388489208, "step": 8320, "torque_loss": 0.2427472621202469 }, { "epoch": 7.491007194244604, "grad_norm": 0.5885980725288391, "learning_rate": 9.785880040773853e-05, "loss": 0.1378, "step": 8330 }, { "action_loss": 0.010760863311588764, "epoch": 7.491007194244604, "step": 8330 }, { "epoch": 7.491007194244604, "step": 8330, "torque_loss": 0.1315302699804306 }, { "epoch": 7.5, "grad_norm": 0.52247154712677, "learning_rate": 9.785081496441527e-05, "loss": 0.1285, "step": 8340 }, { "action_loss": 0.01925058476626873, "epoch": 7.5, "step": 8340 }, { "epoch": 7.5, "step": 8340, "torque_loss": 0.23232907056808472 }, { "epoch": 7.508992805755396, "grad_norm": 0.581160306930542, "learning_rate": 9.784281498527785e-05, "loss": 0.1375, "step": 8350 }, { "action_loss": 0.01381192822009325, "epoch": 7.508992805755396, "step": 8350 }, { "epoch": 7.508992805755396, "step": 8350, "torque_loss": 0.16293232142925262 }, { "epoch": 7.517985611510792, "grad_norm": 0.4569307267665863, "learning_rate": 9.783480047275646e-05, "loss": 0.1404, "step": 8360 }, { "action_loss": 0.014746233820915222, "epoch": 7.517985611510792, "step": 8360 }, { "epoch": 7.517985611510792, "step": 8360, "torque_loss": 0.19093012809753418 }, { "epoch": 7.5269784172661875, "grad_norm": 0.502368688583374, "learning_rate": 9.78267714292857e-05, "loss": 0.1334, "step": 8370 }, { "action_loss": 0.008024840615689754, "epoch": 7.5269784172661875, "step": 8370 }, { "epoch": 7.5269784172661875, "step": 8370, "torque_loss": 0.11564627289772034 }, { "epoch": 7.5359712230215825, "grad_norm": 0.496195524930954, "learning_rate": 9.781872785730454e-05, "loss": 0.1278, "step": 8380 }, { "action_loss": 0.02058827131986618, "epoch": 7.5359712230215825, "step": 8380 }, { "epoch": 7.5359712230215825, "step": 8380, "torque_loss": 0.24084258079528809 }, { "epoch": 7.544964028776978, "grad_norm": 0.48931264877319336, "learning_rate": 9.781066975925646e-05, "loss": 0.1353, "step": 8390 }, { "action_loss": 0.01759384758770466, "epoch": 7.544964028776978, "step": 8390 }, { "epoch": 7.544964028776978, "step": 8390, "torque_loss": 0.21476741135120392 }, { "epoch": 7.553956834532374, "grad_norm": 0.4732513427734375, "learning_rate": 9.780259713758928e-05, "loss": 0.1248, "step": 8400 }, { "action_loss": 0.011902506463229656, "epoch": 7.553956834532374, "step": 8400 }, { "epoch": 7.553956834532374, "step": 8400, "torque_loss": 0.1355362981557846 }, { "epoch": 7.56294964028777, "grad_norm": 0.6066412329673767, "learning_rate": 9.779450999475524e-05, "loss": 0.1235, "step": 8410 }, { "action_loss": 0.014822044409811497, "epoch": 7.56294964028777, "step": 8410 }, { "epoch": 7.56294964028777, "step": 8410, "torque_loss": 0.1930159330368042 }, { "epoch": 7.571942446043165, "grad_norm": 0.6501162052154541, "learning_rate": 9.7786408333211e-05, "loss": 0.1198, "step": 8420 }, { "action_loss": 0.013729683123528957, "epoch": 7.571942446043165, "step": 8420 }, { "epoch": 7.571942446043165, "step": 8420, "torque_loss": 0.23134177923202515 }, { "epoch": 7.580935251798561, "grad_norm": 0.6942180395126343, "learning_rate": 9.777829215541764e-05, "loss": 0.1364, "step": 8430 }, { "action_loss": 0.010820134542882442, "epoch": 7.580935251798561, "step": 8430 }, { "epoch": 7.580935251798561, "step": 8430, "torque_loss": 0.16670948266983032 }, { "epoch": 7.589928057553957, "grad_norm": 0.3985198736190796, "learning_rate": 9.777016146384064e-05, "loss": 0.127, "step": 8440 }, { "action_loss": 0.04016125202178955, "epoch": 7.589928057553957, "step": 8440 }, { "epoch": 7.589928057553957, "step": 8440, "torque_loss": 0.2379484325647354 }, { "epoch": 7.598920863309353, "grad_norm": 0.4789911210536957, "learning_rate": 9.776201626094988e-05, "loss": 0.1141, "step": 8450 }, { "action_loss": 0.009574607945978642, "epoch": 7.598920863309353, "step": 8450 }, { "epoch": 7.598920863309353, "step": 8450, "torque_loss": 0.1551801711320877 }, { "epoch": 7.607913669064748, "grad_norm": 0.5104438066482544, "learning_rate": 9.775385654921965e-05, "loss": 0.1196, "step": 8460 }, { "action_loss": 0.01255288626998663, "epoch": 7.607913669064748, "step": 8460 }, { "epoch": 7.607913669064748, "step": 8460, "torque_loss": 0.20117564499378204 }, { "epoch": 7.616906474820144, "grad_norm": 0.38089948892593384, "learning_rate": 9.774568233112868e-05, "loss": 0.1281, "step": 8470 }, { "action_loss": 0.019243570044636726, "epoch": 7.616906474820144, "step": 8470 }, { "epoch": 7.616906474820144, "step": 8470, "torque_loss": 0.18782146275043488 }, { "epoch": 7.625899280575539, "grad_norm": 0.6269126534461975, "learning_rate": 9.773749360916007e-05, "loss": 0.1439, "step": 8480 }, { "action_loss": 0.018750840798020363, "epoch": 7.625899280575539, "step": 8480 }, { "epoch": 7.625899280575539, "step": 8480, "torque_loss": 0.16127048432826996 }, { "epoch": 7.634892086330935, "grad_norm": 0.4432384967803955, "learning_rate": 9.772929038580134e-05, "loss": 0.1341, "step": 8490 }, { "action_loss": 0.0241867545992136, "epoch": 7.634892086330935, "step": 8490 }, { "epoch": 7.634892086330935, "step": 8490, "torque_loss": 0.2322707176208496 }, { "epoch": 7.643884892086331, "grad_norm": 0.61858069896698, "learning_rate": 9.772107266354439e-05, "loss": 0.1495, "step": 8500 }, { "action_loss": 0.019896307960152626, "epoch": 7.643884892086331, "step": 8500 }, { "epoch": 7.643884892086331, "step": 8500, "torque_loss": 0.2112836390733719 }, { "epoch": 7.652877697841727, "grad_norm": 0.6006408929824829, "learning_rate": 9.77128404448856e-05, "loss": 0.1368, "step": 8510 }, { "action_loss": 0.019473137333989143, "epoch": 7.652877697841727, "step": 8510 }, { "epoch": 7.652877697841727, "step": 8510, "torque_loss": 0.1594061255455017 }, { "epoch": 7.661870503597123, "grad_norm": 0.6001814007759094, "learning_rate": 9.770459373232565e-05, "loss": 0.1076, "step": 8520 }, { "action_loss": 0.014276575297117233, "epoch": 7.661870503597123, "step": 8520 }, { "epoch": 7.661870503597123, "step": 8520, "torque_loss": 0.19915771484375 }, { "epoch": 7.670863309352518, "grad_norm": 0.504167914390564, "learning_rate": 9.769633252836969e-05, "loss": 0.131, "step": 8530 }, { "action_loss": 0.009696969762444496, "epoch": 7.670863309352518, "step": 8530 }, { "epoch": 7.670863309352518, "step": 8530, "torque_loss": 0.14801371097564697 }, { "epoch": 7.679856115107913, "grad_norm": 0.6212944388389587, "learning_rate": 9.768805683552724e-05, "loss": 0.1094, "step": 8540 }, { "action_loss": 0.017824223265051842, "epoch": 7.679856115107913, "step": 8540 }, { "epoch": 7.679856115107913, "step": 8540, "torque_loss": 0.1956912726163864 }, { "epoch": 7.688848920863309, "grad_norm": 0.510403573513031, "learning_rate": 9.767976665631228e-05, "loss": 0.1305, "step": 8550 }, { "action_loss": 0.005761189851909876, "epoch": 7.688848920863309, "step": 8550 }, { "epoch": 7.688848920863309, "step": 8550, "torque_loss": 0.13682235777378082 }, { "epoch": 7.697841726618705, "grad_norm": 0.46420323848724365, "learning_rate": 9.767146199324311e-05, "loss": 0.1251, "step": 8560 }, { "action_loss": 0.009354867041110992, "epoch": 7.697841726618705, "step": 8560 }, { "epoch": 7.697841726618705, "step": 8560, "torque_loss": 0.13763219118118286 }, { "epoch": 7.706834532374101, "grad_norm": 0.40926364064216614, "learning_rate": 9.766314284884249e-05, "loss": 0.1095, "step": 8570 }, { "action_loss": 0.0225040465593338, "epoch": 7.706834532374101, "step": 8570 }, { "epoch": 7.706834532374101, "step": 8570, "torque_loss": 0.2715313136577606 }, { "epoch": 7.715827338129497, "grad_norm": 0.4413151741027832, "learning_rate": 9.765480922563752e-05, "loss": 0.1318, "step": 8580 }, { "action_loss": 0.013568851165473461, "epoch": 7.715827338129497, "step": 8580 }, { "epoch": 7.715827338129497, "step": 8580, "torque_loss": 0.22352004051208496 }, { "epoch": 7.724820143884892, "grad_norm": 0.4308524429798126, "learning_rate": 9.764646112615978e-05, "loss": 0.1286, "step": 8590 }, { "action_loss": 0.02310362458229065, "epoch": 7.724820143884892, "step": 8590 }, { "epoch": 7.724820143884892, "step": 8590, "torque_loss": 0.19950789213180542 }, { "epoch": 7.733812949640288, "grad_norm": 0.4974185526371002, "learning_rate": 9.763809855294517e-05, "loss": 0.1242, "step": 8600 }, { "action_loss": 0.00893250573426485, "epoch": 7.733812949640288, "step": 8600 }, { "epoch": 7.733812949640288, "step": 8600, "torque_loss": 0.15440309047698975 }, { "epoch": 7.7428057553956835, "grad_norm": 0.5383493304252625, "learning_rate": 9.762972150853404e-05, "loss": 0.1154, "step": 8610 }, { "action_loss": 0.013286846689879894, "epoch": 7.7428057553956835, "step": 8610 }, { "epoch": 7.7428057553956835, "step": 8610, "torque_loss": 0.2079528123140335 }, { "epoch": 7.751798561151079, "grad_norm": 0.6772728562355042, "learning_rate": 9.762132999547111e-05, "loss": 0.1294, "step": 8620 }, { "action_loss": 0.01211314182728529, "epoch": 7.751798561151079, "step": 8620 }, { "epoch": 7.751798561151079, "step": 8620, "torque_loss": 0.13809029757976532 }, { "epoch": 7.760791366906475, "grad_norm": 0.4447677433490753, "learning_rate": 9.761292401630549e-05, "loss": 0.1198, "step": 8630 }, { "action_loss": 0.014621327631175518, "epoch": 7.760791366906475, "step": 8630 }, { "epoch": 7.760791366906475, "step": 8630, "torque_loss": 0.18309412896633148 }, { "epoch": 7.76978417266187, "grad_norm": 0.35509592294692993, "learning_rate": 9.76045035735907e-05, "loss": 0.1167, "step": 8640 }, { "action_loss": 0.014572039246559143, "epoch": 7.76978417266187, "step": 8640 }, { "epoch": 7.76978417266187, "step": 8640, "torque_loss": 0.1884545534849167 }, { "epoch": 7.778776978417266, "grad_norm": 0.3886178135871887, "learning_rate": 9.759606866988464e-05, "loss": 0.1192, "step": 8650 }, { "action_loss": 0.0070604439824819565, "epoch": 7.778776978417266, "step": 8650 }, { "epoch": 7.778776978417266, "step": 8650, "torque_loss": 0.15652215480804443 }, { "epoch": 7.787769784172662, "grad_norm": 0.45264196395874023, "learning_rate": 9.758761930774963e-05, "loss": 0.1081, "step": 8660 }, { "action_loss": 0.01444109808653593, "epoch": 7.787769784172662, "step": 8660 }, { "epoch": 7.787769784172662, "step": 8660, "torque_loss": 0.15931330621242523 }, { "epoch": 7.796762589928058, "grad_norm": 0.4253899157047272, "learning_rate": 9.757915548975235e-05, "loss": 0.1189, "step": 8670 }, { "action_loss": 0.021459022536873817, "epoch": 7.796762589928058, "step": 8670 }, { "epoch": 7.796762589928058, "step": 8670, "torque_loss": 0.20847196877002716 }, { "epoch": 7.805755395683454, "grad_norm": 0.4197235107421875, "learning_rate": 9.757067721846389e-05, "loss": 0.1285, "step": 8680 }, { "action_loss": 0.010928582400083542, "epoch": 7.805755395683454, "step": 8680 }, { "epoch": 7.805755395683454, "step": 8680, "torque_loss": 0.17596907913684845 }, { "epoch": 7.814748201438849, "grad_norm": 0.4040171205997467, "learning_rate": 9.756218449645971e-05, "loss": 0.1323, "step": 8690 }, { "action_loss": 0.007855945266783237, "epoch": 7.814748201438849, "step": 8690 }, { "epoch": 7.814748201438849, "step": 8690, "torque_loss": 0.2109944075345993 }, { "epoch": 7.823741007194244, "grad_norm": 0.45956745743751526, "learning_rate": 9.75536773263197e-05, "loss": 0.1239, "step": 8700 }, { "action_loss": 0.013208028860390186, "epoch": 7.823741007194244, "step": 8700 }, { "epoch": 7.823741007194244, "step": 8700, "torque_loss": 0.23710386455059052 }, { "epoch": 7.83273381294964, "grad_norm": 0.5709884762763977, "learning_rate": 9.75451557106281e-05, "loss": 0.114, "step": 8710 }, { "action_loss": 0.012577268294990063, "epoch": 7.83273381294964, "step": 8710 }, { "epoch": 7.83273381294964, "step": 8710, "torque_loss": 0.14755748212337494 }, { "epoch": 7.841726618705036, "grad_norm": 0.45803341269493103, "learning_rate": 9.753661965197354e-05, "loss": 0.1174, "step": 8720 }, { "action_loss": 0.014966499991714954, "epoch": 7.841726618705036, "step": 8720 }, { "epoch": 7.841726618705036, "step": 8720, "torque_loss": 0.23731322586536407 }, { "epoch": 7.850719424460432, "grad_norm": 0.5117339491844177, "learning_rate": 9.752806915294908e-05, "loss": 0.1263, "step": 8730 }, { "action_loss": 0.014371730387210846, "epoch": 7.850719424460432, "step": 8730 }, { "epoch": 7.850719424460432, "step": 8730, "torque_loss": 0.24075299501419067 }, { "epoch": 7.859712230215827, "grad_norm": 0.5395709276199341, "learning_rate": 9.75195042161521e-05, "loss": 0.1164, "step": 8740 }, { "action_loss": 0.012700870633125305, "epoch": 7.859712230215827, "step": 8740 }, { "epoch": 7.859712230215827, "step": 8740, "torque_loss": 0.14851343631744385 }, { "epoch": 7.868705035971223, "grad_norm": 0.7021387219429016, "learning_rate": 9.751092484418442e-05, "loss": 0.1065, "step": 8750 }, { "action_loss": 0.008488026447594166, "epoch": 7.868705035971223, "step": 8750 }, { "epoch": 7.868705035971223, "step": 8750, "torque_loss": 0.21624529361724854 }, { "epoch": 7.877697841726619, "grad_norm": 0.44025254249572754, "learning_rate": 9.750233103965224e-05, "loss": 0.1291, "step": 8760 }, { "action_loss": 0.013417561538517475, "epoch": 7.877697841726619, "step": 8760 }, { "epoch": 7.877697841726619, "step": 8760, "torque_loss": 0.14837495982646942 }, { "epoch": 7.886690647482014, "grad_norm": 0.4253762364387512, "learning_rate": 9.749372280516611e-05, "loss": 0.1323, "step": 8770 }, { "action_loss": 0.010339311324059963, "epoch": 7.886690647482014, "step": 8770 }, { "epoch": 7.886690647482014, "step": 8770, "torque_loss": 0.1405472457408905 }, { "epoch": 7.89568345323741, "grad_norm": 0.4553302526473999, "learning_rate": 9.748510014334097e-05, "loss": 0.1243, "step": 8780 }, { "action_loss": 0.02265390194952488, "epoch": 7.89568345323741, "step": 8780 }, { "epoch": 7.89568345323741, "step": 8780, "torque_loss": 0.16056634485721588 }, { "epoch": 7.904676258992806, "grad_norm": 0.5853199362754822, "learning_rate": 9.747646305679621e-05, "loss": 0.1472, "step": 8790 }, { "action_loss": 0.012551081366837025, "epoch": 7.904676258992806, "step": 8790 }, { "epoch": 7.904676258992806, "step": 8790, "torque_loss": 0.15900550782680511 }, { "epoch": 7.913669064748201, "grad_norm": 0.3493174612522125, "learning_rate": 9.74678115481555e-05, "loss": 0.1401, "step": 8800 }, { "action_loss": 0.03042864240705967, "epoch": 7.913669064748201, "step": 8800 }, { "epoch": 7.913669064748201, "step": 8800, "torque_loss": 0.18871372938156128 }, { "epoch": 7.922661870503597, "grad_norm": 0.49091625213623047, "learning_rate": 9.745914562004696e-05, "loss": 0.137, "step": 8810 }, { "action_loss": 0.00889533944427967, "epoch": 7.922661870503597, "step": 8810 }, { "epoch": 7.922661870503597, "step": 8810, "torque_loss": 0.14893537759780884 }, { "epoch": 7.931654676258993, "grad_norm": 0.3928019106388092, "learning_rate": 9.745046527510307e-05, "loss": 0.1171, "step": 8820 }, { "action_loss": 0.015004063956439495, "epoch": 7.931654676258993, "step": 8820 }, { "epoch": 7.931654676258993, "step": 8820, "torque_loss": 0.21656186878681183 }, { "epoch": 7.940647482014389, "grad_norm": 0.5823075771331787, "learning_rate": 9.744177051596068e-05, "loss": 0.1224, "step": 8830 }, { "action_loss": 0.015099924989044666, "epoch": 7.940647482014389, "step": 8830 }, { "epoch": 7.940647482014389, "step": 8830, "torque_loss": 0.1558617204427719 }, { "epoch": 7.9496402877697845, "grad_norm": 0.40585312247276306, "learning_rate": 9.743306134526105e-05, "loss": 0.132, "step": 8840 }, { "action_loss": 0.026659125462174416, "epoch": 7.9496402877697845, "step": 8840 }, { "epoch": 7.9496402877697845, "step": 8840, "torque_loss": 0.22189877927303314 }, { "epoch": 7.9586330935251794, "grad_norm": 0.38162681460380554, "learning_rate": 9.742433776564977e-05, "loss": 0.1106, "step": 8850 }, { "action_loss": 0.014770537614822388, "epoch": 7.9586330935251794, "step": 8850 }, { "epoch": 7.9586330935251794, "step": 8850, "torque_loss": 0.2636176347732544 }, { "epoch": 7.967625899280575, "grad_norm": 0.36957332491874695, "learning_rate": 9.741559977977683e-05, "loss": 0.1241, "step": 8860 }, { "action_loss": 0.009787512011826038, "epoch": 7.967625899280575, "step": 8860 }, { "epoch": 7.967625899280575, "step": 8860, "torque_loss": 0.14057999849319458 }, { "epoch": 7.976618705035971, "grad_norm": 0.4594159722328186, "learning_rate": 9.740684739029661e-05, "loss": 0.1239, "step": 8870 }, { "action_loss": 0.013259577564895153, "epoch": 7.976618705035971, "step": 8870 }, { "epoch": 7.976618705035971, "step": 8870, "torque_loss": 0.2063712328672409 }, { "epoch": 7.985611510791367, "grad_norm": 0.46681058406829834, "learning_rate": 9.739808059986789e-05, "loss": 0.1457, "step": 8880 }, { "action_loss": 0.009520606137812138, "epoch": 7.985611510791367, "step": 8880 }, { "epoch": 7.985611510791367, "step": 8880, "torque_loss": 0.2108025997877121 }, { "epoch": 7.994604316546763, "grad_norm": 0.3820553123950958, "learning_rate": 9.738929941115373e-05, "loss": 0.1312, "step": 8890 }, { "action_loss": 0.0048216842114925385, "epoch": 7.994604316546763, "step": 8890 }, { "epoch": 7.994604316546763, "step": 8890, "torque_loss": 0.07497472316026688 }, { "epoch": 8.003597122302159, "grad_norm": 0.37020114064216614, "learning_rate": 9.738050382682167e-05, "loss": 0.0964, "step": 8900 }, { "action_loss": 0.01106608659029007, "epoch": 8.003597122302159, "step": 8900 }, { "epoch": 8.003597122302159, "step": 8900, "torque_loss": 0.14515753090381622 }, { "epoch": 8.012589928057555, "grad_norm": 0.3799160420894623, "learning_rate": 9.737169384954355e-05, "loss": 0.1325, "step": 8910 }, { "action_loss": 0.034223686903715134, "epoch": 8.012589928057555, "step": 8910 }, { "epoch": 8.012589928057555, "step": 8910, "torque_loss": 0.23089082539081573 }, { "epoch": 8.02158273381295, "grad_norm": 0.38291990756988525, "learning_rate": 9.736286948199562e-05, "loss": 0.1277, "step": 8920 }, { "action_loss": 0.013255618512630463, "epoch": 8.02158273381295, "step": 8920 }, { "epoch": 8.02158273381295, "step": 8920, "torque_loss": 0.16467368602752686 }, { "epoch": 8.030575539568344, "grad_norm": 0.4596540629863739, "learning_rate": 9.735403072685848e-05, "loss": 0.1284, "step": 8930 }, { "action_loss": 0.018066920340061188, "epoch": 8.030575539568344, "step": 8930 }, { "epoch": 8.030575539568344, "step": 8930, "torque_loss": 0.19418483972549438 }, { "epoch": 8.03956834532374, "grad_norm": 0.5425064563751221, "learning_rate": 9.734517758681712e-05, "loss": 0.1342, "step": 8940 }, { "action_loss": 0.013067975640296936, "epoch": 8.03956834532374, "step": 8940 }, { "epoch": 8.03956834532374, "step": 8940, "torque_loss": 0.09815271943807602 }, { "epoch": 8.048561151079136, "grad_norm": 0.4246535003185272, "learning_rate": 9.733631006456088e-05, "loss": 0.1157, "step": 8950 }, { "action_loss": 0.00691187521442771, "epoch": 8.048561151079136, "step": 8950 }, { "epoch": 8.048561151079136, "step": 8950, "torque_loss": 0.13290263712406158 }, { "epoch": 8.057553956834532, "grad_norm": 0.47656333446502686, "learning_rate": 9.732742816278348e-05, "loss": 0.1244, "step": 8960 }, { "action_loss": 0.01259035337716341, "epoch": 8.057553956834532, "step": 8960 }, { "epoch": 8.057553956834532, "step": 8960, "torque_loss": 0.203605055809021 }, { "epoch": 8.066546762589928, "grad_norm": 0.4758337736129761, "learning_rate": 9.731853188418302e-05, "loss": 0.1182, "step": 8970 }, { "action_loss": 0.05070780590176582, "epoch": 8.066546762589928, "step": 8970 }, { "epoch": 8.066546762589928, "step": 8970, "torque_loss": 0.26275935769081116 }, { "epoch": 8.075539568345324, "grad_norm": 0.5166704654693604, "learning_rate": 9.730962123146194e-05, "loss": 0.1241, "step": 8980 }, { "action_loss": 0.009442843496799469, "epoch": 8.075539568345324, "step": 8980 }, { "epoch": 8.075539568345324, "step": 8980, "torque_loss": 0.12808434665203094 }, { "epoch": 8.08453237410072, "grad_norm": 0.518697202205658, "learning_rate": 9.730069620732709e-05, "loss": 0.1218, "step": 8990 }, { "action_loss": 0.0112702501937747, "epoch": 8.08453237410072, "step": 8990 }, { "epoch": 8.08453237410072, "step": 8990, "torque_loss": 0.14213766157627106 }, { "epoch": 8.093525179856115, "grad_norm": 0.5319736003875732, "learning_rate": 9.72917568144896e-05, "loss": 0.1116, "step": 9000 }, { "action_loss": 0.0175620224326849, "epoch": 8.093525179856115, "step": 9000 }, { "epoch": 8.093525179856115, "step": 9000, "torque_loss": 0.20823664963245392 }, { "epoch": 8.102517985611511, "grad_norm": 0.4432993531227112, "learning_rate": 9.728280305566509e-05, "loss": 0.1352, "step": 9010 }, { "action_loss": 0.02400945872068405, "epoch": 8.102517985611511, "step": 9010 }, { "epoch": 8.102517985611511, "step": 9010, "torque_loss": 0.2474827617406845 }, { "epoch": 8.111510791366907, "grad_norm": 0.4618120491504669, "learning_rate": 9.727383493357343e-05, "loss": 0.1392, "step": 9020 }, { "action_loss": 0.018212823197245598, "epoch": 8.111510791366907, "step": 9020 }, { "epoch": 8.111510791366907, "step": 9020, "torque_loss": 0.15906594693660736 }, { "epoch": 8.120503597122303, "grad_norm": 0.5330766439437866, "learning_rate": 9.726485245093891e-05, "loss": 0.1268, "step": 9030 }, { "action_loss": 0.01731635443866253, "epoch": 8.120503597122303, "step": 9030 }, { "epoch": 8.120503597122303, "step": 9030, "torque_loss": 0.16968272626399994 }, { "epoch": 8.129496402877697, "grad_norm": 0.4468296468257904, "learning_rate": 9.725585561049018e-05, "loss": 0.1188, "step": 9040 }, { "action_loss": 0.028880730271339417, "epoch": 8.129496402877697, "step": 9040 }, { "epoch": 8.129496402877697, "step": 9040, "torque_loss": 0.2160978466272354 }, { "epoch": 8.138489208633093, "grad_norm": 0.37862473726272583, "learning_rate": 9.724684441496022e-05, "loss": 0.1356, "step": 9050 }, { "action_loss": 0.010707742534577847, "epoch": 8.138489208633093, "step": 9050 }, { "epoch": 8.138489208633093, "step": 9050, "torque_loss": 0.1372060626745224 }, { "epoch": 8.147482014388489, "grad_norm": 0.4598006308078766, "learning_rate": 9.72378188670864e-05, "loss": 0.1245, "step": 9060 }, { "action_loss": 0.010638399980962276, "epoch": 8.147482014388489, "step": 9060 }, { "epoch": 8.147482014388489, "step": 9060, "torque_loss": 0.13654308021068573 }, { "epoch": 8.156474820143885, "grad_norm": 0.3693313002586365, "learning_rate": 9.722877896961047e-05, "loss": 0.1136, "step": 9070 }, { "action_loss": 0.01177168171852827, "epoch": 8.156474820143885, "step": 9070 }, { "epoch": 8.156474820143885, "step": 9070, "torque_loss": 0.2094889134168625 }, { "epoch": 8.16546762589928, "grad_norm": 0.5105636715888977, "learning_rate": 9.721972472527848e-05, "loss": 0.1216, "step": 9080 }, { "action_loss": 0.013351098634302616, "epoch": 8.16546762589928, "step": 9080 }, { "epoch": 8.16546762589928, "step": 9080, "torque_loss": 0.22422818839550018 }, { "epoch": 8.174460431654676, "grad_norm": 0.5082065463066101, "learning_rate": 9.721065613684089e-05, "loss": 0.1148, "step": 9090 }, { "action_loss": 0.009929473511874676, "epoch": 8.174460431654676, "step": 9090 }, { "epoch": 8.174460431654676, "step": 9090, "torque_loss": 0.12832914292812347 }, { "epoch": 8.183453237410072, "grad_norm": 0.39985528588294983, "learning_rate": 9.72015732070525e-05, "loss": 0.1089, "step": 9100 }, { "action_loss": 0.033137187361717224, "epoch": 8.183453237410072, "step": 9100 }, { "epoch": 8.183453237410072, "step": 9100, "torque_loss": 0.1871088147163391 }, { "epoch": 8.192446043165468, "grad_norm": 0.31700849533081055, "learning_rate": 9.719247593867244e-05, "loss": 0.107, "step": 9110 }, { "action_loss": 0.03052137792110443, "epoch": 8.192446043165468, "step": 9110 }, { "epoch": 8.192446043165468, "step": 9110, "torque_loss": 0.22861264646053314 }, { "epoch": 8.201438848920864, "grad_norm": 0.4203495383262634, "learning_rate": 9.718336433446423e-05, "loss": 0.122, "step": 9120 }, { "action_loss": 0.04632136598229408, "epoch": 8.201438848920864, "step": 9120 }, { "epoch": 8.201438848920864, "step": 9120, "torque_loss": 0.2225784808397293 }, { "epoch": 8.21043165467626, "grad_norm": 0.39213427901268005, "learning_rate": 9.717423839719574e-05, "loss": 0.1255, "step": 9130 }, { "action_loss": 0.015357919037342072, "epoch": 8.21043165467626, "step": 9130 }, { "epoch": 8.21043165467626, "step": 9130, "torque_loss": 0.1975366473197937 }, { "epoch": 8.219424460431656, "grad_norm": 0.38275980949401855, "learning_rate": 9.71650981296392e-05, "loss": 0.1181, "step": 9140 }, { "action_loss": 0.01748386025428772, "epoch": 8.219424460431656, "step": 9140 }, { "epoch": 8.219424460431656, "step": 9140, "torque_loss": 0.2185826301574707 }, { "epoch": 8.22841726618705, "grad_norm": 0.5826392769813538, "learning_rate": 9.715594353457118e-05, "loss": 0.1188, "step": 9150 }, { "action_loss": 0.010540912859141827, "epoch": 8.22841726618705, "step": 9150 }, { "epoch": 8.22841726618705, "step": 9150, "torque_loss": 0.14518888294696808 }, { "epoch": 8.237410071942445, "grad_norm": 0.5477155447006226, "learning_rate": 9.714677461477257e-05, "loss": 0.119, "step": 9160 }, { "action_loss": 0.016187643632292747, "epoch": 8.237410071942445, "step": 9160 }, { "epoch": 8.237410071942445, "step": 9160, "torque_loss": 0.25595536828041077 }, { "epoch": 8.246402877697841, "grad_norm": 0.5243854522705078, "learning_rate": 9.713759137302869e-05, "loss": 0.1302, "step": 9170 }, { "action_loss": 0.012308411300182343, "epoch": 8.246402877697841, "step": 9170 }, { "epoch": 8.246402877697841, "step": 9170, "torque_loss": 0.16894574463367462 }, { "epoch": 8.255395683453237, "grad_norm": 0.5150465369224548, "learning_rate": 9.712839381212914e-05, "loss": 0.1185, "step": 9180 }, { "action_loss": 0.010309618897736073, "epoch": 8.255395683453237, "step": 9180 }, { "epoch": 8.255395683453237, "step": 9180, "torque_loss": 0.19120760262012482 }, { "epoch": 8.264388489208633, "grad_norm": 0.46718132495880127, "learning_rate": 9.71191819348679e-05, "loss": 0.1214, "step": 9190 }, { "action_loss": 0.010355028323829174, "epoch": 8.264388489208633, "step": 9190 }, { "epoch": 8.264388489208633, "step": 9190, "torque_loss": 0.20957531034946442 }, { "epoch": 8.273381294964029, "grad_norm": 0.384653776884079, "learning_rate": 9.710995574404331e-05, "loss": 0.1245, "step": 9200 }, { "action_loss": 0.011941966600716114, "epoch": 8.273381294964029, "step": 9200 }, { "epoch": 8.273381294964029, "step": 9200, "torque_loss": 0.17953036725521088 }, { "epoch": 8.282374100719425, "grad_norm": 0.4971296787261963, "learning_rate": 9.710071524245802e-05, "loss": 0.1432, "step": 9210 }, { "action_loss": 0.020824557170271873, "epoch": 8.282374100719425, "step": 9210 }, { "epoch": 8.282374100719425, "step": 9210, "torque_loss": 0.2104729861021042 }, { "epoch": 8.29136690647482, "grad_norm": 0.3983381986618042, "learning_rate": 9.709146043291906e-05, "loss": 0.1056, "step": 9220 }, { "action_loss": 0.006538778077811003, "epoch": 8.29136690647482, "step": 9220 }, { "epoch": 8.29136690647482, "step": 9220, "torque_loss": 0.14214585721492767 }, { "epoch": 8.300359712230216, "grad_norm": 0.45923343300819397, "learning_rate": 9.70821913182378e-05, "loss": 0.125, "step": 9230 }, { "action_loss": 0.009515647776424885, "epoch": 8.300359712230216, "step": 9230 }, { "epoch": 8.300359712230216, "step": 9230, "torque_loss": 0.1939215064048767 }, { "epoch": 8.309352517985612, "grad_norm": 0.46234795451164246, "learning_rate": 9.707290790122995e-05, "loss": 0.1233, "step": 9240 }, { "action_loss": 0.006566318217664957, "epoch": 8.309352517985612, "step": 9240 }, { "epoch": 8.309352517985612, "step": 9240, "torque_loss": 0.10797557979822159 }, { "epoch": 8.318345323741006, "grad_norm": 0.31962674856185913, "learning_rate": 9.706361018471557e-05, "loss": 0.1224, "step": 9250 }, { "action_loss": 0.024162814021110535, "epoch": 8.318345323741006, "step": 9250 }, { "epoch": 8.318345323741006, "step": 9250, "torque_loss": 0.23182439804077148 }, { "epoch": 8.327338129496402, "grad_norm": 0.4186016023159027, "learning_rate": 9.705429817151906e-05, "loss": 0.1292, "step": 9260 }, { "action_loss": 0.019907796755433083, "epoch": 8.327338129496402, "step": 9260 }, { "epoch": 8.327338129496402, "step": 9260, "torque_loss": 0.16074630618095398 }, { "epoch": 8.336330935251798, "grad_norm": 0.28346574306488037, "learning_rate": 9.704497186446917e-05, "loss": 0.1115, "step": 9270 }, { "action_loss": 0.01850486733019352, "epoch": 8.336330935251798, "step": 9270 }, { "epoch": 8.336330935251798, "step": 9270, "torque_loss": 0.2640182077884674 }, { "epoch": 8.345323741007194, "grad_norm": 0.39197954535484314, "learning_rate": 9.703563126639896e-05, "loss": 0.1335, "step": 9280 }, { "action_loss": 0.014735457487404346, "epoch": 8.345323741007194, "step": 9280 }, { "epoch": 8.345323741007194, "step": 9280, "torque_loss": 0.1977490931749344 }, { "epoch": 8.35431654676259, "grad_norm": 0.41907158493995667, "learning_rate": 9.70262763801459e-05, "loss": 0.1241, "step": 9290 }, { "action_loss": 0.015270284377038479, "epoch": 8.35431654676259, "step": 9290 }, { "epoch": 8.35431654676259, "step": 9290, "torque_loss": 0.21384818851947784 }, { "epoch": 8.363309352517986, "grad_norm": 0.44996654987335205, "learning_rate": 9.701690720855171e-05, "loss": 0.1323, "step": 9300 }, { "action_loss": 0.008875972591340542, "epoch": 8.363309352517986, "step": 9300 }, { "epoch": 8.363309352517986, "step": 9300, "torque_loss": 0.14086298644542694 }, { "epoch": 8.372302158273381, "grad_norm": 0.5478487610816956, "learning_rate": 9.700752375446253e-05, "loss": 0.1263, "step": 9310 }, { "action_loss": 0.08794399350881577, "epoch": 8.372302158273381, "step": 9310 }, { "epoch": 8.372302158273381, "step": 9310, "torque_loss": 0.23630638420581818 }, { "epoch": 8.381294964028777, "grad_norm": 0.4902341961860657, "learning_rate": 9.69981260207288e-05, "loss": 0.1534, "step": 9320 }, { "action_loss": 0.01845490001142025, "epoch": 8.381294964028777, "step": 9320 }, { "epoch": 8.381294964028777, "step": 9320, "torque_loss": 0.27441099286079407 }, { "epoch": 8.390287769784173, "grad_norm": 0.5336071848869324, "learning_rate": 9.698871401020529e-05, "loss": 0.1293, "step": 9330 }, { "action_loss": 0.007888180203735828, "epoch": 8.390287769784173, "step": 9330 }, { "epoch": 8.390287769784173, "step": 9330, "torque_loss": 0.17781786620616913 }, { "epoch": 8.399280575539569, "grad_norm": 0.48312652111053467, "learning_rate": 9.697928772575112e-05, "loss": 0.1243, "step": 9340 }, { "action_loss": 0.01986815594136715, "epoch": 8.399280575539569, "step": 9340 }, { "epoch": 8.399280575539569, "step": 9340, "torque_loss": 0.18011640012264252 }, { "epoch": 8.408273381294965, "grad_norm": 0.4326934218406677, "learning_rate": 9.696984717022976e-05, "loss": 0.1084, "step": 9350 }, { "action_loss": 0.007398419082164764, "epoch": 8.408273381294965, "step": 9350 }, { "epoch": 8.408273381294965, "step": 9350, "torque_loss": 0.2242453694343567 }, { "epoch": 8.417266187050359, "grad_norm": 0.3518812358379364, "learning_rate": 9.6960392346509e-05, "loss": 0.1048, "step": 9360 }, { "action_loss": 0.025337889790534973, "epoch": 8.417266187050359, "step": 9360 }, { "epoch": 8.417266187050359, "step": 9360, "torque_loss": 0.20794324576854706 }, { "epoch": 8.426258992805755, "grad_norm": 0.450843870639801, "learning_rate": 9.695092325746097e-05, "loss": 0.1449, "step": 9370 }, { "action_loss": 0.008301991038024426, "epoch": 8.426258992805755, "step": 9370 }, { "epoch": 8.426258992805755, "step": 9370, "torque_loss": 0.15159308910369873 }, { "epoch": 8.43525179856115, "grad_norm": 0.3977252244949341, "learning_rate": 9.694143990596211e-05, "loss": 0.1173, "step": 9380 }, { "action_loss": 0.00784303154796362, "epoch": 8.43525179856115, "step": 9380 }, { "epoch": 8.43525179856115, "step": 9380, "torque_loss": 0.20684866607189178 }, { "epoch": 8.444244604316546, "grad_norm": 0.3932156264781952, "learning_rate": 9.693194229489325e-05, "loss": 0.1401, "step": 9390 }, { "action_loss": 0.011764060705900192, "epoch": 8.444244604316546, "step": 9390 }, { "epoch": 8.444244604316546, "step": 9390, "torque_loss": 0.2314382791519165 }, { "epoch": 8.453237410071942, "grad_norm": 0.4994092285633087, "learning_rate": 9.692243042713944e-05, "loss": 0.1189, "step": 9400 }, { "action_loss": 0.025753242895007133, "epoch": 8.453237410071942, "step": 9400 }, { "epoch": 8.453237410071942, "step": 9400, "torque_loss": 0.22950701415538788 }, { "epoch": 8.462230215827338, "grad_norm": 0.4657718241214752, "learning_rate": 9.691290430559022e-05, "loss": 0.1276, "step": 9410 }, { "action_loss": 0.017267147079110146, "epoch": 8.462230215827338, "step": 9410 }, { "epoch": 8.462230215827338, "step": 9410, "torque_loss": 0.1747240275144577 }, { "epoch": 8.471223021582734, "grad_norm": 0.510161280632019, "learning_rate": 9.690336393313932e-05, "loss": 0.1244, "step": 9420 }, { "action_loss": 0.011211887001991272, "epoch": 8.471223021582734, "step": 9420 }, { "epoch": 8.471223021582734, "step": 9420, "torque_loss": 0.1676759123802185 }, { "epoch": 8.48021582733813, "grad_norm": 0.5586357712745667, "learning_rate": 9.689380931268487e-05, "loss": 0.136, "step": 9430 }, { "action_loss": 0.01919718086719513, "epoch": 8.48021582733813, "step": 9430 }, { "epoch": 8.48021582733813, "step": 9430, "torque_loss": 0.19655247032642365 }, { "epoch": 8.489208633093526, "grad_norm": 0.4865509569644928, "learning_rate": 9.688424044712932e-05, "loss": 0.1288, "step": 9440 }, { "action_loss": 0.013063198886811733, "epoch": 8.489208633093526, "step": 9440 }, { "epoch": 8.489208633093526, "step": 9440, "torque_loss": 0.20591825246810913 }, { "epoch": 8.498201438848922, "grad_norm": 0.5846719145774841, "learning_rate": 9.687465733937942e-05, "loss": 0.136, "step": 9450 }, { "action_loss": 0.01157485693693161, "epoch": 8.498201438848922, "step": 9450 }, { "epoch": 8.498201438848922, "step": 9450, "torque_loss": 0.15718533098697662 }, { "epoch": 8.507194244604317, "grad_norm": 0.543043315410614, "learning_rate": 9.686505999234627e-05, "loss": 0.1151, "step": 9460 }, { "action_loss": 0.018908223137259483, "epoch": 8.507194244604317, "step": 9460 }, { "epoch": 8.507194244604317, "step": 9460, "torque_loss": 0.1880689412355423 }, { "epoch": 8.516187050359711, "grad_norm": 0.4042539596557617, "learning_rate": 9.685544840894529e-05, "loss": 0.1165, "step": 9470 }, { "action_loss": 0.010813377797603607, "epoch": 8.516187050359711, "step": 9470 }, { "epoch": 8.516187050359711, "step": 9470, "torque_loss": 0.13193173706531525 }, { "epoch": 8.525179856115107, "grad_norm": 0.4601670503616333, "learning_rate": 9.684582259209624e-05, "loss": 0.1158, "step": 9480 }, { "action_loss": 0.01742357201874256, "epoch": 8.525179856115107, "step": 9480 }, { "epoch": 8.525179856115107, "step": 9480, "torque_loss": 0.19499576091766357 }, { "epoch": 8.534172661870503, "grad_norm": 0.49086371064186096, "learning_rate": 9.683618254472317e-05, "loss": 0.1266, "step": 9490 }, { "action_loss": 0.010001328773796558, "epoch": 8.534172661870503, "step": 9490 }, { "epoch": 8.534172661870503, "step": 9490, "torque_loss": 0.18191027641296387 }, { "epoch": 8.543165467625899, "grad_norm": 0.44153517484664917, "learning_rate": 9.682652826975449e-05, "loss": 0.1285, "step": 9500 }, { "action_loss": 0.029786253347992897, "epoch": 8.543165467625899, "step": 9500 }, { "epoch": 8.543165467625899, "step": 9500, "torque_loss": 0.23889446258544922 }, { "epoch": 8.552158273381295, "grad_norm": 0.4262647032737732, "learning_rate": 9.681685977012291e-05, "loss": 0.1161, "step": 9510 }, { "action_loss": 0.011699003167450428, "epoch": 8.552158273381295, "step": 9510 }, { "epoch": 8.552158273381295, "step": 9510, "torque_loss": 0.11363162845373154 }, { "epoch": 8.56115107913669, "grad_norm": 0.3665526211261749, "learning_rate": 9.680717704876546e-05, "loss": 0.125, "step": 9520 }, { "action_loss": 0.021360665559768677, "epoch": 8.56115107913669, "step": 9520 }, { "epoch": 8.56115107913669, "step": 9520, "torque_loss": 0.23983485996723175 }, { "epoch": 8.570143884892087, "grad_norm": 0.4807360768318176, "learning_rate": 9.679748010862349e-05, "loss": 0.13, "step": 9530 }, { "action_loss": 0.009313342161476612, "epoch": 8.570143884892087, "step": 9530 }, { "epoch": 8.570143884892087, "step": 9530, "torque_loss": 0.1353350728750229 }, { "epoch": 8.579136690647482, "grad_norm": 0.45040029287338257, "learning_rate": 9.678776895264267e-05, "loss": 0.1293, "step": 9540 }, { "action_loss": 0.024559304118156433, "epoch": 8.579136690647482, "step": 9540 }, { "epoch": 8.579136690647482, "step": 9540, "torque_loss": 0.23845148086547852 }, { "epoch": 8.588129496402878, "grad_norm": 0.4615209102630615, "learning_rate": 9.6778043583773e-05, "loss": 0.138, "step": 9550 }, { "action_loss": 0.010110054165124893, "epoch": 8.588129496402878, "step": 9550 }, { "epoch": 8.588129496402878, "step": 9550, "torque_loss": 0.1467440277338028 }, { "epoch": 8.597122302158274, "grad_norm": 0.4202478528022766, "learning_rate": 9.67683040049688e-05, "loss": 0.1095, "step": 9560 }, { "action_loss": 0.012987866997718811, "epoch": 8.597122302158274, "step": 9560 }, { "epoch": 8.597122302158274, "step": 9560, "torque_loss": 0.17712320387363434 }, { "epoch": 8.60611510791367, "grad_norm": 0.4071405231952667, "learning_rate": 9.675855021918869e-05, "loss": 0.1294, "step": 9570 }, { "action_loss": 0.004799955058842897, "epoch": 8.60611510791367, "step": 9570 }, { "epoch": 8.60611510791367, "step": 9570, "torque_loss": 0.14813750982284546 }, { "epoch": 8.615107913669064, "grad_norm": 0.4693993330001831, "learning_rate": 9.674878222939561e-05, "loss": 0.1107, "step": 9580 }, { "action_loss": 0.006180922035127878, "epoch": 8.615107913669064, "step": 9580 }, { "epoch": 8.615107913669064, "step": 9580, "torque_loss": 0.08553435653448105 }, { "epoch": 8.62410071942446, "grad_norm": 0.41689392924308777, "learning_rate": 9.673900003855681e-05, "loss": 0.1081, "step": 9590 }, { "action_loss": 0.016785630956292152, "epoch": 8.62410071942446, "step": 9590 }, { "epoch": 8.62410071942446, "step": 9590, "torque_loss": 0.15153580904006958 }, { "epoch": 8.633093525179856, "grad_norm": 0.4039299488067627, "learning_rate": 9.672920364964389e-05, "loss": 0.1306, "step": 9600 }, { "action_loss": 0.018613776192069054, "epoch": 8.633093525179856, "step": 9600 }, { "epoch": 8.633093525179856, "step": 9600, "torque_loss": 0.18208126723766327 }, { "epoch": 8.642086330935252, "grad_norm": 0.5340539216995239, "learning_rate": 9.671939306563269e-05, "loss": 0.1125, "step": 9610 }, { "action_loss": 0.019468052312731743, "epoch": 8.642086330935252, "step": 9610 }, { "epoch": 8.642086330935252, "step": 9610, "torque_loss": 0.23496408760547638 }, { "epoch": 8.651079136690647, "grad_norm": 0.4716508984565735, "learning_rate": 9.670956828950345e-05, "loss": 0.1283, "step": 9620 }, { "action_loss": 0.01530071347951889, "epoch": 8.651079136690647, "step": 9620 }, { "epoch": 8.651079136690647, "step": 9620, "torque_loss": 0.25956544280052185 }, { "epoch": 8.660071942446043, "grad_norm": 0.5991566777229309, "learning_rate": 9.669972932424065e-05, "loss": 0.1217, "step": 9630 }, { "action_loss": 0.00883942935615778, "epoch": 8.660071942446043, "step": 9630 }, { "epoch": 8.660071942446043, "step": 9630, "torque_loss": 0.14751480519771576 }, { "epoch": 8.66906474820144, "grad_norm": 0.485465943813324, "learning_rate": 9.668987617283312e-05, "loss": 0.1059, "step": 9640 }, { "action_loss": 0.007879102602601051, "epoch": 8.66906474820144, "step": 9640 }, { "epoch": 8.66906474820144, "step": 9640, "torque_loss": 0.11929741501808167 }, { "epoch": 8.678057553956835, "grad_norm": 0.4124605655670166, "learning_rate": 9.668000883827397e-05, "loss": 0.1352, "step": 9650 }, { "action_loss": 0.011573473922908306, "epoch": 8.678057553956835, "step": 9650 }, { "epoch": 8.678057553956835, "step": 9650, "torque_loss": 0.18275435268878937 }, { "epoch": 8.68705035971223, "grad_norm": 0.4154244363307953, "learning_rate": 9.667012732356067e-05, "loss": 0.1382, "step": 9660 }, { "action_loss": 0.008607749827206135, "epoch": 8.68705035971223, "step": 9660 }, { "epoch": 8.68705035971223, "step": 9660, "torque_loss": 0.1502809375524521 }, { "epoch": 8.696043165467627, "grad_norm": 0.5339526534080505, "learning_rate": 9.666023163169493e-05, "loss": 0.1444, "step": 9670 }, { "action_loss": 0.02605229616165161, "epoch": 8.696043165467627, "step": 9670 }, { "epoch": 8.696043165467627, "step": 9670, "torque_loss": 0.2846374213695526 }, { "epoch": 8.70503597122302, "grad_norm": 0.4067680537700653, "learning_rate": 9.665032176568281e-05, "loss": 0.1177, "step": 9680 }, { "action_loss": 0.010515411384403706, "epoch": 8.70503597122302, "step": 9680 }, { "epoch": 8.70503597122302, "step": 9680, "torque_loss": 0.19051019847393036 }, { "epoch": 8.714028776978417, "grad_norm": 0.4399321377277374, "learning_rate": 9.664039772853469e-05, "loss": 0.1141, "step": 9690 }, { "action_loss": 0.010641154833137989, "epoch": 8.714028776978417, "step": 9690 }, { "epoch": 8.714028776978417, "step": 9690, "torque_loss": 0.1444190889596939 }, { "epoch": 8.723021582733812, "grad_norm": 0.3675147593021393, "learning_rate": 9.663045952326518e-05, "loss": 0.1202, "step": 9700 }, { "action_loss": 0.012462337501347065, "epoch": 8.723021582733812, "step": 9700 }, { "epoch": 8.723021582733812, "step": 9700, "torque_loss": 0.14776010811328888 }, { "epoch": 8.732014388489208, "grad_norm": 0.3896152079105377, "learning_rate": 9.662050715289328e-05, "loss": 0.1052, "step": 9710 }, { "action_loss": 0.015616278164088726, "epoch": 8.732014388489208, "step": 9710 }, { "epoch": 8.732014388489208, "step": 9710, "torque_loss": 0.16885100305080414 }, { "epoch": 8.741007194244604, "grad_norm": 0.3992328941822052, "learning_rate": 9.661054062044226e-05, "loss": 0.1371, "step": 9720 }, { "action_loss": 0.026854107156395912, "epoch": 8.741007194244604, "step": 9720 }, { "epoch": 8.741007194244604, "step": 9720, "torque_loss": 0.17129473388195038 }, { "epoch": 8.75, "grad_norm": 0.46807244420051575, "learning_rate": 9.660055992893968e-05, "loss": 0.1132, "step": 9730 }, { "action_loss": 0.019644243642687798, "epoch": 8.75, "step": 9730 }, { "epoch": 8.75, "step": 9730, "torque_loss": 0.18448512256145477 }, { "epoch": 8.758992805755396, "grad_norm": 0.3762849271297455, "learning_rate": 9.659056508141739e-05, "loss": 0.12, "step": 9740 }, { "action_loss": 0.015103553421795368, "epoch": 8.758992805755396, "step": 9740 }, { "epoch": 8.758992805755396, "step": 9740, "torque_loss": 0.17598360776901245 }, { "epoch": 8.767985611510792, "grad_norm": 0.4322441816329956, "learning_rate": 9.658055608091161e-05, "loss": 0.125, "step": 9750 }, { "action_loss": 0.008082128129899502, "epoch": 8.767985611510792, "step": 9750 }, { "epoch": 8.767985611510792, "step": 9750, "torque_loss": 0.21881616115570068 }, { "epoch": 8.776978417266188, "grad_norm": 0.4017515480518341, "learning_rate": 9.657053293046276e-05, "loss": 0.1271, "step": 9760 }, { "action_loss": 0.017189739271998405, "epoch": 8.776978417266188, "step": 9760 }, { "epoch": 8.776978417266188, "step": 9760, "torque_loss": 0.21024282276630402 }, { "epoch": 8.785971223021583, "grad_norm": 0.3997935354709625, "learning_rate": 9.656049563311564e-05, "loss": 0.1143, "step": 9770 }, { "action_loss": 0.015563917346298695, "epoch": 8.785971223021583, "step": 9770 }, { "epoch": 8.785971223021583, "step": 9770, "torque_loss": 0.2019478678703308 }, { "epoch": 8.79496402877698, "grad_norm": 0.40936192870140076, "learning_rate": 9.655044419191929e-05, "loss": 0.1361, "step": 9780 }, { "action_loss": 0.004833715036511421, "epoch": 8.79496402877698, "step": 9780 }, { "epoch": 8.79496402877698, "step": 9780, "torque_loss": 0.145766481757164 }, { "epoch": 8.803956834532373, "grad_norm": 0.5065961480140686, "learning_rate": 9.654037860992711e-05, "loss": 0.1206, "step": 9790 }, { "action_loss": 0.020352505147457123, "epoch": 8.803956834532373, "step": 9790 }, { "epoch": 8.803956834532373, "step": 9790, "torque_loss": 0.23433633148670197 }, { "epoch": 8.81294964028777, "grad_norm": 0.6027885675430298, "learning_rate": 9.653029889019672e-05, "loss": 0.1352, "step": 9800 }, { "action_loss": 0.016018254682421684, "epoch": 8.81294964028777, "step": 9800 }, { "epoch": 8.81294964028777, "step": 9800, "torque_loss": 0.21208810806274414 }, { "epoch": 8.821942446043165, "grad_norm": 0.4542813301086426, "learning_rate": 9.65202050357901e-05, "loss": 0.1218, "step": 9810 }, { "action_loss": 0.03170063719153404, "epoch": 8.821942446043165, "step": 9810 }, { "epoch": 8.821942446043165, "step": 9810, "torque_loss": 0.2922048568725586 }, { "epoch": 8.83093525179856, "grad_norm": 0.4734850525856018, "learning_rate": 9.651009704977347e-05, "loss": 0.1146, "step": 9820 }, { "action_loss": 0.0045386929996311665, "epoch": 8.83093525179856, "step": 9820 }, { "epoch": 8.83093525179856, "step": 9820, "torque_loss": 0.08861645311117172 }, { "epoch": 8.839928057553957, "grad_norm": 0.4085341989994049, "learning_rate": 9.649997493521738e-05, "loss": 0.1193, "step": 9830 }, { "action_loss": 0.010842788964509964, "epoch": 8.839928057553957, "step": 9830 }, { "epoch": 8.839928057553957, "step": 9830, "torque_loss": 0.16666586697101593 }, { "epoch": 8.848920863309353, "grad_norm": 0.44090697169303894, "learning_rate": 9.64898386951967e-05, "loss": 0.1148, "step": 9840 }, { "action_loss": 0.02690141648054123, "epoch": 8.848920863309353, "step": 9840 }, { "epoch": 8.848920863309353, "step": 9840, "torque_loss": 0.23622210323810577 }, { "epoch": 8.857913669064748, "grad_norm": 0.43154194951057434, "learning_rate": 9.647968833279049e-05, "loss": 0.1306, "step": 9850 }, { "action_loss": 0.011315837502479553, "epoch": 8.857913669064748, "step": 9850 }, { "epoch": 8.857913669064748, "step": 9850, "torque_loss": 0.21253633499145508 }, { "epoch": 8.866906474820144, "grad_norm": 0.5080624222755432, "learning_rate": 9.646952385108218e-05, "loss": 0.1313, "step": 9860 }, { "action_loss": 0.005588958505541086, "epoch": 8.866906474820144, "step": 9860 }, { "epoch": 8.866906474820144, "step": 9860, "torque_loss": 0.1354256123304367 }, { "epoch": 8.87589928057554, "grad_norm": 0.44944247603416443, "learning_rate": 9.645934525315951e-05, "loss": 0.1125, "step": 9870 }, { "action_loss": 0.004378997255116701, "epoch": 8.87589928057554, "step": 9870 }, { "epoch": 8.87589928057554, "step": 9870, "torque_loss": 0.0942763015627861 }, { "epoch": 8.884892086330936, "grad_norm": 0.47695425152778625, "learning_rate": 9.644915254211442e-05, "loss": 0.1199, "step": 9880 }, { "action_loss": 0.016572315245866776, "epoch": 8.884892086330936, "step": 9880 }, { "epoch": 8.884892086330936, "step": 9880, "torque_loss": 0.15449188649654388 }, { "epoch": 8.89388489208633, "grad_norm": 0.39264053106307983, "learning_rate": 9.643894572104321e-05, "loss": 0.1183, "step": 9890 }, { "action_loss": 0.006752135697752237, "epoch": 8.89388489208633, "step": 9890 }, { "epoch": 8.89388489208633, "step": 9890, "torque_loss": 0.0934368148446083 }, { "epoch": 8.902877697841726, "grad_norm": 0.609044075012207, "learning_rate": 9.642872479304644e-05, "loss": 0.1221, "step": 9900 }, { "action_loss": 0.01847894862294197, "epoch": 8.902877697841726, "step": 9900 }, { "epoch": 8.902877697841726, "step": 9900, "torque_loss": 0.21770340204238892 }, { "epoch": 8.911870503597122, "grad_norm": 0.5208342671394348, "learning_rate": 9.641848976122895e-05, "loss": 0.1233, "step": 9910 }, { "action_loss": 0.013692252337932587, "epoch": 8.911870503597122, "step": 9910 }, { "epoch": 8.911870503597122, "step": 9910, "torque_loss": 0.17836254835128784 }, { "epoch": 8.920863309352518, "grad_norm": 0.3474958539009094, "learning_rate": 9.64082406286999e-05, "loss": 0.1065, "step": 9920 }, { "action_loss": 0.007189995143562555, "epoch": 8.920863309352518, "step": 9920 }, { "epoch": 8.920863309352518, "step": 9920, "torque_loss": 0.10255801677703857 }, { "epoch": 8.929856115107913, "grad_norm": 0.47556135058403015, "learning_rate": 9.639797739857269e-05, "loss": 0.1345, "step": 9930 }, { "action_loss": 0.010625015012919903, "epoch": 8.929856115107913, "step": 9930 }, { "epoch": 8.929856115107913, "step": 9930, "torque_loss": 0.1572597771883011 }, { "epoch": 8.93884892086331, "grad_norm": 0.40367087721824646, "learning_rate": 9.638770007396498e-05, "loss": 0.1232, "step": 9940 }, { "action_loss": 0.006553275045007467, "epoch": 8.93884892086331, "step": 9940 }, { "epoch": 8.93884892086331, "step": 9940, "torque_loss": 0.15448519587516785 }, { "epoch": 8.947841726618705, "grad_norm": 0.5162588357925415, "learning_rate": 9.63774086579988e-05, "loss": 0.1311, "step": 9950 }, { "action_loss": 0.017808325588703156, "epoch": 8.947841726618705, "step": 9950 }, { "epoch": 8.947841726618705, "step": 9950, "torque_loss": 0.19936828315258026 }, { "epoch": 8.956834532374101, "grad_norm": 0.45563095808029175, "learning_rate": 9.63671031538004e-05, "loss": 0.1153, "step": 9960 }, { "action_loss": 0.012215512804687023, "epoch": 8.956834532374101, "step": 9960 }, { "epoch": 8.956834532374101, "step": 9960, "torque_loss": 0.14783908426761627 }, { "epoch": 8.965827338129497, "grad_norm": 0.42397308349609375, "learning_rate": 9.635678356450031e-05, "loss": 0.1203, "step": 9970 }, { "action_loss": 0.007179430220276117, "epoch": 8.965827338129497, "step": 9970 }, { "epoch": 8.965827338129497, "step": 9970, "torque_loss": 0.11556407064199448 }, { "epoch": 8.974820143884893, "grad_norm": 0.5363574028015137, "learning_rate": 9.634644989323336e-05, "loss": 0.1204, "step": 9980 }, { "action_loss": 0.017149390652775764, "epoch": 8.974820143884893, "step": 9980 }, { "epoch": 8.974820143884893, "step": 9980, "torque_loss": 0.17190246284008026 }, { "epoch": 8.983812949640289, "grad_norm": 0.41752371191978455, "learning_rate": 9.633610214313861e-05, "loss": 0.1337, "step": 9990 }, { "action_loss": 0.011635024100542068, "epoch": 8.983812949640289, "step": 9990 }, { "epoch": 8.983812949640289, "step": 9990, "torque_loss": 0.10699370503425598 }, { "epoch": 8.992805755395683, "grad_norm": 0.49704304337501526, "learning_rate": 9.632574031735951e-05, "loss": 0.1211, "step": 10000 }, { "action_loss": 0.008961674757301807, "epoch": 8.992805755395683, "step": 10000 }, { "epoch": 8.992805755395683, "step": 10000, "torque_loss": 0.16144798696041107 }, { "epoch": 9.001798561151078, "grad_norm": 0.4061869978904724, "learning_rate": 9.631536441904364e-05, "loss": 0.116, "step": 10010 }, { "action_loss": 0.010416320525109768, "epoch": 9.001798561151078, "step": 10010 }, { "epoch": 9.001798561151078, "step": 10010, "torque_loss": 0.1688687950372696 }, { "epoch": 9.010791366906474, "grad_norm": 0.4648078680038452, "learning_rate": 9.630497445134293e-05, "loss": 0.1328, "step": 10020 }, { "action_loss": 0.01092468947172165, "epoch": 9.010791366906474, "step": 10020 }, { "epoch": 9.010791366906474, "step": 10020, "torque_loss": 0.12863893806934357 }, { "epoch": 9.01978417266187, "grad_norm": 0.4573740065097809, "learning_rate": 9.62945704174136e-05, "loss": 0.128, "step": 10030 }, { "action_loss": 0.015325249172747135, "epoch": 9.01978417266187, "step": 10030 }, { "epoch": 9.01978417266187, "step": 10030, "torque_loss": 0.18305586278438568 }, { "epoch": 9.028776978417266, "grad_norm": 0.4526602625846863, "learning_rate": 9.628415232041612e-05, "loss": 0.12, "step": 10040 }, { "action_loss": 0.03471803665161133, "epoch": 9.028776978417266, "step": 10040 }, { "epoch": 9.028776978417266, "step": 10040, "torque_loss": 0.2786595821380615 }, { "epoch": 9.037769784172662, "grad_norm": 0.5724684596061707, "learning_rate": 9.627372016351524e-05, "loss": 0.1271, "step": 10050 }, { "action_loss": 0.01504210289567709, "epoch": 9.037769784172662, "step": 10050 }, { "epoch": 9.037769784172662, "step": 10050, "torque_loss": 0.19115744531154633 }, { "epoch": 9.046762589928058, "grad_norm": 0.49063995480537415, "learning_rate": 9.626327394987995e-05, "loss": 0.1167, "step": 10060 }, { "action_loss": 0.009053497575223446, "epoch": 9.046762589928058, "step": 10060 }, { "epoch": 9.046762589928058, "step": 10060, "torque_loss": 0.1381485015153885 }, { "epoch": 9.055755395683454, "grad_norm": 0.5002123117446899, "learning_rate": 9.625281368268355e-05, "loss": 0.1163, "step": 10070 }, { "action_loss": 0.026405850425362587, "epoch": 9.055755395683454, "step": 10070 }, { "epoch": 9.055755395683454, "step": 10070, "torque_loss": 0.23424220085144043 }, { "epoch": 9.06474820143885, "grad_norm": 0.39268648624420166, "learning_rate": 9.624233936510357e-05, "loss": 0.1172, "step": 10080 }, { "action_loss": 0.02272970974445343, "epoch": 9.06474820143885, "step": 10080 }, { "epoch": 9.06474820143885, "step": 10080, "torque_loss": 0.16604270040988922 }, { "epoch": 9.073741007194245, "grad_norm": 0.5140688419342041, "learning_rate": 9.623185100032187e-05, "loss": 0.126, "step": 10090 }, { "action_loss": 0.008617707528173923, "epoch": 9.073741007194245, "step": 10090 }, { "epoch": 9.073741007194245, "step": 10090, "torque_loss": 0.12400487810373306 }, { "epoch": 9.082733812949641, "grad_norm": 0.49479517340660095, "learning_rate": 9.62213485915245e-05, "loss": 0.1176, "step": 10100 }, { "action_loss": 0.009624400176107883, "epoch": 9.082733812949641, "step": 10100 }, { "epoch": 9.082733812949641, "step": 10100, "torque_loss": 0.19230319559574127 }, { "epoch": 9.091726618705035, "grad_norm": 0.3314875364303589, "learning_rate": 9.621083214190186e-05, "loss": 0.1125, "step": 10110 }, { "action_loss": 0.0056791906245052814, "epoch": 9.091726618705035, "step": 10110 }, { "epoch": 9.091726618705035, "step": 10110, "torque_loss": 0.14419186115264893 }, { "epoch": 9.100719424460431, "grad_norm": 0.39851951599121094, "learning_rate": 9.62003016546485e-05, "loss": 0.1206, "step": 10120 }, { "action_loss": 0.015323038212954998, "epoch": 9.100719424460431, "step": 10120 }, { "epoch": 9.100719424460431, "step": 10120, "torque_loss": 0.20547960698604584 }, { "epoch": 9.109712230215827, "grad_norm": 0.37061625719070435, "learning_rate": 9.618975713296339e-05, "loss": 0.1131, "step": 10130 }, { "action_loss": 0.00608823960646987, "epoch": 9.109712230215827, "step": 10130 }, { "epoch": 9.109712230215827, "step": 10130, "torque_loss": 0.17129181325435638 }, { "epoch": 9.118705035971223, "grad_norm": 0.4035986661911011, "learning_rate": 9.61791985800496e-05, "loss": 0.1001, "step": 10140 }, { "action_loss": 0.016774741932749748, "epoch": 9.118705035971223, "step": 10140 }, { "epoch": 9.118705035971223, "step": 10140, "torque_loss": 0.190884068608284 }, { "epoch": 9.127697841726619, "grad_norm": 0.3150959312915802, "learning_rate": 9.616862599911458e-05, "loss": 0.1224, "step": 10150 }, { "action_loss": 0.013543777167797089, "epoch": 9.127697841726619, "step": 10150 }, { "epoch": 9.127697841726619, "step": 10150, "torque_loss": 0.15822987258434296 }, { "epoch": 9.136690647482014, "grad_norm": 0.41453588008880615, "learning_rate": 9.615803939337e-05, "loss": 0.1142, "step": 10160 }, { "action_loss": 0.01904173195362091, "epoch": 9.136690647482014, "step": 10160 }, { "epoch": 9.136690647482014, "step": 10160, "torque_loss": 0.15787892043590546 }, { "epoch": 9.14568345323741, "grad_norm": 0.4786461889743805, "learning_rate": 9.614743876603178e-05, "loss": 0.1199, "step": 10170 }, { "action_loss": 0.022084763273596764, "epoch": 9.14568345323741, "step": 10170 }, { "epoch": 9.14568345323741, "step": 10170, "torque_loss": 0.24228300154209137 }, { "epoch": 9.154676258992806, "grad_norm": 0.5761147141456604, "learning_rate": 9.613682412032013e-05, "loss": 0.1293, "step": 10180 }, { "action_loss": 0.02102603204548359, "epoch": 9.154676258992806, "step": 10180 }, { "epoch": 9.154676258992806, "step": 10180, "torque_loss": 0.2400507777929306 }, { "epoch": 9.163669064748202, "grad_norm": 0.4693073630332947, "learning_rate": 9.612619545945947e-05, "loss": 0.1404, "step": 10190 }, { "action_loss": 0.005140981171280146, "epoch": 9.163669064748202, "step": 10190 }, { "epoch": 9.163669064748202, "step": 10190, "torque_loss": 0.16987276077270508 }, { "epoch": 9.172661870503598, "grad_norm": 0.41858670115470886, "learning_rate": 9.611555278667852e-05, "loss": 0.1118, "step": 10200 }, { "action_loss": 0.01286900881677866, "epoch": 9.172661870503598, "step": 10200 }, { "epoch": 9.172661870503598, "step": 10200, "torque_loss": 0.12781967222690582 }, { "epoch": 9.181654676258994, "grad_norm": 0.4024120569229126, "learning_rate": 9.610489610521024e-05, "loss": 0.121, "step": 10210 }, { "action_loss": 0.01241304725408554, "epoch": 9.181654676258994, "step": 10210 }, { "epoch": 9.181654676258994, "step": 10210, "torque_loss": 0.15376000106334686 }, { "epoch": 9.190647482014388, "grad_norm": 0.3622828423976898, "learning_rate": 9.609422541829187e-05, "loss": 0.111, "step": 10220 }, { "action_loss": 0.011708460748195648, "epoch": 9.190647482014388, "step": 10220 }, { "epoch": 9.190647482014388, "step": 10220, "torque_loss": 0.1598835289478302 }, { "epoch": 9.199640287769784, "grad_norm": 0.39876818656921387, "learning_rate": 9.608354072916486e-05, "loss": 0.1377, "step": 10230 }, { "action_loss": 0.01567254029214382, "epoch": 9.199640287769784, "step": 10230 }, { "epoch": 9.199640287769784, "step": 10230, "torque_loss": 0.14702501893043518 }, { "epoch": 9.20863309352518, "grad_norm": 0.46605321764945984, "learning_rate": 9.607284204107493e-05, "loss": 0.1223, "step": 10240 }, { "action_loss": 0.007795598823577166, "epoch": 9.20863309352518, "step": 10240 }, { "epoch": 9.20863309352518, "step": 10240, "torque_loss": 0.1384429931640625 }, { "epoch": 9.217625899280575, "grad_norm": 0.38883015513420105, "learning_rate": 9.606212935727208e-05, "loss": 0.0995, "step": 10250 }, { "action_loss": 0.01129920780658722, "epoch": 9.217625899280575, "step": 10250 }, { "epoch": 9.217625899280575, "step": 10250, "torque_loss": 0.1296224147081375 }, { "epoch": 9.226618705035971, "grad_norm": 0.38882729411125183, "learning_rate": 9.605140268101052e-05, "loss": 0.1204, "step": 10260 }, { "action_loss": 0.010438837110996246, "epoch": 9.226618705035971, "step": 10260 }, { "epoch": 9.226618705035971, "step": 10260, "torque_loss": 0.2066735476255417 }, { "epoch": 9.235611510791367, "grad_norm": 0.5403329133987427, "learning_rate": 9.604066201554875e-05, "loss": 0.102, "step": 10270 }, { "action_loss": 0.017967617139220238, "epoch": 9.235611510791367, "step": 10270 }, { "epoch": 9.235611510791367, "step": 10270, "torque_loss": 0.19721542298793793 }, { "epoch": 9.244604316546763, "grad_norm": 0.5895339846611023, "learning_rate": 9.60299073641495e-05, "loss": 0.1217, "step": 10280 }, { "action_loss": 0.012312489561736584, "epoch": 9.244604316546763, "step": 10280 }, { "epoch": 9.244604316546763, "step": 10280, "torque_loss": 0.15330977737903595 }, { "epoch": 9.253597122302159, "grad_norm": 0.39698243141174316, "learning_rate": 9.601913873007974e-05, "loss": 0.101, "step": 10290 }, { "action_loss": 0.006775203626602888, "epoch": 9.253597122302159, "step": 10290 }, { "epoch": 9.253597122302159, "step": 10290, "torque_loss": 0.19417977333068848 }, { "epoch": 9.262589928057555, "grad_norm": 0.3686746656894684, "learning_rate": 9.60083561166107e-05, "loss": 0.1317, "step": 10300 }, { "action_loss": 0.004742360208183527, "epoch": 9.262589928057555, "step": 10300 }, { "epoch": 9.262589928057555, "step": 10300, "torque_loss": 0.11661761999130249 }, { "epoch": 9.27158273381295, "grad_norm": 0.39148205518722534, "learning_rate": 9.599755952701783e-05, "loss": 0.1201, "step": 10310 }, { "action_loss": 0.013503228314220905, "epoch": 9.27158273381295, "step": 10310 }, { "epoch": 9.27158273381295, "step": 10310, "torque_loss": 0.12959325313568115 }, { "epoch": 9.280575539568344, "grad_norm": 0.488797128200531, "learning_rate": 9.598674896458089e-05, "loss": 0.127, "step": 10320 }, { "action_loss": 0.011336886323988438, "epoch": 9.280575539568344, "step": 10320 }, { "epoch": 9.280575539568344, "step": 10320, "torque_loss": 0.1454296112060547 }, { "epoch": 9.28956834532374, "grad_norm": 0.3732556104660034, "learning_rate": 9.597592443258383e-05, "loss": 0.0956, "step": 10330 }, { "action_loss": 0.006973492447286844, "epoch": 9.28956834532374, "step": 10330 }, { "epoch": 9.28956834532374, "step": 10330, "torque_loss": 0.13057196140289307 }, { "epoch": 9.298561151079136, "grad_norm": 0.3887004852294922, "learning_rate": 9.596508593431483e-05, "loss": 0.1044, "step": 10340 }, { "action_loss": 0.006784887984395027, "epoch": 9.298561151079136, "step": 10340 }, { "epoch": 9.298561151079136, "step": 10340, "torque_loss": 0.13222678005695343 }, { "epoch": 9.307553956834532, "grad_norm": 0.4096716642379761, "learning_rate": 9.59542334730664e-05, "loss": 0.1147, "step": 10350 }, { "action_loss": 0.012666639871895313, "epoch": 9.307553956834532, "step": 10350 }, { "epoch": 9.307553956834532, "step": 10350, "torque_loss": 0.14508189260959625 }, { "epoch": 9.316546762589928, "grad_norm": 0.4428849220275879, "learning_rate": 9.594336705213516e-05, "loss": 0.1222, "step": 10360 }, { "action_loss": 0.012222218327224255, "epoch": 9.316546762589928, "step": 10360 }, { "epoch": 9.316546762589928, "step": 10360, "torque_loss": 0.15212887525558472 }, { "epoch": 9.325539568345324, "grad_norm": 0.4320310950279236, "learning_rate": 9.593248667482208e-05, "loss": 0.1089, "step": 10370 }, { "action_loss": 0.0160736832767725, "epoch": 9.325539568345324, "step": 10370 }, { "epoch": 9.325539568345324, "step": 10370, "torque_loss": 0.1898362785577774 }, { "epoch": 9.33453237410072, "grad_norm": 0.4612119495868683, "learning_rate": 9.592159234443233e-05, "loss": 0.1332, "step": 10380 }, { "action_loss": 0.015429223887622356, "epoch": 9.33453237410072, "step": 10380 }, { "epoch": 9.33453237410072, "step": 10380, "torque_loss": 0.18011480569839478 }, { "epoch": 9.343525179856115, "grad_norm": 0.5455573201179504, "learning_rate": 9.59106840642753e-05, "loss": 0.1278, "step": 10390 }, { "action_loss": 0.012183435261249542, "epoch": 9.343525179856115, "step": 10390 }, { "epoch": 9.343525179856115, "step": 10390, "torque_loss": 0.1658240109682083 }, { "epoch": 9.352517985611511, "grad_norm": 0.3891197443008423, "learning_rate": 9.589976183766467e-05, "loss": 0.1224, "step": 10400 }, { "action_loss": 0.010045948438346386, "epoch": 9.352517985611511, "step": 10400 }, { "epoch": 9.352517985611511, "step": 10400, "torque_loss": 0.14568746089935303 }, { "epoch": 9.361510791366907, "grad_norm": 0.4236734211444855, "learning_rate": 9.58888256679183e-05, "loss": 0.1237, "step": 10410 }, { "action_loss": 0.011900666169822216, "epoch": 9.361510791366907, "step": 10410 }, { "epoch": 9.361510791366907, "step": 10410, "torque_loss": 0.15799115598201752 }, { "epoch": 9.370503597122303, "grad_norm": 0.4308495819568634, "learning_rate": 9.587787555835832e-05, "loss": 0.1237, "step": 10420 }, { "action_loss": 0.02041323482990265, "epoch": 9.370503597122303, "step": 10420 }, { "epoch": 9.370503597122303, "step": 10420, "torque_loss": 0.28718218207359314 }, { "epoch": 9.379496402877697, "grad_norm": 0.3325156569480896, "learning_rate": 9.586691151231107e-05, "loss": 0.1225, "step": 10430 }, { "action_loss": 0.006188590079545975, "epoch": 9.379496402877697, "step": 10430 }, { "epoch": 9.379496402877697, "step": 10430, "torque_loss": 0.1431676596403122 }, { "epoch": 9.388489208633093, "grad_norm": 0.5286041498184204, "learning_rate": 9.585593353310715e-05, "loss": 0.122, "step": 10440 }, { "action_loss": 0.015894973650574684, "epoch": 9.388489208633093, "step": 10440 }, { "epoch": 9.388489208633093, "step": 10440, "torque_loss": 0.19211876392364502 }, { "epoch": 9.397482014388489, "grad_norm": 0.43236854672431946, "learning_rate": 9.58449416240814e-05, "loss": 0.1234, "step": 10450 }, { "action_loss": 0.006170891225337982, "epoch": 9.397482014388489, "step": 10450 }, { "epoch": 9.397482014388489, "step": 10450, "torque_loss": 0.09730010479688644 }, { "epoch": 9.406474820143885, "grad_norm": 0.4636145234107971, "learning_rate": 9.583393578857283e-05, "loss": 0.1099, "step": 10460 }, { "action_loss": 0.007795941550284624, "epoch": 9.406474820143885, "step": 10460 }, { "epoch": 9.406474820143885, "step": 10460, "torque_loss": 0.15551157295703888 }, { "epoch": 9.41546762589928, "grad_norm": 0.44389286637306213, "learning_rate": 9.582291602992474e-05, "loss": 0.1108, "step": 10470 }, { "action_loss": 0.00852722767740488, "epoch": 9.41546762589928, "step": 10470 }, { "epoch": 9.41546762589928, "step": 10470, "torque_loss": 0.12749874591827393 }, { "epoch": 9.424460431654676, "grad_norm": 0.3975944221019745, "learning_rate": 9.581188235148466e-05, "loss": 0.1129, "step": 10480 }, { "action_loss": 0.007510806899517775, "epoch": 9.424460431654676, "step": 10480 }, { "epoch": 9.424460431654676, "step": 10480, "torque_loss": 0.1251915842294693 }, { "epoch": 9.433453237410072, "grad_norm": 0.4014056622982025, "learning_rate": 9.58008347566043e-05, "loss": 0.1103, "step": 10490 }, { "action_loss": 0.02062220685184002, "epoch": 9.433453237410072, "step": 10490 }, { "epoch": 9.433453237410072, "step": 10490, "torque_loss": 0.22930943965911865 }, { "epoch": 9.442446043165468, "grad_norm": 0.5479212403297424, "learning_rate": 9.578977324863965e-05, "loss": 0.1166, "step": 10500 }, { "action_loss": 0.007942024618387222, "epoch": 9.442446043165468, "step": 10500 }, { "epoch": 9.442446043165468, "step": 10500, "torque_loss": 0.1157667338848114 }, { "epoch": 9.451438848920864, "grad_norm": 0.4521387219429016, "learning_rate": 9.577869783095089e-05, "loss": 0.1182, "step": 10510 }, { "action_loss": 0.015225696377456188, "epoch": 9.451438848920864, "step": 10510 }, { "epoch": 9.451438848920864, "step": 10510, "torque_loss": 0.18623702228069305 }, { "epoch": 9.46043165467626, "grad_norm": 0.4178721606731415, "learning_rate": 9.576760850690245e-05, "loss": 0.1236, "step": 10520 }, { "action_loss": 0.0179018285125494, "epoch": 9.46043165467626, "step": 10520 }, { "epoch": 9.46043165467626, "step": 10520, "torque_loss": 0.21819214522838593 }, { "epoch": 9.469424460431656, "grad_norm": 0.5002855062484741, "learning_rate": 9.575650527986298e-05, "loss": 0.1407, "step": 10530 }, { "action_loss": 0.031024187803268433, "epoch": 9.469424460431656, "step": 10530 }, { "epoch": 9.469424460431656, "step": 10530, "torque_loss": 0.1943134218454361 }, { "epoch": 9.47841726618705, "grad_norm": 0.4415045380592346, "learning_rate": 9.574538815320531e-05, "loss": 0.1094, "step": 10540 }, { "action_loss": 0.00886058434844017, "epoch": 9.47841726618705, "step": 10540 }, { "epoch": 9.47841726618705, "step": 10540, "torque_loss": 0.1865425556898117 }, { "epoch": 9.487410071942445, "grad_norm": 0.45111358165740967, "learning_rate": 9.573425713030656e-05, "loss": 0.1391, "step": 10550 }, { "action_loss": 0.008967091329395771, "epoch": 9.487410071942445, "step": 10550 }, { "epoch": 9.487410071942445, "step": 10550, "torque_loss": 0.14315815269947052 }, { "epoch": 9.496402877697841, "grad_norm": 0.43296390771865845, "learning_rate": 9.572311221454806e-05, "loss": 0.1188, "step": 10560 }, { "action_loss": 0.014532707631587982, "epoch": 9.496402877697841, "step": 10560 }, { "epoch": 9.496402877697841, "step": 10560, "torque_loss": 0.18893830478191376 }, { "epoch": 9.505395683453237, "grad_norm": 0.5187705755233765, "learning_rate": 9.57119534093153e-05, "loss": 0.1253, "step": 10570 }, { "action_loss": 0.012953825294971466, "epoch": 9.505395683453237, "step": 10570 }, { "epoch": 9.505395683453237, "step": 10570, "torque_loss": 0.20446623861789703 }, { "epoch": 9.514388489208633, "grad_norm": 0.4633350670337677, "learning_rate": 9.570078071799806e-05, "loss": 0.1207, "step": 10580 }, { "action_loss": 0.022781671956181526, "epoch": 9.514388489208633, "step": 10580 }, { "epoch": 9.514388489208633, "step": 10580, "torque_loss": 0.21337063610553741 }, { "epoch": 9.523381294964029, "grad_norm": 0.4769333302974701, "learning_rate": 9.568959414399028e-05, "loss": 0.1149, "step": 10590 }, { "action_loss": 0.0286931823939085, "epoch": 9.523381294964029, "step": 10590 }, { "epoch": 9.523381294964029, "step": 10590, "torque_loss": 0.23307399451732635 }, { "epoch": 9.532374100719425, "grad_norm": 0.5013886094093323, "learning_rate": 9.567839369069018e-05, "loss": 0.1147, "step": 10600 }, { "action_loss": 0.009938006289303303, "epoch": 9.532374100719425, "step": 10600 }, { "epoch": 9.532374100719425, "step": 10600, "torque_loss": 0.21343670785427094 }, { "epoch": 9.54136690647482, "grad_norm": 0.4396341145038605, "learning_rate": 9.566717936150013e-05, "loss": 0.1302, "step": 10610 }, { "action_loss": 0.017901169136166573, "epoch": 9.54136690647482, "step": 10610 }, { "epoch": 9.54136690647482, "step": 10610, "torque_loss": 0.26512691378593445 }, { "epoch": 9.550359712230216, "grad_norm": 0.4563892185688019, "learning_rate": 9.565595115982678e-05, "loss": 0.1144, "step": 10620 }, { "action_loss": 0.017686130478978157, "epoch": 9.550359712230216, "step": 10620 }, { "epoch": 9.550359712230216, "step": 10620, "torque_loss": 0.17202945053577423 }, { "epoch": 9.559352517985612, "grad_norm": 0.47548532485961914, "learning_rate": 9.564470908908094e-05, "loss": 0.1226, "step": 10630 }, { "action_loss": 0.0398649163544178, "epoch": 9.559352517985612, "step": 10630 }, { "epoch": 9.559352517985612, "step": 10630, "torque_loss": 0.20512175559997559 }, { "epoch": 9.568345323741006, "grad_norm": 0.4287552237510681, "learning_rate": 9.563345315267764e-05, "loss": 0.1233, "step": 10640 }, { "action_loss": 0.015983788296580315, "epoch": 9.568345323741006, "step": 10640 }, { "epoch": 9.568345323741006, "step": 10640, "torque_loss": 0.18146848678588867 }, { "epoch": 9.577338129496402, "grad_norm": 0.38936862349510193, "learning_rate": 9.562218335403616e-05, "loss": 0.1199, "step": 10650 }, { "action_loss": 0.011569775640964508, "epoch": 9.577338129496402, "step": 10650 }, { "epoch": 9.577338129496402, "step": 10650, "torque_loss": 0.22917167842388153 }, { "epoch": 9.586330935251798, "grad_norm": 0.3312101662158966, "learning_rate": 9.561089969657999e-05, "loss": 0.1114, "step": 10660 }, { "action_loss": 0.009815412573516369, "epoch": 9.586330935251798, "step": 10660 }, { "epoch": 9.586330935251798, "step": 10660, "torque_loss": 0.16851703822612762 }, { "epoch": 9.595323741007194, "grad_norm": 0.33612415194511414, "learning_rate": 9.559960218373673e-05, "loss": 0.1242, "step": 10670 }, { "action_loss": 0.020311878994107246, "epoch": 9.595323741007194, "step": 10670 }, { "epoch": 9.595323741007194, "step": 10670, "torque_loss": 0.22468866407871246 }, { "epoch": 9.60431654676259, "grad_norm": 0.4007485806941986, "learning_rate": 9.558829081893836e-05, "loss": 0.1187, "step": 10680 }, { "action_loss": 0.015653548762202263, "epoch": 9.60431654676259, "step": 10680 }, { "epoch": 9.60431654676259, "step": 10680, "torque_loss": 0.20843245089054108 }, { "epoch": 9.613309352517986, "grad_norm": 0.3916700482368469, "learning_rate": 9.55769656056209e-05, "loss": 0.114, "step": 10690 }, { "action_loss": 0.01343062985688448, "epoch": 9.613309352517986, "step": 10690 }, { "epoch": 9.613309352517986, "step": 10690, "torque_loss": 0.16765160858631134 }, { "epoch": 9.622302158273381, "grad_norm": 0.3627735674381256, "learning_rate": 9.556562654722469e-05, "loss": 0.1212, "step": 10700 }, { "action_loss": 0.013604196719825268, "epoch": 9.622302158273381, "step": 10700 }, { "epoch": 9.622302158273381, "step": 10700, "torque_loss": 0.16541288793087006 }, { "epoch": 9.631294964028777, "grad_norm": 0.4162858724594116, "learning_rate": 9.555427364719422e-05, "loss": 0.131, "step": 10710 }, { "action_loss": 0.016601277515292168, "epoch": 9.631294964028777, "step": 10710 }, { "epoch": 9.631294964028777, "step": 10710, "torque_loss": 0.17261725664138794 }, { "epoch": 9.640287769784173, "grad_norm": 0.4748736023902893, "learning_rate": 9.55429069089782e-05, "loss": 0.116, "step": 10720 }, { "action_loss": 0.020180905237793922, "epoch": 9.640287769784173, "step": 10720 }, { "epoch": 9.640287769784173, "step": 10720, "torque_loss": 0.27699998021125793 }, { "epoch": 9.649280575539569, "grad_norm": 0.4802723526954651, "learning_rate": 9.553152633602956e-05, "loss": 0.1277, "step": 10730 }, { "action_loss": 0.007940305396914482, "epoch": 9.649280575539569, "step": 10730 }, { "epoch": 9.649280575539569, "step": 10730, "torque_loss": 0.14973972737789154 }, { "epoch": 9.658273381294965, "grad_norm": 0.5407938361167908, "learning_rate": 9.552013193180543e-05, "loss": 0.1322, "step": 10740 }, { "action_loss": 0.00907642487436533, "epoch": 9.658273381294965, "step": 10740 }, { "epoch": 9.658273381294965, "step": 10740, "torque_loss": 0.13996782898902893 }, { "epoch": 9.667266187050359, "grad_norm": 0.5634610056877136, "learning_rate": 9.550872369976707e-05, "loss": 0.1174, "step": 10750 }, { "action_loss": 0.01424665842205286, "epoch": 9.667266187050359, "step": 10750 }, { "epoch": 9.667266187050359, "step": 10750, "torque_loss": 0.17465107142925262 }, { "epoch": 9.676258992805755, "grad_norm": 0.36569586396217346, "learning_rate": 9.549730164338007e-05, "loss": 0.1114, "step": 10760 }, { "action_loss": 0.010541354306042194, "epoch": 9.676258992805755, "step": 10760 }, { "epoch": 9.676258992805755, "step": 10760, "torque_loss": 0.16252197325229645 }, { "epoch": 9.68525179856115, "grad_norm": 0.467753142118454, "learning_rate": 9.548586576611408e-05, "loss": 0.1195, "step": 10770 }, { "action_loss": 0.01184803619980812, "epoch": 9.68525179856115, "step": 10770 }, { "epoch": 9.68525179856115, "step": 10770, "torque_loss": 0.14695985615253448 }, { "epoch": 9.694244604316546, "grad_norm": 0.43553048372268677, "learning_rate": 9.54744160714431e-05, "loss": 0.1175, "step": 10780 }, { "action_loss": 0.009985874406993389, "epoch": 9.694244604316546, "step": 10780 }, { "epoch": 9.694244604316546, "step": 10780, "torque_loss": 0.1408587098121643 }, { "epoch": 9.703237410071942, "grad_norm": 0.4851857125759125, "learning_rate": 9.546295256284516e-05, "loss": 0.1113, "step": 10790 }, { "action_loss": 0.017346108332276344, "epoch": 9.703237410071942, "step": 10790 }, { "epoch": 9.703237410071942, "step": 10790, "torque_loss": 0.1642860323190689 }, { "epoch": 9.712230215827338, "grad_norm": 0.4416370689868927, "learning_rate": 9.545147524380265e-05, "loss": 0.1262, "step": 10800 }, { "action_loss": 0.010199497453868389, "epoch": 9.712230215827338, "step": 10800 }, { "epoch": 9.712230215827338, "step": 10800, "torque_loss": 0.15783293545246124 }, { "epoch": 9.721223021582734, "grad_norm": 0.4128723740577698, "learning_rate": 9.543998411780201e-05, "loss": 0.1208, "step": 10810 }, { "action_loss": 0.027729006484150887, "epoch": 9.721223021582734, "step": 10810 }, { "epoch": 9.721223021582734, "step": 10810, "torque_loss": 0.2614153325557709 }, { "epoch": 9.73021582733813, "grad_norm": 0.4916072189807892, "learning_rate": 9.542847918833397e-05, "loss": 0.1223, "step": 10820 }, { "action_loss": 0.01723526604473591, "epoch": 9.73021582733813, "step": 10820 }, { "epoch": 9.73021582733813, "step": 10820, "torque_loss": 0.16769365966320038 }, { "epoch": 9.739208633093526, "grad_norm": 0.36067137122154236, "learning_rate": 9.541696045889343e-05, "loss": 0.1171, "step": 10830 }, { "action_loss": 0.008188421837985516, "epoch": 9.739208633093526, "step": 10830 }, { "epoch": 9.739208633093526, "step": 10830, "torque_loss": 0.14641334116458893 }, { "epoch": 9.748201438848922, "grad_norm": 0.3919582962989807, "learning_rate": 9.540542793297947e-05, "loss": 0.1117, "step": 10840 }, { "action_loss": 0.009781158529222012, "epoch": 9.748201438848922, "step": 10840 }, { "epoch": 9.748201438848922, "step": 10840, "torque_loss": 0.1643802523612976 }, { "epoch": 9.757194244604317, "grad_norm": 0.47023308277130127, "learning_rate": 9.539388161409537e-05, "loss": 0.1187, "step": 10850 }, { "action_loss": 0.01529895979911089, "epoch": 9.757194244604317, "step": 10850 }, { "epoch": 9.757194244604317, "step": 10850, "torque_loss": 0.22691567242145538 }, { "epoch": 9.766187050359711, "grad_norm": 0.47174572944641113, "learning_rate": 9.538232150574857e-05, "loss": 0.1258, "step": 10860 }, { "action_loss": 0.012517054565250874, "epoch": 9.766187050359711, "step": 10860 }, { "epoch": 9.766187050359711, "step": 10860, "torque_loss": 0.18338818848133087 }, { "epoch": 9.775179856115107, "grad_norm": 0.43484508991241455, "learning_rate": 9.537074761145076e-05, "loss": 0.105, "step": 10870 }, { "action_loss": 0.00767097482457757, "epoch": 9.775179856115107, "step": 10870 }, { "epoch": 9.775179856115107, "step": 10870, "torque_loss": 0.1360224038362503 }, { "epoch": 9.784172661870503, "grad_norm": 0.459564745426178, "learning_rate": 9.535915993471778e-05, "loss": 0.1192, "step": 10880 }, { "action_loss": 0.020286548882722855, "epoch": 9.784172661870503, "step": 10880 }, { "epoch": 9.784172661870503, "step": 10880, "torque_loss": 0.17632757127285004 }, { "epoch": 9.793165467625899, "grad_norm": 0.42417672276496887, "learning_rate": 9.534755847906964e-05, "loss": 0.1335, "step": 10890 }, { "action_loss": 0.0062254928052425385, "epoch": 9.793165467625899, "step": 10890 }, { "epoch": 9.793165467625899, "step": 10890, "torque_loss": 0.16011063754558563 }, { "epoch": 9.802158273381295, "grad_norm": 0.4650164246559143, "learning_rate": 9.533594324803057e-05, "loss": 0.118, "step": 10900 }, { "action_loss": 0.016921982169151306, "epoch": 9.802158273381295, "step": 10900 }, { "epoch": 9.802158273381295, "step": 10900, "torque_loss": 0.20585380494594574 }, { "epoch": 9.81115107913669, "grad_norm": 0.39154401421546936, "learning_rate": 9.532431424512895e-05, "loss": 0.0996, "step": 10910 }, { "action_loss": 0.015417508780956268, "epoch": 9.81115107913669, "step": 10910 }, { "epoch": 9.81115107913669, "step": 10910, "torque_loss": 0.20499368011951447 }, { "epoch": 9.820143884892087, "grad_norm": 0.3775922954082489, "learning_rate": 9.531267147389741e-05, "loss": 0.1239, "step": 10920 }, { "action_loss": 0.020342597737908363, "epoch": 9.820143884892087, "step": 10920 }, { "epoch": 9.820143884892087, "step": 10920, "torque_loss": 0.2121037244796753 }, { "epoch": 9.829136690647482, "grad_norm": 0.4551807940006256, "learning_rate": 9.530101493787266e-05, "loss": 0.1159, "step": 10930 }, { "action_loss": 0.028634563088417053, "epoch": 9.829136690647482, "step": 10930 }, { "epoch": 9.829136690647482, "step": 10930, "torque_loss": 0.20401625335216522 }, { "epoch": 9.838129496402878, "grad_norm": 0.3627903461456299, "learning_rate": 9.528934464059571e-05, "loss": 0.1101, "step": 10940 }, { "action_loss": 0.010639202781021595, "epoch": 9.838129496402878, "step": 10940 }, { "epoch": 9.838129496402878, "step": 10940, "torque_loss": 0.16497176885604858 }, { "epoch": 9.847122302158274, "grad_norm": 0.37337803840637207, "learning_rate": 9.527766058561163e-05, "loss": 0.0996, "step": 10950 }, { "action_loss": 0.013725177384912968, "epoch": 9.847122302158274, "step": 10950 }, { "epoch": 9.847122302158274, "step": 10950, "torque_loss": 0.1829308122396469 }, { "epoch": 9.85611510791367, "grad_norm": 0.617634117603302, "learning_rate": 9.526596277646976e-05, "loss": 0.1177, "step": 10960 }, { "action_loss": 0.012305743992328644, "epoch": 9.85611510791367, "step": 10960 }, { "epoch": 9.85611510791367, "step": 10960, "torque_loss": 0.19790787994861603 }, { "epoch": 9.865107913669064, "grad_norm": 0.49848252534866333, "learning_rate": 9.525425121672358e-05, "loss": 0.1127, "step": 10970 }, { "action_loss": 0.005612219218164682, "epoch": 9.865107913669064, "step": 10970 }, { "epoch": 9.865107913669064, "step": 10970, "torque_loss": 0.1076635792851448 }, { "epoch": 9.87410071942446, "grad_norm": 0.43154188990592957, "learning_rate": 9.524252590993074e-05, "loss": 0.0973, "step": 10980 }, { "action_loss": 0.0059467521496117115, "epoch": 9.87410071942446, "step": 10980 }, { "epoch": 9.87410071942446, "step": 10980, "torque_loss": 0.10684707015752792 }, { "epoch": 9.883093525179856, "grad_norm": 0.41167205572128296, "learning_rate": 9.523078685965309e-05, "loss": 0.1134, "step": 10990 }, { "action_loss": 0.0031226191204041243, "epoch": 9.883093525179856, "step": 10990 }, { "epoch": 9.883093525179856, "step": 10990, "torque_loss": 0.10739047080278397 }, { "epoch": 9.892086330935252, "grad_norm": 0.39739030599594116, "learning_rate": 9.521903406945664e-05, "loss": 0.1054, "step": 11000 }, { "action_loss": 0.013171547092497349, "epoch": 9.892086330935252, "step": 11000 }, { "epoch": 9.892086330935252, "step": 11000, "torque_loss": 0.15527032315731049 }, { "epoch": 9.901079136690647, "grad_norm": 0.4235251843929291, "learning_rate": 9.520726754291158e-05, "loss": 0.1132, "step": 11010 }, { "action_loss": 0.02542049251496792, "epoch": 9.901079136690647, "step": 11010 }, { "epoch": 9.901079136690647, "step": 11010, "torque_loss": 0.2346246838569641 }, { "epoch": 9.910071942446043, "grad_norm": 0.4670312702655792, "learning_rate": 9.519548728359227e-05, "loss": 0.1126, "step": 11020 }, { "action_loss": 0.01600041054189205, "epoch": 9.910071942446043, "step": 11020 }, { "epoch": 9.910071942446043, "step": 11020, "torque_loss": 0.14871762692928314 }, { "epoch": 9.91906474820144, "grad_norm": 0.39703741669654846, "learning_rate": 9.518369329507726e-05, "loss": 0.1057, "step": 11030 }, { "action_loss": 0.00796330627053976, "epoch": 9.91906474820144, "step": 11030 }, { "epoch": 9.91906474820144, "step": 11030, "torque_loss": 0.12082862854003906 }, { "epoch": 9.928057553956835, "grad_norm": 0.366755872964859, "learning_rate": 9.51718855809492e-05, "loss": 0.0962, "step": 11040 }, { "action_loss": 0.011855732649564743, "epoch": 9.928057553956835, "step": 11040 }, { "epoch": 9.928057553956835, "step": 11040, "torque_loss": 0.22653628885746002 }, { "epoch": 9.93705035971223, "grad_norm": 0.4651262164115906, "learning_rate": 9.516006414479502e-05, "loss": 0.1309, "step": 11050 }, { "action_loss": 0.013441686518490314, "epoch": 9.93705035971223, "step": 11050 }, { "epoch": 9.93705035971223, "step": 11050, "torque_loss": 0.1825680136680603 }, { "epoch": 9.946043165467627, "grad_norm": 0.3609393835067749, "learning_rate": 9.514822899020572e-05, "loss": 0.1285, "step": 11060 }, { "action_loss": 0.012861977331340313, "epoch": 9.946043165467627, "step": 11060 }, { "epoch": 9.946043165467627, "step": 11060, "torque_loss": 0.1762094348669052 }, { "epoch": 9.95503597122302, "grad_norm": 0.5797876715660095, "learning_rate": 9.513638012077654e-05, "loss": 0.1172, "step": 11070 }, { "action_loss": 0.011646567843854427, "epoch": 9.95503597122302, "step": 11070 }, { "epoch": 9.95503597122302, "step": 11070, "torque_loss": 0.13155405223369598 }, { "epoch": 9.964028776978417, "grad_norm": 0.3779428005218506, "learning_rate": 9.512451754010683e-05, "loss": 0.1006, "step": 11080 }, { "action_loss": 0.013887849636375904, "epoch": 9.964028776978417, "step": 11080 }, { "epoch": 9.964028776978417, "step": 11080, "torque_loss": 0.13003331422805786 }, { "epoch": 9.973021582733812, "grad_norm": 0.4945477545261383, "learning_rate": 9.511264125180013e-05, "loss": 0.105, "step": 11090 }, { "action_loss": 0.009799965657293797, "epoch": 9.973021582733812, "step": 11090 }, { "epoch": 9.973021582733812, "step": 11090, "torque_loss": 0.18069450557231903 }, { "epoch": 9.982014388489208, "grad_norm": 0.4194795787334442, "learning_rate": 9.510075125946414e-05, "loss": 0.1098, "step": 11100 }, { "action_loss": 0.01284098532050848, "epoch": 9.982014388489208, "step": 11100 }, { "epoch": 9.982014388489208, "step": 11100, "torque_loss": 0.2525228261947632 }, { "epoch": 9.991007194244604, "grad_norm": 0.3696177005767822, "learning_rate": 9.508884756671075e-05, "loss": 0.1277, "step": 11110 }, { "action_loss": 0.00635553291067481, "epoch": 9.991007194244604, "step": 11110 }, { "epoch": 9.991007194244604, "step": 11110, "torque_loss": 0.1358669549226761 }, { "epoch": 10.0, "grad_norm": 0.37853068113327026, "learning_rate": 9.507693017715596e-05, "loss": 0.1165, "step": 11120 }, { "action_loss": 0.013865509070456028, "epoch": 10.0, "step": 11120 }, { "epoch": 10.0, "step": 11120, "torque_loss": 0.13654758036136627 }, { "epoch": 10.008992805755396, "grad_norm": 0.6184044480323792, "learning_rate": 9.506499909441997e-05, "loss": 0.1088, "step": 11130 }, { "action_loss": 0.007178646046668291, "epoch": 10.008992805755396, "step": 11130 }, { "epoch": 10.008992805755396, "step": 11130, "torque_loss": 0.1326734572649002 }, { "epoch": 10.017985611510792, "grad_norm": 0.49695175886154175, "learning_rate": 9.505305432212713e-05, "loss": 0.1071, "step": 11140 }, { "action_loss": 0.010901889763772488, "epoch": 10.017985611510792, "step": 11140 }, { "epoch": 10.017985611510792, "step": 11140, "torque_loss": 0.14910487830638885 }, { "epoch": 10.026978417266188, "grad_norm": 0.414789080619812, "learning_rate": 9.504109586390595e-05, "loss": 0.1096, "step": 11150 }, { "action_loss": 0.004220140632241964, "epoch": 10.026978417266188, "step": 11150 }, { "epoch": 10.026978417266188, "step": 11150, "torque_loss": 0.12153306603431702 }, { "epoch": 10.035971223021583, "grad_norm": 0.4252563416957855, "learning_rate": 9.502912372338908e-05, "loss": 0.1224, "step": 11160 }, { "action_loss": 0.006833745166659355, "epoch": 10.035971223021583, "step": 11160 }, { "epoch": 10.035971223021583, "step": 11160, "torque_loss": 0.1078549399971962 }, { "epoch": 10.04496402877698, "grad_norm": 0.5421680212020874, "learning_rate": 9.501713790421335e-05, "loss": 0.1077, "step": 11170 }, { "action_loss": 0.010509095154702663, "epoch": 10.04496402877698, "step": 11170 }, { "epoch": 10.04496402877698, "step": 11170, "torque_loss": 0.20156173408031464 }, { "epoch": 10.053956834532373, "grad_norm": 0.47107699513435364, "learning_rate": 9.500513841001974e-05, "loss": 0.1138, "step": 11180 }, { "action_loss": 0.01314498484134674, "epoch": 10.053956834532373, "step": 11180 }, { "epoch": 10.053956834532373, "step": 11180, "torque_loss": 0.17120890319347382 }, { "epoch": 10.06294964028777, "grad_norm": 0.40525567531585693, "learning_rate": 9.499312524445336e-05, "loss": 0.1241, "step": 11190 }, { "action_loss": 0.00504910247400403, "epoch": 10.06294964028777, "step": 11190 }, { "epoch": 10.06294964028777, "step": 11190, "torque_loss": 0.09226226806640625 }, { "epoch": 10.071942446043165, "grad_norm": 0.4240349531173706, "learning_rate": 9.498109841116351e-05, "loss": 0.1049, "step": 11200 }, { "action_loss": 0.011887435801327229, "epoch": 10.071942446043165, "step": 11200 }, { "epoch": 10.071942446043165, "step": 11200, "torque_loss": 0.18520860373973846 }, { "epoch": 10.08093525179856, "grad_norm": 0.379129022359848, "learning_rate": 9.496905791380363e-05, "loss": 0.138, "step": 11210 }, { "action_loss": 0.01125404518097639, "epoch": 10.08093525179856, "step": 11210 }, { "epoch": 10.08093525179856, "step": 11210, "torque_loss": 0.16519606113433838 }, { "epoch": 10.089928057553957, "grad_norm": 0.4633525609970093, "learning_rate": 9.495700375603129e-05, "loss": 0.1115, "step": 11220 }, { "action_loss": 0.013229689560830593, "epoch": 10.089928057553957, "step": 11220 }, { "epoch": 10.089928057553957, "step": 11220, "torque_loss": 0.20241932570934296 }, { "epoch": 10.098920863309353, "grad_norm": 0.40727055072784424, "learning_rate": 9.494493594150822e-05, "loss": 0.1293, "step": 11230 }, { "action_loss": 0.009024374186992645, "epoch": 10.098920863309353, "step": 11230 }, { "epoch": 10.098920863309353, "step": 11230, "torque_loss": 0.13510091602802277 }, { "epoch": 10.107913669064748, "grad_norm": 0.45314815640449524, "learning_rate": 9.493285447390032e-05, "loss": 0.1129, "step": 11240 }, { "action_loss": 0.008312353864312172, "epoch": 10.107913669064748, "step": 11240 }, { "epoch": 10.107913669064748, "step": 11240, "torque_loss": 0.17303061485290527 }, { "epoch": 10.116906474820144, "grad_norm": 0.38505619764328003, "learning_rate": 9.492075935687761e-05, "loss": 0.1132, "step": 11250 }, { "action_loss": 0.014116744510829449, "epoch": 10.116906474820144, "step": 11250 }, { "epoch": 10.116906474820144, "step": 11250, "torque_loss": 0.11046939343214035 }, { "epoch": 10.12589928057554, "grad_norm": 0.38250061869621277, "learning_rate": 9.490865059411427e-05, "loss": 0.1043, "step": 11260 }, { "action_loss": 0.027295542880892754, "epoch": 10.12589928057554, "step": 11260 }, { "epoch": 10.12589928057554, "step": 11260, "torque_loss": 0.18060730397701263 }, { "epoch": 10.134892086330936, "grad_norm": 0.4286685883998871, "learning_rate": 9.489652818928863e-05, "loss": 0.1112, "step": 11270 }, { "action_loss": 0.014908332377672195, "epoch": 10.134892086330936, "step": 11270 }, { "epoch": 10.134892086330936, "step": 11270, "torque_loss": 0.16146476566791534 }, { "epoch": 10.14388489208633, "grad_norm": 0.3940505087375641, "learning_rate": 9.488439214608315e-05, "loss": 0.1285, "step": 11280 }, { "action_loss": 0.010736345313489437, "epoch": 10.14388489208633, "step": 11280 }, { "epoch": 10.14388489208633, "step": 11280, "torque_loss": 0.11727681010961533 }, { "epoch": 10.152877697841726, "grad_norm": 0.4422975480556488, "learning_rate": 9.487224246818444e-05, "loss": 0.1006, "step": 11290 }, { "action_loss": 0.008214837871491909, "epoch": 10.152877697841726, "step": 11290 }, { "epoch": 10.152877697841726, "step": 11290, "torque_loss": 0.13614879548549652 }, { "epoch": 10.161870503597122, "grad_norm": 0.339615136384964, "learning_rate": 9.486007915928325e-05, "loss": 0.1033, "step": 11300 }, { "action_loss": 0.01055938471108675, "epoch": 10.161870503597122, "step": 11300 }, { "epoch": 10.161870503597122, "step": 11300, "torque_loss": 0.13843385875225067 }, { "epoch": 10.170863309352518, "grad_norm": 0.4245370030403137, "learning_rate": 9.484790222307448e-05, "loss": 0.1065, "step": 11310 }, { "action_loss": 0.004904836881905794, "epoch": 10.170863309352518, "step": 11310 }, { "epoch": 10.170863309352518, "step": 11310, "torque_loss": 0.09560194611549377 }, { "epoch": 10.179856115107913, "grad_norm": 0.4172523319721222, "learning_rate": 9.483571166325716e-05, "loss": 0.1264, "step": 11320 }, { "action_loss": 0.011145000346004963, "epoch": 10.179856115107913, "step": 11320 }, { "epoch": 10.179856115107913, "step": 11320, "torque_loss": 0.18090248107910156 }, { "epoch": 10.18884892086331, "grad_norm": 0.4236672818660736, "learning_rate": 9.482350748353444e-05, "loss": 0.1174, "step": 11330 }, { "action_loss": 0.006943728309124708, "epoch": 10.18884892086331, "step": 11330 }, { "epoch": 10.18884892086331, "step": 11330, "torque_loss": 0.13001160323619843 }, { "epoch": 10.197841726618705, "grad_norm": 0.5115694403648376, "learning_rate": 9.481128968761363e-05, "loss": 0.1087, "step": 11340 }, { "action_loss": 0.01021267008036375, "epoch": 10.197841726618705, "step": 11340 }, { "epoch": 10.197841726618705, "step": 11340, "torque_loss": 0.17132651805877686 }, { "epoch": 10.206834532374101, "grad_norm": 0.3671814203262329, "learning_rate": 9.479905827920621e-05, "loss": 0.107, "step": 11350 }, { "action_loss": 0.013433519750833511, "epoch": 10.206834532374101, "step": 11350 }, { "epoch": 10.206834532374101, "step": 11350, "torque_loss": 0.15662677586078644 }, { "epoch": 10.215827338129497, "grad_norm": 0.41157636046409607, "learning_rate": 9.478681326202773e-05, "loss": 0.1063, "step": 11360 }, { "action_loss": 0.006918551400303841, "epoch": 10.215827338129497, "step": 11360 }, { "epoch": 10.215827338129497, "step": 11360, "torque_loss": 0.11628631502389908 }, { "epoch": 10.224820143884893, "grad_norm": 0.4242388904094696, "learning_rate": 9.477455463979791e-05, "loss": 0.1083, "step": 11370 }, { "action_loss": 0.008338212966918945, "epoch": 10.224820143884893, "step": 11370 }, { "epoch": 10.224820143884893, "step": 11370, "torque_loss": 0.1748950332403183 }, { "epoch": 10.233812949640289, "grad_norm": 0.43159809708595276, "learning_rate": 9.476228241624059e-05, "loss": 0.14, "step": 11380 }, { "action_loss": 0.0046361587010324, "epoch": 10.233812949640289, "step": 11380 }, { "epoch": 10.233812949640289, "step": 11380, "torque_loss": 0.12008175998926163 }, { "epoch": 10.242805755395683, "grad_norm": 0.43166467547416687, "learning_rate": 9.474999659508374e-05, "loss": 0.1054, "step": 11390 }, { "action_loss": 0.005073340144008398, "epoch": 10.242805755395683, "step": 11390 }, { "epoch": 10.242805755395683, "step": 11390, "torque_loss": 0.094424307346344 }, { "epoch": 10.251798561151078, "grad_norm": 0.421082079410553, "learning_rate": 9.47376971800595e-05, "loss": 0.0885, "step": 11400 }, { "action_loss": 0.007130660582333803, "epoch": 10.251798561151078, "step": 11400 }, { "epoch": 10.251798561151078, "step": 11400, "torque_loss": 0.13319337368011475 }, { "epoch": 10.260791366906474, "grad_norm": 0.44825488328933716, "learning_rate": 9.472538417490409e-05, "loss": 0.1356, "step": 11410 }, { "action_loss": 0.010179034434258938, "epoch": 10.260791366906474, "step": 11410 }, { "epoch": 10.260791366906474, "step": 11410, "torque_loss": 0.11394151300191879 }, { "epoch": 10.26978417266187, "grad_norm": 0.37029802799224854, "learning_rate": 9.471305758335784e-05, "loss": 0.1219, "step": 11420 }, { "action_loss": 0.005914326757192612, "epoch": 10.26978417266187, "step": 11420 }, { "epoch": 10.26978417266187, "step": 11420, "torque_loss": 0.1157878115773201 }, { "epoch": 10.278776978417266, "grad_norm": 0.4549577236175537, "learning_rate": 9.47007174091653e-05, "loss": 0.1199, "step": 11430 }, { "action_loss": 0.015923386439681053, "epoch": 10.278776978417266, "step": 11430 }, { "epoch": 10.278776978417266, "step": 11430, "torque_loss": 0.1767834573984146 }, { "epoch": 10.287769784172662, "grad_norm": 0.38981103897094727, "learning_rate": 9.468836365607507e-05, "loss": 0.1273, "step": 11440 }, { "action_loss": 0.017903879284858704, "epoch": 10.287769784172662, "step": 11440 }, { "epoch": 10.287769784172662, "step": 11440, "torque_loss": 0.2394515872001648 }, { "epoch": 10.296762589928058, "grad_norm": 0.42782488465309143, "learning_rate": 9.467599632783988e-05, "loss": 0.1339, "step": 11450 }, { "action_loss": 0.020548762753605843, "epoch": 10.296762589928058, "step": 11450 }, { "epoch": 10.296762589928058, "step": 11450, "torque_loss": 0.2260865420103073 }, { "epoch": 10.305755395683454, "grad_norm": 0.4975396394729614, "learning_rate": 9.466361542821662e-05, "loss": 0.1235, "step": 11460 }, { "action_loss": 0.012284447439014912, "epoch": 10.305755395683454, "step": 11460 }, { "epoch": 10.305755395683454, "step": 11460, "torque_loss": 0.1687134951353073 }, { "epoch": 10.31474820143885, "grad_norm": 0.38878852128982544, "learning_rate": 9.465122096096625e-05, "loss": 0.1069, "step": 11470 }, { "action_loss": 0.019595570862293243, "epoch": 10.31474820143885, "step": 11470 }, { "epoch": 10.31474820143885, "step": 11470, "torque_loss": 0.2236972600221634 }, { "epoch": 10.323741007194245, "grad_norm": 0.48489466309547424, "learning_rate": 9.463881292985391e-05, "loss": 0.1167, "step": 11480 }, { "action_loss": 0.009330249391496181, "epoch": 10.323741007194245, "step": 11480 }, { "epoch": 10.323741007194245, "step": 11480, "torque_loss": 0.13676400482654572 }, { "epoch": 10.332733812949641, "grad_norm": 0.48091158270835876, "learning_rate": 9.462639133864881e-05, "loss": 0.1091, "step": 11490 }, { "action_loss": 0.010631154291331768, "epoch": 10.332733812949641, "step": 11490 }, { "epoch": 10.332733812949641, "step": 11490, "torque_loss": 0.17410922050476074 }, { "epoch": 10.341726618705035, "grad_norm": 0.4690338671207428, "learning_rate": 9.461395619112432e-05, "loss": 0.1124, "step": 11500 }, { "action_loss": 0.01626242883503437, "epoch": 10.341726618705035, "step": 11500 }, { "epoch": 10.341726618705035, "step": 11500, "torque_loss": 0.20676034688949585 }, { "epoch": 10.350719424460431, "grad_norm": 0.39653533697128296, "learning_rate": 9.460150749105791e-05, "loss": 0.1071, "step": 11510 }, { "action_loss": 0.014826349914073944, "epoch": 10.350719424460431, "step": 11510 }, { "epoch": 10.350719424460431, "step": 11510, "torque_loss": 0.17516189813613892 }, { "epoch": 10.359712230215827, "grad_norm": 0.4074593782424927, "learning_rate": 9.458904524223116e-05, "loss": 0.1192, "step": 11520 }, { "action_loss": 0.014886931516230106, "epoch": 10.359712230215827, "step": 11520 }, { "epoch": 10.359712230215827, "step": 11520, "torque_loss": 0.14786802232265472 }, { "epoch": 10.368705035971223, "grad_norm": 0.5796221494674683, "learning_rate": 9.457656944842976e-05, "loss": 0.1231, "step": 11530 }, { "action_loss": 0.005159068387001753, "epoch": 10.368705035971223, "step": 11530 }, { "epoch": 10.368705035971223, "step": 11530, "torque_loss": 0.1597146838903427 }, { "epoch": 10.377697841726619, "grad_norm": 0.4193874001502991, "learning_rate": 9.456408011344353e-05, "loss": 0.1053, "step": 11540 }, { "action_loss": 0.012248420156538486, "epoch": 10.377697841726619, "step": 11540 }, { "epoch": 10.377697841726619, "step": 11540, "torque_loss": 0.174677312374115 }, { "epoch": 10.386690647482014, "grad_norm": 0.4046534597873688, "learning_rate": 9.455157724106643e-05, "loss": 0.1157, "step": 11550 }, { "action_loss": 0.042509838938713074, "epoch": 10.386690647482014, "step": 11550 }, { "epoch": 10.386690647482014, "step": 11550, "torque_loss": 0.2524425685405731 }, { "epoch": 10.39568345323741, "grad_norm": 0.41306039690971375, "learning_rate": 9.453906083509647e-05, "loss": 0.1265, "step": 11560 }, { "action_loss": 0.012440727092325687, "epoch": 10.39568345323741, "step": 11560 }, { "epoch": 10.39568345323741, "step": 11560, "torque_loss": 0.13152071833610535 }, { "epoch": 10.404676258992806, "grad_norm": 0.3784874677658081, "learning_rate": 9.45265308993358e-05, "loss": 0.1006, "step": 11570 }, { "action_loss": 0.07787508517503738, "epoch": 10.404676258992806, "step": 11570 }, { "epoch": 10.404676258992806, "step": 11570, "torque_loss": 0.16182902455329895 }, { "epoch": 10.413669064748202, "grad_norm": 0.32229185104370117, "learning_rate": 9.451398743759071e-05, "loss": 0.1171, "step": 11580 }, { "action_loss": 0.012853114865720272, "epoch": 10.413669064748202, "step": 11580 }, { "epoch": 10.413669064748202, "step": 11580, "torque_loss": 0.1336233764886856 }, { "epoch": 10.422661870503598, "grad_norm": 0.5065287947654724, "learning_rate": 9.450143045367156e-05, "loss": 0.1036, "step": 11590 }, { "action_loss": 0.026823721826076508, "epoch": 10.422661870503598, "step": 11590 }, { "epoch": 10.422661870503598, "step": 11590, "torque_loss": 0.2719709575176239 }, { "epoch": 10.431654676258994, "grad_norm": 0.44592955708503723, "learning_rate": 9.448885995139283e-05, "loss": 0.1182, "step": 11600 }, { "action_loss": 0.007752627599984407, "epoch": 10.431654676258994, "step": 11600 }, { "epoch": 10.431654676258994, "step": 11600, "torque_loss": 0.15701059997081757 }, { "epoch": 10.440647482014388, "grad_norm": 0.4249378740787506, "learning_rate": 9.44762759345731e-05, "loss": 0.1125, "step": 11610 }, { "action_loss": 0.008521725423634052, "epoch": 10.440647482014388, "step": 11610 }, { "epoch": 10.440647482014388, "step": 11610, "torque_loss": 0.13627715408802032 }, { "epoch": 10.449640287769784, "grad_norm": 0.36493808031082153, "learning_rate": 9.446367840703509e-05, "loss": 0.1107, "step": 11620 }, { "action_loss": 0.013029615394771099, "epoch": 10.449640287769784, "step": 11620 }, { "epoch": 10.449640287769784, "step": 11620, "torque_loss": 0.15075932443141937 }, { "epoch": 10.45863309352518, "grad_norm": 0.5408989787101746, "learning_rate": 9.445106737260556e-05, "loss": 0.0962, "step": 11630 }, { "action_loss": 0.0070760915987193584, "epoch": 10.45863309352518, "step": 11630 }, { "epoch": 10.45863309352518, "step": 11630, "torque_loss": 0.13156674802303314 }, { "epoch": 10.467625899280575, "grad_norm": 0.5224066376686096, "learning_rate": 9.443844283511543e-05, "loss": 0.11, "step": 11640 }, { "action_loss": 0.011660720221698284, "epoch": 10.467625899280575, "step": 11640 }, { "epoch": 10.467625899280575, "step": 11640, "torque_loss": 0.16473840177059174 }, { "epoch": 10.476618705035971, "grad_norm": 0.4161374270915985, "learning_rate": 9.442580479839968e-05, "loss": 0.1195, "step": 11650 }, { "action_loss": 0.011962111108005047, "epoch": 10.476618705035971, "step": 11650 }, { "epoch": 10.476618705035971, "step": 11650, "torque_loss": 0.14480404555797577 }, { "epoch": 10.485611510791367, "grad_norm": 0.478974312543869, "learning_rate": 9.441315326629745e-05, "loss": 0.1256, "step": 11660 }, { "action_loss": 0.012803718447685242, "epoch": 10.485611510791367, "step": 11660 }, { "epoch": 10.485611510791367, "step": 11660, "torque_loss": 0.12810947000980377 }, { "epoch": 10.494604316546763, "grad_norm": 0.3862878084182739, "learning_rate": 9.44004882426519e-05, "loss": 0.1253, "step": 11670 }, { "action_loss": 0.010327567346394062, "epoch": 10.494604316546763, "step": 11670 }, { "epoch": 10.494604316546763, "step": 11670, "torque_loss": 0.12858988344669342 }, { "epoch": 10.503597122302159, "grad_norm": 0.39229559898376465, "learning_rate": 9.438780973131037e-05, "loss": 0.1122, "step": 11680 }, { "action_loss": 0.021430322900414467, "epoch": 10.503597122302159, "step": 11680 }, { "epoch": 10.503597122302159, "step": 11680, "torque_loss": 0.23578500747680664 }, { "epoch": 10.512589928057555, "grad_norm": 0.39281177520751953, "learning_rate": 9.437511773612423e-05, "loss": 0.125, "step": 11690 }, { "action_loss": 0.018551791086792946, "epoch": 10.512589928057555, "step": 11690 }, { "epoch": 10.512589928057555, "step": 11690, "torque_loss": 0.21035011112689972 }, { "epoch": 10.52158273381295, "grad_norm": 0.4370034635066986, "learning_rate": 9.436241226094896e-05, "loss": 0.1164, "step": 11700 }, { "action_loss": 0.038226619362831116, "epoch": 10.52158273381295, "step": 11700 }, { "epoch": 10.52158273381295, "step": 11700, "torque_loss": 0.18033349514007568 }, { "epoch": 10.530575539568346, "grad_norm": 0.31283193826675415, "learning_rate": 9.434969330964418e-05, "loss": 0.1166, "step": 11710 }, { "action_loss": 0.01760747842490673, "epoch": 10.530575539568346, "step": 11710 }, { "epoch": 10.530575539568346, "step": 11710, "torque_loss": 0.23716647922992706 }, { "epoch": 10.53956834532374, "grad_norm": 0.389626145362854, "learning_rate": 9.433696088607356e-05, "loss": 0.1192, "step": 11720 }, { "action_loss": 0.014245313592255116, "epoch": 10.53956834532374, "step": 11720 }, { "epoch": 10.53956834532374, "step": 11720, "torque_loss": 0.182697594165802 }, { "epoch": 10.548561151079136, "grad_norm": 0.4303554594516754, "learning_rate": 9.432421499410486e-05, "loss": 0.1169, "step": 11730 }, { "action_loss": 0.012453754432499409, "epoch": 10.548561151079136, "step": 11730 }, { "epoch": 10.548561151079136, "step": 11730, "torque_loss": 0.1589936912059784 }, { "epoch": 10.557553956834532, "grad_norm": 0.3711388111114502, "learning_rate": 9.431145563760998e-05, "loss": 0.1178, "step": 11740 }, { "action_loss": 0.02038663439452648, "epoch": 10.557553956834532, "step": 11740 }, { "epoch": 10.557553956834532, "step": 11740, "torque_loss": 0.1838403195142746 }, { "epoch": 10.566546762589928, "grad_norm": 0.41129517555236816, "learning_rate": 9.429868282046484e-05, "loss": 0.1207, "step": 11750 }, { "action_loss": 0.011253689415752888, "epoch": 10.566546762589928, "step": 11750 }, { "epoch": 10.566546762589928, "step": 11750, "torque_loss": 0.20372872054576874 }, { "epoch": 10.575539568345324, "grad_norm": 0.4005832076072693, "learning_rate": 9.428589654654951e-05, "loss": 0.1362, "step": 11760 }, { "action_loss": 0.011947485618293285, "epoch": 10.575539568345324, "step": 11760 }, { "epoch": 10.575539568345324, "step": 11760, "torque_loss": 0.19003109633922577 }, { "epoch": 10.58453237410072, "grad_norm": 0.3995225727558136, "learning_rate": 9.42730968197481e-05, "loss": 0.1174, "step": 11770 }, { "action_loss": 0.005846414249390364, "epoch": 10.58453237410072, "step": 11770 }, { "epoch": 10.58453237410072, "step": 11770, "torque_loss": 0.18414391577243805 }, { "epoch": 10.593525179856115, "grad_norm": 0.32686078548431396, "learning_rate": 9.426028364394883e-05, "loss": 0.1179, "step": 11780 }, { "action_loss": 0.012629766948521137, "epoch": 10.593525179856115, "step": 11780 }, { "epoch": 10.593525179856115, "step": 11780, "torque_loss": 0.13638119399547577 }, { "epoch": 10.602517985611511, "grad_norm": 0.37114188075065613, "learning_rate": 9.424745702304402e-05, "loss": 0.1058, "step": 11790 }, { "action_loss": 0.017872633412480354, "epoch": 10.602517985611511, "step": 11790 }, { "epoch": 10.602517985611511, "step": 11790, "torque_loss": 0.1956215500831604 }, { "epoch": 10.611510791366907, "grad_norm": 0.38400518894195557, "learning_rate": 9.423461696093006e-05, "loss": 0.1224, "step": 11800 }, { "action_loss": 0.033508624881505966, "epoch": 10.611510791366907, "step": 11800 }, { "epoch": 10.611510791366907, "step": 11800, "torque_loss": 0.2575549781322479 }, { "epoch": 10.620503597122303, "grad_norm": 0.3715147078037262, "learning_rate": 9.422176346150741e-05, "loss": 0.1126, "step": 11810 }, { "action_loss": 0.008948138915002346, "epoch": 10.620503597122303, "step": 11810 }, { "epoch": 10.620503597122303, "step": 11810, "torque_loss": 0.13786013424396515 }, { "epoch": 10.629496402877697, "grad_norm": 0.4093463122844696, "learning_rate": 9.420889652868063e-05, "loss": 0.1136, "step": 11820 }, { "action_loss": 0.007357212249189615, "epoch": 10.629496402877697, "step": 11820 }, { "epoch": 10.629496402877697, "step": 11820, "torque_loss": 0.12073444575071335 }, { "epoch": 10.638489208633093, "grad_norm": 0.3984494209289551, "learning_rate": 9.419601616635836e-05, "loss": 0.1207, "step": 11830 }, { "action_loss": 0.014607141725718975, "epoch": 10.638489208633093, "step": 11830 }, { "epoch": 10.638489208633093, "step": 11830, "torque_loss": 0.20309583842754364 }, { "epoch": 10.647482014388489, "grad_norm": 0.5558615326881409, "learning_rate": 9.418312237845331e-05, "loss": 0.1163, "step": 11840 }, { "action_loss": 0.007574313785880804, "epoch": 10.647482014388489, "step": 11840 }, { "epoch": 10.647482014388489, "step": 11840, "torque_loss": 0.112340547144413 }, { "epoch": 10.656474820143885, "grad_norm": 0.48251718282699585, "learning_rate": 9.417021516888225e-05, "loss": 0.1137, "step": 11850 }, { "action_loss": 0.018359676003456116, "epoch": 10.656474820143885, "step": 11850 }, { "epoch": 10.656474820143885, "step": 11850, "torque_loss": 0.2098560780286789 }, { "epoch": 10.66546762589928, "grad_norm": 0.39162176847457886, "learning_rate": 9.415729454156608e-05, "loss": 0.1084, "step": 11860 }, { "action_loss": 0.01909749023616314, "epoch": 10.66546762589928, "step": 11860 }, { "epoch": 10.66546762589928, "step": 11860, "torque_loss": 0.18094998598098755 }, { "epoch": 10.674460431654676, "grad_norm": 0.43748950958251953, "learning_rate": 9.414436050042973e-05, "loss": 0.1172, "step": 11870 }, { "action_loss": 0.013409525156021118, "epoch": 10.674460431654676, "step": 11870 }, { "epoch": 10.674460431654676, "step": 11870, "torque_loss": 0.20683293044567108 }, { "epoch": 10.683453237410072, "grad_norm": 0.43854907155036926, "learning_rate": 9.413141304940223e-05, "loss": 0.1134, "step": 11880 }, { "action_loss": 0.011780482716858387, "epoch": 10.683453237410072, "step": 11880 }, { "epoch": 10.683453237410072, "step": 11880, "torque_loss": 0.1550305336713791 }, { "epoch": 10.692446043165468, "grad_norm": 0.5432202816009521, "learning_rate": 9.411845219241666e-05, "loss": 0.1112, "step": 11890 }, { "action_loss": 0.0045369830913841724, "epoch": 10.692446043165468, "step": 11890 }, { "epoch": 10.692446043165468, "step": 11890, "torque_loss": 0.11352745443582535 }, { "epoch": 10.701438848920864, "grad_norm": 0.3416752517223358, "learning_rate": 9.410547793341021e-05, "loss": 0.1128, "step": 11900 }, { "action_loss": 0.006953095551580191, "epoch": 10.701438848920864, "step": 11900 }, { "epoch": 10.701438848920864, "step": 11900, "torque_loss": 0.14506815373897552 }, { "epoch": 10.71043165467626, "grad_norm": 0.3608955144882202, "learning_rate": 9.409249027632408e-05, "loss": 0.108, "step": 11910 }, { "action_loss": 0.009203321300446987, "epoch": 10.71043165467626, "step": 11910 }, { "epoch": 10.71043165467626, "step": 11910, "torque_loss": 0.1668577939271927 }, { "epoch": 10.719424460431654, "grad_norm": 0.4063766300678253, "learning_rate": 9.407948922510362e-05, "loss": 0.1137, "step": 11920 }, { "action_loss": 0.006331393960863352, "epoch": 10.719424460431654, "step": 11920 }, { "epoch": 10.719424460431654, "step": 11920, "torque_loss": 0.1245064064860344 }, { "epoch": 10.72841726618705, "grad_norm": 0.3298187851905823, "learning_rate": 9.406647478369817e-05, "loss": 0.1041, "step": 11930 }, { "action_loss": 0.01763427071273327, "epoch": 10.72841726618705, "step": 11930 }, { "epoch": 10.72841726618705, "step": 11930, "torque_loss": 0.22905601561069489 }, { "epoch": 10.737410071942445, "grad_norm": 0.38050422072410583, "learning_rate": 9.405344695606118e-05, "loss": 0.1087, "step": 11940 }, { "action_loss": 0.006521489471197128, "epoch": 10.737410071942445, "step": 11940 }, { "epoch": 10.737410071942445, "step": 11940, "torque_loss": 0.11351009458303452 }, { "epoch": 10.746402877697841, "grad_norm": 0.4065426290035248, "learning_rate": 9.404040574615018e-05, "loss": 0.0985, "step": 11950 }, { "action_loss": 0.01919683814048767, "epoch": 10.746402877697841, "step": 11950 }, { "epoch": 10.746402877697841, "step": 11950, "torque_loss": 0.20840446650981903 }, { "epoch": 10.755395683453237, "grad_norm": 0.4768826365470886, "learning_rate": 9.402735115792674e-05, "loss": 0.1239, "step": 11960 }, { "action_loss": 0.007047960069030523, "epoch": 10.755395683453237, "step": 11960 }, { "epoch": 10.755395683453237, "step": 11960, "torque_loss": 0.10793310403823853 }, { "epoch": 10.764388489208633, "grad_norm": 0.3955623209476471, "learning_rate": 9.401428319535649e-05, "loss": 0.0992, "step": 11970 }, { "action_loss": 0.008952253498136997, "epoch": 10.764388489208633, "step": 11970 }, { "epoch": 10.764388489208633, "step": 11970, "torque_loss": 0.11608162522315979 }, { "epoch": 10.773381294964029, "grad_norm": 0.5454248785972595, "learning_rate": 9.400120186240912e-05, "loss": 0.1118, "step": 11980 }, { "action_loss": 0.007933505810797215, "epoch": 10.773381294964029, "step": 11980 }, { "epoch": 10.773381294964029, "step": 11980, "torque_loss": 0.1470978856086731 }, { "epoch": 10.782374100719425, "grad_norm": 0.5594480633735657, "learning_rate": 9.398810716305844e-05, "loss": 0.1076, "step": 11990 }, { "action_loss": 0.01591290533542633, "epoch": 10.782374100719425, "step": 11990 }, { "epoch": 10.782374100719425, "step": 11990, "torque_loss": 0.16556905210018158 }, { "epoch": 10.79136690647482, "grad_norm": 0.43754297494888306, "learning_rate": 9.397499910128222e-05, "loss": 0.1231, "step": 12000 }, { "action_loss": 0.004819341469556093, "epoch": 10.79136690647482, "step": 12000 }, { "epoch": 10.79136690647482, "step": 12000, "torque_loss": 0.12929703295230865 }, { "epoch": 10.800359712230216, "grad_norm": 0.3453734815120697, "learning_rate": 9.396187768106237e-05, "loss": 0.101, "step": 12010 }, { "action_loss": 0.009307256899774075, "epoch": 10.800359712230216, "step": 12010 }, { "epoch": 10.800359712230216, "step": 12010, "torque_loss": 0.12324056774377823 }, { "epoch": 10.809352517985612, "grad_norm": 0.40327173471450806, "learning_rate": 9.394874290638482e-05, "loss": 0.1088, "step": 12020 }, { "action_loss": 0.014982105232775211, "epoch": 10.809352517985612, "step": 12020 }, { "epoch": 10.809352517985612, "step": 12020, "torque_loss": 0.21463747322559357 }, { "epoch": 10.818345323741006, "grad_norm": 0.3815365731716156, "learning_rate": 9.393559478123959e-05, "loss": 0.1162, "step": 12030 }, { "action_loss": 0.013519818894565105, "epoch": 10.818345323741006, "step": 12030 }, { "epoch": 10.818345323741006, "step": 12030, "torque_loss": 0.17441244423389435 }, { "epoch": 10.827338129496402, "grad_norm": 0.33466339111328125, "learning_rate": 9.39224333096207e-05, "loss": 0.1078, "step": 12040 }, { "action_loss": 0.0064761582762002945, "epoch": 10.827338129496402, "step": 12040 }, { "epoch": 10.827338129496402, "step": 12040, "torque_loss": 0.11750663071870804 }, { "epoch": 10.836330935251798, "grad_norm": 0.4640343487262726, "learning_rate": 9.390925849552629e-05, "loss": 0.1059, "step": 12050 }, { "action_loss": 0.015023522078990936, "epoch": 10.836330935251798, "step": 12050 }, { "epoch": 10.836330935251798, "step": 12050, "torque_loss": 0.20450849831104279 }, { "epoch": 10.845323741007194, "grad_norm": 0.36262184381484985, "learning_rate": 9.389607034295849e-05, "loss": 0.0982, "step": 12060 }, { "action_loss": 0.015136025846004486, "epoch": 10.845323741007194, "step": 12060 }, { "epoch": 10.845323741007194, "step": 12060, "torque_loss": 0.16120043396949768 }, { "epoch": 10.85431654676259, "grad_norm": 0.31175267696380615, "learning_rate": 9.388286885592355e-05, "loss": 0.1207, "step": 12070 }, { "action_loss": 0.0070376028306782246, "epoch": 10.85431654676259, "step": 12070 }, { "epoch": 10.85431654676259, "step": 12070, "torque_loss": 0.11276517063379288 }, { "epoch": 10.863309352517986, "grad_norm": 0.4116092920303345, "learning_rate": 9.386965403843168e-05, "loss": 0.1026, "step": 12080 }, { "action_loss": 0.004916139412671328, "epoch": 10.863309352517986, "step": 12080 }, { "epoch": 10.863309352517986, "step": 12080, "torque_loss": 0.15866976976394653 }, { "epoch": 10.872302158273381, "grad_norm": 0.320354163646698, "learning_rate": 9.385642589449726e-05, "loss": 0.1041, "step": 12090 }, { "action_loss": 0.004888240713626146, "epoch": 10.872302158273381, "step": 12090 }, { "epoch": 10.872302158273381, "step": 12090, "torque_loss": 0.09292802214622498 }, { "epoch": 10.881294964028777, "grad_norm": 0.41464272141456604, "learning_rate": 9.38431844281386e-05, "loss": 0.1027, "step": 12100 }, { "action_loss": 0.006762102246284485, "epoch": 10.881294964028777, "step": 12100 }, { "epoch": 10.881294964028777, "step": 12100, "torque_loss": 0.15301160514354706 }, { "epoch": 10.890287769784173, "grad_norm": 0.3395727872848511, "learning_rate": 9.38299296433781e-05, "loss": 0.1069, "step": 12110 }, { "action_loss": 0.01299334317445755, "epoch": 10.890287769784173, "step": 12110 }, { "epoch": 10.890287769784173, "step": 12110, "torque_loss": 0.20794379711151123 }, { "epoch": 10.899280575539569, "grad_norm": 0.3141842782497406, "learning_rate": 9.381666154424226e-05, "loss": 0.1123, "step": 12120 }, { "action_loss": 0.013389497064054012, "epoch": 10.899280575539569, "step": 12120 }, { "epoch": 10.899280575539569, "step": 12120, "torque_loss": 0.15907275676727295 }, { "epoch": 10.908273381294965, "grad_norm": 0.4012972414493561, "learning_rate": 9.380338013476157e-05, "loss": 0.111, "step": 12130 }, { "action_loss": 0.0233529731631279, "epoch": 10.908273381294965, "step": 12130 }, { "epoch": 10.908273381294965, "step": 12130, "torque_loss": 0.28530529141426086 }, { "epoch": 10.917266187050359, "grad_norm": 0.40122514963150024, "learning_rate": 9.379008541897054e-05, "loss": 0.1169, "step": 12140 }, { "action_loss": 0.0060343355871737, "epoch": 10.917266187050359, "step": 12140 }, { "epoch": 10.917266187050359, "step": 12140, "torque_loss": 0.11256318539381027 }, { "epoch": 10.926258992805755, "grad_norm": 0.38536080718040466, "learning_rate": 9.377677740090777e-05, "loss": 0.1231, "step": 12150 }, { "action_loss": 0.009819163009524345, "epoch": 10.926258992805755, "step": 12150 }, { "epoch": 10.926258992805755, "step": 12150, "torque_loss": 0.14750619232654572 }, { "epoch": 10.93525179856115, "grad_norm": 0.4263201057910919, "learning_rate": 9.376345608461588e-05, "loss": 0.1086, "step": 12160 }, { "action_loss": 0.011092737317085266, "epoch": 10.93525179856115, "step": 12160 }, { "epoch": 10.93525179856115, "step": 12160, "torque_loss": 0.21191461384296417 }, { "epoch": 10.944244604316546, "grad_norm": 0.30255556106567383, "learning_rate": 9.375012147414155e-05, "loss": 0.1052, "step": 12170 }, { "action_loss": 0.030785495415329933, "epoch": 10.944244604316546, "step": 12170 }, { "epoch": 10.944244604316546, "step": 12170, "torque_loss": 0.21552489697933197 }, { "epoch": 10.953237410071942, "grad_norm": 0.40294280648231506, "learning_rate": 9.373677357353545e-05, "loss": 0.118, "step": 12180 }, { "action_loss": 0.02157929539680481, "epoch": 10.953237410071942, "step": 12180 }, { "epoch": 10.953237410071942, "step": 12180, "torque_loss": 0.17733341455459595 }, { "epoch": 10.962230215827338, "grad_norm": 0.42937085032463074, "learning_rate": 9.372341238685237e-05, "loss": 0.0956, "step": 12190 }, { "action_loss": 0.008841034956276417, "epoch": 10.962230215827338, "step": 12190 }, { "epoch": 10.962230215827338, "step": 12190, "torque_loss": 0.15922944247722626 }, { "epoch": 10.971223021582734, "grad_norm": 0.5055394172668457, "learning_rate": 9.371003791815102e-05, "loss": 0.1078, "step": 12200 }, { "action_loss": 0.0049613662995398045, "epoch": 10.971223021582734, "step": 12200 }, { "epoch": 10.971223021582734, "step": 12200, "torque_loss": 0.10087230801582336 }, { "epoch": 10.98021582733813, "grad_norm": 0.4643513262271881, "learning_rate": 9.369665017149429e-05, "loss": 0.1089, "step": 12210 }, { "action_loss": 0.016138141974806786, "epoch": 10.98021582733813, "step": 12210 }, { "epoch": 10.98021582733813, "step": 12210, "torque_loss": 0.23927541077136993 }, { "epoch": 10.989208633093526, "grad_norm": 0.3108353614807129, "learning_rate": 9.368324915094895e-05, "loss": 0.104, "step": 12220 }, { "action_loss": 0.018562888726592064, "epoch": 10.989208633093526, "step": 12220 }, { "epoch": 10.989208633093526, "step": 12220, "torque_loss": 0.24087516963481903 }, { "epoch": 10.998201438848922, "grad_norm": 0.3043265640735626, "learning_rate": 9.366983486058591e-05, "loss": 0.1142, "step": 12230 }, { "action_loss": 0.0194129329174757, "epoch": 10.998201438848922, "step": 12230 }, { "epoch": 10.998201438848922, "step": 12230, "torque_loss": 0.1817152053117752 }, { "epoch": 11.007194244604317, "grad_norm": 0.38603490591049194, "learning_rate": 9.365640730448009e-05, "loss": 0.1123, "step": 12240 }, { "action_loss": 0.010160601697862148, "epoch": 11.007194244604317, "step": 12240 }, { "epoch": 11.007194244604317, "step": 12240, "torque_loss": 0.15367372334003448 }, { "epoch": 11.016187050359711, "grad_norm": 0.39843466877937317, "learning_rate": 9.36429664867104e-05, "loss": 0.1227, "step": 12250 }, { "action_loss": 0.007507903967052698, "epoch": 11.016187050359711, "step": 12250 }, { "epoch": 11.016187050359711, "step": 12250, "torque_loss": 0.10499797016382217 }, { "epoch": 11.025179856115107, "grad_norm": 0.4729006588459015, "learning_rate": 9.362951241135982e-05, "loss": 0.114, "step": 12260 }, { "action_loss": 0.01116007287055254, "epoch": 11.025179856115107, "step": 12260 }, { "epoch": 11.025179856115107, "step": 12260, "torque_loss": 0.1442543864250183 }, { "epoch": 11.034172661870503, "grad_norm": 0.34096893668174744, "learning_rate": 9.361604508251534e-05, "loss": 0.1045, "step": 12270 }, { "action_loss": 0.009201928973197937, "epoch": 11.034172661870503, "step": 12270 }, { "epoch": 11.034172661870503, "step": 12270, "torque_loss": 0.13916097581386566 }, { "epoch": 11.043165467625899, "grad_norm": 0.3484081029891968, "learning_rate": 9.360256450426799e-05, "loss": 0.1045, "step": 12280 }, { "action_loss": 0.011155943386256695, "epoch": 11.043165467625899, "step": 12280 }, { "epoch": 11.043165467625899, "step": 12280, "torque_loss": 0.1680763214826584 }, { "epoch": 11.052158273381295, "grad_norm": 0.43329930305480957, "learning_rate": 9.358907068071279e-05, "loss": 0.1019, "step": 12290 }, { "action_loss": 0.007945106364786625, "epoch": 11.052158273381295, "step": 12290 }, { "epoch": 11.052158273381295, "step": 12290, "torque_loss": 0.14647483825683594 }, { "epoch": 11.06115107913669, "grad_norm": 0.44887036085128784, "learning_rate": 9.357556361594882e-05, "loss": 0.0973, "step": 12300 }, { "action_loss": 0.004879587795585394, "epoch": 11.06115107913669, "step": 12300 }, { "epoch": 11.06115107913669, "step": 12300, "torque_loss": 0.13303129374980927 }, { "epoch": 11.070143884892087, "grad_norm": 0.3929116427898407, "learning_rate": 9.356204331407917e-05, "loss": 0.108, "step": 12310 }, { "action_loss": 0.04355092719197273, "epoch": 11.070143884892087, "step": 12310 }, { "epoch": 11.070143884892087, "step": 12310, "torque_loss": 0.20367105305194855 }, { "epoch": 11.079136690647482, "grad_norm": 0.4804379940032959, "learning_rate": 9.354850977921094e-05, "loss": 0.1191, "step": 12320 }, { "action_loss": 0.01586235500872135, "epoch": 11.079136690647482, "step": 12320 }, { "epoch": 11.079136690647482, "step": 12320, "torque_loss": 0.22494055330753326 }, { "epoch": 11.088129496402878, "grad_norm": 0.42232081294059753, "learning_rate": 9.353496301545529e-05, "loss": 0.1089, "step": 12330 }, { "action_loss": 0.010212600231170654, "epoch": 11.088129496402878, "step": 12330 }, { "epoch": 11.088129496402878, "step": 12330, "torque_loss": 0.13543416559696198 }, { "epoch": 11.097122302158274, "grad_norm": 0.3917171359062195, "learning_rate": 9.352140302692733e-05, "loss": 0.116, "step": 12340 }, { "action_loss": 0.004295202903449535, "epoch": 11.097122302158274, "step": 12340 }, { "epoch": 11.097122302158274, "step": 12340, "torque_loss": 0.15110550820827484 }, { "epoch": 11.10611510791367, "grad_norm": 0.33796438574790955, "learning_rate": 9.350782981774627e-05, "loss": 0.1212, "step": 12350 }, { "action_loss": 0.008950927294790745, "epoch": 11.10611510791367, "step": 12350 }, { "epoch": 11.10611510791367, "step": 12350, "torque_loss": 0.17905886471271515 }, { "epoch": 11.115107913669064, "grad_norm": 0.2986343204975128, "learning_rate": 9.349424339203526e-05, "loss": 0.1103, "step": 12360 }, { "action_loss": 0.008160252124071121, "epoch": 11.115107913669064, "step": 12360 }, { "epoch": 11.115107913669064, "step": 12360, "torque_loss": 0.15714053809642792 }, { "epoch": 11.12410071942446, "grad_norm": 0.42033544182777405, "learning_rate": 9.34806437539215e-05, "loss": 0.1192, "step": 12370 }, { "action_loss": 0.018645435571670532, "epoch": 11.12410071942446, "step": 12370 }, { "epoch": 11.12410071942446, "step": 12370, "torque_loss": 0.21605515480041504 }, { "epoch": 11.133093525179856, "grad_norm": 0.44932079315185547, "learning_rate": 9.346703090753622e-05, "loss": 0.1228, "step": 12380 }, { "action_loss": 0.01674940623342991, "epoch": 11.133093525179856, "step": 12380 }, { "epoch": 11.133093525179856, "step": 12380, "torque_loss": 0.24358922243118286 }, { "epoch": 11.142086330935252, "grad_norm": 0.46710729598999023, "learning_rate": 9.345340485701461e-05, "loss": 0.1249, "step": 12390 }, { "action_loss": 0.007737803738564253, "epoch": 11.142086330935252, "step": 12390 }, { "epoch": 11.142086330935252, "step": 12390, "torque_loss": 0.12708760797977448 }, { "epoch": 11.151079136690647, "grad_norm": 0.41607579588890076, "learning_rate": 9.343976560649595e-05, "loss": 0.0965, "step": 12400 }, { "action_loss": 0.011030447669327259, "epoch": 11.151079136690647, "step": 12400 }, { "epoch": 11.151079136690647, "step": 12400, "torque_loss": 0.17988626658916473 }, { "epoch": 11.160071942446043, "grad_norm": 0.44170358777046204, "learning_rate": 9.342611316012344e-05, "loss": 0.1166, "step": 12410 }, { "action_loss": 0.012968729250133038, "epoch": 11.160071942446043, "step": 12410 }, { "epoch": 11.160071942446043, "step": 12410, "torque_loss": 0.17077122628688812 }, { "epoch": 11.16906474820144, "grad_norm": 0.39914873242378235, "learning_rate": 9.341244752204437e-05, "loss": 0.1126, "step": 12420 }, { "action_loss": 0.017025453969836235, "epoch": 11.16906474820144, "step": 12420 }, { "epoch": 11.16906474820144, "step": 12420, "torque_loss": 0.22695167362689972 }, { "epoch": 11.178057553956835, "grad_norm": 0.31779414415359497, "learning_rate": 9.339876869640995e-05, "loss": 0.0965, "step": 12430 }, { "action_loss": 0.014369063079357147, "epoch": 11.178057553956835, "step": 12430 }, { "epoch": 11.178057553956835, "step": 12430, "torque_loss": 0.16788524389266968 }, { "epoch": 11.18705035971223, "grad_norm": 0.46011295914649963, "learning_rate": 9.33850766873755e-05, "loss": 0.1173, "step": 12440 }, { "action_loss": 0.022275755181908607, "epoch": 11.18705035971223, "step": 12440 }, { "epoch": 11.18705035971223, "step": 12440, "torque_loss": 0.2186921387910843 }, { "epoch": 11.196043165467627, "grad_norm": 0.4466851055622101, "learning_rate": 9.337137149910028e-05, "loss": 0.1226, "step": 12450 }, { "action_loss": 0.007632728666067123, "epoch": 11.196043165467627, "step": 12450 }, { "epoch": 11.196043165467627, "step": 12450, "torque_loss": 0.18941982090473175 }, { "epoch": 11.20503597122302, "grad_norm": 0.4530165493488312, "learning_rate": 9.335765313574753e-05, "loss": 0.1033, "step": 12460 }, { "action_loss": 0.006102666258811951, "epoch": 11.20503597122302, "step": 12460 }, { "epoch": 11.20503597122302, "step": 12460, "torque_loss": 0.13523827493190765 }, { "epoch": 11.214028776978417, "grad_norm": 0.5213270783424377, "learning_rate": 9.334392160148457e-05, "loss": 0.1106, "step": 12470 }, { "action_loss": 0.013256910257041454, "epoch": 11.214028776978417, "step": 12470 }, { "epoch": 11.214028776978417, "step": 12470, "torque_loss": 0.1138504147529602 }, { "epoch": 11.223021582733812, "grad_norm": 0.42035943269729614, "learning_rate": 9.333017690048264e-05, "loss": 0.1112, "step": 12480 }, { "action_loss": 0.011321738362312317, "epoch": 11.223021582733812, "step": 12480 }, { "epoch": 11.223021582733812, "step": 12480, "torque_loss": 0.13656871020793915 }, { "epoch": 11.232014388489208, "grad_norm": 0.48891371488571167, "learning_rate": 9.331641903691706e-05, "loss": 0.1073, "step": 12490 }, { "action_loss": 0.019788969308137894, "epoch": 11.232014388489208, "step": 12490 }, { "epoch": 11.232014388489208, "step": 12490, "torque_loss": 0.16368038952350616 }, { "epoch": 11.241007194244604, "grad_norm": 0.4247366786003113, "learning_rate": 9.330264801496707e-05, "loss": 0.1231, "step": 12500 }, { "action_loss": 0.022669097408652306, "epoch": 11.241007194244604, "step": 12500 }, { "epoch": 11.241007194244604, "step": 12500, "torque_loss": 0.16941845417022705 }, { "epoch": 11.25, "grad_norm": 0.4516794979572296, "learning_rate": 9.328886383881594e-05, "loss": 0.109, "step": 12510 }, { "action_loss": 0.01119727548211813, "epoch": 11.25, "step": 12510 }, { "epoch": 11.25, "step": 12510, "torque_loss": 0.18840591609477997 }, { "epoch": 11.258992805755396, "grad_norm": 0.37602388858795166, "learning_rate": 9.327506651265095e-05, "loss": 0.1131, "step": 12520 }, { "action_loss": 0.01385349128395319, "epoch": 11.258992805755396, "step": 12520 }, { "epoch": 11.258992805755396, "step": 12520, "torque_loss": 0.12305303663015366 }, { "epoch": 11.267985611510792, "grad_norm": 0.3607887029647827, "learning_rate": 9.326125604066338e-05, "loss": 0.1164, "step": 12530 }, { "action_loss": 0.008885464631021023, "epoch": 11.267985611510792, "step": 12530 }, { "epoch": 11.267985611510792, "step": 12530, "torque_loss": 0.10658135265111923 }, { "epoch": 11.276978417266188, "grad_norm": 0.48246851563453674, "learning_rate": 9.324743242704847e-05, "loss": 0.1054, "step": 12540 }, { "action_loss": 0.007764542009681463, "epoch": 11.276978417266188, "step": 12540 }, { "epoch": 11.276978417266188, "step": 12540, "torque_loss": 0.1888502985239029 }, { "epoch": 11.285971223021583, "grad_norm": 0.38522082567214966, "learning_rate": 9.323359567600546e-05, "loss": 0.1213, "step": 12550 }, { "action_loss": 0.027151718735694885, "epoch": 11.285971223021583, "step": 12550 }, { "epoch": 11.285971223021583, "step": 12550, "torque_loss": 0.26857686042785645 }, { "epoch": 11.29496402877698, "grad_norm": 0.3726217448711395, "learning_rate": 9.321974579173761e-05, "loss": 0.1112, "step": 12560 }, { "action_loss": 0.006826154887676239, "epoch": 11.29496402877698, "step": 12560 }, { "epoch": 11.29496402877698, "step": 12560, "torque_loss": 0.17074353992938995 }, { "epoch": 11.303956834532373, "grad_norm": 0.4542442858219147, "learning_rate": 9.320588277845213e-05, "loss": 0.1279, "step": 12570 }, { "action_loss": 0.010699531994760036, "epoch": 11.303956834532373, "step": 12570 }, { "epoch": 11.303956834532373, "step": 12570, "torque_loss": 0.2061590552330017 }, { "epoch": 11.31294964028777, "grad_norm": 0.4085211455821991, "learning_rate": 9.319200664036026e-05, "loss": 0.1209, "step": 12580 }, { "action_loss": 0.004822729155421257, "epoch": 11.31294964028777, "step": 12580 }, { "epoch": 11.31294964028777, "step": 12580, "torque_loss": 0.11634737253189087 }, { "epoch": 11.321942446043165, "grad_norm": 0.38698211312294006, "learning_rate": 9.31781173816772e-05, "loss": 0.102, "step": 12590 }, { "action_loss": 0.027933945879340172, "epoch": 11.321942446043165, "step": 12590 }, { "epoch": 11.321942446043165, "step": 12590, "torque_loss": 0.21804653108119965 }, { "epoch": 11.33093525179856, "grad_norm": 0.38194540143013, "learning_rate": 9.316421500662212e-05, "loss": 0.1083, "step": 12600 }, { "action_loss": 0.005700069013983011, "epoch": 11.33093525179856, "step": 12600 }, { "epoch": 11.33093525179856, "step": 12600, "torque_loss": 0.12123183161020279 }, { "epoch": 11.339928057553957, "grad_norm": 0.37116432189941406, "learning_rate": 9.31502995194182e-05, "loss": 0.1039, "step": 12610 }, { "action_loss": 0.009019696153700352, "epoch": 11.339928057553957, "step": 12610 }, { "epoch": 11.339928057553957, "step": 12610, "torque_loss": 0.14506185054779053 }, { "epoch": 11.348920863309353, "grad_norm": 0.3690848648548126, "learning_rate": 9.31363709242926e-05, "loss": 0.1148, "step": 12620 }, { "action_loss": 0.015487599186599255, "epoch": 11.348920863309353, "step": 12620 }, { "epoch": 11.348920863309353, "step": 12620, "torque_loss": 0.1482282429933548 }, { "epoch": 11.357913669064748, "grad_norm": 0.42809584736824036, "learning_rate": 9.312242922547647e-05, "loss": 0.0943, "step": 12630 }, { "action_loss": 0.007798964623361826, "epoch": 11.357913669064748, "step": 12630 }, { "epoch": 11.357913669064748, "step": 12630, "torque_loss": 0.13521064817905426 }, { "epoch": 11.366906474820144, "grad_norm": 0.3594772517681122, "learning_rate": 9.310847442720492e-05, "loss": 0.1144, "step": 12640 }, { "action_loss": 0.010774056427180767, "epoch": 11.366906474820144, "step": 12640 }, { "epoch": 11.366906474820144, "step": 12640, "torque_loss": 0.1707981377840042 }, { "epoch": 11.37589928057554, "grad_norm": 0.515690267086029, "learning_rate": 9.309450653371706e-05, "loss": 0.1182, "step": 12650 }, { "action_loss": 0.008993786759674549, "epoch": 11.37589928057554, "step": 12650 }, { "epoch": 11.37589928057554, "step": 12650, "torque_loss": 0.15421843528747559 }, { "epoch": 11.384892086330936, "grad_norm": 0.4990899860858917, "learning_rate": 9.308052554925595e-05, "loss": 0.1051, "step": 12660 }, { "action_loss": 0.0321982242166996, "epoch": 11.384892086330936, "step": 12660 }, { "epoch": 11.384892086330936, "step": 12660, "torque_loss": 0.2673574388027191 }, { "epoch": 11.39388489208633, "grad_norm": 0.5130634307861328, "learning_rate": 9.306653147806867e-05, "loss": 0.1149, "step": 12670 }, { "action_loss": 0.0034845878835767508, "epoch": 11.39388489208633, "step": 12670 }, { "epoch": 11.39388489208633, "step": 12670, "torque_loss": 0.0838836133480072 }, { "epoch": 11.402877697841726, "grad_norm": 0.4505019783973694, "learning_rate": 9.305252432440622e-05, "loss": 0.1031, "step": 12680 }, { "action_loss": 0.014127927832305431, "epoch": 11.402877697841726, "step": 12680 }, { "epoch": 11.402877697841726, "step": 12680, "torque_loss": 0.13621191680431366 }, { "epoch": 11.411870503597122, "grad_norm": 0.3653222918510437, "learning_rate": 9.303850409252361e-05, "loss": 0.1026, "step": 12690 }, { "action_loss": 0.006141627207398415, "epoch": 11.411870503597122, "step": 12690 }, { "epoch": 11.411870503597122, "step": 12690, "torque_loss": 0.1120925173163414 }, { "epoch": 11.420863309352518, "grad_norm": 0.46190834045410156, "learning_rate": 9.302447078667985e-05, "loss": 0.1021, "step": 12700 }, { "action_loss": 0.010177903808653355, "epoch": 11.420863309352518, "step": 12700 }, { "epoch": 11.420863309352518, "step": 12700, "torque_loss": 0.12034652382135391 }, { "epoch": 11.429856115107913, "grad_norm": 0.44701215624809265, "learning_rate": 9.301042441113783e-05, "loss": 0.1028, "step": 12710 }, { "action_loss": 0.010191609151661396, "epoch": 11.429856115107913, "step": 12710 }, { "epoch": 11.429856115107913, "step": 12710, "torque_loss": 0.15025947988033295 }, { "epoch": 11.43884892086331, "grad_norm": 0.4128844141960144, "learning_rate": 9.299636497016451e-05, "loss": 0.1271, "step": 12720 }, { "action_loss": 0.027339249849319458, "epoch": 11.43884892086331, "step": 12720 }, { "epoch": 11.43884892086331, "step": 12720, "torque_loss": 0.178923562169075 }, { "epoch": 11.447841726618705, "grad_norm": 0.37876731157302856, "learning_rate": 9.298229246803076e-05, "loss": 0.1062, "step": 12730 }, { "action_loss": 0.0072293970733881, "epoch": 11.447841726618705, "step": 12730 }, { "epoch": 11.447841726618705, "step": 12730, "torque_loss": 0.06536024808883667 }, { "epoch": 11.456834532374101, "grad_norm": 0.3688599169254303, "learning_rate": 9.296820690901144e-05, "loss": 0.1075, "step": 12740 }, { "action_loss": 0.005924989935010672, "epoch": 11.456834532374101, "step": 12740 }, { "epoch": 11.456834532374101, "step": 12740, "torque_loss": 0.10512054711580276 }, { "epoch": 11.465827338129497, "grad_norm": 0.36449211835861206, "learning_rate": 9.295410829738539e-05, "loss": 0.1046, "step": 12750 }, { "action_loss": 0.009004532359540462, "epoch": 11.465827338129497, "step": 12750 }, { "epoch": 11.465827338129497, "step": 12750, "torque_loss": 0.16159281134605408 }, { "epoch": 11.474820143884893, "grad_norm": 0.4591819941997528, "learning_rate": 9.293999663743535e-05, "loss": 0.1073, "step": 12760 }, { "action_loss": 0.008635745383799076, "epoch": 11.474820143884893, "step": 12760 }, { "epoch": 11.474820143884893, "step": 12760, "torque_loss": 0.16683833301067352 }, { "epoch": 11.483812949640289, "grad_norm": 0.4141153395175934, "learning_rate": 9.292587193344813e-05, "loss": 0.1254, "step": 12770 }, { "action_loss": 0.005731187295168638, "epoch": 11.483812949640289, "step": 12770 }, { "epoch": 11.483812949640289, "step": 12770, "torque_loss": 0.1302594393491745 }, { "epoch": 11.492805755395683, "grad_norm": 0.37042227387428284, "learning_rate": 9.291173418971437e-05, "loss": 0.1134, "step": 12780 }, { "action_loss": 0.021688150241971016, "epoch": 11.492805755395683, "step": 12780 }, { "epoch": 11.492805755395683, "step": 12780, "torque_loss": 0.22554732859134674 }, { "epoch": 11.501798561151078, "grad_norm": 0.4141480326652527, "learning_rate": 9.28975834105288e-05, "loss": 0.1187, "step": 12790 }, { "action_loss": 0.02651931345462799, "epoch": 11.501798561151078, "step": 12790 }, { "epoch": 11.501798561151078, "step": 12790, "torque_loss": 0.17125873267650604 }, { "epoch": 11.510791366906474, "grad_norm": 0.3550852835178375, "learning_rate": 9.288341960019004e-05, "loss": 0.1092, "step": 12800 }, { "action_loss": 0.006437332835048437, "epoch": 11.510791366906474, "step": 12800 }, { "epoch": 11.510791366906474, "step": 12800, "torque_loss": 0.12267782539129257 }, { "epoch": 11.51978417266187, "grad_norm": 0.4396991431713104, "learning_rate": 9.286924276300067e-05, "loss": 0.1194, "step": 12810 }, { "action_loss": 0.00859337393194437, "epoch": 11.51978417266187, "step": 12810 }, { "epoch": 11.51978417266187, "step": 12810, "torque_loss": 0.10264722257852554 }, { "epoch": 11.528776978417266, "grad_norm": 0.38368287682533264, "learning_rate": 9.285505290326726e-05, "loss": 0.1137, "step": 12820 }, { "action_loss": 0.00760575570166111, "epoch": 11.528776978417266, "step": 12820 }, { "epoch": 11.528776978417266, "step": 12820, "torque_loss": 0.14459870755672455 }, { "epoch": 11.537769784172662, "grad_norm": 0.4441986680030823, "learning_rate": 9.284085002530027e-05, "loss": 0.1017, "step": 12830 }, { "action_loss": 0.012198589742183685, "epoch": 11.537769784172662, "step": 12830 }, { "epoch": 11.537769784172662, "step": 12830, "torque_loss": 0.16016210615634918 }, { "epoch": 11.546762589928058, "grad_norm": 0.35259589552879333, "learning_rate": 9.282663413341422e-05, "loss": 0.1168, "step": 12840 }, { "action_loss": 0.006873064208775759, "epoch": 11.546762589928058, "step": 12840 }, { "epoch": 11.546762589928058, "step": 12840, "torque_loss": 0.11040753871202469 }, { "epoch": 11.555755395683454, "grad_norm": 0.35317742824554443, "learning_rate": 9.281240523192747e-05, "loss": 0.0971, "step": 12850 }, { "action_loss": 0.012133684940636158, "epoch": 11.555755395683454, "step": 12850 }, { "epoch": 11.555755395683454, "step": 12850, "torque_loss": 0.17225635051727295 }, { "epoch": 11.56474820143885, "grad_norm": 0.353922963142395, "learning_rate": 9.279816332516242e-05, "loss": 0.1033, "step": 12860 }, { "action_loss": 0.01457205880433321, "epoch": 11.56474820143885, "step": 12860 }, { "epoch": 11.56474820143885, "step": 12860, "torque_loss": 0.21061939001083374 }, { "epoch": 11.573741007194245, "grad_norm": 0.4369911253452301, "learning_rate": 9.278390841744536e-05, "loss": 0.1163, "step": 12870 }, { "action_loss": 0.004854412283748388, "epoch": 11.573741007194245, "step": 12870 }, { "epoch": 11.573741007194245, "step": 12870, "torque_loss": 0.11018449068069458 }, { "epoch": 11.582733812949641, "grad_norm": 0.3858620524406433, "learning_rate": 9.276964051310658e-05, "loss": 0.1082, "step": 12880 }, { "action_loss": 0.037249237298965454, "epoch": 11.582733812949641, "step": 12880 }, { "epoch": 11.582733812949641, "step": 12880, "torque_loss": 0.23440112173557281 }, { "epoch": 11.591726618705035, "grad_norm": 0.409488320350647, "learning_rate": 9.275535961648027e-05, "loss": 0.1021, "step": 12890 }, { "action_loss": 0.00843051914125681, "epoch": 11.591726618705035, "step": 12890 }, { "epoch": 11.591726618705035, "step": 12890, "torque_loss": 0.16114747524261475 }, { "epoch": 11.600719424460431, "grad_norm": 0.3282390832901001, "learning_rate": 9.274106573190459e-05, "loss": 0.0988, "step": 12900 }, { "action_loss": 0.008396480232477188, "epoch": 11.600719424460431, "step": 12900 }, { "epoch": 11.600719424460431, "step": 12900, "torque_loss": 0.17895351350307465 }, { "epoch": 11.609712230215827, "grad_norm": 0.40967661142349243, "learning_rate": 9.272675886372168e-05, "loss": 0.0984, "step": 12910 }, { "action_loss": 0.017837511375546455, "epoch": 11.609712230215827, "step": 12910 }, { "epoch": 11.609712230215827, "step": 12910, "torque_loss": 0.17380225658416748 }, { "epoch": 11.618705035971223, "grad_norm": 0.3148472011089325, "learning_rate": 9.271243901627754e-05, "loss": 0.1107, "step": 12920 }, { "action_loss": 0.011723697185516357, "epoch": 11.618705035971223, "step": 12920 }, { "epoch": 11.618705035971223, "step": 12920, "torque_loss": 0.15232382714748383 }, { "epoch": 11.627697841726619, "grad_norm": 0.40876856446266174, "learning_rate": 9.269810619392219e-05, "loss": 0.109, "step": 12930 }, { "action_loss": 0.00807799119502306, "epoch": 11.627697841726619, "step": 12930 }, { "epoch": 11.627697841726619, "step": 12930, "torque_loss": 0.14925016462802887 }, { "epoch": 11.636690647482014, "grad_norm": 0.45151853561401367, "learning_rate": 9.268376040100955e-05, "loss": 0.1309, "step": 12940 }, { "action_loss": 0.03714769706130028, "epoch": 11.636690647482014, "step": 12940 }, { "epoch": 11.636690647482014, "step": 12940, "torque_loss": 0.2569461166858673 }, { "epoch": 11.64568345323741, "grad_norm": 0.37096720933914185, "learning_rate": 9.266940164189752e-05, "loss": 0.1011, "step": 12950 }, { "action_loss": 0.02473945915699005, "epoch": 11.64568345323741, "step": 12950 }, { "epoch": 11.64568345323741, "step": 12950, "torque_loss": 0.21114139258861542 }, { "epoch": 11.654676258992806, "grad_norm": 0.5034787654876709, "learning_rate": 9.265502992094787e-05, "loss": 0.1087, "step": 12960 }, { "action_loss": 0.008163467980921268, "epoch": 11.654676258992806, "step": 12960 }, { "epoch": 11.654676258992806, "step": 12960, "torque_loss": 0.09464794397354126 }, { "epoch": 11.663669064748202, "grad_norm": 0.43404248356819153, "learning_rate": 9.264064524252638e-05, "loss": 0.099, "step": 12970 }, { "action_loss": 0.0054490044713020325, "epoch": 11.663669064748202, "step": 12970 }, { "epoch": 11.663669064748202, "step": 12970, "torque_loss": 0.10242611914873123 }, { "epoch": 11.672661870503598, "grad_norm": 0.4689120054244995, "learning_rate": 9.262624761100271e-05, "loss": 0.1212, "step": 12980 }, { "action_loss": 0.005631657782942057, "epoch": 11.672661870503598, "step": 12980 }, { "epoch": 11.672661870503598, "step": 12980, "torque_loss": 0.11022619158029556 }, { "epoch": 11.681654676258994, "grad_norm": 0.3883650600910187, "learning_rate": 9.261183703075051e-05, "loss": 0.1099, "step": 12990 }, { "action_loss": 0.014119702391326427, "epoch": 11.681654676258994, "step": 12990 }, { "epoch": 11.681654676258994, "step": 12990, "torque_loss": 0.22288958728313446 }, { "epoch": 11.690647482014388, "grad_norm": 0.4912433624267578, "learning_rate": 9.259741350614733e-05, "loss": 0.1085, "step": 13000 }, { "action_loss": 0.010921262204647064, "epoch": 11.690647482014388, "step": 13000 }, { "epoch": 11.690647482014388, "step": 13000, "torque_loss": 0.13160043954849243 }, { "epoch": 11.699640287769784, "grad_norm": 0.4571765065193176, "learning_rate": 9.258297704157464e-05, "loss": 0.1091, "step": 13010 }, { "action_loss": 0.011001679114997387, "epoch": 11.699640287769784, "step": 13010 }, { "epoch": 11.699640287769784, "step": 13010, "torque_loss": 0.13242368400096893 }, { "epoch": 11.70863309352518, "grad_norm": 0.4380281865596771, "learning_rate": 9.256852764141786e-05, "loss": 0.1076, "step": 13020 }, { "action_loss": 0.015060718171298504, "epoch": 11.70863309352518, "step": 13020 }, { "epoch": 11.70863309352518, "step": 13020, "torque_loss": 0.15804831683635712 }, { "epoch": 11.717625899280575, "grad_norm": 0.40683406591415405, "learning_rate": 9.255406531006634e-05, "loss": 0.104, "step": 13030 }, { "action_loss": 0.013033241033554077, "epoch": 11.717625899280575, "step": 13030 }, { "epoch": 11.717625899280575, "step": 13030, "torque_loss": 0.1614794135093689 }, { "epoch": 11.726618705035971, "grad_norm": 0.39107194542884827, "learning_rate": 9.253959005191335e-05, "loss": 0.1229, "step": 13040 }, { "action_loss": 0.008003235794603825, "epoch": 11.726618705035971, "step": 13040 }, { "epoch": 11.726618705035971, "step": 13040, "torque_loss": 0.1297776848077774 }, { "epoch": 11.735611510791367, "grad_norm": 0.33881163597106934, "learning_rate": 9.25251018713561e-05, "loss": 0.128, "step": 13050 }, { "action_loss": 0.01289613451808691, "epoch": 11.735611510791367, "step": 13050 }, { "epoch": 11.735611510791367, "step": 13050, "torque_loss": 0.13932208716869354 }, { "epoch": 11.744604316546763, "grad_norm": 0.3848223090171814, "learning_rate": 9.251060077279571e-05, "loss": 0.1019, "step": 13060 }, { "action_loss": 0.01030165795236826, "epoch": 11.744604316546763, "step": 13060 }, { "epoch": 11.744604316546763, "step": 13060, "torque_loss": 0.1561269909143448 }, { "epoch": 11.753597122302159, "grad_norm": 0.35218220949172974, "learning_rate": 9.249608676063724e-05, "loss": 0.1039, "step": 13070 }, { "action_loss": 0.05885636433959007, "epoch": 11.753597122302159, "step": 13070 }, { "epoch": 11.753597122302159, "step": 13070, "torque_loss": 0.2544872462749481 }, { "epoch": 11.762589928057555, "grad_norm": 0.2985812723636627, "learning_rate": 9.248155983928964e-05, "loss": 0.1045, "step": 13080 }, { "action_loss": 0.010588998906314373, "epoch": 11.762589928057555, "step": 13080 }, { "epoch": 11.762589928057555, "step": 13080, "torque_loss": 0.16224221885204315 }, { "epoch": 11.77158273381295, "grad_norm": 0.45961621403694153, "learning_rate": 9.246702001316583e-05, "loss": 0.1067, "step": 13090 }, { "action_loss": 0.015937460586428642, "epoch": 11.77158273381295, "step": 13090 }, { "epoch": 11.77158273381295, "step": 13090, "torque_loss": 0.2715968191623688 }, { "epoch": 11.780575539568346, "grad_norm": 0.3751336634159088, "learning_rate": 9.245246728668262e-05, "loss": 0.1124, "step": 13100 }, { "action_loss": 0.010064843110740185, "epoch": 11.780575539568346, "step": 13100 }, { "epoch": 11.780575539568346, "step": 13100, "torque_loss": 0.1527451127767563 }, { "epoch": 11.78956834532374, "grad_norm": 0.4811212420463562, "learning_rate": 9.243790166426073e-05, "loss": 0.1219, "step": 13110 }, { "action_loss": 0.01264519989490509, "epoch": 11.78956834532374, "step": 13110 }, { "epoch": 11.78956834532374, "step": 13110, "torque_loss": 0.263843297958374 }, { "epoch": 11.798561151079136, "grad_norm": 0.36064058542251587, "learning_rate": 9.242332315032484e-05, "loss": 0.115, "step": 13120 }, { "action_loss": 0.00492585264146328, "epoch": 11.798561151079136, "step": 13120 }, { "epoch": 11.798561151079136, "step": 13120, "torque_loss": 0.12714914977550507 }, { "epoch": 11.807553956834532, "grad_norm": 0.33608582615852356, "learning_rate": 9.240873174930349e-05, "loss": 0.1195, "step": 13130 }, { "action_loss": 0.01022909302264452, "epoch": 11.807553956834532, "step": 13130 }, { "epoch": 11.807553956834532, "step": 13130, "torque_loss": 0.17195749282836914 }, { "epoch": 11.816546762589928, "grad_norm": 0.46106600761413574, "learning_rate": 9.239412746562917e-05, "loss": 0.1156, "step": 13140 }, { "action_loss": 0.033431436866521835, "epoch": 11.816546762589928, "step": 13140 }, { "epoch": 11.816546762589928, "step": 13140, "torque_loss": 0.23422743380069733 }, { "epoch": 11.825539568345324, "grad_norm": 0.36120936274528503, "learning_rate": 9.237951030373828e-05, "loss": 0.1338, "step": 13150 }, { "action_loss": 0.02830669842660427, "epoch": 11.825539568345324, "step": 13150 }, { "epoch": 11.825539568345324, "step": 13150, "torque_loss": 0.21959693729877472 }, { "epoch": 11.83453237410072, "grad_norm": 0.4969213902950287, "learning_rate": 9.236488026807113e-05, "loss": 0.1201, "step": 13160 }, { "action_loss": 0.006733129266649485, "epoch": 11.83453237410072, "step": 13160 }, { "epoch": 11.83453237410072, "step": 13160, "torque_loss": 0.15205828845500946 }, { "epoch": 11.843525179856115, "grad_norm": 0.38987505435943604, "learning_rate": 9.235023736307193e-05, "loss": 0.1037, "step": 13170 }, { "action_loss": 0.009309175424277782, "epoch": 11.843525179856115, "step": 13170 }, { "epoch": 11.843525179856115, "step": 13170, "torque_loss": 0.1934005469083786 }, { "epoch": 11.852517985611511, "grad_norm": 0.4461324214935303, "learning_rate": 9.233558159318881e-05, "loss": 0.1118, "step": 13180 }, { "action_loss": 0.006781779229640961, "epoch": 11.852517985611511, "step": 13180 }, { "epoch": 11.852517985611511, "step": 13180, "torque_loss": 0.1434466391801834 }, { "epoch": 11.861510791366907, "grad_norm": 0.35636192560195923, "learning_rate": 9.232091296287382e-05, "loss": 0.0986, "step": 13190 }, { "action_loss": 0.00925401784479618, "epoch": 11.861510791366907, "step": 13190 }, { "epoch": 11.861510791366907, "step": 13190, "torque_loss": 0.16820192337036133 }, { "epoch": 11.870503597122303, "grad_norm": 0.4113359749317169, "learning_rate": 9.230623147658288e-05, "loss": 0.1048, "step": 13200 }, { "action_loss": 0.007255423814058304, "epoch": 11.870503597122303, "step": 13200 }, { "epoch": 11.870503597122303, "step": 13200, "torque_loss": 0.1499251425266266 }, { "epoch": 11.879496402877697, "grad_norm": 0.37488916516304016, "learning_rate": 9.229153713877586e-05, "loss": 0.1106, "step": 13210 }, { "action_loss": 0.01770542934536934, "epoch": 11.879496402877697, "step": 13210 }, { "epoch": 11.879496402877697, "step": 13210, "torque_loss": 0.20701496303081512 }, { "epoch": 11.888489208633093, "grad_norm": 0.4186451733112335, "learning_rate": 9.227682995391649e-05, "loss": 0.1232, "step": 13220 }, { "action_loss": 0.01340485643595457, "epoch": 11.888489208633093, "step": 13220 }, { "epoch": 11.888489208633093, "step": 13220, "torque_loss": 0.17191822826862335 }, { "epoch": 11.897482014388489, "grad_norm": 0.41453829407691956, "learning_rate": 9.226210992647243e-05, "loss": 0.108, "step": 13230 }, { "action_loss": 0.00812480691820383, "epoch": 11.897482014388489, "step": 13230 }, { "epoch": 11.897482014388489, "step": 13230, "torque_loss": 0.14346615970134735 }, { "epoch": 11.906474820143885, "grad_norm": 0.3896946609020233, "learning_rate": 9.224737706091525e-05, "loss": 0.0962, "step": 13240 }, { "action_loss": 0.01801156997680664, "epoch": 11.906474820143885, "step": 13240 }, { "epoch": 11.906474820143885, "step": 13240, "torque_loss": 0.18050134181976318 }, { "epoch": 11.91546762589928, "grad_norm": 0.31534990668296814, "learning_rate": 9.223263136172039e-05, "loss": 0.1174, "step": 13250 }, { "action_loss": 0.05346041917800903, "epoch": 11.91546762589928, "step": 13250 }, { "epoch": 11.91546762589928, "step": 13250, "torque_loss": 0.3176436722278595 }, { "epoch": 11.924460431654676, "grad_norm": 0.36567234992980957, "learning_rate": 9.22178728333672e-05, "loss": 0.1184, "step": 13260 }, { "action_loss": 0.011561363935470581, "epoch": 11.924460431654676, "step": 13260 }, { "epoch": 11.924460431654676, "step": 13260, "torque_loss": 0.14012835919857025 }, { "epoch": 11.933453237410072, "grad_norm": 0.424947053194046, "learning_rate": 9.220310148033897e-05, "loss": 0.1142, "step": 13270 }, { "action_loss": 0.010669194161891937, "epoch": 11.933453237410072, "step": 13270 }, { "epoch": 11.933453237410072, "step": 13270, "torque_loss": 0.14899586141109467 }, { "epoch": 11.942446043165468, "grad_norm": 0.602649450302124, "learning_rate": 9.21883173071228e-05, "loss": 0.1181, "step": 13280 }, { "action_loss": 0.008407934568822384, "epoch": 11.942446043165468, "step": 13280 }, { "epoch": 11.942446043165468, "step": 13280, "torque_loss": 0.12711377441883087 }, { "epoch": 11.951438848920864, "grad_norm": 0.3854684829711914, "learning_rate": 9.217352031820976e-05, "loss": 0.0953, "step": 13290 }, { "action_loss": 0.024353429675102234, "epoch": 11.951438848920864, "step": 13290 }, { "epoch": 11.951438848920864, "step": 13290, "torque_loss": 0.1851019412279129 }, { "epoch": 11.96043165467626, "grad_norm": 0.32448631525039673, "learning_rate": 9.215871051809477e-05, "loss": 0.1058, "step": 13300 }, { "action_loss": 0.004233511630445719, "epoch": 11.96043165467626, "step": 13300 }, { "epoch": 11.96043165467626, "step": 13300, "torque_loss": 0.09265085309743881 }, { "epoch": 11.969424460431654, "grad_norm": 0.4133296310901642, "learning_rate": 9.214388791127666e-05, "loss": 0.1082, "step": 13310 }, { "action_loss": 0.01821417175233364, "epoch": 11.969424460431654, "step": 13310 }, { "epoch": 11.969424460431654, "step": 13310, "torque_loss": 0.2308603972196579 }, { "epoch": 11.97841726618705, "grad_norm": 0.43141302466392517, "learning_rate": 9.212905250225814e-05, "loss": 0.1139, "step": 13320 }, { "action_loss": 0.009147741831839085, "epoch": 11.97841726618705, "step": 13320 }, { "epoch": 11.97841726618705, "step": 13320, "torque_loss": 0.19793976843357086 }, { "epoch": 11.987410071942445, "grad_norm": 0.40775689482688904, "learning_rate": 9.211420429554583e-05, "loss": 0.1053, "step": 13330 }, { "action_loss": 0.004075754899531603, "epoch": 11.987410071942445, "step": 13330 }, { "epoch": 11.987410071942445, "step": 13330, "torque_loss": 0.16331888735294342 }, { "epoch": 11.996402877697841, "grad_norm": 0.4662085175514221, "learning_rate": 9.209934329565022e-05, "loss": 0.1046, "step": 13340 }, { "action_loss": 0.004639522638171911, "epoch": 11.996402877697841, "step": 13340 }, { "epoch": 11.996402877697841, "step": 13340, "torque_loss": 0.1220267042517662 }, { "epoch": 12.005395683453237, "grad_norm": 0.5066155791282654, "learning_rate": 9.208446950708568e-05, "loss": 0.1185, "step": 13350 }, { "action_loss": 0.03159011900424957, "epoch": 12.005395683453237, "step": 13350 }, { "epoch": 12.005395683453237, "step": 13350, "torque_loss": 0.2801496982574463 }, { "epoch": 12.014388489208633, "grad_norm": 0.3473855257034302, "learning_rate": 9.20695829343705e-05, "loss": 0.112, "step": 13360 }, { "action_loss": 0.004833701532334089, "epoch": 12.014388489208633, "step": 13360 }, { "epoch": 12.014388489208633, "step": 13360, "torque_loss": 0.11269679665565491 }, { "epoch": 12.023381294964029, "grad_norm": 0.4006245732307434, "learning_rate": 9.205468358202678e-05, "loss": 0.1002, "step": 13370 }, { "action_loss": 0.009451953694224358, "epoch": 12.023381294964029, "step": 13370 }, { "epoch": 12.023381294964029, "step": 13370, "torque_loss": 0.20983581244945526 }, { "epoch": 12.032374100719425, "grad_norm": 0.4443390667438507, "learning_rate": 9.203977145458059e-05, "loss": 0.1193, "step": 13380 }, { "action_loss": 0.010707306675612926, "epoch": 12.032374100719425, "step": 13380 }, { "epoch": 12.032374100719425, "step": 13380, "torque_loss": 0.17431731522083282 }, { "epoch": 12.04136690647482, "grad_norm": 0.3956846296787262, "learning_rate": 9.202484655656182e-05, "loss": 0.1086, "step": 13390 }, { "action_loss": 0.01642967201769352, "epoch": 12.04136690647482, "step": 13390 }, { "epoch": 12.04136690647482, "step": 13390, "torque_loss": 0.1930420994758606 }, { "epoch": 12.050359712230216, "grad_norm": 0.45187908411026, "learning_rate": 9.200990889250427e-05, "loss": 0.1131, "step": 13400 }, { "action_loss": 0.006866173353046179, "epoch": 12.050359712230216, "step": 13400 }, { "epoch": 12.050359712230216, "step": 13400, "torque_loss": 0.11307547241449356 }, { "epoch": 12.059352517985612, "grad_norm": 0.38341575860977173, "learning_rate": 9.19949584669456e-05, "loss": 0.1005, "step": 13410 }, { "action_loss": 0.004947460722178221, "epoch": 12.059352517985612, "step": 13410 }, { "epoch": 12.059352517985612, "step": 13410, "torque_loss": 0.08927647024393082 }, { "epoch": 12.068345323741006, "grad_norm": 0.41467899084091187, "learning_rate": 9.197999528442738e-05, "loss": 0.1146, "step": 13420 }, { "action_loss": 0.01249802578240633, "epoch": 12.068345323741006, "step": 13420 }, { "epoch": 12.068345323741006, "step": 13420, "torque_loss": 0.18192319571971893 }, { "epoch": 12.077338129496402, "grad_norm": 0.37632372975349426, "learning_rate": 9.196501934949499e-05, "loss": 0.1012, "step": 13430 }, { "action_loss": 0.005015449598431587, "epoch": 12.077338129496402, "step": 13430 }, { "epoch": 12.077338129496402, "step": 13430, "torque_loss": 0.08849620074033737 }, { "epoch": 12.086330935251798, "grad_norm": 0.4869759976863861, "learning_rate": 9.195003066669776e-05, "loss": 0.111, "step": 13440 }, { "action_loss": 0.00786516536027193, "epoch": 12.086330935251798, "step": 13440 }, { "epoch": 12.086330935251798, "step": 13440, "torque_loss": 0.12679223716259003 }, { "epoch": 12.095323741007194, "grad_norm": 0.44385889172554016, "learning_rate": 9.193502924058884e-05, "loss": 0.1003, "step": 13450 }, { "action_loss": 0.007155591156333685, "epoch": 12.095323741007194, "step": 13450 }, { "epoch": 12.095323741007194, "step": 13450, "torque_loss": 0.14914992451667786 }, { "epoch": 12.10431654676259, "grad_norm": 0.41840100288391113, "learning_rate": 9.192001507572526e-05, "loss": 0.1198, "step": 13460 }, { "action_loss": 0.011995404958724976, "epoch": 12.10431654676259, "step": 13460 }, { "epoch": 12.10431654676259, "step": 13460, "torque_loss": 0.1783633977174759 }, { "epoch": 12.113309352517986, "grad_norm": 0.45512405037879944, "learning_rate": 9.190498817666793e-05, "loss": 0.1247, "step": 13470 }, { "action_loss": 0.02753998152911663, "epoch": 12.113309352517986, "step": 13470 }, { "epoch": 12.113309352517986, "step": 13470, "torque_loss": 0.1522333174943924 }, { "epoch": 12.122302158273381, "grad_norm": 0.33539631962776184, "learning_rate": 9.188994854798163e-05, "loss": 0.1033, "step": 13480 }, { "action_loss": 0.008416508324444294, "epoch": 12.122302158273381, "step": 13480 }, { "epoch": 12.122302158273381, "step": 13480, "torque_loss": 0.11012550443410873 }, { "epoch": 12.131294964028777, "grad_norm": 0.4018109440803528, "learning_rate": 9.187489619423499e-05, "loss": 0.1026, "step": 13490 }, { "action_loss": 0.00604885583743453, "epoch": 12.131294964028777, "step": 13490 }, { "epoch": 12.131294964028777, "step": 13490, "torque_loss": 0.20811688899993896 }, { "epoch": 12.140287769784173, "grad_norm": 0.40109366178512573, "learning_rate": 9.185983112000056e-05, "loss": 0.1032, "step": 13500 }, { "action_loss": 0.005584574770182371, "epoch": 12.140287769784173, "step": 13500 }, { "epoch": 12.140287769784173, "step": 13500, "torque_loss": 0.1187206506729126 }, { "epoch": 12.149280575539569, "grad_norm": 0.3770250082015991, "learning_rate": 9.184475332985464e-05, "loss": 0.0936, "step": 13510 }, { "action_loss": 0.009071278385818005, "epoch": 12.149280575539569, "step": 13510 }, { "epoch": 12.149280575539569, "step": 13510, "torque_loss": 0.13773350417613983 }, { "epoch": 12.158273381294965, "grad_norm": 0.5062207579612732, "learning_rate": 9.182966282837754e-05, "loss": 0.1073, "step": 13520 }, { "action_loss": 0.005589688662439585, "epoch": 12.158273381294965, "step": 13520 }, { "epoch": 12.158273381294965, "step": 13520, "torque_loss": 0.09653843194246292 }, { "epoch": 12.167266187050359, "grad_norm": 0.26466891169548035, "learning_rate": 9.18145596201533e-05, "loss": 0.0874, "step": 13530 }, { "action_loss": 0.010893593542277813, "epoch": 12.167266187050359, "step": 13530 }, { "epoch": 12.167266187050359, "step": 13530, "torque_loss": 0.14229877293109894 }, { "epoch": 12.176258992805755, "grad_norm": 0.3825680613517761, "learning_rate": 9.179944370976991e-05, "loss": 0.1002, "step": 13540 }, { "action_loss": 0.010137076489627361, "epoch": 12.176258992805755, "step": 13540 }, { "epoch": 12.176258992805755, "step": 13540, "torque_loss": 0.12406217306852341 }, { "epoch": 12.18525179856115, "grad_norm": 0.44355759024620056, "learning_rate": 9.178431510181918e-05, "loss": 0.096, "step": 13550 }, { "action_loss": 0.007061606738716364, "epoch": 12.18525179856115, "step": 13550 }, { "epoch": 12.18525179856115, "step": 13550, "torque_loss": 0.09511342644691467 }, { "epoch": 12.194244604316546, "grad_norm": 0.5274161696434021, "learning_rate": 9.176917380089675e-05, "loss": 0.113, "step": 13560 }, { "action_loss": 0.006521039176732302, "epoch": 12.194244604316546, "step": 13560 }, { "epoch": 12.194244604316546, "step": 13560, "torque_loss": 0.12964092195034027 }, { "epoch": 12.203237410071942, "grad_norm": 0.43157875537872314, "learning_rate": 9.175401981160219e-05, "loss": 0.1138, "step": 13570 }, { "action_loss": 0.0046125841327011585, "epoch": 12.203237410071942, "step": 13570 }, { "epoch": 12.203237410071942, "step": 13570, "torque_loss": 0.12993855774402618 }, { "epoch": 12.212230215827338, "grad_norm": 0.4191450774669647, "learning_rate": 9.173885313853885e-05, "loss": 0.1038, "step": 13580 }, { "action_loss": 0.007361177355051041, "epoch": 12.212230215827338, "step": 13580 }, { "epoch": 12.212230215827338, "step": 13580, "torque_loss": 0.1550344079732895 }, { "epoch": 12.221223021582734, "grad_norm": 0.3998585343360901, "learning_rate": 9.172367378631398e-05, "loss": 0.1195, "step": 13590 }, { "action_loss": 0.008687102235853672, "epoch": 12.221223021582734, "step": 13590 }, { "epoch": 12.221223021582734, "step": 13590, "torque_loss": 0.15718941390514374 }, { "epoch": 12.23021582733813, "grad_norm": 0.4136744737625122, "learning_rate": 9.170848175953866e-05, "loss": 0.1275, "step": 13600 }, { "action_loss": 0.005638373550027609, "epoch": 12.23021582733813, "step": 13600 }, { "epoch": 12.23021582733813, "step": 13600, "torque_loss": 0.0863790512084961 }, { "epoch": 12.239208633093526, "grad_norm": 0.4197591245174408, "learning_rate": 9.169327706282784e-05, "loss": 0.1006, "step": 13610 }, { "action_loss": 0.016453469172120094, "epoch": 12.239208633093526, "step": 13610 }, { "epoch": 12.239208633093526, "step": 13610, "torque_loss": 0.10407397896051407 }, { "epoch": 12.248201438848922, "grad_norm": 0.35983118414878845, "learning_rate": 9.167805970080029e-05, "loss": 0.0924, "step": 13620 }, { "action_loss": 0.014325063675642014, "epoch": 12.248201438848922, "step": 13620 }, { "epoch": 12.248201438848922, "step": 13620, "torque_loss": 0.16014626622200012 }, { "epoch": 12.257194244604317, "grad_norm": 0.4464864730834961, "learning_rate": 9.166282967807864e-05, "loss": 0.0964, "step": 13630 }, { "action_loss": 0.008398124016821384, "epoch": 12.257194244604317, "step": 13630 }, { "epoch": 12.257194244604317, "step": 13630, "torque_loss": 0.14030537009239197 }, { "epoch": 12.266187050359711, "grad_norm": 0.39826732873916626, "learning_rate": 9.16475869992894e-05, "loss": 0.0963, "step": 13640 }, { "action_loss": 0.01236219983547926, "epoch": 12.266187050359711, "step": 13640 }, { "epoch": 12.266187050359711, "step": 13640, "torque_loss": 0.13732625544071198 }, { "epoch": 12.275179856115107, "grad_norm": 0.4519980251789093, "learning_rate": 9.163233166906284e-05, "loss": 0.0883, "step": 13650 }, { "action_loss": 0.005311809480190277, "epoch": 12.275179856115107, "step": 13650 }, { "epoch": 12.275179856115107, "step": 13650, "torque_loss": 0.12961183488368988 }, { "epoch": 12.284172661870503, "grad_norm": 0.4518680274486542, "learning_rate": 9.161706369203317e-05, "loss": 0.1064, "step": 13660 }, { "action_loss": 0.007456807419657707, "epoch": 12.284172661870503, "step": 13660 }, { "epoch": 12.284172661870503, "step": 13660, "torque_loss": 0.11728910356760025 }, { "epoch": 12.293165467625899, "grad_norm": 0.32101282477378845, "learning_rate": 9.16017830728384e-05, "loss": 0.0851, "step": 13670 }, { "action_loss": 0.0030245843809098005, "epoch": 12.293165467625899, "step": 13670 }, { "epoch": 12.293165467625899, "step": 13670, "torque_loss": 0.1021738275885582 }, { "epoch": 12.302158273381295, "grad_norm": 0.30741551518440247, "learning_rate": 9.158648981612035e-05, "loss": 0.1038, "step": 13680 }, { "action_loss": 0.007121507078409195, "epoch": 12.302158273381295, "step": 13680 }, { "epoch": 12.302158273381295, "step": 13680, "torque_loss": 0.11723025888204575 }, { "epoch": 12.31115107913669, "grad_norm": 0.3379853665828705, "learning_rate": 9.157118392652472e-05, "loss": 0.1025, "step": 13690 }, { "action_loss": 0.01275882963091135, "epoch": 12.31115107913669, "step": 13690 }, { "epoch": 12.31115107913669, "step": 13690, "torque_loss": 0.17723500728607178 }, { "epoch": 12.320143884892087, "grad_norm": 0.3156889081001282, "learning_rate": 9.155586540870104e-05, "loss": 0.0962, "step": 13700 }, { "action_loss": 0.008435900323092937, "epoch": 12.320143884892087, "step": 13700 }, { "epoch": 12.320143884892087, "step": 13700, "torque_loss": 0.20227597653865814 }, { "epoch": 12.329136690647482, "grad_norm": 0.38922879099845886, "learning_rate": 9.154053426730267e-05, "loss": 0.119, "step": 13710 }, { "action_loss": 0.006492586340755224, "epoch": 12.329136690647482, "step": 13710 }, { "epoch": 12.329136690647482, "step": 13710, "torque_loss": 0.12665657699108124 }, { "epoch": 12.338129496402878, "grad_norm": 0.3842127323150635, "learning_rate": 9.15251905069868e-05, "loss": 0.1132, "step": 13720 }, { "action_loss": 0.01365450769662857, "epoch": 12.338129496402878, "step": 13720 }, { "epoch": 12.338129496402878, "step": 13720, "torque_loss": 0.15628018975257874 }, { "epoch": 12.347122302158274, "grad_norm": 0.34049561619758606, "learning_rate": 9.150983413241446e-05, "loss": 0.106, "step": 13730 }, { "action_loss": 0.011676195077598095, "epoch": 12.347122302158274, "step": 13730 }, { "epoch": 12.347122302158274, "step": 13730, "torque_loss": 0.19686593115329742 }, { "epoch": 12.35611510791367, "grad_norm": 0.36406445503234863, "learning_rate": 9.149446514825051e-05, "loss": 0.1135, "step": 13740 }, { "action_loss": 0.0050596497021615505, "epoch": 12.35611510791367, "step": 13740 }, { "epoch": 12.35611510791367, "step": 13740, "torque_loss": 0.10782036930322647 }, { "epoch": 12.365107913669064, "grad_norm": 0.4768945872783661, "learning_rate": 9.147908355916365e-05, "loss": 0.1089, "step": 13750 }, { "action_loss": 0.030362049117684364, "epoch": 12.365107913669064, "step": 13750 }, { "epoch": 12.365107913669064, "step": 13750, "torque_loss": 0.26576361060142517 }, { "epoch": 12.37410071942446, "grad_norm": 0.34296873211860657, "learning_rate": 9.146368936982642e-05, "loss": 0.1268, "step": 13760 }, { "action_loss": 0.01621066965162754, "epoch": 12.37410071942446, "step": 13760 }, { "epoch": 12.37410071942446, "step": 13760, "torque_loss": 0.1846676617860794 }, { "epoch": 12.383093525179856, "grad_norm": 0.3790304362773895, "learning_rate": 9.144828258491511e-05, "loss": 0.1141, "step": 13770 }, { "action_loss": 0.01917366124689579, "epoch": 12.383093525179856, "step": 13770 }, { "epoch": 12.383093525179856, "step": 13770, "torque_loss": 0.17616307735443115 }, { "epoch": 12.392086330935252, "grad_norm": 0.30347830057144165, "learning_rate": 9.143286320910996e-05, "loss": 0.1088, "step": 13780 }, { "action_loss": 0.010384579189121723, "epoch": 12.392086330935252, "step": 13780 }, { "epoch": 12.392086330935252, "step": 13780, "torque_loss": 0.1691257804632187 }, { "epoch": 12.401079136690647, "grad_norm": 0.4326944947242737, "learning_rate": 9.141743124709491e-05, "loss": 0.1055, "step": 13790 }, { "action_loss": 0.01159775722771883, "epoch": 12.401079136690647, "step": 13790 }, { "epoch": 12.401079136690647, "step": 13790, "torque_loss": 0.15483635663986206 }, { "epoch": 12.410071942446043, "grad_norm": 0.35701784491539, "learning_rate": 9.140198670355784e-05, "loss": 0.1067, "step": 13800 }, { "action_loss": 0.016893818974494934, "epoch": 12.410071942446043, "step": 13800 }, { "epoch": 12.410071942446043, "step": 13800, "torque_loss": 0.17725062370300293 }, { "epoch": 12.41906474820144, "grad_norm": 0.45055660605430603, "learning_rate": 9.138652958319034e-05, "loss": 0.1232, "step": 13810 }, { "action_loss": 0.0034735326189547777, "epoch": 12.41906474820144, "step": 13810 }, { "epoch": 12.41906474820144, "step": 13810, "torque_loss": 0.07723396271467209 }, { "epoch": 12.428057553956835, "grad_norm": 0.424854576587677, "learning_rate": 9.137105989068791e-05, "loss": 0.0982, "step": 13820 }, { "action_loss": 0.011551124043762684, "epoch": 12.428057553956835, "step": 13820 }, { "epoch": 12.428057553956835, "step": 13820, "torque_loss": 0.13590838015079498 }, { "epoch": 12.43705035971223, "grad_norm": 0.45835086703300476, "learning_rate": 9.135557763074983e-05, "loss": 0.1239, "step": 13830 }, { "action_loss": 0.010770599357783794, "epoch": 12.43705035971223, "step": 13830 }, { "epoch": 12.43705035971223, "step": 13830, "torque_loss": 0.09761040657758713 }, { "epoch": 12.446043165467627, "grad_norm": 0.4234763979911804, "learning_rate": 9.13400828080792e-05, "loss": 0.1185, "step": 13840 }, { "action_loss": 0.0034988902043551207, "epoch": 12.446043165467627, "step": 13840 }, { "epoch": 12.446043165467627, "step": 13840, "torque_loss": 0.10685151070356369 }, { "epoch": 12.45503597122302, "grad_norm": 0.42096906900405884, "learning_rate": 9.132457542738292e-05, "loss": 0.112, "step": 13850 }, { "action_loss": 0.007995285093784332, "epoch": 12.45503597122302, "step": 13850 }, { "epoch": 12.45503597122302, "step": 13850, "torque_loss": 0.15573103725910187 }, { "epoch": 12.464028776978417, "grad_norm": 0.3045656681060791, "learning_rate": 9.130905549337174e-05, "loss": 0.0949, "step": 13860 }, { "action_loss": 0.018871570006012917, "epoch": 12.464028776978417, "step": 13860 }, { "epoch": 12.464028776978417, "step": 13860, "torque_loss": 0.20040583610534668 }, { "epoch": 12.473021582733812, "grad_norm": 0.4004391133785248, "learning_rate": 9.129352301076021e-05, "loss": 0.1078, "step": 13870 }, { "action_loss": 0.012521225959062576, "epoch": 12.473021582733812, "step": 13870 }, { "epoch": 12.473021582733812, "step": 13870, "torque_loss": 0.13685722649097443 }, { "epoch": 12.482014388489208, "grad_norm": 0.41746994853019714, "learning_rate": 9.127797798426668e-05, "loss": 0.105, "step": 13880 }, { "action_loss": 0.04758928343653679, "epoch": 12.482014388489208, "step": 13880 }, { "epoch": 12.482014388489208, "step": 13880, "torque_loss": 0.22075168788433075 }, { "epoch": 12.491007194244604, "grad_norm": 0.4285392165184021, "learning_rate": 9.126242041861333e-05, "loss": 0.1204, "step": 13890 }, { "action_loss": 0.007161339744925499, "epoch": 12.491007194244604, "step": 13890 }, { "epoch": 12.491007194244604, "step": 13890, "torque_loss": 0.15356218814849854 }, { "epoch": 12.5, "grad_norm": 0.30466026067733765, "learning_rate": 9.124685031852611e-05, "loss": 0.1179, "step": 13900 }, { "action_loss": 0.00781466905027628, "epoch": 12.5, "step": 13900 }, { "epoch": 12.5, "step": 13900, "torque_loss": 0.15983699262142181 }, { "epoch": 12.508992805755396, "grad_norm": 0.4275083541870117, "learning_rate": 9.123126768873482e-05, "loss": 0.1061, "step": 13910 }, { "action_loss": 0.010210486128926277, "epoch": 12.508992805755396, "step": 13910 }, { "epoch": 12.508992805755396, "step": 13910, "torque_loss": 0.12797068059444427 }, { "epoch": 12.517985611510792, "grad_norm": 0.48988449573516846, "learning_rate": 9.121567253397308e-05, "loss": 0.1131, "step": 13920 }, { "action_loss": 0.005685979034751654, "epoch": 12.517985611510792, "step": 13920 }, { "epoch": 12.517985611510792, "step": 13920, "torque_loss": 0.11434347182512283 }, { "epoch": 12.526978417266188, "grad_norm": 0.3298438489437103, "learning_rate": 9.120006485897824e-05, "loss": 0.0891, "step": 13930 }, { "action_loss": 0.004828887991607189, "epoch": 12.526978417266188, "step": 13930 }, { "epoch": 12.526978417266188, "step": 13930, "torque_loss": 0.08533108234405518 }, { "epoch": 12.535971223021583, "grad_norm": 0.4211437404155731, "learning_rate": 9.118444466849152e-05, "loss": 0.109, "step": 13940 }, { "action_loss": 0.016402969136834145, "epoch": 12.535971223021583, "step": 13940 }, { "epoch": 12.535971223021583, "step": 13940, "torque_loss": 0.19601745903491974 }, { "epoch": 12.54496402877698, "grad_norm": 0.3691006898880005, "learning_rate": 9.116881196725793e-05, "loss": 0.1073, "step": 13950 }, { "action_loss": 0.005944435950368643, "epoch": 12.54496402877698, "step": 13950 }, { "epoch": 12.54496402877698, "step": 13950, "torque_loss": 0.14756914973258972 }, { "epoch": 12.553956834532373, "grad_norm": 0.3652347922325134, "learning_rate": 9.115316676002627e-05, "loss": 0.1048, "step": 13960 }, { "action_loss": 0.013061843812465668, "epoch": 12.553956834532373, "step": 13960 }, { "epoch": 12.553956834532373, "step": 13960, "torque_loss": 0.189495250582695 }, { "epoch": 12.56294964028777, "grad_norm": 0.2989960312843323, "learning_rate": 9.113750905154911e-05, "loss": 0.1129, "step": 13970 }, { "action_loss": 0.006243010982871056, "epoch": 12.56294964028777, "step": 13970 }, { "epoch": 12.56294964028777, "step": 13970, "torque_loss": 0.1582658737897873 }, { "epoch": 12.571942446043165, "grad_norm": 0.31469887495040894, "learning_rate": 9.112183884658289e-05, "loss": 0.1057, "step": 13980 }, { "action_loss": 0.006454195827245712, "epoch": 12.571942446043165, "step": 13980 }, { "epoch": 12.571942446043165, "step": 13980, "torque_loss": 0.19796304404735565 }, { "epoch": 12.58093525179856, "grad_norm": 0.3647690415382385, "learning_rate": 9.11061561498878e-05, "loss": 0.0958, "step": 13990 }, { "action_loss": 0.016766507178544998, "epoch": 12.58093525179856, "step": 13990 }, { "epoch": 12.58093525179856, "step": 13990, "torque_loss": 0.20852820575237274 }, { "epoch": 12.589928057553957, "grad_norm": 0.43522903323173523, "learning_rate": 9.109046096622779e-05, "loss": 0.115, "step": 14000 }, { "action_loss": 0.010585474781692028, "epoch": 12.589928057553957, "step": 14000 }, { "epoch": 12.589928057553957, "step": 14000, "torque_loss": 0.18030387163162231 }, { "epoch": 12.598920863309353, "grad_norm": 0.3813692331314087, "learning_rate": 9.107475330037069e-05, "loss": 0.1081, "step": 14010 }, { "action_loss": 0.013573809526860714, "epoch": 12.598920863309353, "step": 14010 }, { "epoch": 12.598920863309353, "step": 14010, "torque_loss": 0.18007366359233856 }, { "epoch": 12.607913669064748, "grad_norm": 0.44029632210731506, "learning_rate": 9.105903315708806e-05, "loss": 0.1105, "step": 14020 }, { "action_loss": 0.0051213838160037994, "epoch": 12.607913669064748, "step": 14020 }, { "epoch": 12.607913669064748, "step": 14020, "torque_loss": 0.12385204434394836 }, { "epoch": 12.616906474820144, "grad_norm": 0.2927245795726776, "learning_rate": 9.104330054115524e-05, "loss": 0.0977, "step": 14030 }, { "action_loss": 0.0073269749991595745, "epoch": 12.616906474820144, "step": 14030 }, { "epoch": 12.616906474820144, "step": 14030, "torque_loss": 0.12850256264209747 }, { "epoch": 12.62589928057554, "grad_norm": 0.297442764043808, "learning_rate": 9.102755545735141e-05, "loss": 0.0928, "step": 14040 }, { "action_loss": 0.01463345531374216, "epoch": 12.62589928057554, "step": 14040 }, { "epoch": 12.62589928057554, "step": 14040, "torque_loss": 0.11287399381399155 }, { "epoch": 12.634892086330936, "grad_norm": 0.45556074380874634, "learning_rate": 9.10117979104595e-05, "loss": 0.1111, "step": 14050 }, { "action_loss": 0.009838846512138844, "epoch": 12.634892086330936, "step": 14050 }, { "epoch": 12.634892086330936, "step": 14050, "torque_loss": 0.1606372743844986 }, { "epoch": 12.64388489208633, "grad_norm": 0.35406389832496643, "learning_rate": 9.099602790526624e-05, "loss": 0.1187, "step": 14060 }, { "action_loss": 0.009599012322723866, "epoch": 12.64388489208633, "step": 14060 }, { "epoch": 12.64388489208633, "step": 14060, "torque_loss": 0.13188011944293976 }, { "epoch": 12.652877697841726, "grad_norm": 0.521521806716919, "learning_rate": 9.098024544656212e-05, "loss": 0.0948, "step": 14070 }, { "action_loss": 0.00755680026486516, "epoch": 12.652877697841726, "step": 14070 }, { "epoch": 12.652877697841726, "step": 14070, "torque_loss": 0.091680146753788 }, { "epoch": 12.661870503597122, "grad_norm": 0.33781930804252625, "learning_rate": 9.096445053914148e-05, "loss": 0.117, "step": 14080 }, { "action_loss": 0.005317926872521639, "epoch": 12.661870503597122, "step": 14080 }, { "epoch": 12.661870503597122, "step": 14080, "torque_loss": 0.10184154659509659 }, { "epoch": 12.670863309352518, "grad_norm": 0.3563917279243469, "learning_rate": 9.094864318780236e-05, "loss": 0.1033, "step": 14090 }, { "action_loss": 0.06357032805681229, "epoch": 12.670863309352518, "step": 14090 }, { "epoch": 12.670863309352518, "step": 14090, "torque_loss": 0.24274718761444092 }, { "epoch": 12.679856115107913, "grad_norm": 0.4391920268535614, "learning_rate": 9.093282339734663e-05, "loss": 0.1279, "step": 14100 }, { "action_loss": 0.007325336802750826, "epoch": 12.679856115107913, "step": 14100 }, { "epoch": 12.679856115107913, "step": 14100, "torque_loss": 0.1385778933763504 }, { "epoch": 12.68884892086331, "grad_norm": 0.35689982771873474, "learning_rate": 9.091699117257992e-05, "loss": 0.0975, "step": 14110 }, { "action_loss": 0.00585678406059742, "epoch": 12.68884892086331, "step": 14110 }, { "epoch": 12.68884892086331, "step": 14110, "torque_loss": 0.2116440385580063 }, { "epoch": 12.697841726618705, "grad_norm": 0.37784355878829956, "learning_rate": 9.090114651831163e-05, "loss": 0.1069, "step": 14120 }, { "action_loss": 0.037030380219221115, "epoch": 12.697841726618705, "step": 14120 }, { "epoch": 12.697841726618705, "step": 14120, "torque_loss": 0.23420560359954834 }, { "epoch": 12.706834532374101, "grad_norm": 0.5456299185752869, "learning_rate": 9.088528943935497e-05, "loss": 0.1218, "step": 14130 }, { "action_loss": 0.008900395594537258, "epoch": 12.706834532374101, "step": 14130 }, { "epoch": 12.706834532374101, "step": 14130, "torque_loss": 0.1915833204984665 }, { "epoch": 12.715827338129497, "grad_norm": 0.31296631693840027, "learning_rate": 9.086941994052689e-05, "loss": 0.0959, "step": 14140 }, { "action_loss": 0.006943372543901205, "epoch": 12.715827338129497, "step": 14140 }, { "epoch": 12.715827338129497, "step": 14140, "torque_loss": 0.18336112797260284 }, { "epoch": 12.724820143884893, "grad_norm": 0.3012479841709137, "learning_rate": 9.085353802664813e-05, "loss": 0.0987, "step": 14150 }, { "action_loss": 0.011925623752176762, "epoch": 12.724820143884893, "step": 14150 }, { "epoch": 12.724820143884893, "step": 14150, "torque_loss": 0.1589939147233963 }, { "epoch": 12.733812949640289, "grad_norm": 0.34559521079063416, "learning_rate": 9.08376437025432e-05, "loss": 0.0994, "step": 14160 }, { "action_loss": 0.008967154659330845, "epoch": 12.733812949640289, "step": 14160 }, { "epoch": 12.733812949640289, "step": 14160, "torque_loss": 0.10832832008600235 }, { "epoch": 12.742805755395683, "grad_norm": 0.3575417399406433, "learning_rate": 9.082173697304035e-05, "loss": 0.1061, "step": 14170 }, { "action_loss": 0.011344097554683685, "epoch": 12.742805755395683, "step": 14170 }, { "epoch": 12.742805755395683, "step": 14170, "torque_loss": 0.1587107628583908 }, { "epoch": 12.751798561151078, "grad_norm": 0.4510779082775116, "learning_rate": 9.080581784297166e-05, "loss": 0.097, "step": 14180 }, { "action_loss": 0.030903711915016174, "epoch": 12.751798561151078, "step": 14180 }, { "epoch": 12.751798561151078, "step": 14180, "torque_loss": 0.2451292723417282 }, { "epoch": 12.760791366906474, "grad_norm": 0.3908237814903259, "learning_rate": 9.078988631717291e-05, "loss": 0.1093, "step": 14190 }, { "action_loss": 0.006999563425779343, "epoch": 12.760791366906474, "step": 14190 }, { "epoch": 12.760791366906474, "step": 14190, "torque_loss": 0.11780768632888794 }, { "epoch": 12.76978417266187, "grad_norm": 0.5284469127655029, "learning_rate": 9.077394240048369e-05, "loss": 0.1071, "step": 14200 }, { "action_loss": 0.0061003281734883785, "epoch": 12.76978417266187, "step": 14200 }, { "epoch": 12.76978417266187, "step": 14200, "torque_loss": 0.1307838261127472 }, { "epoch": 12.778776978417266, "grad_norm": 0.3542144000530243, "learning_rate": 9.075798609774736e-05, "loss": 0.106, "step": 14210 }, { "action_loss": 0.0031883178744465113, "epoch": 12.778776978417266, "step": 14210 }, { "epoch": 12.778776978417266, "step": 14210, "torque_loss": 0.12591728568077087 }, { "epoch": 12.787769784172662, "grad_norm": 0.4110490381717682, "learning_rate": 9.0742017413811e-05, "loss": 0.1056, "step": 14220 }, { "action_loss": 0.010366720147430897, "epoch": 12.787769784172662, "step": 14220 }, { "epoch": 12.787769784172662, "step": 14220, "torque_loss": 0.17762111127376556 }, { "epoch": 12.796762589928058, "grad_norm": 0.3216153085231781, "learning_rate": 9.072603635352548e-05, "loss": 0.1016, "step": 14230 }, { "action_loss": 0.006993390619754791, "epoch": 12.796762589928058, "step": 14230 }, { "epoch": 12.796762589928058, "step": 14230, "torque_loss": 0.113095723092556 }, { "epoch": 12.805755395683454, "grad_norm": 0.4541802704334259, "learning_rate": 9.071004292174541e-05, "loss": 0.1154, "step": 14240 }, { "action_loss": 0.010651317425072193, "epoch": 12.805755395683454, "step": 14240 }, { "epoch": 12.805755395683454, "step": 14240, "torque_loss": 0.1441020965576172 }, { "epoch": 12.81474820143885, "grad_norm": 0.3201122581958771, "learning_rate": 9.06940371233292e-05, "loss": 0.0967, "step": 14250 }, { "action_loss": 0.0077957105822861195, "epoch": 12.81474820143885, "step": 14250 }, { "epoch": 12.81474820143885, "step": 14250, "torque_loss": 0.12375827878713608 }, { "epoch": 12.823741007194245, "grad_norm": 0.342883825302124, "learning_rate": 9.067801896313898e-05, "loss": 0.109, "step": 14260 }, { "action_loss": 0.005485020577907562, "epoch": 12.823741007194245, "step": 14260 }, { "epoch": 12.823741007194245, "step": 14260, "torque_loss": 0.10162317752838135 }, { "epoch": 12.832733812949641, "grad_norm": 0.2864418029785156, "learning_rate": 9.066198844604064e-05, "loss": 0.104, "step": 14270 }, { "action_loss": 0.008682268671691418, "epoch": 12.832733812949641, "step": 14270 }, { "epoch": 12.832733812949641, "step": 14270, "torque_loss": 0.11859146505594254 }, { "epoch": 12.841726618705035, "grad_norm": 0.3803676962852478, "learning_rate": 9.06459455769038e-05, "loss": 0.1166, "step": 14280 }, { "action_loss": 0.013613875024020672, "epoch": 12.841726618705035, "step": 14280 }, { "epoch": 12.841726618705035, "step": 14280, "torque_loss": 0.18979227542877197 }, { "epoch": 12.850719424460431, "grad_norm": 0.3836610019207001, "learning_rate": 9.062989036060193e-05, "loss": 0.1201, "step": 14290 }, { "action_loss": 0.01423904299736023, "epoch": 12.850719424460431, "step": 14290 }, { "epoch": 12.850719424460431, "step": 14290, "torque_loss": 0.17500704526901245 }, { "epoch": 12.859712230215827, "grad_norm": 0.3714728057384491, "learning_rate": 9.061382280201212e-05, "loss": 0.1002, "step": 14300 }, { "action_loss": 0.05906395986676216, "epoch": 12.859712230215827, "step": 14300 }, { "epoch": 12.859712230215827, "step": 14300, "torque_loss": 0.19580243527889252 }, { "epoch": 12.868705035971223, "grad_norm": 0.36100631952285767, "learning_rate": 9.059774290601528e-05, "loss": 0.1155, "step": 14310 }, { "action_loss": 0.008257499895989895, "epoch": 12.868705035971223, "step": 14310 }, { "epoch": 12.868705035971223, "step": 14310, "torque_loss": 0.14549921452999115 }, { "epoch": 12.877697841726619, "grad_norm": 0.3106866180896759, "learning_rate": 9.058165067749606e-05, "loss": 0.1092, "step": 14320 }, { "action_loss": 0.010313655249774456, "epoch": 12.877697841726619, "step": 14320 }, { "epoch": 12.877697841726619, "step": 14320, "torque_loss": 0.17278647422790527 }, { "epoch": 12.886690647482014, "grad_norm": 0.45104819536209106, "learning_rate": 9.056554612134288e-05, "loss": 0.1215, "step": 14330 }, { "action_loss": 0.0073168035596609116, "epoch": 12.886690647482014, "step": 14330 }, { "epoch": 12.886690647482014, "step": 14330, "torque_loss": 0.14434035122394562 }, { "epoch": 12.89568345323741, "grad_norm": 0.4279404580593109, "learning_rate": 9.054942924244785e-05, "loss": 0.1029, "step": 14340 }, { "action_loss": 0.013302527368068695, "epoch": 12.89568345323741, "step": 14340 }, { "epoch": 12.89568345323741, "step": 14340, "torque_loss": 0.1418752670288086 }, { "epoch": 12.904676258992806, "grad_norm": 0.32262855768203735, "learning_rate": 9.053330004570686e-05, "loss": 0.1043, "step": 14350 }, { "action_loss": 0.010780063457787037, "epoch": 12.904676258992806, "step": 14350 }, { "epoch": 12.904676258992806, "step": 14350, "torque_loss": 0.14188896119594574 }, { "epoch": 12.913669064748202, "grad_norm": 0.4063054919242859, "learning_rate": 9.051715853601955e-05, "loss": 0.1066, "step": 14360 }, { "action_loss": 0.021219976246356964, "epoch": 12.913669064748202, "step": 14360 }, { "epoch": 12.913669064748202, "step": 14360, "torque_loss": 0.1953555792570114 }, { "epoch": 12.922661870503598, "grad_norm": 0.3171723783016205, "learning_rate": 9.050100471828926e-05, "loss": 0.1176, "step": 14370 }, { "action_loss": 0.009804205037653446, "epoch": 12.922661870503598, "step": 14370 }, { "epoch": 12.922661870503598, "step": 14370, "torque_loss": 0.15574060380458832 }, { "epoch": 12.931654676258994, "grad_norm": 0.39826542139053345, "learning_rate": 9.048483859742311e-05, "loss": 0.1121, "step": 14380 }, { "action_loss": 0.01603126712143421, "epoch": 12.931654676258994, "step": 14380 }, { "epoch": 12.931654676258994, "step": 14380, "torque_loss": 0.17881284654140472 }, { "epoch": 12.940647482014388, "grad_norm": 0.3743031322956085, "learning_rate": 9.046866017833193e-05, "loss": 0.0967, "step": 14390 }, { "action_loss": 0.01241978257894516, "epoch": 12.940647482014388, "step": 14390 }, { "epoch": 12.940647482014388, "step": 14390, "torque_loss": 0.20477350056171417 }, { "epoch": 12.949640287769784, "grad_norm": 0.4035113751888275, "learning_rate": 9.045246946593029e-05, "loss": 0.1086, "step": 14400 }, { "action_loss": 0.00527745857834816, "epoch": 12.949640287769784, "step": 14400 }, { "epoch": 12.949640287769784, "step": 14400, "torque_loss": 0.1330045610666275 }, { "epoch": 12.95863309352518, "grad_norm": 0.40178412199020386, "learning_rate": 9.043626646513652e-05, "loss": 0.0942, "step": 14410 }, { "action_loss": 0.0064101978205144405, "epoch": 12.95863309352518, "step": 14410 }, { "epoch": 12.95863309352518, "step": 14410, "torque_loss": 0.14932440221309662 }, { "epoch": 12.967625899280575, "grad_norm": 0.37128105759620667, "learning_rate": 9.042005118087267e-05, "loss": 0.1011, "step": 14420 }, { "action_loss": 0.00498120067641139, "epoch": 12.967625899280575, "step": 14420 }, { "epoch": 12.967625899280575, "step": 14420, "torque_loss": 0.09514778852462769 }, { "epoch": 12.976618705035971, "grad_norm": 0.3359939157962799, "learning_rate": 9.040382361806448e-05, "loss": 0.1171, "step": 14430 }, { "action_loss": 0.007495593279600143, "epoch": 12.976618705035971, "step": 14430 }, { "epoch": 12.976618705035971, "step": 14430, "torque_loss": 0.13899052143096924 }, { "epoch": 12.985611510791367, "grad_norm": 0.37269678711891174, "learning_rate": 9.038758378164148e-05, "loss": 0.1103, "step": 14440 }, { "action_loss": 0.014134020544588566, "epoch": 12.985611510791367, "step": 14440 }, { "epoch": 12.985611510791367, "step": 14440, "torque_loss": 0.13599105179309845 }, { "epoch": 12.994604316546763, "grad_norm": 0.44775310158729553, "learning_rate": 9.037133167653691e-05, "loss": 0.1049, "step": 14450 }, { "action_loss": 0.014548596926033497, "epoch": 12.994604316546763, "step": 14450 }, { "epoch": 12.994604316546763, "step": 14450, "torque_loss": 0.18924307823181152 }, { "epoch": 13.003597122302159, "grad_norm": 0.45160579681396484, "learning_rate": 9.035506730768771e-05, "loss": 0.1026, "step": 14460 }, { "action_loss": 0.006385754328221083, "epoch": 13.003597122302159, "step": 14460 }, { "epoch": 13.003597122302159, "step": 14460, "torque_loss": 0.12292822450399399 }, { "epoch": 13.012589928057555, "grad_norm": 0.3943355977535248, "learning_rate": 9.033879068003458e-05, "loss": 0.1018, "step": 14470 }, { "action_loss": 0.017038783058524132, "epoch": 13.012589928057555, "step": 14470 }, { "epoch": 13.012589928057555, "step": 14470, "torque_loss": 0.19361726939678192 }, { "epoch": 13.02158273381295, "grad_norm": 0.47578391432762146, "learning_rate": 9.032250179852193e-05, "loss": 0.1086, "step": 14480 }, { "action_loss": 0.014076241292059422, "epoch": 13.02158273381295, "step": 14480 }, { "epoch": 13.02158273381295, "step": 14480, "torque_loss": 0.1653028279542923 }, { "epoch": 13.030575539568344, "grad_norm": 0.35089805722236633, "learning_rate": 9.030620066809787e-05, "loss": 0.1003, "step": 14490 }, { "action_loss": 0.00765876704826951, "epoch": 13.030575539568344, "step": 14490 }, { "epoch": 13.030575539568344, "step": 14490, "torque_loss": 0.1064244881272316 }, { "epoch": 13.03956834532374, "grad_norm": 0.3240269422531128, "learning_rate": 9.028988729371428e-05, "loss": 0.1051, "step": 14500 }, { "action_loss": 0.012872668914496899, "epoch": 13.03956834532374, "step": 14500 }, { "epoch": 13.03956834532374, "step": 14500, "torque_loss": 0.17470954358577728 }, { "epoch": 13.048561151079136, "grad_norm": 0.40996888279914856, "learning_rate": 9.027356168032673e-05, "loss": 0.1081, "step": 14510 }, { "action_loss": 0.0049940538592636585, "epoch": 13.048561151079136, "step": 14510 }, { "epoch": 13.048561151079136, "step": 14510, "torque_loss": 0.09098265320062637 }, { "epoch": 13.057553956834532, "grad_norm": 0.5484997034072876, "learning_rate": 9.02572238328945e-05, "loss": 0.1047, "step": 14520 }, { "action_loss": 0.008847068063914776, "epoch": 13.057553956834532, "step": 14520 }, { "epoch": 13.057553956834532, "step": 14520, "torque_loss": 0.12879258394241333 }, { "epoch": 13.066546762589928, "grad_norm": 0.45438525080680847, "learning_rate": 9.02408737563806e-05, "loss": 0.116, "step": 14530 }, { "action_loss": 0.00545492535457015, "epoch": 13.066546762589928, "step": 14530 }, { "epoch": 13.066546762589928, "step": 14530, "torque_loss": 0.11275821924209595 }, { "epoch": 13.075539568345324, "grad_norm": 0.49823641777038574, "learning_rate": 9.022451145575174e-05, "loss": 0.1003, "step": 14540 }, { "action_loss": 0.0057326615788042545, "epoch": 13.075539568345324, "step": 14540 }, { "epoch": 13.075539568345324, "step": 14540, "torque_loss": 0.10784851759672165 }, { "epoch": 13.08453237410072, "grad_norm": 0.3947699964046478, "learning_rate": 9.02081369359784e-05, "loss": 0.1026, "step": 14550 }, { "action_loss": 0.006676745135337114, "epoch": 13.08453237410072, "step": 14550 }, { "epoch": 13.08453237410072, "step": 14550, "torque_loss": 0.12740634381771088 }, { "epoch": 13.093525179856115, "grad_norm": 0.5101856589317322, "learning_rate": 9.019175020203465e-05, "loss": 0.0933, "step": 14560 }, { "action_loss": 0.006209537386894226, "epoch": 13.093525179856115, "step": 14560 }, { "epoch": 13.093525179856115, "step": 14560, "torque_loss": 0.13323885202407837 }, { "epoch": 13.102517985611511, "grad_norm": 0.46962815523147583, "learning_rate": 9.017535125889842e-05, "loss": 0.0973, "step": 14570 }, { "action_loss": 0.015150885097682476, "epoch": 13.102517985611511, "step": 14570 }, { "epoch": 13.102517985611511, "step": 14570, "torque_loss": 0.196849063038826 }, { "epoch": 13.111510791366907, "grad_norm": 0.3733869791030884, "learning_rate": 9.015894011155124e-05, "loss": 0.1153, "step": 14580 }, { "action_loss": 0.014550524763762951, "epoch": 13.111510791366907, "step": 14580 }, { "epoch": 13.111510791366907, "step": 14580, "torque_loss": 0.16003794968128204 }, { "epoch": 13.120503597122303, "grad_norm": 0.3903230130672455, "learning_rate": 9.014251676497838e-05, "loss": 0.1238, "step": 14590 }, { "action_loss": 0.009255914948880672, "epoch": 13.120503597122303, "step": 14590 }, { "epoch": 13.120503597122303, "step": 14590, "torque_loss": 0.10306206345558167 }, { "epoch": 13.129496402877697, "grad_norm": 0.3272424340248108, "learning_rate": 9.012608122416884e-05, "loss": 0.0958, "step": 14600 }, { "action_loss": 0.007004739250987768, "epoch": 13.129496402877697, "step": 14600 }, { "epoch": 13.129496402877697, "step": 14600, "torque_loss": 0.1138436421751976 }, { "epoch": 13.138489208633093, "grad_norm": 0.3441503643989563, "learning_rate": 9.010963349411529e-05, "loss": 0.1065, "step": 14610 }, { "action_loss": 0.026353759691119194, "epoch": 13.138489208633093, "step": 14610 }, { "epoch": 13.138489208633093, "step": 14610, "torque_loss": 0.18803830444812775 }, { "epoch": 13.147482014388489, "grad_norm": 0.4338296353816986, "learning_rate": 9.00931735798141e-05, "loss": 0.1058, "step": 14620 }, { "action_loss": 0.005909452680498362, "epoch": 13.147482014388489, "step": 14620 }, { "epoch": 13.147482014388489, "step": 14620, "torque_loss": 0.11276718974113464 }, { "epoch": 13.156474820143885, "grad_norm": 0.41025686264038086, "learning_rate": 9.00767014862654e-05, "loss": 0.0964, "step": 14630 }, { "action_loss": 0.006740873213857412, "epoch": 13.156474820143885, "step": 14630 }, { "epoch": 13.156474820143885, "step": 14630, "torque_loss": 0.13376189768314362 }, { "epoch": 13.16546762589928, "grad_norm": 0.38603270053863525, "learning_rate": 9.006021721847295e-05, "loss": 0.1029, "step": 14640 }, { "action_loss": 0.013796635903418064, "epoch": 13.16546762589928, "step": 14640 }, { "epoch": 13.16546762589928, "step": 14640, "torque_loss": 0.2133830338716507 }, { "epoch": 13.174460431654676, "grad_norm": 0.42901134490966797, "learning_rate": 9.004372078144423e-05, "loss": 0.1085, "step": 14650 }, { "action_loss": 0.026003146544098854, "epoch": 13.174460431654676, "step": 14650 }, { "epoch": 13.174460431654676, "step": 14650, "torque_loss": 0.19505847990512848 }, { "epoch": 13.183453237410072, "grad_norm": 0.3895491659641266, "learning_rate": 9.002721218019043e-05, "loss": 0.1131, "step": 14660 }, { "action_loss": 0.010850015096366405, "epoch": 13.183453237410072, "step": 14660 }, { "epoch": 13.183453237410072, "step": 14660, "torque_loss": 0.08932337909936905 }, { "epoch": 13.192446043165468, "grad_norm": 0.44240352511405945, "learning_rate": 9.001069141972642e-05, "loss": 0.0942, "step": 14670 }, { "action_loss": 0.010034195147454739, "epoch": 13.192446043165468, "step": 14670 }, { "epoch": 13.192446043165468, "step": 14670, "torque_loss": 0.16603262722492218 }, { "epoch": 13.201438848920864, "grad_norm": 0.4168548882007599, "learning_rate": 8.99941585050708e-05, "loss": 0.1053, "step": 14680 }, { "action_loss": 0.01498199999332428, "epoch": 13.201438848920864, "step": 14680 }, { "epoch": 13.201438848920864, "step": 14680, "torque_loss": 0.13445629179477692 }, { "epoch": 13.21043165467626, "grad_norm": 0.33392488956451416, "learning_rate": 8.997761344124578e-05, "loss": 0.1023, "step": 14690 }, { "action_loss": 0.0036083159502595663, "epoch": 13.21043165467626, "step": 14690 }, { "epoch": 13.21043165467626, "step": 14690, "torque_loss": 0.13683240115642548 }, { "epoch": 13.219424460431656, "grad_norm": 0.3167121112346649, "learning_rate": 8.996105623327737e-05, "loss": 0.0939, "step": 14700 }, { "action_loss": 0.006987469736486673, "epoch": 13.219424460431656, "step": 14700 }, { "epoch": 13.219424460431656, "step": 14700, "torque_loss": 0.16074104607105255 }, { "epoch": 13.22841726618705, "grad_norm": 0.36476659774780273, "learning_rate": 8.994448688619517e-05, "loss": 0.0967, "step": 14710 }, { "action_loss": 0.00589502090588212, "epoch": 13.22841726618705, "step": 14710 }, { "epoch": 13.22841726618705, "step": 14710, "torque_loss": 0.14435510337352753 }, { "epoch": 13.237410071942445, "grad_norm": 0.2991262674331665, "learning_rate": 8.992790540503253e-05, "loss": 0.0978, "step": 14720 }, { "action_loss": 0.007733085658401251, "epoch": 13.237410071942445, "step": 14720 }, { "epoch": 13.237410071942445, "step": 14720, "torque_loss": 0.1685691922903061 }, { "epoch": 13.246402877697841, "grad_norm": 0.31075552105903625, "learning_rate": 8.991131179482648e-05, "loss": 0.0936, "step": 14730 }, { "action_loss": 0.010728859342634678, "epoch": 13.246402877697841, "step": 14730 }, { "epoch": 13.246402877697841, "step": 14730, "torque_loss": 0.14244361221790314 }, { "epoch": 13.255395683453237, "grad_norm": 0.4537180960178375, "learning_rate": 8.989470606061768e-05, "loss": 0.1047, "step": 14740 }, { "action_loss": 0.012867026031017303, "epoch": 13.255395683453237, "step": 14740 }, { "epoch": 13.255395683453237, "step": 14740, "torque_loss": 0.21498002111911774 }, { "epoch": 13.264388489208633, "grad_norm": 0.43302249908447266, "learning_rate": 8.987808820745056e-05, "loss": 0.1268, "step": 14750 }, { "action_loss": 0.011909343302249908, "epoch": 13.264388489208633, "step": 14750 }, { "epoch": 13.264388489208633, "step": 14750, "torque_loss": 0.18326139450073242 }, { "epoch": 13.273381294964029, "grad_norm": 0.3875921368598938, "learning_rate": 8.986145824037315e-05, "loss": 0.0996, "step": 14760 }, { "action_loss": 0.020334122702479362, "epoch": 13.273381294964029, "step": 14760 }, { "epoch": 13.273381294964029, "step": 14760, "torque_loss": 0.18757747113704681 }, { "epoch": 13.282374100719425, "grad_norm": 0.3426775634288788, "learning_rate": 8.984481616443721e-05, "loss": 0.115, "step": 14770 }, { "action_loss": 0.010531100444495678, "epoch": 13.282374100719425, "step": 14770 }, { "epoch": 13.282374100719425, "step": 14770, "torque_loss": 0.16898393630981445 }, { "epoch": 13.29136690647482, "grad_norm": 0.342147558927536, "learning_rate": 8.982816198469815e-05, "loss": 0.1002, "step": 14780 }, { "action_loss": 0.006017437670379877, "epoch": 13.29136690647482, "step": 14780 }, { "epoch": 13.29136690647482, "step": 14780, "torque_loss": 0.11973943561315536 }, { "epoch": 13.300359712230216, "grad_norm": 0.34855806827545166, "learning_rate": 8.98114957062151e-05, "loss": 0.113, "step": 14790 }, { "action_loss": 0.008468960411846638, "epoch": 13.300359712230216, "step": 14790 }, { "epoch": 13.300359712230216, "step": 14790, "torque_loss": 0.17752213776111603 }, { "epoch": 13.309352517985612, "grad_norm": 0.2709070146083832, "learning_rate": 8.97948173340508e-05, "loss": 0.0961, "step": 14800 }, { "action_loss": 0.005811998154968023, "epoch": 13.309352517985612, "step": 14800 }, { "epoch": 13.309352517985612, "step": 14800, "torque_loss": 0.1095500960946083 }, { "epoch": 13.318345323741006, "grad_norm": 0.3373732268810272, "learning_rate": 8.977812687327172e-05, "loss": 0.083, "step": 14810 }, { "action_loss": 0.024485496804118156, "epoch": 13.318345323741006, "step": 14810 }, { "epoch": 13.318345323741006, "step": 14810, "torque_loss": 0.20036081969738007 }, { "epoch": 13.327338129496402, "grad_norm": 0.3925272226333618, "learning_rate": 8.976142432894798e-05, "loss": 0.1024, "step": 14820 }, { "action_loss": 0.008410933427512646, "epoch": 13.327338129496402, "step": 14820 }, { "epoch": 13.327338129496402, "step": 14820, "torque_loss": 0.09117937833070755 }, { "epoch": 13.336330935251798, "grad_norm": 0.35227975249290466, "learning_rate": 8.974470970615336e-05, "loss": 0.1023, "step": 14830 }, { "action_loss": 0.019687699154019356, "epoch": 13.336330935251798, "step": 14830 }, { "epoch": 13.336330935251798, "step": 14830, "torque_loss": 0.22084297239780426 }, { "epoch": 13.345323741007194, "grad_norm": 0.40748316049575806, "learning_rate": 8.972798300996534e-05, "loss": 0.1119, "step": 14840 }, { "action_loss": 0.005342511925846338, "epoch": 13.345323741007194, "step": 14840 }, { "epoch": 13.345323741007194, "step": 14840, "torque_loss": 0.15396977961063385 }, { "epoch": 13.35431654676259, "grad_norm": 0.3632405698299408, "learning_rate": 8.971124424546504e-05, "loss": 0.1015, "step": 14850 }, { "action_loss": 0.005124018993228674, "epoch": 13.35431654676259, "step": 14850 }, { "epoch": 13.35431654676259, "step": 14850, "torque_loss": 0.12694664299488068 }, { "epoch": 13.363309352517986, "grad_norm": 0.340794175863266, "learning_rate": 8.969449341773724e-05, "loss": 0.1072, "step": 14860 }, { "action_loss": 0.014057368040084839, "epoch": 13.363309352517986, "step": 14860 }, { "epoch": 13.363309352517986, "step": 14860, "torque_loss": 0.2004082351922989 }, { "epoch": 13.372302158273381, "grad_norm": 0.34328505396842957, "learning_rate": 8.967773053187042e-05, "loss": 0.1089, "step": 14870 }, { "action_loss": 0.009717666544020176, "epoch": 13.372302158273381, "step": 14870 }, { "epoch": 13.372302158273381, "step": 14870, "torque_loss": 0.20508356392383575 }, { "epoch": 13.381294964028777, "grad_norm": 0.3298250138759613, "learning_rate": 8.966095559295668e-05, "loss": 0.0958, "step": 14880 }, { "action_loss": 0.005752595141530037, "epoch": 13.381294964028777, "step": 14880 }, { "epoch": 13.381294964028777, "step": 14880, "torque_loss": 0.1628730148077011 }, { "epoch": 13.390287769784173, "grad_norm": 0.2918006479740143, "learning_rate": 8.964416860609184e-05, "loss": 0.107, "step": 14890 }, { "action_loss": 0.006823386996984482, "epoch": 13.390287769784173, "step": 14890 }, { "epoch": 13.390287769784173, "step": 14890, "torque_loss": 0.13974183797836304 }, { "epoch": 13.399280575539569, "grad_norm": 0.4696875512599945, "learning_rate": 8.962736957637532e-05, "loss": 0.0863, "step": 14900 }, { "action_loss": 0.002696485025808215, "epoch": 13.399280575539569, "step": 14900 }, { "epoch": 13.399280575539569, "step": 14900, "torque_loss": 0.058612924069166183 }, { "epoch": 13.408273381294965, "grad_norm": 0.22332137823104858, "learning_rate": 8.96105585089102e-05, "loss": 0.0835, "step": 14910 }, { "action_loss": 0.031110597774386406, "epoch": 13.408273381294965, "step": 14910 }, { "epoch": 13.408273381294965, "step": 14910, "torque_loss": 0.25062957406044006 }, { "epoch": 13.417266187050359, "grad_norm": 0.3786676526069641, "learning_rate": 8.959373540880329e-05, "loss": 0.1209, "step": 14920 }, { "action_loss": 0.008535356260836124, "epoch": 13.417266187050359, "step": 14920 }, { "epoch": 13.417266187050359, "step": 14920, "torque_loss": 0.18830521404743195 }, { "epoch": 13.426258992805755, "grad_norm": 0.39088767766952515, "learning_rate": 8.957690028116495e-05, "loss": 0.1101, "step": 14930 }, { "action_loss": 0.009160280227661133, "epoch": 13.426258992805755, "step": 14930 }, { "epoch": 13.426258992805755, "step": 14930, "torque_loss": 0.1301240772008896 }, { "epoch": 13.43525179856115, "grad_norm": 0.42072001099586487, "learning_rate": 8.956005313110928e-05, "loss": 0.0979, "step": 14940 }, { "action_loss": 0.004664577078074217, "epoch": 13.43525179856115, "step": 14940 }, { "epoch": 13.43525179856115, "step": 14940, "torque_loss": 0.1186104342341423 }, { "epoch": 13.444244604316546, "grad_norm": 0.31642258167266846, "learning_rate": 8.9543193963754e-05, "loss": 0.1071, "step": 14950 }, { "action_loss": 0.013976727612316608, "epoch": 13.444244604316546, "step": 14950 }, { "epoch": 13.444244604316546, "step": 14950, "torque_loss": 0.15124191343784332 }, { "epoch": 13.453237410071942, "grad_norm": 0.3525657057762146, "learning_rate": 8.952632278422048e-05, "loss": 0.1012, "step": 14960 }, { "action_loss": 0.010922759771347046, "epoch": 13.453237410071942, "step": 14960 }, { "epoch": 13.453237410071942, "step": 14960, "torque_loss": 0.12142697721719742 }, { "epoch": 13.462230215827338, "grad_norm": 0.2811211049556732, "learning_rate": 8.95094395976337e-05, "loss": 0.1171, "step": 14970 }, { "action_loss": 0.013425435870885849, "epoch": 13.462230215827338, "step": 14970 }, { "epoch": 13.462230215827338, "step": 14970, "torque_loss": 0.15674345195293427 }, { "epoch": 13.471223021582734, "grad_norm": 0.47518420219421387, "learning_rate": 8.949254440912239e-05, "loss": 0.1122, "step": 14980 }, { "action_loss": 0.012995225377380848, "epoch": 13.471223021582734, "step": 14980 }, { "epoch": 13.471223021582734, "step": 14980, "torque_loss": 0.17132842540740967 }, { "epoch": 13.48021582733813, "grad_norm": 0.4819885790348053, "learning_rate": 8.94756372238188e-05, "loss": 0.1145, "step": 14990 }, { "action_loss": 0.01034363079816103, "epoch": 13.48021582733813, "step": 14990 }, { "epoch": 13.48021582733813, "step": 14990, "torque_loss": 0.19474440813064575 }, { "epoch": 13.489208633093526, "grad_norm": 0.40925076603889465, "learning_rate": 8.945871804685892e-05, "loss": 0.1101, "step": 15000 }, { "action_loss": 0.013249757699668407, "epoch": 13.489208633093526, "step": 15000 }, { "epoch": 13.489208633093526, "step": 15000, "torque_loss": 0.2040855437517166 }, { "epoch": 13.498201438848922, "grad_norm": 0.3855281472206116, "learning_rate": 8.944178688338236e-05, "loss": 0.102, "step": 15010 }, { "action_loss": 0.014144686050713062, "epoch": 13.498201438848922, "step": 15010 }, { "epoch": 13.498201438848922, "step": 15010, "torque_loss": 0.1519353836774826 }, { "epoch": 13.507194244604317, "grad_norm": 0.40580034255981445, "learning_rate": 8.942484373853233e-05, "loss": 0.0911, "step": 15020 }, { "action_loss": 0.006665655877441168, "epoch": 13.507194244604317, "step": 15020 }, { "epoch": 13.507194244604317, "step": 15020, "torque_loss": 0.1455800086259842 }, { "epoch": 13.516187050359711, "grad_norm": 0.4046599864959717, "learning_rate": 8.940788861745572e-05, "loss": 0.1068, "step": 15030 }, { "action_loss": 0.006026893388479948, "epoch": 13.516187050359711, "step": 15030 }, { "epoch": 13.516187050359711, "step": 15030, "torque_loss": 0.0997546836733818 }, { "epoch": 13.525179856115107, "grad_norm": 0.4450966417789459, "learning_rate": 8.939092152530308e-05, "loss": 0.1074, "step": 15040 }, { "action_loss": 0.006014792714267969, "epoch": 13.525179856115107, "step": 15040 }, { "epoch": 13.525179856115107, "step": 15040, "torque_loss": 0.1792060285806656 }, { "epoch": 13.534172661870503, "grad_norm": 0.40511250495910645, "learning_rate": 8.937394246722853e-05, "loss": 0.0945, "step": 15050 }, { "action_loss": 0.01570763625204563, "epoch": 13.534172661870503, "step": 15050 }, { "epoch": 13.534172661870503, "step": 15050, "torque_loss": 0.2077374905347824 }, { "epoch": 13.543165467625899, "grad_norm": 0.36414268612861633, "learning_rate": 8.935695144838984e-05, "loss": 0.1248, "step": 15060 }, { "action_loss": 0.01124497503042221, "epoch": 13.543165467625899, "step": 15060 }, { "epoch": 13.543165467625899, "step": 15060, "torque_loss": 0.1690623164176941 }, { "epoch": 13.552158273381295, "grad_norm": 0.47231295704841614, "learning_rate": 8.933994847394849e-05, "loss": 0.1004, "step": 15070 }, { "action_loss": 0.009982513263821602, "epoch": 13.552158273381295, "step": 15070 }, { "epoch": 13.552158273381295, "step": 15070, "torque_loss": 0.10282698273658752 }, { "epoch": 13.56115107913669, "grad_norm": 0.33728116750717163, "learning_rate": 8.932293354906949e-05, "loss": 0.0982, "step": 15080 }, { "action_loss": 0.010545991361141205, "epoch": 13.56115107913669, "step": 15080 }, { "epoch": 13.56115107913669, "step": 15080, "torque_loss": 0.19568507373332977 }, { "epoch": 13.570143884892087, "grad_norm": 0.3970257639884949, "learning_rate": 8.930590667892153e-05, "loss": 0.1032, "step": 15090 }, { "action_loss": 0.006797583773732185, "epoch": 13.570143884892087, "step": 15090 }, { "epoch": 13.570143884892087, "step": 15090, "torque_loss": 0.13381870090961456 }, { "epoch": 13.579136690647482, "grad_norm": 0.4486251473426819, "learning_rate": 8.928886786867696e-05, "loss": 0.096, "step": 15100 }, { "action_loss": 0.010687577538192272, "epoch": 13.579136690647482, "step": 15100 }, { "epoch": 13.579136690647482, "step": 15100, "torque_loss": 0.1540205329656601 }, { "epoch": 13.588129496402878, "grad_norm": 0.4335087239742279, "learning_rate": 8.927181712351168e-05, "loss": 0.0988, "step": 15110 }, { "action_loss": 0.006235411856323481, "epoch": 13.588129496402878, "step": 15110 }, { "epoch": 13.588129496402878, "step": 15110, "torque_loss": 0.13017983734607697 }, { "epoch": 13.597122302158274, "grad_norm": 0.32623207569122314, "learning_rate": 8.925475444860527e-05, "loss": 0.1036, "step": 15120 }, { "action_loss": 0.007360244635492563, "epoch": 13.597122302158274, "step": 15120 }, { "epoch": 13.597122302158274, "step": 15120, "torque_loss": 0.13869468867778778 }, { "epoch": 13.60611510791367, "grad_norm": 0.28519752621650696, "learning_rate": 8.923767984914092e-05, "loss": 0.0873, "step": 15130 }, { "action_loss": 0.0056544519029557705, "epoch": 13.60611510791367, "step": 15130 }, { "epoch": 13.60611510791367, "step": 15130, "torque_loss": 0.12282692641019821 }, { "epoch": 13.615107913669064, "grad_norm": 0.4258524477481842, "learning_rate": 8.922059333030545e-05, "loss": 0.1138, "step": 15140 }, { "action_loss": 0.004255636129528284, "epoch": 13.615107913669064, "step": 15140 }, { "epoch": 13.615107913669064, "step": 15140, "torque_loss": 0.09332910925149918 }, { "epoch": 13.62410071942446, "grad_norm": 0.3799521327018738, "learning_rate": 8.920349489728928e-05, "loss": 0.1018, "step": 15150 }, { "action_loss": 0.008471070788800716, "epoch": 13.62410071942446, "step": 15150 }, { "epoch": 13.62410071942446, "step": 15150, "torque_loss": 0.1187812089920044 }, { "epoch": 13.633093525179856, "grad_norm": 0.3627682328224182, "learning_rate": 8.918638455528646e-05, "loss": 0.1203, "step": 15160 }, { "action_loss": 0.0056570786982774734, "epoch": 13.633093525179856, "step": 15160 }, { "epoch": 13.633093525179856, "step": 15160, "torque_loss": 0.1053895354270935 }, { "epoch": 13.642086330935252, "grad_norm": 0.3650306761264801, "learning_rate": 8.916926230949468e-05, "loss": 0.1054, "step": 15170 }, { "action_loss": 0.007066891994327307, "epoch": 13.642086330935252, "step": 15170 }, { "epoch": 13.642086330935252, "step": 15170, "torque_loss": 0.09597349166870117 }, { "epoch": 13.651079136690647, "grad_norm": 0.3653104901313782, "learning_rate": 8.915212816511522e-05, "loss": 0.1041, "step": 15180 }, { "action_loss": 0.005716394633054733, "epoch": 13.651079136690647, "step": 15180 }, { "epoch": 13.651079136690647, "step": 15180, "torque_loss": 0.15317925810813904 }, { "epoch": 13.660071942446043, "grad_norm": 0.4305107295513153, "learning_rate": 8.913498212735296e-05, "loss": 0.1131, "step": 15190 }, { "action_loss": 0.012665173970162868, "epoch": 13.660071942446043, "step": 15190 }, { "epoch": 13.660071942446043, "step": 15190, "torque_loss": 0.16459859907627106 }, { "epoch": 13.66906474820144, "grad_norm": 0.35554561018943787, "learning_rate": 8.911782420141643e-05, "loss": 0.1097, "step": 15200 }, { "action_loss": 0.006959854159504175, "epoch": 13.66906474820144, "step": 15200 }, { "epoch": 13.66906474820144, "step": 15200, "torque_loss": 0.10349518060684204 }, { "epoch": 13.678057553956835, "grad_norm": 0.4026699364185333, "learning_rate": 8.910065439251775e-05, "loss": 0.0929, "step": 15210 }, { "action_loss": 0.007404783274978399, "epoch": 13.678057553956835, "step": 15210 }, { "epoch": 13.678057553956835, "step": 15210, "torque_loss": 0.11773854494094849 }, { "epoch": 13.68705035971223, "grad_norm": 0.34208640456199646, "learning_rate": 8.908347270587268e-05, "loss": 0.1012, "step": 15220 }, { "action_loss": 0.004042238462716341, "epoch": 13.68705035971223, "step": 15220 }, { "epoch": 13.68705035971223, "step": 15220, "torque_loss": 0.09256688505411148 }, { "epoch": 13.696043165467627, "grad_norm": 0.3817637860774994, "learning_rate": 8.906627914670054e-05, "loss": 0.0984, "step": 15230 }, { "action_loss": 0.010202039033174515, "epoch": 13.696043165467627, "step": 15230 }, { "epoch": 13.696043165467627, "step": 15230, "torque_loss": 0.18544864654541016 }, { "epoch": 13.70503597122302, "grad_norm": 0.4140496253967285, "learning_rate": 8.904907372022427e-05, "loss": 0.0888, "step": 15240 }, { "action_loss": 0.007313715759664774, "epoch": 13.70503597122302, "step": 15240 }, { "epoch": 13.70503597122302, "step": 15240, "torque_loss": 0.15839847922325134 }, { "epoch": 13.714028776978417, "grad_norm": 0.3832662105560303, "learning_rate": 8.903185643167042e-05, "loss": 0.1101, "step": 15250 }, { "action_loss": 0.004533042665570974, "epoch": 13.714028776978417, "step": 15250 }, { "epoch": 13.714028776978417, "step": 15250, "torque_loss": 0.07208351045846939 }, { "epoch": 13.723021582733812, "grad_norm": 0.4429500699043274, "learning_rate": 8.901462728626919e-05, "loss": 0.0986, "step": 15260 }, { "action_loss": 0.015450051985681057, "epoch": 13.723021582733812, "step": 15260 }, { "epoch": 13.723021582733812, "step": 15260, "torque_loss": 0.21165437996387482 }, { "epoch": 13.732014388489208, "grad_norm": 0.345967173576355, "learning_rate": 8.899738628925429e-05, "loss": 0.1074, "step": 15270 }, { "action_loss": 0.006821915507316589, "epoch": 13.732014388489208, "step": 15270 }, { "epoch": 13.732014388489208, "step": 15270, "torque_loss": 0.13759344816207886 }, { "epoch": 13.741007194244604, "grad_norm": 0.3119393289089203, "learning_rate": 8.898013344586312e-05, "loss": 0.091, "step": 15280 }, { "action_loss": 0.04024333134293556, "epoch": 13.741007194244604, "step": 15280 }, { "epoch": 13.741007194244604, "step": 15280, "torque_loss": 0.16754303872585297 }, { "epoch": 13.75, "grad_norm": 0.3639608323574066, "learning_rate": 8.896286876133661e-05, "loss": 0.1084, "step": 15290 }, { "action_loss": 0.007879647426307201, "epoch": 13.75, "step": 15290 }, { "epoch": 13.75, "step": 15290, "torque_loss": 0.11671767383813858 }, { "epoch": 13.758992805755396, "grad_norm": 0.3273724913597107, "learning_rate": 8.894559224091933e-05, "loss": 0.098, "step": 15300 }, { "action_loss": 0.019101768732070923, "epoch": 13.758992805755396, "step": 15300 }, { "epoch": 13.758992805755396, "step": 15300, "torque_loss": 0.18468992412090302 }, { "epoch": 13.767985611510792, "grad_norm": 0.3995179235935211, "learning_rate": 8.892830388985942e-05, "loss": 0.1026, "step": 15310 }, { "action_loss": 0.009449951350688934, "epoch": 13.767985611510792, "step": 15310 }, { "epoch": 13.767985611510792, "step": 15310, "torque_loss": 0.1698269098997116 }, { "epoch": 13.776978417266188, "grad_norm": 0.28491637110710144, "learning_rate": 8.891100371340864e-05, "loss": 0.0934, "step": 15320 }, { "action_loss": 0.020397571846842766, "epoch": 13.776978417266188, "step": 15320 }, { "epoch": 13.776978417266188, "step": 15320, "torque_loss": 0.13106226921081543 }, { "epoch": 13.785971223021583, "grad_norm": 0.4183328151702881, "learning_rate": 8.889369171682231e-05, "loss": 0.1038, "step": 15330 }, { "action_loss": 0.005776323843747377, "epoch": 13.785971223021583, "step": 15330 }, { "epoch": 13.785971223021583, "step": 15330, "torque_loss": 0.14874833822250366 }, { "epoch": 13.79496402877698, "grad_norm": 0.3636402189731598, "learning_rate": 8.887636790535936e-05, "loss": 0.097, "step": 15340 }, { "action_loss": 0.004986191634088755, "epoch": 13.79496402877698, "step": 15340 }, { "epoch": 13.79496402877698, "step": 15340, "torque_loss": 0.15190333127975464 }, { "epoch": 13.803956834532373, "grad_norm": 0.376457154750824, "learning_rate": 8.885903228428231e-05, "loss": 0.097, "step": 15350 }, { "action_loss": 0.006763773504644632, "epoch": 13.803956834532373, "step": 15350 }, { "epoch": 13.803956834532373, "step": 15350, "torque_loss": 0.1348973661661148 }, { "epoch": 13.81294964028777, "grad_norm": 0.2958364188671112, "learning_rate": 8.884168485885727e-05, "loss": 0.0994, "step": 15360 }, { "action_loss": 0.009168591350317001, "epoch": 13.81294964028777, "step": 15360 }, { "epoch": 13.81294964028777, "step": 15360, "torque_loss": 0.13945506513118744 }, { "epoch": 13.821942446043165, "grad_norm": 0.34850117564201355, "learning_rate": 8.882432563435393e-05, "loss": 0.1016, "step": 15370 }, { "action_loss": 0.011580941267311573, "epoch": 13.821942446043165, "step": 15370 }, { "epoch": 13.821942446043165, "step": 15370, "torque_loss": 0.20623314380645752 }, { "epoch": 13.83093525179856, "grad_norm": 0.5291422605514526, "learning_rate": 8.880695461604556e-05, "loss": 0.0939, "step": 15380 }, { "action_loss": 0.0028210189193487167, "epoch": 13.83093525179856, "step": 15380 }, { "epoch": 13.83093525179856, "step": 15380, "torque_loss": 0.07412248849868774 }, { "epoch": 13.839928057553957, "grad_norm": 0.35959339141845703, "learning_rate": 8.878957180920901e-05, "loss": 0.0946, "step": 15390 }, { "action_loss": 0.004638336598873138, "epoch": 13.839928057553957, "step": 15390 }, { "epoch": 13.839928057553957, "step": 15390, "torque_loss": 0.1251736879348755 }, { "epoch": 13.848920863309353, "grad_norm": 0.4298434257507324, "learning_rate": 8.877217721912473e-05, "loss": 0.0961, "step": 15400 }, { "action_loss": 0.005642494186758995, "epoch": 13.848920863309353, "step": 15400 }, { "epoch": 13.848920863309353, "step": 15400, "torque_loss": 0.12086091190576553 }, { "epoch": 13.857913669064748, "grad_norm": 0.2707573473453522, "learning_rate": 8.875477085107673e-05, "loss": 0.0994, "step": 15410 }, { "action_loss": 0.006282422225922346, "epoch": 13.857913669064748, "step": 15410 }, { "epoch": 13.857913669064748, "step": 15410, "torque_loss": 0.16171306371688843 }, { "epoch": 13.866906474820144, "grad_norm": 0.49417969584465027, "learning_rate": 8.87373527103526e-05, "loss": 0.1077, "step": 15420 }, { "action_loss": 0.004750643856823444, "epoch": 13.866906474820144, "step": 15420 }, { "epoch": 13.866906474820144, "step": 15420, "torque_loss": 0.10800755769014359 }, { "epoch": 13.87589928057554, "grad_norm": 0.42208871245384216, "learning_rate": 8.871992280224353e-05, "loss": 0.09, "step": 15430 }, { "action_loss": 0.007511306554079056, "epoch": 13.87589928057554, "step": 15430 }, { "epoch": 13.87589928057554, "step": 15430, "torque_loss": 0.14088332653045654 }, { "epoch": 13.884892086330936, "grad_norm": 0.32927629351615906, "learning_rate": 8.870248113204422e-05, "loss": 0.0951, "step": 15440 }, { "action_loss": 0.012304895557463169, "epoch": 13.884892086330936, "step": 15440 }, { "epoch": 13.884892086330936, "step": 15440, "torque_loss": 0.12533466517925262 }, { "epoch": 13.89388489208633, "grad_norm": 0.4271271228790283, "learning_rate": 8.868502770505306e-05, "loss": 0.1096, "step": 15450 }, { "action_loss": 0.011863067746162415, "epoch": 13.89388489208633, "step": 15450 }, { "epoch": 13.89388489208633, "step": 15450, "torque_loss": 0.16096071898937225 }, { "epoch": 13.902877697841726, "grad_norm": 0.2662155330181122, "learning_rate": 8.86675625265719e-05, "loss": 0.1161, "step": 15460 }, { "action_loss": 0.007180606480687857, "epoch": 13.902877697841726, "step": 15460 }, { "epoch": 13.902877697841726, "step": 15460, "torque_loss": 0.1279718428850174 }, { "epoch": 13.911870503597122, "grad_norm": 0.4029465615749359, "learning_rate": 8.865008560190618e-05, "loss": 0.0872, "step": 15470 }, { "action_loss": 0.033058565109968185, "epoch": 13.911870503597122, "step": 15470 }, { "epoch": 13.911870503597122, "step": 15470, "torque_loss": 0.22554413974285126 }, { "epoch": 13.920863309352518, "grad_norm": 0.27823179960250854, "learning_rate": 8.863259693636496e-05, "loss": 0.1017, "step": 15480 }, { "action_loss": 0.014243279583752155, "epoch": 13.920863309352518, "step": 15480 }, { "epoch": 13.920863309352518, "step": 15480, "torque_loss": 0.20121192932128906 }, { "epoch": 13.929856115107913, "grad_norm": 0.5353947877883911, "learning_rate": 8.861509653526083e-05, "loss": 0.0946, "step": 15490 }, { "action_loss": 0.00950809195637703, "epoch": 13.929856115107913, "step": 15490 }, { "epoch": 13.929856115107913, "step": 15490, "torque_loss": 0.19027023017406464 }, { "epoch": 13.93884892086331, "grad_norm": 0.32313770055770874, "learning_rate": 8.859758440390993e-05, "loss": 0.0989, "step": 15500 }, { "action_loss": 0.006383985280990601, "epoch": 13.93884892086331, "step": 15500 }, { "epoch": 13.93884892086331, "step": 15500, "torque_loss": 0.13644050061702728 }, { "epoch": 13.947841726618705, "grad_norm": 0.29291248321533203, "learning_rate": 8.858006054763202e-05, "loss": 0.0868, "step": 15510 }, { "action_loss": 0.011302344501018524, "epoch": 13.947841726618705, "step": 15510 }, { "epoch": 13.947841726618705, "step": 15510, "torque_loss": 0.13566477596759796 }, { "epoch": 13.956834532374101, "grad_norm": 0.3656037449836731, "learning_rate": 8.856252497175035e-05, "loss": 0.0965, "step": 15520 }, { "action_loss": 0.006202705204486847, "epoch": 13.956834532374101, "step": 15520 }, { "epoch": 13.956834532374101, "step": 15520, "torque_loss": 0.12817369401454926 }, { "epoch": 13.965827338129497, "grad_norm": 0.24268648028373718, "learning_rate": 8.854497768159178e-05, "loss": 0.0799, "step": 15530 }, { "action_loss": 0.011520583182573318, "epoch": 13.965827338129497, "step": 15530 }, { "epoch": 13.965827338129497, "step": 15530, "torque_loss": 0.23511582612991333 }, { "epoch": 13.974820143884893, "grad_norm": 0.3687359690666199, "learning_rate": 8.852741868248671e-05, "loss": 0.101, "step": 15540 }, { "action_loss": 0.011341535486280918, "epoch": 13.974820143884893, "step": 15540 }, { "epoch": 13.974820143884893, "step": 15540, "torque_loss": 0.19707565009593964 }, { "epoch": 13.983812949640289, "grad_norm": 0.37861159443855286, "learning_rate": 8.85098479797691e-05, "loss": 0.1077, "step": 15550 }, { "action_loss": 0.006480847951024771, "epoch": 13.983812949640289, "step": 15550 }, { "epoch": 13.983812949640289, "step": 15550, "torque_loss": 0.13638730347156525 }, { "epoch": 13.992805755395683, "grad_norm": 0.43241995573043823, "learning_rate": 8.849226557877646e-05, "loss": 0.1054, "step": 15560 }, { "action_loss": 0.010784911923110485, "epoch": 13.992805755395683, "step": 15560 }, { "epoch": 13.992805755395683, "step": 15560, "torque_loss": 0.16978096961975098 }, { "epoch": 14.001798561151078, "grad_norm": 0.39110419154167175, "learning_rate": 8.84746714848499e-05, "loss": 0.0946, "step": 15570 }, { "action_loss": 0.003966873046010733, "epoch": 14.001798561151078, "step": 15570 }, { "epoch": 14.001798561151078, "step": 15570, "torque_loss": 0.11104464530944824 }, { "epoch": 14.010791366906474, "grad_norm": 0.3242701590061188, "learning_rate": 8.845706570333397e-05, "loss": 0.0926, "step": 15580 }, { "action_loss": 0.011869095265865326, "epoch": 14.010791366906474, "step": 15580 }, { "epoch": 14.010791366906474, "step": 15580, "torque_loss": 0.15468595921993256 }, { "epoch": 14.01978417266187, "grad_norm": 0.32393068075180054, "learning_rate": 8.84394482395769e-05, "loss": 0.1181, "step": 15590 }, { "action_loss": 0.003557202173396945, "epoch": 14.01978417266187, "step": 15590 }, { "epoch": 14.01978417266187, "step": 15590, "torque_loss": 0.09707397222518921 }, { "epoch": 14.028776978417266, "grad_norm": 0.3222220540046692, "learning_rate": 8.842181909893038e-05, "loss": 0.0783, "step": 15600 }, { "action_loss": 0.011616534553468227, "epoch": 14.028776978417266, "step": 15600 }, { "epoch": 14.028776978417266, "step": 15600, "torque_loss": 0.15997318923473358 }, { "epoch": 14.037769784172662, "grad_norm": 0.29257190227508545, "learning_rate": 8.840417828674969e-05, "loss": 0.0972, "step": 15610 }, { "action_loss": 0.005510587245225906, "epoch": 14.037769784172662, "step": 15610 }, { "epoch": 14.037769784172662, "step": 15610, "torque_loss": 0.12881040573120117 }, { "epoch": 14.046762589928058, "grad_norm": 0.3145884573459625, "learning_rate": 8.838652580839364e-05, "loss": 0.1058, "step": 15620 }, { "action_loss": 0.007692456711083651, "epoch": 14.046762589928058, "step": 15620 }, { "epoch": 14.046762589928058, "step": 15620, "torque_loss": 0.16716258227825165 }, { "epoch": 14.055755395683454, "grad_norm": 0.4106309711933136, "learning_rate": 8.836886166922458e-05, "loss": 0.122, "step": 15630 }, { "action_loss": 0.004078971222043037, "epoch": 14.055755395683454, "step": 15630 }, { "epoch": 14.055755395683454, "step": 15630, "torque_loss": 0.09653834253549576 }, { "epoch": 14.06474820143885, "grad_norm": 0.44334039092063904, "learning_rate": 8.835118587460844e-05, "loss": 0.0936, "step": 15640 }, { "action_loss": 0.017906997352838516, "epoch": 14.06474820143885, "step": 15640 }, { "epoch": 14.06474820143885, "step": 15640, "torque_loss": 0.1526583880186081 }, { "epoch": 14.073741007194245, "grad_norm": 0.3533404767513275, "learning_rate": 8.83334984299146e-05, "loss": 0.1167, "step": 15650 }, { "action_loss": 0.015532489866018295, "epoch": 14.073741007194245, "step": 15650 }, { "epoch": 14.073741007194245, "step": 15650, "torque_loss": 0.18228793144226074 }, { "epoch": 14.082733812949641, "grad_norm": 0.38677898049354553, "learning_rate": 8.83157993405161e-05, "loss": 0.1144, "step": 15660 }, { "action_loss": 0.004853122401982546, "epoch": 14.082733812949641, "step": 15660 }, { "epoch": 14.082733812949641, "step": 15660, "torque_loss": 0.12120526283979416 }, { "epoch": 14.091726618705035, "grad_norm": 0.2987683117389679, "learning_rate": 8.829808861178943e-05, "loss": 0.1009, "step": 15670 }, { "action_loss": 0.009453163482248783, "epoch": 14.091726618705035, "step": 15670 }, { "epoch": 14.091726618705035, "step": 15670, "torque_loss": 0.14663831889629364 }, { "epoch": 14.100719424460431, "grad_norm": 0.47546398639678955, "learning_rate": 8.828036624911464e-05, "loss": 0.107, "step": 15680 }, { "action_loss": 0.009369158186018467, "epoch": 14.100719424460431, "step": 15680 }, { "epoch": 14.100719424460431, "step": 15680, "torque_loss": 0.12267792969942093 }, { "epoch": 14.109712230215827, "grad_norm": 0.4113294184207916, "learning_rate": 8.826263225787532e-05, "loss": 0.1047, "step": 15690 }, { "action_loss": 0.006245218217372894, "epoch": 14.109712230215827, "step": 15690 }, { "epoch": 14.109712230215827, "step": 15690, "torque_loss": 0.13502216339111328 }, { "epoch": 14.118705035971223, "grad_norm": 0.3628806173801422, "learning_rate": 8.824488664345858e-05, "loss": 0.0962, "step": 15700 }, { "action_loss": 0.017579661682248116, "epoch": 14.118705035971223, "step": 15700 }, { "epoch": 14.118705035971223, "step": 15700, "torque_loss": 0.17042593657970428 }, { "epoch": 14.127697841726619, "grad_norm": 0.32403847575187683, "learning_rate": 8.822712941125508e-05, "loss": 0.0873, "step": 15710 }, { "action_loss": 0.025139182806015015, "epoch": 14.127697841726619, "step": 15710 }, { "epoch": 14.127697841726619, "step": 15710, "torque_loss": 0.15520696341991425 }, { "epoch": 14.136690647482014, "grad_norm": 0.3198918402194977, "learning_rate": 8.820936056665898e-05, "loss": 0.0974, "step": 15720 }, { "action_loss": 0.009433376602828503, "epoch": 14.136690647482014, "step": 15720 }, { "epoch": 14.136690647482014, "step": 15720, "torque_loss": 0.15510280430316925 }, { "epoch": 14.14568345323741, "grad_norm": 0.32847434282302856, "learning_rate": 8.819158011506801e-05, "loss": 0.1036, "step": 15730 }, { "action_loss": 0.0046088905073702335, "epoch": 14.14568345323741, "step": 15730 }, { "epoch": 14.14568345323741, "step": 15730, "torque_loss": 0.12468767911195755 }, { "epoch": 14.154676258992806, "grad_norm": 0.3100323975086212, "learning_rate": 8.81737880618834e-05, "loss": 0.0948, "step": 15740 }, { "action_loss": 0.00988579262048006, "epoch": 14.154676258992806, "step": 15740 }, { "epoch": 14.154676258992806, "step": 15740, "torque_loss": 0.1125861406326294 }, { "epoch": 14.163669064748202, "grad_norm": 0.3110508322715759, "learning_rate": 8.815598441250987e-05, "loss": 0.1072, "step": 15750 }, { "action_loss": 0.003947950899600983, "epoch": 14.163669064748202, "step": 15750 }, { "epoch": 14.163669064748202, "step": 15750, "torque_loss": 0.12910936772823334 }, { "epoch": 14.172661870503598, "grad_norm": 0.3288668096065521, "learning_rate": 8.813816917235576e-05, "loss": 0.1, "step": 15760 }, { "action_loss": 0.009892783127725124, "epoch": 14.172661870503598, "step": 15760 }, { "epoch": 14.172661870503598, "step": 15760, "torque_loss": 0.09043405205011368 }, { "epoch": 14.181654676258994, "grad_norm": 0.5031497478485107, "learning_rate": 8.812034234683282e-05, "loss": 0.111, "step": 15770 }, { "action_loss": 0.010652798227965832, "epoch": 14.181654676258994, "step": 15770 }, { "epoch": 14.181654676258994, "step": 15770, "torque_loss": 0.2048102617263794 }, { "epoch": 14.190647482014388, "grad_norm": 0.3416144549846649, "learning_rate": 8.810250394135637e-05, "loss": 0.1057, "step": 15780 }, { "action_loss": 0.012292228639125824, "epoch": 14.190647482014388, "step": 15780 }, { "epoch": 14.190647482014388, "step": 15780, "torque_loss": 0.10782796889543533 }, { "epoch": 14.199640287769784, "grad_norm": 0.3178115785121918, "learning_rate": 8.808465396134529e-05, "loss": 0.1003, "step": 15790 }, { "action_loss": 0.010268418118357658, "epoch": 14.199640287769784, "step": 15790 }, { "epoch": 14.199640287769784, "step": 15790, "torque_loss": 0.14756865799427032 }, { "epoch": 14.20863309352518, "grad_norm": 0.3348466157913208, "learning_rate": 8.806679241222189e-05, "loss": 0.0905, "step": 15800 }, { "action_loss": 0.009678087197244167, "epoch": 14.20863309352518, "step": 15800 }, { "epoch": 14.20863309352518, "step": 15800, "torque_loss": 0.1442890167236328 }, { "epoch": 14.217625899280575, "grad_norm": 0.39434903860092163, "learning_rate": 8.804891929941203e-05, "loss": 0.117, "step": 15810 }, { "action_loss": 0.00910879671573639, "epoch": 14.217625899280575, "step": 15810 }, { "epoch": 14.217625899280575, "step": 15810, "torque_loss": 0.15456606447696686 }, { "epoch": 14.226618705035971, "grad_norm": 0.38231804966926575, "learning_rate": 8.803103462834514e-05, "loss": 0.1084, "step": 15820 }, { "action_loss": 0.009278311394155025, "epoch": 14.226618705035971, "step": 15820 }, { "epoch": 14.226618705035971, "step": 15820, "torque_loss": 0.22612391412258148 }, { "epoch": 14.235611510791367, "grad_norm": 0.4902573227882385, "learning_rate": 8.801313840445408e-05, "loss": 0.1038, "step": 15830 }, { "action_loss": 0.007949190214276314, "epoch": 14.235611510791367, "step": 15830 }, { "epoch": 14.235611510791367, "step": 15830, "torque_loss": 0.13691820204257965 }, { "epoch": 14.244604316546763, "grad_norm": 0.37435007095336914, "learning_rate": 8.799523063317524e-05, "loss": 0.0941, "step": 15840 }, { "action_loss": 0.004163408186286688, "epoch": 14.244604316546763, "step": 15840 }, { "epoch": 14.244604316546763, "step": 15840, "torque_loss": 0.14640207588672638 }, { "epoch": 14.253597122302159, "grad_norm": 0.36145442724227905, "learning_rate": 8.797731131994854e-05, "loss": 0.0878, "step": 15850 }, { "action_loss": 0.017791690304875374, "epoch": 14.253597122302159, "step": 15850 }, { "epoch": 14.253597122302159, "step": 15850, "torque_loss": 0.15581248700618744 }, { "epoch": 14.262589928057555, "grad_norm": 0.33520352840423584, "learning_rate": 8.795938047021739e-05, "loss": 0.1187, "step": 15860 }, { "action_loss": 0.007380913943052292, "epoch": 14.262589928057555, "step": 15860 }, { "epoch": 14.262589928057555, "step": 15860, "torque_loss": 0.13358597457408905 }, { "epoch": 14.27158273381295, "grad_norm": 0.32886067032814026, "learning_rate": 8.794143808942872e-05, "loss": 0.101, "step": 15870 }, { "action_loss": 0.014064491726458073, "epoch": 14.27158273381295, "step": 15870 }, { "epoch": 14.27158273381295, "step": 15870, "torque_loss": 0.1280210167169571 }, { "epoch": 14.280575539568344, "grad_norm": 0.40381401777267456, "learning_rate": 8.792348418303296e-05, "loss": 0.1046, "step": 15880 }, { "action_loss": 0.03831922635436058, "epoch": 14.280575539568344, "step": 15880 }, { "epoch": 14.280575539568344, "step": 15880, "torque_loss": 0.23013055324554443 }, { "epoch": 14.28956834532374, "grad_norm": 0.3360077440738678, "learning_rate": 8.790551875648398e-05, "loss": 0.1201, "step": 15890 }, { "action_loss": 0.014500629156827927, "epoch": 14.28956834532374, "step": 15890 }, { "epoch": 14.28956834532374, "step": 15890, "torque_loss": 0.20914708077907562 }, { "epoch": 14.298561151079136, "grad_norm": 0.4140723645687103, "learning_rate": 8.788754181523926e-05, "loss": 0.1089, "step": 15900 }, { "action_loss": 0.05754619464278221, "epoch": 14.298561151079136, "step": 15900 }, { "epoch": 14.298561151079136, "step": 15900, "torque_loss": 0.18693991005420685 }, { "epoch": 14.307553956834532, "grad_norm": 0.42490047216415405, "learning_rate": 8.78695533647597e-05, "loss": 0.112, "step": 15910 }, { "action_loss": 0.010451079346239567, "epoch": 14.307553956834532, "step": 15910 }, { "epoch": 14.307553956834532, "step": 15910, "torque_loss": 0.18652699887752533 }, { "epoch": 14.316546762589928, "grad_norm": 0.3158261477947235, "learning_rate": 8.785155341050972e-05, "loss": 0.1303, "step": 15920 }, { "action_loss": 0.011491361074149609, "epoch": 14.316546762589928, "step": 15920 }, { "epoch": 14.316546762589928, "step": 15920, "torque_loss": 0.1817120760679245 }, { "epoch": 14.325539568345324, "grad_norm": 0.3406376242637634, "learning_rate": 8.783354195795721e-05, "loss": 0.1073, "step": 15930 }, { "action_loss": 0.017405016347765923, "epoch": 14.325539568345324, "step": 15930 }, { "epoch": 14.325539568345324, "step": 15930, "torque_loss": 0.15738923847675323 }, { "epoch": 14.33453237410072, "grad_norm": 0.4781692326068878, "learning_rate": 8.78155190125736e-05, "loss": 0.1098, "step": 15940 }, { "action_loss": 0.011054151691496372, "epoch": 14.33453237410072, "step": 15940 }, { "epoch": 14.33453237410072, "step": 15940, "torque_loss": 0.16823923587799072 }, { "epoch": 14.343525179856115, "grad_norm": 0.3312503397464752, "learning_rate": 8.779748457983378e-05, "loss": 0.107, "step": 15950 }, { "action_loss": 0.007727675139904022, "epoch": 14.343525179856115, "step": 15950 }, { "epoch": 14.343525179856115, "step": 15950, "torque_loss": 0.16712750494480133 }, { "epoch": 14.352517985611511, "grad_norm": 0.27242329716682434, "learning_rate": 8.777943866521612e-05, "loss": 0.1222, "step": 15960 }, { "action_loss": 0.007482344750314951, "epoch": 14.352517985611511, "step": 15960 }, { "epoch": 14.352517985611511, "step": 15960, "torque_loss": 0.14020191133022308 }, { "epoch": 14.361510791366907, "grad_norm": 0.3940402865409851, "learning_rate": 8.77613812742025e-05, "loss": 0.0947, "step": 15970 }, { "action_loss": 0.010685905814170837, "epoch": 14.361510791366907, "step": 15970 }, { "epoch": 14.361510791366907, "step": 15970, "torque_loss": 0.11686911433935165 }, { "epoch": 14.370503597122303, "grad_norm": 0.3424079120159149, "learning_rate": 8.774331241227829e-05, "loss": 0.1114, "step": 15980 }, { "action_loss": 0.006816735025495291, "epoch": 14.370503597122303, "step": 15980 }, { "epoch": 14.370503597122303, "step": 15980, "torque_loss": 0.17284375429153442 }, { "epoch": 14.379496402877697, "grad_norm": 0.33198896050453186, "learning_rate": 8.772523208493232e-05, "loss": 0.0901, "step": 15990 }, { "action_loss": 0.013146092183887959, "epoch": 14.379496402877697, "step": 15990 }, { "epoch": 14.379496402877697, "step": 15990, "torque_loss": 0.1650153547525406 }, { "epoch": 14.388489208633093, "grad_norm": 0.3534037172794342, "learning_rate": 8.770714029765692e-05, "loss": 0.1087, "step": 16000 }, { "action_loss": 0.026327578350901604, "epoch": 14.388489208633093, "step": 16000 }, { "epoch": 14.388489208633093, "step": 16000, "torque_loss": 0.24358637630939484 }, { "epoch": 14.397482014388489, "grad_norm": 0.3472265601158142, "learning_rate": 8.768903705594789e-05, "loss": 0.1088, "step": 16010 }, { "action_loss": 0.021918192505836487, "epoch": 14.397482014388489, "step": 16010 }, { "epoch": 14.397482014388489, "step": 16010, "torque_loss": 0.1487094610929489 }, { "epoch": 14.406474820143885, "grad_norm": 0.4230922758579254, "learning_rate": 8.767092236530453e-05, "loss": 0.1015, "step": 16020 }, { "action_loss": 0.023799918591976166, "epoch": 14.406474820143885, "step": 16020 }, { "epoch": 14.406474820143885, "step": 16020, "torque_loss": 0.24184198677539825 }, { "epoch": 14.41546762589928, "grad_norm": 0.3893493413925171, "learning_rate": 8.76527962312296e-05, "loss": 0.116, "step": 16030 }, { "action_loss": 0.004574760794639587, "epoch": 14.41546762589928, "step": 16030 }, { "epoch": 14.41546762589928, "step": 16030, "torque_loss": 0.12591269612312317 }, { "epoch": 14.424460431654676, "grad_norm": 0.2967185080051422, "learning_rate": 8.763465865922934e-05, "loss": 0.0918, "step": 16040 }, { "action_loss": 0.03525706008076668, "epoch": 14.424460431654676, "step": 16040 }, { "epoch": 14.424460431654676, "step": 16040, "torque_loss": 0.19342410564422607 }, { "epoch": 14.433453237410072, "grad_norm": 0.4543176293373108, "learning_rate": 8.761650965481347e-05, "loss": 0.1092, "step": 16050 }, { "action_loss": 0.008904307149350643, "epoch": 14.433453237410072, "step": 16050 }, { "epoch": 14.433453237410072, "step": 16050, "torque_loss": 0.1366354078054428 }, { "epoch": 14.442446043165468, "grad_norm": 0.2993740141391754, "learning_rate": 8.759834922349516e-05, "loss": 0.101, "step": 16060 }, { "action_loss": 0.006066605448722839, "epoch": 14.442446043165468, "step": 16060 }, { "epoch": 14.442446043165468, "step": 16060, "torque_loss": 0.11405076831579208 }, { "epoch": 14.451438848920864, "grad_norm": 0.3236567974090576, "learning_rate": 8.758017737079108e-05, "loss": 0.104, "step": 16070 }, { "action_loss": 0.011426863260567188, "epoch": 14.451438848920864, "step": 16070 }, { "epoch": 14.451438848920864, "step": 16070, "torque_loss": 0.1430741846561432 }, { "epoch": 14.46043165467626, "grad_norm": 0.3245724141597748, "learning_rate": 8.756199410222137e-05, "loss": 0.0879, "step": 16080 }, { "action_loss": 0.007247494533658028, "epoch": 14.46043165467626, "step": 16080 }, { "epoch": 14.46043165467626, "step": 16080, "torque_loss": 0.1500893086194992 }, { "epoch": 14.469424460431656, "grad_norm": 0.3511224389076233, "learning_rate": 8.754379942330963e-05, "loss": 0.1043, "step": 16090 }, { "action_loss": 0.006345722824335098, "epoch": 14.469424460431656, "step": 16090 }, { "epoch": 14.469424460431656, "step": 16090, "torque_loss": 0.11746034026145935 }, { "epoch": 14.47841726618705, "grad_norm": 0.3114928603172302, "learning_rate": 8.75255933395829e-05, "loss": 0.0957, "step": 16100 }, { "action_loss": 0.008826692588627338, "epoch": 14.47841726618705, "step": 16100 }, { "epoch": 14.47841726618705, "step": 16100, "torque_loss": 0.23313641548156738 }, { "epoch": 14.487410071942445, "grad_norm": 0.41592586040496826, "learning_rate": 8.750737585657171e-05, "loss": 0.11, "step": 16110 }, { "action_loss": 0.00617488706484437, "epoch": 14.487410071942445, "step": 16110 }, { "epoch": 14.487410071942445, "step": 16110, "torque_loss": 0.15072931349277496 }, { "epoch": 14.496402877697841, "grad_norm": 0.2984028458595276, "learning_rate": 8.748914697981008e-05, "loss": 0.103, "step": 16120 }, { "action_loss": 0.011826661415398121, "epoch": 14.496402877697841, "step": 16120 }, { "epoch": 14.496402877697841, "step": 16120, "torque_loss": 0.19371740520000458 }, { "epoch": 14.505395683453237, "grad_norm": 0.3400440514087677, "learning_rate": 8.747090671483542e-05, "loss": 0.105, "step": 16130 }, { "action_loss": 0.00353887933306396, "epoch": 14.505395683453237, "step": 16130 }, { "epoch": 14.505395683453237, "step": 16130, "torque_loss": 0.09752616286277771 }, { "epoch": 14.514388489208633, "grad_norm": 0.35145509243011475, "learning_rate": 8.745265506718869e-05, "loss": 0.0829, "step": 16140 }, { "action_loss": 0.004835423082113266, "epoch": 14.514388489208633, "step": 16140 }, { "epoch": 14.514388489208633, "step": 16140, "torque_loss": 0.09773268550634384 }, { "epoch": 14.523381294964029, "grad_norm": 0.33682581782341003, "learning_rate": 8.74343920424142e-05, "loss": 0.0938, "step": 16150 }, { "action_loss": 0.0132276751101017, "epoch": 14.523381294964029, "step": 16150 }, { "epoch": 14.523381294964029, "step": 16150, "torque_loss": 0.208689883351326 }, { "epoch": 14.532374100719425, "grad_norm": 0.37448474764823914, "learning_rate": 8.741611764605982e-05, "loss": 0.1008, "step": 16160 }, { "action_loss": 0.0057658725418150425, "epoch": 14.532374100719425, "step": 16160 }, { "epoch": 14.532374100719425, "step": 16160, "torque_loss": 0.16050265729427338 }, { "epoch": 14.54136690647482, "grad_norm": 0.2921804189682007, "learning_rate": 8.739783188367682e-05, "loss": 0.0995, "step": 16170 }, { "action_loss": 0.004367648158222437, "epoch": 14.54136690647482, "step": 16170 }, { "epoch": 14.54136690647482, "step": 16170, "torque_loss": 0.10662122815847397 }, { "epoch": 14.550359712230216, "grad_norm": 0.3797454833984375, "learning_rate": 8.737953476081991e-05, "loss": 0.086, "step": 16180 }, { "action_loss": 0.006111775059252977, "epoch": 14.550359712230216, "step": 16180 }, { "epoch": 14.550359712230216, "step": 16180, "torque_loss": 0.1019473746418953 }, { "epoch": 14.559352517985612, "grad_norm": 0.2977198660373688, "learning_rate": 8.73612262830473e-05, "loss": 0.1021, "step": 16190 }, { "action_loss": 0.0057187494821846485, "epoch": 14.559352517985612, "step": 16190 }, { "epoch": 14.559352517985612, "step": 16190, "torque_loss": 0.13038252294063568 }, { "epoch": 14.568345323741006, "grad_norm": 0.34788504242897034, "learning_rate": 8.734290645592061e-05, "loss": 0.0789, "step": 16200 }, { "action_loss": 0.007318049669265747, "epoch": 14.568345323741006, "step": 16200 }, { "epoch": 14.568345323741006, "step": 16200, "torque_loss": 0.16979633271694183 }, { "epoch": 14.577338129496402, "grad_norm": 0.42898207902908325, "learning_rate": 8.732457528500493e-05, "loss": 0.0964, "step": 16210 }, { "action_loss": 0.006104330066591501, "epoch": 14.577338129496402, "step": 16210 }, { "epoch": 14.577338129496402, "step": 16210, "torque_loss": 0.156680166721344 }, { "epoch": 14.586330935251798, "grad_norm": 0.32109272480010986, "learning_rate": 8.730623277586875e-05, "loss": 0.1006, "step": 16220 }, { "action_loss": 0.004607511218637228, "epoch": 14.586330935251798, "step": 16220 }, { "epoch": 14.586330935251798, "step": 16220, "torque_loss": 0.11158571392297745 }, { "epoch": 14.595323741007194, "grad_norm": 0.41224926710128784, "learning_rate": 8.72878789340841e-05, "loss": 0.0947, "step": 16230 }, { "action_loss": 0.003716791048645973, "epoch": 14.595323741007194, "step": 16230 }, { "epoch": 14.595323741007194, "step": 16230, "torque_loss": 0.08241839706897736 }, { "epoch": 14.60431654676259, "grad_norm": 0.31495556235313416, "learning_rate": 8.726951376522635e-05, "loss": 0.1102, "step": 16240 }, { "action_loss": 0.007466034963726997, "epoch": 14.60431654676259, "step": 16240 }, { "epoch": 14.60431654676259, "step": 16240, "torque_loss": 0.1352781504392624 }, { "epoch": 14.613309352517986, "grad_norm": 0.4089420735836029, "learning_rate": 8.725113727487435e-05, "loss": 0.0968, "step": 16250 }, { "action_loss": 0.011516149155795574, "epoch": 14.613309352517986, "step": 16250 }, { "epoch": 14.613309352517986, "step": 16250, "torque_loss": 0.18878109753131866 }, { "epoch": 14.622302158273381, "grad_norm": 0.3129039406776428, "learning_rate": 8.723274946861042e-05, "loss": 0.0886, "step": 16260 }, { "action_loss": 0.0057628341019153595, "epoch": 14.622302158273381, "step": 16260 }, { "epoch": 14.622302158273381, "step": 16260, "torque_loss": 0.11628720909357071 }, { "epoch": 14.631294964028777, "grad_norm": 0.39221903681755066, "learning_rate": 8.721435035202026e-05, "loss": 0.0903, "step": 16270 }, { "action_loss": 0.019228054210543633, "epoch": 14.631294964028777, "step": 16270 }, { "epoch": 14.631294964028777, "step": 16270, "torque_loss": 0.2366974800825119 }, { "epoch": 14.640287769784173, "grad_norm": 0.35268229246139526, "learning_rate": 8.719593993069306e-05, "loss": 0.1207, "step": 16280 }, { "action_loss": 0.0062630437314510345, "epoch": 14.640287769784173, "step": 16280 }, { "epoch": 14.640287769784173, "step": 16280, "torque_loss": 0.1632343977689743 }, { "epoch": 14.649280575539569, "grad_norm": 0.38539984822273254, "learning_rate": 8.717751821022139e-05, "loss": 0.1036, "step": 16290 }, { "action_loss": 0.010852274484932423, "epoch": 14.649280575539569, "step": 16290 }, { "epoch": 14.649280575539569, "step": 16290, "torque_loss": 0.1709936112165451 }, { "epoch": 14.658273381294965, "grad_norm": 0.34522828459739685, "learning_rate": 8.715908519620134e-05, "loss": 0.1042, "step": 16300 }, { "action_loss": 0.01922071911394596, "epoch": 14.658273381294965, "step": 16300 }, { "epoch": 14.658273381294965, "step": 16300, "torque_loss": 0.18489818274974823 }, { "epoch": 14.667266187050359, "grad_norm": 0.3561491370201111, "learning_rate": 8.71406408942323e-05, "loss": 0.1036, "step": 16310 }, { "action_loss": 0.011750482954084873, "epoch": 14.667266187050359, "step": 16310 }, { "epoch": 14.667266187050359, "step": 16310, "torque_loss": 0.15445232391357422 }, { "epoch": 14.676258992805755, "grad_norm": 0.32276034355163574, "learning_rate": 8.712218530991723e-05, "loss": 0.0889, "step": 16320 }, { "action_loss": 0.009452980943024158, "epoch": 14.676258992805755, "step": 16320 }, { "epoch": 14.676258992805755, "step": 16320, "torque_loss": 0.15728643536567688 }, { "epoch": 14.68525179856115, "grad_norm": 0.38490965962409973, "learning_rate": 8.710371844886241e-05, "loss": 0.096, "step": 16330 }, { "action_loss": 0.025822937488555908, "epoch": 14.68525179856115, "step": 16330 }, { "epoch": 14.68525179856115, "step": 16330, "torque_loss": 0.22127707302570343 }, { "epoch": 14.694244604316546, "grad_norm": 0.35878226161003113, "learning_rate": 8.708524031667758e-05, "loss": 0.1007, "step": 16340 }, { "action_loss": 0.017210310325026512, "epoch": 14.694244604316546, "step": 16340 }, { "epoch": 14.694244604316546, "step": 16340, "torque_loss": 0.18911810219287872 }, { "epoch": 14.703237410071942, "grad_norm": 0.3367069363594055, "learning_rate": 8.706675091897592e-05, "loss": 0.1042, "step": 16350 }, { "action_loss": 0.009303580038249493, "epoch": 14.703237410071942, "step": 16350 }, { "epoch": 14.703237410071942, "step": 16350, "torque_loss": 0.15205711126327515 }, { "epoch": 14.712230215827338, "grad_norm": 0.3488940894603729, "learning_rate": 8.704825026137404e-05, "loss": 0.0958, "step": 16360 }, { "action_loss": 0.009793036617338657, "epoch": 14.712230215827338, "step": 16360 }, { "epoch": 14.712230215827338, "step": 16360, "torque_loss": 0.17189399898052216 }, { "epoch": 14.721223021582734, "grad_norm": 0.3429363965988159, "learning_rate": 8.702973834949192e-05, "loss": 0.1054, "step": 16370 }, { "action_loss": 0.007800595369189978, "epoch": 14.721223021582734, "step": 16370 }, { "epoch": 14.721223021582734, "step": 16370, "torque_loss": 0.13267454504966736 }, { "epoch": 14.73021582733813, "grad_norm": 0.2736717164516449, "learning_rate": 8.701121518895301e-05, "loss": 0.096, "step": 16380 }, { "action_loss": 0.013552725315093994, "epoch": 14.73021582733813, "step": 16380 }, { "epoch": 14.73021582733813, "step": 16380, "torque_loss": 0.18037302792072296 }, { "epoch": 14.739208633093526, "grad_norm": 0.3673611879348755, "learning_rate": 8.699268078538414e-05, "loss": 0.1059, "step": 16390 }, { "action_loss": 0.003346705110743642, "epoch": 14.739208633093526, "step": 16390 }, { "epoch": 14.739208633093526, "step": 16390, "torque_loss": 0.10038000345230103 }, { "epoch": 14.748201438848922, "grad_norm": 0.32022222876548767, "learning_rate": 8.69741351444156e-05, "loss": 0.0884, "step": 16400 }, { "action_loss": 0.01184286642819643, "epoch": 14.748201438848922, "step": 16400 }, { "epoch": 14.748201438848922, "step": 16400, "torque_loss": 0.16821861267089844 }, { "epoch": 14.757194244604317, "grad_norm": 0.3943643867969513, "learning_rate": 8.695557827168101e-05, "loss": 0.097, "step": 16410 }, { "action_loss": 0.006529305130243301, "epoch": 14.757194244604317, "step": 16410 }, { "epoch": 14.757194244604317, "step": 16410, "torque_loss": 0.15421628952026367 }, { "epoch": 14.766187050359711, "grad_norm": 0.32331007719039917, "learning_rate": 8.693701017281753e-05, "loss": 0.0939, "step": 16420 }, { "action_loss": 0.010346896015107632, "epoch": 14.766187050359711, "step": 16420 }, { "epoch": 14.766187050359711, "step": 16420, "torque_loss": 0.21222706139087677 }, { "epoch": 14.775179856115107, "grad_norm": 0.2932223677635193, "learning_rate": 8.691843085346563e-05, "loss": 0.1159, "step": 16430 }, { "action_loss": 0.008898457512259483, "epoch": 14.775179856115107, "step": 16430 }, { "epoch": 14.775179856115107, "step": 16430, "torque_loss": 0.14345715939998627 }, { "epoch": 14.784172661870503, "grad_norm": 0.31091591715812683, "learning_rate": 8.689984031926919e-05, "loss": 0.0944, "step": 16440 }, { "action_loss": 0.005393683444708586, "epoch": 14.784172661870503, "step": 16440 }, { "epoch": 14.784172661870503, "step": 16440, "torque_loss": 0.1417144387960434 }, { "epoch": 14.793165467625899, "grad_norm": 0.3828759789466858, "learning_rate": 8.688123857587555e-05, "loss": 0.0926, "step": 16450 }, { "action_loss": 0.004961395636200905, "epoch": 14.793165467625899, "step": 16450 }, { "epoch": 14.793165467625899, "step": 16450, "torque_loss": 0.1271476000547409 }, { "epoch": 14.802158273381295, "grad_norm": 0.36005303263664246, "learning_rate": 8.686262562893544e-05, "loss": 0.1139, "step": 16460 }, { "action_loss": 0.008170916698873043, "epoch": 14.802158273381295, "step": 16460 }, { "epoch": 14.802158273381295, "step": 16460, "torque_loss": 0.18481403589248657 }, { "epoch": 14.81115107913669, "grad_norm": 0.4209018051624298, "learning_rate": 8.684400148410294e-05, "loss": 0.0921, "step": 16470 }, { "action_loss": 0.003288567066192627, "epoch": 14.81115107913669, "step": 16470 }, { "epoch": 14.81115107913669, "step": 16470, "torque_loss": 0.1236434057354927 }, { "epoch": 14.820143884892087, "grad_norm": 0.3059574365615845, "learning_rate": 8.682536614703562e-05, "loss": 0.1001, "step": 16480 }, { "action_loss": 0.009214325807988644, "epoch": 14.820143884892087, "step": 16480 }, { "epoch": 14.820143884892087, "step": 16480, "torque_loss": 0.18706005811691284 }, { "epoch": 14.829136690647482, "grad_norm": 0.3221275508403778, "learning_rate": 8.680671962339437e-05, "loss": 0.0955, "step": 16490 }, { "action_loss": 0.00900799036026001, "epoch": 14.829136690647482, "step": 16490 }, { "epoch": 14.829136690647482, "step": 16490, "torque_loss": 0.17535729706287384 }, { "epoch": 14.838129496402878, "grad_norm": 0.32672640681266785, "learning_rate": 8.678806191884352e-05, "loss": 0.0983, "step": 16500 }, { "action_loss": 0.009570221416652203, "epoch": 14.838129496402878, "step": 16500 }, { "epoch": 14.838129496402878, "step": 16500, "torque_loss": 0.1564662903547287 }, { "epoch": 14.847122302158274, "grad_norm": 0.3704361021518707, "learning_rate": 8.67693930390508e-05, "loss": 0.0906, "step": 16510 }, { "action_loss": 0.012688924558460712, "epoch": 14.847122302158274, "step": 16510 }, { "epoch": 14.847122302158274, "step": 16510, "torque_loss": 0.1622074395418167 }, { "epoch": 14.85611510791367, "grad_norm": 0.29647892713546753, "learning_rate": 8.67507129896873e-05, "loss": 0.0939, "step": 16520 }, { "action_loss": 0.012904896400868893, "epoch": 14.85611510791367, "step": 16520 }, { "epoch": 14.85611510791367, "step": 16520, "torque_loss": 0.22992102801799774 }, { "epoch": 14.865107913669064, "grad_norm": 0.34846705198287964, "learning_rate": 8.673202177642757e-05, "loss": 0.1217, "step": 16530 }, { "action_loss": 0.01335290540009737, "epoch": 14.865107913669064, "step": 16530 }, { "epoch": 14.865107913669064, "step": 16530, "torque_loss": 0.20528407394886017 }, { "epoch": 14.87410071942446, "grad_norm": 0.36827266216278076, "learning_rate": 8.671331940494945e-05, "loss": 0.1082, "step": 16540 }, { "action_loss": 0.006424013990908861, "epoch": 14.87410071942446, "step": 16540 }, { "epoch": 14.87410071942446, "step": 16540, "torque_loss": 0.21329987049102783 }, { "epoch": 14.883093525179856, "grad_norm": 0.36213746666908264, "learning_rate": 8.669460588093427e-05, "loss": 0.1017, "step": 16550 }, { "action_loss": 0.006824204232543707, "epoch": 14.883093525179856, "step": 16550 }, { "epoch": 14.883093525179856, "step": 16550, "torque_loss": 0.17867867648601532 }, { "epoch": 14.892086330935252, "grad_norm": 0.28353041410446167, "learning_rate": 8.667588121006667e-05, "loss": 0.0892, "step": 16560 }, { "action_loss": 0.03262163698673248, "epoch": 14.892086330935252, "step": 16560 }, { "epoch": 14.892086330935252, "step": 16560, "torque_loss": 0.2330169677734375 }, { "epoch": 14.901079136690647, "grad_norm": 0.430093377828598, "learning_rate": 8.665714539803475e-05, "loss": 0.1061, "step": 16570 }, { "action_loss": 0.007505720015615225, "epoch": 14.901079136690647, "step": 16570 }, { "epoch": 14.901079136690647, "step": 16570, "torque_loss": 0.14936129748821259 }, { "epoch": 14.910071942446043, "grad_norm": 0.3379278779029846, "learning_rate": 8.663839845052993e-05, "loss": 0.0916, "step": 16580 }, { "action_loss": 0.008809168823063374, "epoch": 14.910071942446043, "step": 16580 }, { "epoch": 14.910071942446043, "step": 16580, "torque_loss": 0.1119552031159401 }, { "epoch": 14.91906474820144, "grad_norm": 0.4468376636505127, "learning_rate": 8.661964037324703e-05, "loss": 0.1064, "step": 16590 }, { "action_loss": 0.01657225750386715, "epoch": 14.91906474820144, "step": 16590 }, { "epoch": 14.91906474820144, "step": 16590, "torque_loss": 0.16763173043727875 }, { "epoch": 14.928057553956835, "grad_norm": 0.313776433467865, "learning_rate": 8.660087117188427e-05, "loss": 0.0758, "step": 16600 }, { "action_loss": 0.009560068137943745, "epoch": 14.928057553956835, "step": 16600 }, { "epoch": 14.928057553956835, "step": 16600, "torque_loss": 0.15997834503650665 }, { "epoch": 14.93705035971223, "grad_norm": 0.3691636323928833, "learning_rate": 8.658209085214325e-05, "loss": 0.1032, "step": 16610 }, { "action_loss": 0.0090734688565135, "epoch": 14.93705035971223, "step": 16610 }, { "epoch": 14.93705035971223, "step": 16610, "torque_loss": 0.15387053787708282 }, { "epoch": 14.946043165467627, "grad_norm": 0.3359665274620056, "learning_rate": 8.656329941972891e-05, "loss": 0.105, "step": 16620 }, { "action_loss": 0.006044917274266481, "epoch": 14.946043165467627, "step": 16620 }, { "epoch": 14.946043165467627, "step": 16620, "torque_loss": 0.1133415624499321 }, { "epoch": 14.95503597122302, "grad_norm": 0.26626914739608765, "learning_rate": 8.654449688034963e-05, "loss": 0.0996, "step": 16630 }, { "action_loss": 0.010913987644016743, "epoch": 14.95503597122302, "step": 16630 }, { "epoch": 14.95503597122302, "step": 16630, "torque_loss": 0.1623622179031372 }, { "epoch": 14.964028776978417, "grad_norm": 0.4118419587612152, "learning_rate": 8.652568323971706e-05, "loss": 0.1067, "step": 16640 }, { "action_loss": 0.0040929969400167465, "epoch": 14.964028776978417, "step": 16640 }, { "epoch": 14.964028776978417, "step": 16640, "torque_loss": 0.09651273488998413 }, { "epoch": 14.973021582733812, "grad_norm": 0.39644676446914673, "learning_rate": 8.650685850354636e-05, "loss": 0.0959, "step": 16650 }, { "action_loss": 0.009887688793241978, "epoch": 14.973021582733812, "step": 16650 }, { "epoch": 14.973021582733812, "step": 16650, "torque_loss": 0.17583268880844116 }, { "epoch": 14.982014388489208, "grad_norm": 0.45601171255111694, "learning_rate": 8.648802267755593e-05, "loss": 0.1101, "step": 16660 }, { "action_loss": 0.002986249513924122, "epoch": 14.982014388489208, "step": 16660 }, { "epoch": 14.982014388489208, "step": 16660, "torque_loss": 0.10640457272529602 }, { "epoch": 14.991007194244604, "grad_norm": 0.3600524961948395, "learning_rate": 8.646917576746764e-05, "loss": 0.0977, "step": 16670 }, { "action_loss": 0.007027377840131521, "epoch": 14.991007194244604, "step": 16670 }, { "epoch": 14.991007194244604, "step": 16670, "torque_loss": 0.11659836769104004 }, { "epoch": 15.0, "grad_norm": 0.37709102034568787, "learning_rate": 8.645031777900666e-05, "loss": 0.0919, "step": 16680 }, { "action_loss": 0.008418664336204529, "epoch": 15.0, "step": 16680 }, { "epoch": 15.0, "step": 16680, "torque_loss": 0.10247156769037247 }, { "epoch": 15.008992805755396, "grad_norm": 0.4617144763469696, "learning_rate": 8.643144871790154e-05, "loss": 0.1098, "step": 16690 }, { "action_loss": 0.009678434580564499, "epoch": 15.008992805755396, "step": 16690 }, { "epoch": 15.008992805755396, "step": 16690, "torque_loss": 0.16480308771133423 }, { "epoch": 15.017985611510792, "grad_norm": 0.2956034243106842, "learning_rate": 8.641256858988424e-05, "loss": 0.1016, "step": 16700 }, { "action_loss": 0.006030881777405739, "epoch": 15.017985611510792, "step": 16700 }, { "epoch": 15.017985611510792, "step": 16700, "torque_loss": 0.10424315929412842 }, { "epoch": 15.026978417266188, "grad_norm": 0.4319424033164978, "learning_rate": 8.639367740069e-05, "loss": 0.0944, "step": 16710 }, { "action_loss": 0.012660913169384003, "epoch": 15.026978417266188, "step": 16710 }, { "epoch": 15.026978417266188, "step": 16710, "torque_loss": 0.19839151203632355 }, { "epoch": 15.035971223021583, "grad_norm": 0.3626062870025635, "learning_rate": 8.63747751560575e-05, "loss": 0.1058, "step": 16720 }, { "action_loss": 0.004236577078700066, "epoch": 15.035971223021583, "step": 16720 }, { "epoch": 15.035971223021583, "step": 16720, "torque_loss": 0.08259477466344833 }, { "epoch": 15.04496402877698, "grad_norm": 0.35684850811958313, "learning_rate": 8.635586186172871e-05, "loss": 0.0853, "step": 16730 }, { "action_loss": 0.007022007834166288, "epoch": 15.04496402877698, "step": 16730 }, { "epoch": 15.04496402877698, "step": 16730, "torque_loss": 0.1338946670293808 }, { "epoch": 15.053956834532373, "grad_norm": 0.3560030162334442, "learning_rate": 8.633693752344902e-05, "loss": 0.0902, "step": 16740 }, { "action_loss": 0.009748854674398899, "epoch": 15.053956834532373, "step": 16740 }, { "epoch": 15.053956834532373, "step": 16740, "torque_loss": 0.092617928981781 }, { "epoch": 15.06294964028777, "grad_norm": 0.34956109523773193, "learning_rate": 8.631800214696713e-05, "loss": 0.0959, "step": 16750 }, { "action_loss": 0.007427711505442858, "epoch": 15.06294964028777, "step": 16750 }, { "epoch": 15.06294964028777, "step": 16750, "torque_loss": 0.1357688456773758 }, { "epoch": 15.071942446043165, "grad_norm": 0.44587424397468567, "learning_rate": 8.629905573803511e-05, "loss": 0.1023, "step": 16760 }, { "action_loss": 0.011581706814467907, "epoch": 15.071942446043165, "step": 16760 }, { "epoch": 15.071942446043165, "step": 16760, "torque_loss": 0.1512168049812317 }, { "epoch": 15.08093525179856, "grad_norm": 0.35843104124069214, "learning_rate": 8.628009830240839e-05, "loss": 0.1001, "step": 16770 }, { "action_loss": 0.004437328781932592, "epoch": 15.08093525179856, "step": 16770 }, { "epoch": 15.08093525179856, "step": 16770, "torque_loss": 0.11097625643014908 }, { "epoch": 15.089928057553957, "grad_norm": 0.5683722496032715, "learning_rate": 8.626112984584571e-05, "loss": 0.1011, "step": 16780 }, { "action_loss": 0.006302524358034134, "epoch": 15.089928057553957, "step": 16780 }, { "epoch": 15.089928057553957, "step": 16780, "torque_loss": 0.08632073551416397 }, { "epoch": 15.098920863309353, "grad_norm": 0.3474523723125458, "learning_rate": 8.62421503741092e-05, "loss": 0.0932, "step": 16790 }, { "action_loss": 0.010713701136410236, "epoch": 15.098920863309353, "step": 16790 }, { "epoch": 15.098920863309353, "step": 16790, "torque_loss": 0.09580516070127487 }, { "epoch": 15.107913669064748, "grad_norm": 0.23156312108039856, "learning_rate": 8.622315989296432e-05, "loss": 0.0767, "step": 16800 }, { "action_loss": 0.005144704598933458, "epoch": 15.107913669064748, "step": 16800 }, { "epoch": 15.107913669064748, "step": 16800, "torque_loss": 0.14324095845222473 }, { "epoch": 15.116906474820144, "grad_norm": 0.35685354471206665, "learning_rate": 8.62041584081799e-05, "loss": 0.0999, "step": 16810 }, { "action_loss": 0.012873713858425617, "epoch": 15.116906474820144, "step": 16810 }, { "epoch": 15.116906474820144, "step": 16810, "torque_loss": 0.13793039321899414 }, { "epoch": 15.12589928057554, "grad_norm": 0.35007357597351074, "learning_rate": 8.618514592552807e-05, "loss": 0.1001, "step": 16820 }, { "action_loss": 0.008408511988818645, "epoch": 15.12589928057554, "step": 16820 }, { "epoch": 15.12589928057554, "step": 16820, "torque_loss": 0.11470388621091843 }, { "epoch": 15.134892086330936, "grad_norm": 0.23976853489875793, "learning_rate": 8.616612245078431e-05, "loss": 0.089, "step": 16830 }, { "action_loss": 0.01178634911775589, "epoch": 15.134892086330936, "step": 16830 }, { "epoch": 15.134892086330936, "step": 16830, "torque_loss": 0.17482508718967438 }, { "epoch": 15.14388489208633, "grad_norm": 0.29722920060157776, "learning_rate": 8.614708798972746e-05, "loss": 0.0948, "step": 16840 }, { "action_loss": 0.010563231073319912, "epoch": 15.14388489208633, "step": 16840 }, { "epoch": 15.14388489208633, "step": 16840, "torque_loss": 0.1295812427997589 }, { "epoch": 15.152877697841726, "grad_norm": 0.3074696958065033, "learning_rate": 8.61280425481397e-05, "loss": 0.0897, "step": 16850 }, { "action_loss": 0.014150458388030529, "epoch": 15.152877697841726, "step": 16850 }, { "epoch": 15.152877697841726, "step": 16850, "torque_loss": 0.17635639011859894 }, { "epoch": 15.161870503597122, "grad_norm": 0.3947659432888031, "learning_rate": 8.61089861318065e-05, "loss": 0.0978, "step": 16860 }, { "action_loss": 0.008271283470094204, "epoch": 15.161870503597122, "step": 16860 }, { "epoch": 15.161870503597122, "step": 16860, "torque_loss": 0.09400469064712524 }, { "epoch": 15.170863309352518, "grad_norm": 0.2647205591201782, "learning_rate": 8.608991874651673e-05, "loss": 0.0804, "step": 16870 }, { "action_loss": 0.012582424096763134, "epoch": 15.170863309352518, "step": 16870 }, { "epoch": 15.170863309352518, "step": 16870, "torque_loss": 0.2144433856010437 }, { "epoch": 15.179856115107913, "grad_norm": 0.30887389183044434, "learning_rate": 8.607084039806255e-05, "loss": 0.0966, "step": 16880 }, { "action_loss": 0.020443463698029518, "epoch": 15.179856115107913, "step": 16880 }, { "epoch": 15.179856115107913, "step": 16880, "torque_loss": 0.15872685611248016 }, { "epoch": 15.18884892086331, "grad_norm": 0.32321780920028687, "learning_rate": 8.605175109223944e-05, "loss": 0.0914, "step": 16890 }, { "action_loss": 0.010436917655169964, "epoch": 15.18884892086331, "step": 16890 }, { "epoch": 15.18884892086331, "step": 16890, "torque_loss": 0.21394950151443481 }, { "epoch": 15.197841726618705, "grad_norm": 0.3124101459980011, "learning_rate": 8.603265083484624e-05, "loss": 0.0952, "step": 16900 }, { "action_loss": 0.005208803340792656, "epoch": 15.197841726618705, "step": 16900 }, { "epoch": 15.197841726618705, "step": 16900, "torque_loss": 0.14726336300373077 }, { "epoch": 15.206834532374101, "grad_norm": 0.3134666383266449, "learning_rate": 8.60135396316851e-05, "loss": 0.0838, "step": 16910 }, { "action_loss": 0.011061638593673706, "epoch": 15.206834532374101, "step": 16910 }, { "epoch": 15.206834532374101, "step": 16910, "torque_loss": 0.14032356441020966 }, { "epoch": 15.215827338129497, "grad_norm": 0.378640353679657, "learning_rate": 8.599441748856152e-05, "loss": 0.088, "step": 16920 }, { "action_loss": 0.00957395602017641, "epoch": 15.215827338129497, "step": 16920 }, { "epoch": 15.215827338129497, "step": 16920, "torque_loss": 0.15123815834522247 }, { "epoch": 15.224820143884893, "grad_norm": 0.34241896867752075, "learning_rate": 8.597528441128427e-05, "loss": 0.0966, "step": 16930 }, { "action_loss": 0.004911485593765974, "epoch": 15.224820143884893, "step": 16930 }, { "epoch": 15.224820143884893, "step": 16930, "torque_loss": 0.1278604418039322 }, { "epoch": 15.233812949640289, "grad_norm": 0.3356908857822418, "learning_rate": 8.595614040566549e-05, "loss": 0.0908, "step": 16940 }, { "action_loss": 0.004629700910300016, "epoch": 15.233812949640289, "step": 16940 }, { "epoch": 15.233812949640289, "step": 16940, "torque_loss": 0.08807014673948288 }, { "epoch": 15.242805755395683, "grad_norm": 0.3535546660423279, "learning_rate": 8.593698547752063e-05, "loss": 0.0882, "step": 16950 }, { "action_loss": 0.007486786227673292, "epoch": 15.242805755395683, "step": 16950 }, { "epoch": 15.242805755395683, "step": 16950, "torque_loss": 0.10626635700464249 }, { "epoch": 15.251798561151078, "grad_norm": 0.35496997833251953, "learning_rate": 8.591781963266843e-05, "loss": 0.1131, "step": 16960 }, { "action_loss": 0.00472363131120801, "epoch": 15.251798561151078, "step": 16960 }, { "epoch": 15.251798561151078, "step": 16960, "torque_loss": 0.07803675532341003 }, { "epoch": 15.260791366906474, "grad_norm": 0.4145192801952362, "learning_rate": 8.5898642876931e-05, "loss": 0.0884, "step": 16970 }, { "action_loss": 0.02105519361793995, "epoch": 15.260791366906474, "step": 16970 }, { "epoch": 15.260791366906474, "step": 16970, "torque_loss": 0.2264745682477951 }, { "epoch": 15.26978417266187, "grad_norm": 0.28091704845428467, "learning_rate": 8.587945521613369e-05, "loss": 0.1158, "step": 16980 }, { "action_loss": 0.005886354949325323, "epoch": 15.26978417266187, "step": 16980 }, { "epoch": 15.26978417266187, "step": 16980, "torque_loss": 0.10753240436315536 }, { "epoch": 15.278776978417266, "grad_norm": 0.3836565911769867, "learning_rate": 8.586025665610524e-05, "loss": 0.0857, "step": 16990 }, { "action_loss": 0.0065015689469873905, "epoch": 15.278776978417266, "step": 16990 }, { "epoch": 15.278776978417266, "step": 16990, "torque_loss": 0.10486254841089249 }, { "epoch": 15.287769784172662, "grad_norm": 0.3388010561466217, "learning_rate": 8.584104720267765e-05, "loss": 0.095, "step": 17000 }, { "action_loss": 0.010960171930491924, "epoch": 15.287769784172662, "step": 17000 }, { "epoch": 15.287769784172662, "step": 17000, "torque_loss": 0.1597200334072113 }, { "epoch": 15.296762589928058, "grad_norm": 0.5090630054473877, "learning_rate": 8.582182686168625e-05, "loss": 0.0967, "step": 17010 }, { "action_loss": 0.009077257476747036, "epoch": 15.296762589928058, "step": 17010 }, { "epoch": 15.296762589928058, "step": 17010, "torque_loss": 0.16234807670116425 }, { "epoch": 15.305755395683454, "grad_norm": 0.4219261109828949, "learning_rate": 8.580259563896967e-05, "loss": 0.1022, "step": 17020 }, { "action_loss": 0.015699056908488274, "epoch": 15.305755395683454, "step": 17020 }, { "epoch": 15.305755395683454, "step": 17020, "torque_loss": 0.1467193216085434 }, { "epoch": 15.31474820143885, "grad_norm": 0.4231925308704376, "learning_rate": 8.578335354036983e-05, "loss": 0.1003, "step": 17030 }, { "action_loss": 0.010872592218220234, "epoch": 15.31474820143885, "step": 17030 }, { "epoch": 15.31474820143885, "step": 17030, "torque_loss": 0.16839368641376495 }, { "epoch": 15.323741007194245, "grad_norm": 0.4794038236141205, "learning_rate": 8.576410057173201e-05, "loss": 0.1037, "step": 17040 }, { "action_loss": 0.007851450704038143, "epoch": 15.323741007194245, "step": 17040 }, { "epoch": 15.323741007194245, "step": 17040, "torque_loss": 0.15362977981567383 }, { "epoch": 15.332733812949641, "grad_norm": 0.3682393431663513, "learning_rate": 8.574483673890474e-05, "loss": 0.1016, "step": 17050 }, { "action_loss": 0.0038450185675174, "epoch": 15.332733812949641, "step": 17050 }, { "epoch": 15.332733812949641, "step": 17050, "torque_loss": 0.06453949958086014 }, { "epoch": 15.341726618705035, "grad_norm": 0.3681195080280304, "learning_rate": 8.572556204773983e-05, "loss": 0.0902, "step": 17060 }, { "action_loss": 0.006405230611562729, "epoch": 15.341726618705035, "step": 17060 }, { "epoch": 15.341726618705035, "step": 17060, "torque_loss": 0.14569583535194397 }, { "epoch": 15.350719424460431, "grad_norm": 0.4216742515563965, "learning_rate": 8.570627650409246e-05, "loss": 0.0876, "step": 17070 }, { "action_loss": 0.003397206775844097, "epoch": 15.350719424460431, "step": 17070 }, { "epoch": 15.350719424460431, "step": 17070, "torque_loss": 0.14243097603321075 }, { "epoch": 15.359712230215827, "grad_norm": 0.3340856432914734, "learning_rate": 8.568698011382107e-05, "loss": 0.0931, "step": 17080 }, { "action_loss": 0.031386882066726685, "epoch": 15.359712230215827, "step": 17080 }, { "epoch": 15.359712230215827, "step": 17080, "torque_loss": 0.23324166238307953 }, { "epoch": 15.368705035971223, "grad_norm": 0.3394729793071747, "learning_rate": 8.566767288278738e-05, "loss": 0.1141, "step": 17090 }, { "action_loss": 0.004667505621910095, "epoch": 15.368705035971223, "step": 17090 }, { "epoch": 15.368705035971223, "step": 17090, "torque_loss": 0.10764571279287338 }, { "epoch": 15.377697841726619, "grad_norm": 0.3297779858112335, "learning_rate": 8.56483548168564e-05, "loss": 0.0985, "step": 17100 }, { "action_loss": 0.00447835261002183, "epoch": 15.377697841726619, "step": 17100 }, { "epoch": 15.377697841726619, "step": 17100, "torque_loss": 0.10905016213655472 }, { "epoch": 15.386690647482014, "grad_norm": 0.3146417438983917, "learning_rate": 8.562902592189648e-05, "loss": 0.0971, "step": 17110 }, { "action_loss": 0.005171519238501787, "epoch": 15.386690647482014, "step": 17110 }, { "epoch": 15.386690647482014, "step": 17110, "torque_loss": 0.14502549171447754 }, { "epoch": 15.39568345323741, "grad_norm": 0.3093469440937042, "learning_rate": 8.560968620377921e-05, "loss": 0.0861, "step": 17120 }, { "action_loss": 0.02466140128672123, "epoch": 15.39568345323741, "step": 17120 }, { "epoch": 15.39568345323741, "step": 17120, "torque_loss": 0.17312543094158173 }, { "epoch": 15.404676258992806, "grad_norm": 0.41660961508750916, "learning_rate": 8.559033566837951e-05, "loss": 0.1026, "step": 17130 }, { "action_loss": 0.003588131396099925, "epoch": 15.404676258992806, "step": 17130 }, { "epoch": 15.404676258992806, "step": 17130, "torque_loss": 0.08617647737264633 }, { "epoch": 15.413669064748202, "grad_norm": 0.40588098764419556, "learning_rate": 8.557097432157551e-05, "loss": 0.1014, "step": 17140 }, { "action_loss": 0.01370866596698761, "epoch": 15.413669064748202, "step": 17140 }, { "epoch": 15.413669064748202, "step": 17140, "torque_loss": 0.136003777384758 }, { "epoch": 15.422661870503598, "grad_norm": 0.2757381796836853, "learning_rate": 8.555160216924872e-05, "loss": 0.0859, "step": 17150 }, { "action_loss": 0.01038939505815506, "epoch": 15.422661870503598, "step": 17150 }, { "epoch": 15.422661870503598, "step": 17150, "torque_loss": 0.1530652791261673 }, { "epoch": 15.431654676258994, "grad_norm": 0.3159751296043396, "learning_rate": 8.55322192172839e-05, "loss": 0.0963, "step": 17160 }, { "action_loss": 0.006056845188140869, "epoch": 15.431654676258994, "step": 17160 }, { "epoch": 15.431654676258994, "step": 17160, "torque_loss": 0.11791235208511353 }, { "epoch": 15.440647482014388, "grad_norm": 0.2977888286113739, "learning_rate": 8.551282547156902e-05, "loss": 0.0803, "step": 17170 }, { "action_loss": 0.01754733733832836, "epoch": 15.440647482014388, "step": 17170 }, { "epoch": 15.440647482014388, "step": 17170, "torque_loss": 0.15369121730327606 }, { "epoch": 15.449640287769784, "grad_norm": 0.40621012449264526, "learning_rate": 8.549342093799544e-05, "loss": 0.1073, "step": 17180 }, { "action_loss": 0.009433838538825512, "epoch": 15.449640287769784, "step": 17180 }, { "epoch": 15.449640287769784, "step": 17180, "torque_loss": 0.1279841512441635 }, { "epoch": 15.45863309352518, "grad_norm": 0.38290658593177795, "learning_rate": 8.547400562245773e-05, "loss": 0.1112, "step": 17190 }, { "action_loss": 0.019334616139531136, "epoch": 15.45863309352518, "step": 17190 }, { "epoch": 15.45863309352518, "step": 17190, "torque_loss": 0.181369349360466 }, { "epoch": 15.467625899280575, "grad_norm": 0.428832471370697, "learning_rate": 8.545457953085374e-05, "loss": 0.1, "step": 17200 }, { "action_loss": 0.007638821844011545, "epoch": 15.467625899280575, "step": 17200 }, { "epoch": 15.467625899280575, "step": 17200, "torque_loss": 0.11247783899307251 }, { "epoch": 15.476618705035971, "grad_norm": 0.30756887793540955, "learning_rate": 8.543514266908463e-05, "loss": 0.0938, "step": 17210 }, { "action_loss": 0.008212114684283733, "epoch": 15.476618705035971, "step": 17210 }, { "epoch": 15.476618705035971, "step": 17210, "torque_loss": 0.14361801743507385 }, { "epoch": 15.485611510791367, "grad_norm": 0.5021538734436035, "learning_rate": 8.541569504305478e-05, "loss": 0.1026, "step": 17220 }, { "action_loss": 0.009463152848184109, "epoch": 15.485611510791367, "step": 17220 }, { "epoch": 15.485611510791367, "step": 17220, "torque_loss": 0.1516999751329422 }, { "epoch": 15.494604316546763, "grad_norm": 0.33448097109794617, "learning_rate": 8.539623665867187e-05, "loss": 0.0996, "step": 17230 }, { "action_loss": 0.026900535449385643, "epoch": 15.494604316546763, "step": 17230 }, { "epoch": 15.494604316546763, "step": 17230, "torque_loss": 0.22403229773044586 }, { "epoch": 15.503597122302159, "grad_norm": 0.29953882098197937, "learning_rate": 8.537676752184685e-05, "loss": 0.1031, "step": 17240 }, { "action_loss": 0.0057909986935555935, "epoch": 15.503597122302159, "step": 17240 }, { "epoch": 15.503597122302159, "step": 17240, "torque_loss": 0.12709547579288483 }, { "epoch": 15.512589928057555, "grad_norm": 0.36035826802253723, "learning_rate": 8.53572876384939e-05, "loss": 0.0893, "step": 17250 }, { "action_loss": 0.006462485995143652, "epoch": 15.512589928057555, "step": 17250 }, { "epoch": 15.512589928057555, "step": 17250, "torque_loss": 0.14460064470767975 }, { "epoch": 15.52158273381295, "grad_norm": 0.3394506573677063, "learning_rate": 8.533779701453056e-05, "loss": 0.0968, "step": 17260 }, { "action_loss": 0.02828536182641983, "epoch": 15.52158273381295, "step": 17260 }, { "epoch": 15.52158273381295, "step": 17260, "torque_loss": 0.2179710865020752 }, { "epoch": 15.530575539568346, "grad_norm": 0.3534673750400543, "learning_rate": 8.53182956558775e-05, "loss": 0.1028, "step": 17270 }, { "action_loss": 0.0034122068900614977, "epoch": 15.530575539568346, "step": 17270 }, { "epoch": 15.530575539568346, "step": 17270, "torque_loss": 0.12828344106674194 }, { "epoch": 15.53956834532374, "grad_norm": 0.300591379404068, "learning_rate": 8.529878356845877e-05, "loss": 0.0884, "step": 17280 }, { "action_loss": 0.011678420007228851, "epoch": 15.53956834532374, "step": 17280 }, { "epoch": 15.53956834532374, "step": 17280, "torque_loss": 0.11121552437543869 }, { "epoch": 15.548561151079136, "grad_norm": 0.3693840503692627, "learning_rate": 8.527926075820158e-05, "loss": 0.1004, "step": 17290 }, { "action_loss": 0.00565572502091527, "epoch": 15.548561151079136, "step": 17290 }, { "epoch": 15.548561151079136, "step": 17290, "torque_loss": 0.08857652544975281 }, { "epoch": 15.557553956834532, "grad_norm": 0.34469032287597656, "learning_rate": 8.525972723103648e-05, "loss": 0.0952, "step": 17300 }, { "action_loss": 0.0064864023588597775, "epoch": 15.557553956834532, "step": 17300 }, { "epoch": 15.557553956834532, "step": 17300, "torque_loss": 0.1721632033586502 }, { "epoch": 15.566546762589928, "grad_norm": 0.3140944242477417, "learning_rate": 8.524018299289722e-05, "loss": 0.0953, "step": 17310 }, { "action_loss": 0.00639917841181159, "epoch": 15.566546762589928, "step": 17310 }, { "epoch": 15.566546762589928, "step": 17310, "torque_loss": 0.14545613527297974 }, { "epoch": 15.575539568345324, "grad_norm": 0.41602981090545654, "learning_rate": 8.522062804972083e-05, "loss": 0.1005, "step": 17320 }, { "action_loss": 0.012696191668510437, "epoch": 15.575539568345324, "step": 17320 }, { "epoch": 15.575539568345324, "step": 17320, "torque_loss": 0.1539711356163025 }, { "epoch": 15.58453237410072, "grad_norm": 0.33943530917167664, "learning_rate": 8.520106240744759e-05, "loss": 0.0977, "step": 17330 }, { "action_loss": 0.006824089214205742, "epoch": 15.58453237410072, "step": 17330 }, { "epoch": 15.58453237410072, "step": 17330, "torque_loss": 0.1434280276298523 }, { "epoch": 15.593525179856115, "grad_norm": 0.44847846031188965, "learning_rate": 8.518148607202102e-05, "loss": 0.1002, "step": 17340 }, { "action_loss": 0.009254305623471737, "epoch": 15.593525179856115, "step": 17340 }, { "epoch": 15.593525179856115, "step": 17340, "torque_loss": 0.15874765813350677 }, { "epoch": 15.602517985611511, "grad_norm": 0.5197000503540039, "learning_rate": 8.51618990493879e-05, "loss": 0.1091, "step": 17350 }, { "action_loss": 0.009011749178171158, "epoch": 15.602517985611511, "step": 17350 }, { "epoch": 15.602517985611511, "step": 17350, "torque_loss": 0.1789020150899887 }, { "epoch": 15.611510791366907, "grad_norm": 0.4246801733970642, "learning_rate": 8.514230134549823e-05, "loss": 0.1118, "step": 17360 }, { "action_loss": 0.013334120623767376, "epoch": 15.611510791366907, "step": 17360 }, { "epoch": 15.611510791366907, "step": 17360, "torque_loss": 0.11663220077753067 }, { "epoch": 15.620503597122303, "grad_norm": 0.3288423418998718, "learning_rate": 8.51226929663053e-05, "loss": 0.0943, "step": 17370 }, { "action_loss": 0.014312241226434708, "epoch": 15.620503597122303, "step": 17370 }, { "epoch": 15.620503597122303, "step": 17370, "torque_loss": 0.16795708239078522 }, { "epoch": 15.629496402877697, "grad_norm": 0.4081410765647888, "learning_rate": 8.51030739177656e-05, "loss": 0.0999, "step": 17380 }, { "action_loss": 0.003903564065694809, "epoch": 15.629496402877697, "step": 17380 }, { "epoch": 15.629496402877697, "step": 17380, "torque_loss": 0.08167019486427307 }, { "epoch": 15.638489208633093, "grad_norm": 0.2784653604030609, "learning_rate": 8.508344420583889e-05, "loss": 0.0916, "step": 17390 }, { "action_loss": 0.014224521815776825, "epoch": 15.638489208633093, "step": 17390 }, { "epoch": 15.638489208633093, "step": 17390, "torque_loss": 0.17326776683330536 }, { "epoch": 15.647482014388489, "grad_norm": 0.34766885638237, "learning_rate": 8.506380383648816e-05, "loss": 0.0907, "step": 17400 }, { "action_loss": 0.0044665527530014515, "epoch": 15.647482014388489, "step": 17400 }, { "epoch": 15.647482014388489, "step": 17400, "torque_loss": 0.12572580575942993 }, { "epoch": 15.656474820143885, "grad_norm": 0.32047951221466064, "learning_rate": 8.504415281567963e-05, "loss": 0.0942, "step": 17410 }, { "action_loss": 0.0048827072605490685, "epoch": 15.656474820143885, "step": 17410 }, { "epoch": 15.656474820143885, "step": 17410, "torque_loss": 0.10901688784360886 }, { "epoch": 15.66546762589928, "grad_norm": 0.3772222399711609, "learning_rate": 8.502449114938275e-05, "loss": 0.0835, "step": 17420 }, { "action_loss": 0.006632177159190178, "epoch": 15.66546762589928, "step": 17420 }, { "epoch": 15.66546762589928, "step": 17420, "torque_loss": 0.1323806345462799 }, { "epoch": 15.674460431654676, "grad_norm": 0.3562246859073639, "learning_rate": 8.500481884357025e-05, "loss": 0.1069, "step": 17430 }, { "action_loss": 0.007589863147586584, "epoch": 15.674460431654676, "step": 17430 }, { "epoch": 15.674460431654676, "step": 17430, "torque_loss": 0.19428987801074982 }, { "epoch": 15.683453237410072, "grad_norm": 0.4545918107032776, "learning_rate": 8.498513590421801e-05, "loss": 0.1034, "step": 17440 }, { "action_loss": 0.024545514956116676, "epoch": 15.683453237410072, "step": 17440 }, { "epoch": 15.683453237410072, "step": 17440, "torque_loss": 0.2399936467409134 }, { "epoch": 15.692446043165468, "grad_norm": 0.44484901428222656, "learning_rate": 8.496544233730522e-05, "loss": 0.1005, "step": 17450 }, { "action_loss": 0.006299895700067282, "epoch": 15.692446043165468, "step": 17450 }, { "epoch": 15.692446043165468, "step": 17450, "torque_loss": 0.11153271049261093 }, { "epoch": 15.701438848920864, "grad_norm": 0.4141848087310791, "learning_rate": 8.494573814881426e-05, "loss": 0.0978, "step": 17460 }, { "action_loss": 0.00630519213154912, "epoch": 15.701438848920864, "step": 17460 }, { "epoch": 15.701438848920864, "step": 17460, "torque_loss": 0.1419588178396225 }, { "epoch": 15.71043165467626, "grad_norm": 0.3328392207622528, "learning_rate": 8.492602334473074e-05, "loss": 0.0865, "step": 17470 }, { "action_loss": 0.012746465392410755, "epoch": 15.71043165467626, "step": 17470 }, { "epoch": 15.71043165467626, "step": 17470, "torque_loss": 0.20740240812301636 }, { "epoch": 15.719424460431654, "grad_norm": 0.30846017599105835, "learning_rate": 8.49062979310435e-05, "loss": 0.0999, "step": 17480 }, { "action_loss": 0.014571924693882465, "epoch": 15.719424460431654, "step": 17480 }, { "epoch": 15.719424460431654, "step": 17480, "torque_loss": 0.20490920543670654 }, { "epoch": 15.72841726618705, "grad_norm": 0.33950984477996826, "learning_rate": 8.488656191374458e-05, "loss": 0.1015, "step": 17490 }, { "action_loss": 0.0058424449525773525, "epoch": 15.72841726618705, "step": 17490 }, { "epoch": 15.72841726618705, "step": 17490, "torque_loss": 0.12296068668365479 }, { "epoch": 15.737410071942445, "grad_norm": 0.3972071409225464, "learning_rate": 8.48668152988293e-05, "loss": 0.0944, "step": 17500 }, { "action_loss": 0.008950144983828068, "epoch": 15.737410071942445, "step": 17500 }, { "epoch": 15.737410071942445, "step": 17500, "torque_loss": 0.13642388582229614 }, { "epoch": 15.746402877697841, "grad_norm": 0.41441911458969116, "learning_rate": 8.484705809229612e-05, "loss": 0.0946, "step": 17510 }, { "action_loss": 0.0163198821246624, "epoch": 15.746402877697841, "step": 17510 }, { "epoch": 15.746402877697841, "step": 17510, "torque_loss": 0.183784618973732 }, { "epoch": 15.755395683453237, "grad_norm": 0.4480056166648865, "learning_rate": 8.482729030014677e-05, "loss": 0.1201, "step": 17520 }, { "action_loss": 0.004679942037910223, "epoch": 15.755395683453237, "step": 17520 }, { "epoch": 15.755395683453237, "step": 17520, "torque_loss": 0.12232580035924911 }, { "epoch": 15.764388489208633, "grad_norm": 0.2748914361000061, "learning_rate": 8.48075119283862e-05, "loss": 0.0896, "step": 17530 }, { "action_loss": 0.016605472192168236, "epoch": 15.764388489208633, "step": 17530 }, { "epoch": 15.764388489208633, "step": 17530, "torque_loss": 0.18674075603485107 }, { "epoch": 15.773381294964029, "grad_norm": 0.41867733001708984, "learning_rate": 8.478772298302254e-05, "loss": 0.0974, "step": 17540 }, { "action_loss": 0.010260100476443768, "epoch": 15.773381294964029, "step": 17540 }, { "epoch": 15.773381294964029, "step": 17540, "torque_loss": 0.14670313894748688 }, { "epoch": 15.782374100719425, "grad_norm": 0.3592161238193512, "learning_rate": 8.476792347006716e-05, "loss": 0.0982, "step": 17550 }, { "action_loss": 0.007487920578569174, "epoch": 15.782374100719425, "step": 17550 }, { "epoch": 15.782374100719425, "step": 17550, "torque_loss": 0.10760293155908585 }, { "epoch": 15.79136690647482, "grad_norm": 0.3922692835330963, "learning_rate": 8.474811339553462e-05, "loss": 0.0994, "step": 17560 }, { "action_loss": 0.015025896020233631, "epoch": 15.79136690647482, "step": 17560 }, { "epoch": 15.79136690647482, "step": 17560, "torque_loss": 0.18037092685699463 }, { "epoch": 15.800359712230216, "grad_norm": 0.4309299886226654, "learning_rate": 8.47282927654427e-05, "loss": 0.1289, "step": 17570 }, { "action_loss": 0.00699011841788888, "epoch": 15.800359712230216, "step": 17570 }, { "epoch": 15.800359712230216, "step": 17570, "torque_loss": 0.09389978647232056 }, { "epoch": 15.809352517985612, "grad_norm": 0.36905744671821594, "learning_rate": 8.470846158581238e-05, "loss": 0.0868, "step": 17580 }, { "action_loss": 0.010537461377680302, "epoch": 15.809352517985612, "step": 17580 }, { "epoch": 15.809352517985612, "step": 17580, "torque_loss": 0.1319836527109146 }, { "epoch": 15.818345323741006, "grad_norm": 0.3359370529651642, "learning_rate": 8.468861986266787e-05, "loss": 0.0986, "step": 17590 }, { "action_loss": 0.01639142818748951, "epoch": 15.818345323741006, "step": 17590 }, { "epoch": 15.818345323741006, "step": 17590, "torque_loss": 0.2229854017496109 }, { "epoch": 15.827338129496402, "grad_norm": 0.2988719642162323, "learning_rate": 8.466876760203654e-05, "loss": 0.1029, "step": 17600 }, { "action_loss": 0.039678845554590225, "epoch": 15.827338129496402, "step": 17600 }, { "epoch": 15.827338129496402, "step": 17600, "torque_loss": 0.2448241114616394 }, { "epoch": 15.836330935251798, "grad_norm": 0.28869256377220154, "learning_rate": 8.464890480994898e-05, "loss": 0.1049, "step": 17610 }, { "action_loss": 0.005435654427856207, "epoch": 15.836330935251798, "step": 17610 }, { "epoch": 15.836330935251798, "step": 17610, "torque_loss": 0.10816409438848495 }, { "epoch": 15.845323741007194, "grad_norm": 0.2972653806209564, "learning_rate": 8.462903149243899e-05, "loss": 0.0883, "step": 17620 }, { "action_loss": 0.005647073034197092, "epoch": 15.845323741007194, "step": 17620 }, { "epoch": 15.845323741007194, "step": 17620, "torque_loss": 0.08204897493124008 }, { "epoch": 15.85431654676259, "grad_norm": 0.31907761096954346, "learning_rate": 8.460914765554357e-05, "loss": 0.0829, "step": 17630 }, { "action_loss": 0.006901080254465342, "epoch": 15.85431654676259, "step": 17630 }, { "epoch": 15.85431654676259, "step": 17630, "torque_loss": 0.11281061172485352 }, { "epoch": 15.863309352517986, "grad_norm": 0.4035785496234894, "learning_rate": 8.458925330530288e-05, "loss": 0.1177, "step": 17640 }, { "action_loss": 0.004263522569090128, "epoch": 15.863309352517986, "step": 17640 }, { "epoch": 15.863309352517986, "step": 17640, "torque_loss": 0.1445612758398056 }, { "epoch": 15.872302158273381, "grad_norm": 0.41036128997802734, "learning_rate": 8.456934844776032e-05, "loss": 0.0943, "step": 17650 }, { "action_loss": 0.00436511030420661, "epoch": 15.872302158273381, "step": 17650 }, { "epoch": 15.872302158273381, "step": 17650, "torque_loss": 0.13009817898273468 }, { "epoch": 15.881294964028777, "grad_norm": 0.45035088062286377, "learning_rate": 8.454943308896246e-05, "loss": 0.102, "step": 17660 }, { "action_loss": 0.013982038013637066, "epoch": 15.881294964028777, "step": 17660 }, { "epoch": 15.881294964028777, "step": 17660, "torque_loss": 0.16498751938343048 }, { "epoch": 15.890287769784173, "grad_norm": 0.308251291513443, "learning_rate": 8.452950723495905e-05, "loss": 0.0853, "step": 17670 }, { "action_loss": 0.007274392526596785, "epoch": 15.890287769784173, "step": 17670 }, { "epoch": 15.890287769784173, "step": 17670, "torque_loss": 0.12842053174972534 }, { "epoch": 15.899280575539569, "grad_norm": 0.3261122703552246, "learning_rate": 8.450957089180303e-05, "loss": 0.0926, "step": 17680 }, { "action_loss": 0.005913691595196724, "epoch": 15.899280575539569, "step": 17680 }, { "epoch": 15.899280575539569, "step": 17680, "torque_loss": 0.09765087813138962 }, { "epoch": 15.908273381294965, "grad_norm": 0.41977590322494507, "learning_rate": 8.448962406555055e-05, "loss": 0.0946, "step": 17690 }, { "action_loss": 0.025533527135849, "epoch": 15.908273381294965, "step": 17690 }, { "epoch": 15.908273381294965, "step": 17690, "torque_loss": 0.24942786991596222 }, { "epoch": 15.917266187050359, "grad_norm": 0.3132895529270172, "learning_rate": 8.446966676226093e-05, "loss": 0.1149, "step": 17700 }, { "action_loss": 0.0048650773242115974, "epoch": 15.917266187050359, "step": 17700 }, { "epoch": 15.917266187050359, "step": 17700, "torque_loss": 0.0931301936507225 }, { "epoch": 15.926258992805755, "grad_norm": 0.3724968731403351, "learning_rate": 8.444969898799667e-05, "loss": 0.0985, "step": 17710 }, { "action_loss": 0.020648518577218056, "epoch": 15.926258992805755, "step": 17710 }, { "epoch": 15.926258992805755, "step": 17710, "torque_loss": 0.22119300067424774 }, { "epoch": 15.93525179856115, "grad_norm": 0.39822879433631897, "learning_rate": 8.442972074882343e-05, "loss": 0.0995, "step": 17720 }, { "action_loss": 0.013469687663018703, "epoch": 15.93525179856115, "step": 17720 }, { "epoch": 15.93525179856115, "step": 17720, "torque_loss": 0.1665130853652954 }, { "epoch": 15.944244604316546, "grad_norm": 0.3785090446472168, "learning_rate": 8.44097320508101e-05, "loss": 0.0921, "step": 17730 }, { "action_loss": 0.004060944076627493, "epoch": 15.944244604316546, "step": 17730 }, { "epoch": 15.944244604316546, "step": 17730, "torque_loss": 0.11423647403717041 }, { "epoch": 15.953237410071942, "grad_norm": 0.40815627574920654, "learning_rate": 8.43897329000287e-05, "loss": 0.0845, "step": 17740 }, { "action_loss": 0.016530994325876236, "epoch": 15.953237410071942, "step": 17740 }, { "epoch": 15.953237410071942, "step": 17740, "torque_loss": 0.19000305235385895 }, { "epoch": 15.962230215827338, "grad_norm": 0.2875150740146637, "learning_rate": 8.436972330255448e-05, "loss": 0.0962, "step": 17750 }, { "action_loss": 0.0049575804732739925, "epoch": 15.962230215827338, "step": 17750 }, { "epoch": 15.962230215827338, "step": 17750, "torque_loss": 0.1272655427455902 }, { "epoch": 15.971223021582734, "grad_norm": 0.4205852150917053, "learning_rate": 8.434970326446579e-05, "loss": 0.0997, "step": 17760 }, { "action_loss": 0.006076211575418711, "epoch": 15.971223021582734, "step": 17760 }, { "epoch": 15.971223021582734, "step": 17760, "torque_loss": 0.09798302501440048 }, { "epoch": 15.98021582733813, "grad_norm": 0.32479608058929443, "learning_rate": 8.432967279184418e-05, "loss": 0.0854, "step": 17770 }, { "action_loss": 0.018761076033115387, "epoch": 15.98021582733813, "step": 17770 }, { "epoch": 15.98021582733813, "step": 17770, "torque_loss": 0.14194543659687042 }, { "epoch": 15.989208633093526, "grad_norm": 0.3370347321033478, "learning_rate": 8.430963189077441e-05, "loss": 0.0863, "step": 17780 }, { "action_loss": 0.008534695021808147, "epoch": 15.989208633093526, "step": 17780 }, { "epoch": 15.989208633093526, "step": 17780, "torque_loss": 0.14305712282657623 }, { "epoch": 15.998201438848922, "grad_norm": 0.44612738490104675, "learning_rate": 8.428958056734437e-05, "loss": 0.0866, "step": 17790 }, { "action_loss": 0.013854645192623138, "epoch": 15.998201438848922, "step": 17790 }, { "epoch": 15.998201438848922, "step": 17790, "torque_loss": 0.1990109235048294 }, { "epoch": 16.007194244604317, "grad_norm": 0.4139278829097748, "learning_rate": 8.426951882764513e-05, "loss": 0.1121, "step": 17800 }, { "action_loss": 0.009226600639522076, "epoch": 16.007194244604317, "step": 17800 }, { "epoch": 16.007194244604317, "step": 17800, "torque_loss": 0.15181122720241547 }, { "epoch": 16.01618705035971, "grad_norm": 0.2992665767669678, "learning_rate": 8.424944667777089e-05, "loss": 0.0865, "step": 17810 }, { "action_loss": 0.00768201844766736, "epoch": 16.01618705035971, "step": 17810 }, { "epoch": 16.01618705035971, "step": 17810, "torque_loss": 0.13850602507591248 }, { "epoch": 16.02517985611511, "grad_norm": 0.367180198431015, "learning_rate": 8.422936412381905e-05, "loss": 0.093, "step": 17820 }, { "action_loss": 0.007041756063699722, "epoch": 16.02517985611511, "step": 17820 }, { "epoch": 16.02517985611511, "step": 17820, "torque_loss": 0.13384781777858734 }, { "epoch": 16.034172661870503, "grad_norm": 0.42099225521087646, "learning_rate": 8.420927117189017e-05, "loss": 0.0906, "step": 17830 }, { "action_loss": 0.010989166796207428, "epoch": 16.034172661870503, "step": 17830 }, { "epoch": 16.034172661870503, "step": 17830, "torque_loss": 0.20905464887619019 }, { "epoch": 16.0431654676259, "grad_norm": 0.3996615707874298, "learning_rate": 8.418916782808795e-05, "loss": 0.0979, "step": 17840 }, { "action_loss": 0.006604921072721481, "epoch": 16.0431654676259, "step": 17840 }, { "epoch": 16.0431654676259, "step": 17840, "torque_loss": 0.09801877290010452 }, { "epoch": 16.052158273381295, "grad_norm": 0.3156903386116028, "learning_rate": 8.416905409851926e-05, "loss": 0.0967, "step": 17850 }, { "action_loss": 0.005740627646446228, "epoch": 16.052158273381295, "step": 17850 }, { "epoch": 16.052158273381295, "step": 17850, "torque_loss": 0.127268448472023 }, { "epoch": 16.06115107913669, "grad_norm": 0.32371652126312256, "learning_rate": 8.41489299892941e-05, "loss": 0.086, "step": 17860 }, { "action_loss": 0.007881676778197289, "epoch": 16.06115107913669, "step": 17860 }, { "epoch": 16.06115107913669, "step": 17860, "torque_loss": 0.149736225605011 }, { "epoch": 16.070143884892087, "grad_norm": 0.2641940414905548, "learning_rate": 8.412879550652566e-05, "loss": 0.0932, "step": 17870 }, { "action_loss": 0.005252351518720388, "epoch": 16.070143884892087, "step": 17870 }, { "epoch": 16.070143884892087, "step": 17870, "torque_loss": 0.11864670366048813 }, { "epoch": 16.07913669064748, "grad_norm": 0.36438101530075073, "learning_rate": 8.410865065633029e-05, "loss": 0.0932, "step": 17880 }, { "action_loss": 0.004631788935512304, "epoch": 16.07913669064748, "step": 17880 }, { "epoch": 16.07913669064748, "step": 17880, "torque_loss": 0.09466791152954102 }, { "epoch": 16.08812949640288, "grad_norm": 0.2735467851161957, "learning_rate": 8.408849544482742e-05, "loss": 0.0955, "step": 17890 }, { "action_loss": 0.009512831456959248, "epoch": 16.08812949640288, "step": 17890 }, { "epoch": 16.08812949640288, "step": 17890, "torque_loss": 0.18206918239593506 }, { "epoch": 16.097122302158272, "grad_norm": 0.3802896738052368, "learning_rate": 8.406832987813968e-05, "loss": 0.0903, "step": 17900 }, { "action_loss": 0.015821708366274834, "epoch": 16.097122302158272, "step": 17900 }, { "epoch": 16.097122302158272, "step": 17900, "torque_loss": 0.16947348415851593 }, { "epoch": 16.10611510791367, "grad_norm": 0.38643786311149597, "learning_rate": 8.404815396239286e-05, "loss": 0.1002, "step": 17910 }, { "action_loss": 0.0050856140442192554, "epoch": 16.10611510791367, "step": 17910 }, { "epoch": 16.10611510791367, "step": 17910, "torque_loss": 0.12238087505102158 }, { "epoch": 16.115107913669064, "grad_norm": 0.29942697286605835, "learning_rate": 8.402796770371587e-05, "loss": 0.1006, "step": 17920 }, { "action_loss": 0.011456146836280823, "epoch": 16.115107913669064, "step": 17920 }, { "epoch": 16.115107913669064, "step": 17920, "torque_loss": 0.13275407254695892 }, { "epoch": 16.12410071942446, "grad_norm": 0.3272879123687744, "learning_rate": 8.400777110824071e-05, "loss": 0.0885, "step": 17930 }, { "action_loss": 0.014925467781722546, "epoch": 16.12410071942446, "step": 17930 }, { "epoch": 16.12410071942446, "step": 17930, "torque_loss": 0.15928204357624054 }, { "epoch": 16.133093525179856, "grad_norm": 0.44141048192977905, "learning_rate": 8.398756418210263e-05, "loss": 0.0896, "step": 17940 }, { "action_loss": 0.0037104852963238955, "epoch": 16.133093525179856, "step": 17940 }, { "epoch": 16.133093525179856, "step": 17940, "torque_loss": 0.11771080642938614 }, { "epoch": 16.142086330935253, "grad_norm": 0.26648133993148804, "learning_rate": 8.396734693143993e-05, "loss": 0.0813, "step": 17950 }, { "action_loss": 0.005563363432884216, "epoch": 16.142086330935253, "step": 17950 }, { "epoch": 16.142086330935253, "step": 17950, "torque_loss": 0.12722048163414001 }, { "epoch": 16.151079136690647, "grad_norm": 0.35478466749191284, "learning_rate": 8.39471193623941e-05, "loss": 0.1059, "step": 17960 }, { "action_loss": 0.00519063463434577, "epoch": 16.151079136690647, "step": 17960 }, { "epoch": 16.151079136690647, "step": 17960, "torque_loss": 0.10997128486633301 }, { "epoch": 16.16007194244604, "grad_norm": 0.415224552154541, "learning_rate": 8.392688148110974e-05, "loss": 0.1006, "step": 17970 }, { "action_loss": 0.004177522379904985, "epoch": 16.16007194244604, "step": 17970 }, { "epoch": 16.16007194244604, "step": 17970, "torque_loss": 0.1026163101196289 }, { "epoch": 16.16906474820144, "grad_norm": 0.33521464467048645, "learning_rate": 8.390663329373456e-05, "loss": 0.0914, "step": 17980 }, { "action_loss": 0.00800811406224966, "epoch": 16.16906474820144, "step": 17980 }, { "epoch": 16.16906474820144, "step": 17980, "torque_loss": 0.1676151305437088 }, { "epoch": 16.178057553956833, "grad_norm": 0.41242578625679016, "learning_rate": 8.388637480641944e-05, "loss": 0.1094, "step": 17990 }, { "action_loss": 0.011945158243179321, "epoch": 16.178057553956833, "step": 17990 }, { "epoch": 16.178057553956833, "step": 17990, "torque_loss": 0.16041181981563568 }, { "epoch": 16.18705035971223, "grad_norm": 0.41370806097984314, "learning_rate": 8.386610602531837e-05, "loss": 0.0969, "step": 18000 }, { "action_loss": 0.00460845697671175, "epoch": 16.18705035971223, "step": 18000 }, { "epoch": 16.18705035971223, "step": 18000, "torque_loss": 0.06311570852994919 }, { "epoch": 16.196043165467625, "grad_norm": 0.3556494116783142, "learning_rate": 8.384582695658847e-05, "loss": 0.0861, "step": 18010 }, { "action_loss": 0.013544526882469654, "epoch": 16.196043165467625, "step": 18010 }, { "epoch": 16.196043165467625, "step": 18010, "torque_loss": 0.16535703837871552 }, { "epoch": 16.205035971223023, "grad_norm": 0.3411675691604614, "learning_rate": 8.382553760638999e-05, "loss": 0.0863, "step": 18020 }, { "action_loss": 0.006247160956263542, "epoch": 16.205035971223023, "step": 18020 }, { "epoch": 16.205035971223023, "step": 18020, "torque_loss": 0.13916908204555511 }, { "epoch": 16.214028776978417, "grad_norm": 0.24820518493652344, "learning_rate": 8.380523798088631e-05, "loss": 0.087, "step": 18030 }, { "action_loss": 0.008200527168810368, "epoch": 16.214028776978417, "step": 18030 }, { "epoch": 16.214028776978417, "step": 18030, "torque_loss": 0.09174296259880066 }, { "epoch": 16.223021582733814, "grad_norm": 0.31948190927505493, "learning_rate": 8.378492808624389e-05, "loss": 0.0851, "step": 18040 }, { "action_loss": 0.0068082963116467, "epoch": 16.223021582733814, "step": 18040 }, { "epoch": 16.223021582733814, "step": 18040, "torque_loss": 0.10544780641794205 }, { "epoch": 16.23201438848921, "grad_norm": 0.3088702857494354, "learning_rate": 8.376460792863237e-05, "loss": 0.0986, "step": 18050 }, { "action_loss": 0.006795309484004974, "epoch": 16.23201438848921, "step": 18050 }, { "epoch": 16.23201438848921, "step": 18050, "torque_loss": 0.12050601094961166 }, { "epoch": 16.241007194244606, "grad_norm": 0.2771526277065277, "learning_rate": 8.374427751422444e-05, "loss": 0.0765, "step": 18060 }, { "action_loss": 0.0055975839495658875, "epoch": 16.241007194244606, "step": 18060 }, { "epoch": 16.241007194244606, "step": 18060, "torque_loss": 0.1020035520195961 }, { "epoch": 16.25, "grad_norm": 0.35675695538520813, "learning_rate": 8.3723936849196e-05, "loss": 0.0811, "step": 18070 }, { "action_loss": 0.004781122785061598, "epoch": 16.25, "step": 18070 }, { "epoch": 16.25, "step": 18070, "torque_loss": 0.09758568555116653 }, { "epoch": 16.258992805755394, "grad_norm": 0.29327255487442017, "learning_rate": 8.370358593972595e-05, "loss": 0.0928, "step": 18080 }, { "action_loss": 0.0058839707635343075, "epoch": 16.258992805755394, "step": 18080 }, { "epoch": 16.258992805755394, "step": 18080, "torque_loss": 0.1031041368842125 }, { "epoch": 16.26798561151079, "grad_norm": 0.3263508081436157, "learning_rate": 8.36832247919964e-05, "loss": 0.0871, "step": 18090 }, { "action_loss": 0.01304086297750473, "epoch": 16.26798561151079, "step": 18090 }, { "epoch": 16.26798561151079, "step": 18090, "torque_loss": 0.18573622405529022 }, { "epoch": 16.276978417266186, "grad_norm": 0.25720900297164917, "learning_rate": 8.36628534121925e-05, "loss": 0.0925, "step": 18100 }, { "action_loss": 0.004341946914792061, "epoch": 16.276978417266186, "step": 18100 }, { "epoch": 16.276978417266186, "step": 18100, "torque_loss": 0.12842626869678497 }, { "epoch": 16.285971223021583, "grad_norm": 0.3039265275001526, "learning_rate": 8.364247180650254e-05, "loss": 0.1001, "step": 18110 }, { "action_loss": 0.014243493787944317, "epoch": 16.285971223021583, "step": 18110 }, { "epoch": 16.285971223021583, "step": 18110, "torque_loss": 0.12572996318340302 }, { "epoch": 16.294964028776977, "grad_norm": 0.32255956530570984, "learning_rate": 8.362207998111794e-05, "loss": 0.0798, "step": 18120 }, { "action_loss": 0.005961783230304718, "epoch": 16.294964028776977, "step": 18120 }, { "epoch": 16.294964028776977, "step": 18120, "torque_loss": 0.12394522875547409 }, { "epoch": 16.303956834532375, "grad_norm": 0.3294036388397217, "learning_rate": 8.360167794223318e-05, "loss": 0.0897, "step": 18130 }, { "action_loss": 0.012137487530708313, "epoch": 16.303956834532375, "step": 18130 }, { "epoch": 16.303956834532375, "step": 18130, "torque_loss": 0.13969051837921143 }, { "epoch": 16.31294964028777, "grad_norm": 0.407423198223114, "learning_rate": 8.358126569604586e-05, "loss": 0.098, "step": 18140 }, { "action_loss": 0.007341498509049416, "epoch": 16.31294964028777, "step": 18140 }, { "epoch": 16.31294964028777, "step": 18140, "torque_loss": 0.1728503257036209 }, { "epoch": 16.321942446043167, "grad_norm": 0.28298091888427734, "learning_rate": 8.356084324875668e-05, "loss": 0.0924, "step": 18150 }, { "action_loss": 0.004702113103121519, "epoch": 16.321942446043167, "step": 18150 }, { "epoch": 16.321942446043167, "step": 18150, "torque_loss": 0.10540681332349777 }, { "epoch": 16.33093525179856, "grad_norm": 0.36320990324020386, "learning_rate": 8.354041060656945e-05, "loss": 0.086, "step": 18160 }, { "action_loss": 0.0033031527418643236, "epoch": 16.33093525179856, "step": 18160 }, { "epoch": 16.33093525179856, "step": 18160, "torque_loss": 0.0866963341832161 }, { "epoch": 16.33992805755396, "grad_norm": 0.34959477186203003, "learning_rate": 8.351996777569106e-05, "loss": 0.0831, "step": 18170 }, { "action_loss": 0.010104202665388584, "epoch": 16.33992805755396, "step": 18170 }, { "epoch": 16.33992805755396, "step": 18170, "torque_loss": 0.12977272272109985 }, { "epoch": 16.348920863309353, "grad_norm": 0.2792492210865021, "learning_rate": 8.349951476233148e-05, "loss": 0.0962, "step": 18180 }, { "action_loss": 0.004765805788338184, "epoch": 16.348920863309353, "step": 18180 }, { "epoch": 16.348920863309353, "step": 18180, "torque_loss": 0.08637168258428574 }, { "epoch": 16.357913669064747, "grad_norm": 0.3248596787452698, "learning_rate": 8.347905157270386e-05, "loss": 0.0859, "step": 18190 }, { "action_loss": 0.007748740259557962, "epoch": 16.357913669064747, "step": 18190 }, { "epoch": 16.357913669064747, "step": 18190, "torque_loss": 0.12496425956487656 }, { "epoch": 16.366906474820144, "grad_norm": 0.30054327845573425, "learning_rate": 8.345857821302432e-05, "loss": 0.101, "step": 18200 }, { "action_loss": 0.018626496195793152, "epoch": 16.366906474820144, "step": 18200 }, { "epoch": 16.366906474820144, "step": 18200, "torque_loss": 0.1580430418252945 }, { "epoch": 16.37589928057554, "grad_norm": 0.4592098295688629, "learning_rate": 8.343809468951213e-05, "loss": 0.0918, "step": 18210 }, { "action_loss": 0.008309565484523773, "epoch": 16.37589928057554, "step": 18210 }, { "epoch": 16.37589928057554, "step": 18210, "torque_loss": 0.17001664638519287 }, { "epoch": 16.384892086330936, "grad_norm": 0.2830735743045807, "learning_rate": 8.341760100838965e-05, "loss": 0.1036, "step": 18220 }, { "action_loss": 0.011184140108525753, "epoch": 16.384892086330936, "step": 18220 }, { "epoch": 16.384892086330936, "step": 18220, "torque_loss": 0.15047185122966766 }, { "epoch": 16.39388489208633, "grad_norm": 0.27961817383766174, "learning_rate": 8.339709717588233e-05, "loss": 0.1054, "step": 18230 }, { "action_loss": 0.007056953385472298, "epoch": 16.39388489208633, "step": 18230 }, { "epoch": 16.39388489208633, "step": 18230, "torque_loss": 0.11564803123474121 }, { "epoch": 16.402877697841728, "grad_norm": 0.33805912733078003, "learning_rate": 8.33765831982187e-05, "loss": 0.0891, "step": 18240 }, { "action_loss": 0.003555254777893424, "epoch": 16.402877697841728, "step": 18240 }, { "epoch": 16.402877697841728, "step": 18240, "torque_loss": 0.07361964136362076 }, { "epoch": 16.41187050359712, "grad_norm": 0.2582588493824005, "learning_rate": 8.335605908163035e-05, "loss": 0.0737, "step": 18250 }, { "action_loss": 0.006395711097866297, "epoch": 16.41187050359712, "step": 18250 }, { "epoch": 16.41187050359712, "step": 18250, "torque_loss": 0.08220553398132324 }, { "epoch": 16.42086330935252, "grad_norm": 0.28352054953575134, "learning_rate": 8.333552483235196e-05, "loss": 0.0847, "step": 18260 }, { "action_loss": 0.006343957502394915, "epoch": 16.42086330935252, "step": 18260 }, { "epoch": 16.42086330935252, "step": 18260, "torque_loss": 0.11486522108316422 }, { "epoch": 16.429856115107913, "grad_norm": 0.26929110288619995, "learning_rate": 8.33149804566213e-05, "loss": 0.0943, "step": 18270 }, { "action_loss": 0.006826700177043676, "epoch": 16.429856115107913, "step": 18270 }, { "epoch": 16.429856115107913, "step": 18270, "torque_loss": 0.1602572500705719 }, { "epoch": 16.43884892086331, "grad_norm": 0.32828986644744873, "learning_rate": 8.329442596067921e-05, "loss": 0.0943, "step": 18280 }, { "action_loss": 0.0028912469279021025, "epoch": 16.43884892086331, "step": 18280 }, { "epoch": 16.43884892086331, "step": 18280, "torque_loss": 0.07243891805410385 }, { "epoch": 16.447841726618705, "grad_norm": 0.38155484199523926, "learning_rate": 8.32738613507696e-05, "loss": 0.0941, "step": 18290 }, { "action_loss": 0.011516444385051727, "epoch": 16.447841726618705, "step": 18290 }, { "epoch": 16.447841726618705, "step": 18290, "torque_loss": 0.11659073084592819 }, { "epoch": 16.4568345323741, "grad_norm": 0.4225389361381531, "learning_rate": 8.325328663313946e-05, "loss": 0.1079, "step": 18300 }, { "action_loss": 0.02584247849881649, "epoch": 16.4568345323741, "step": 18300 }, { "epoch": 16.4568345323741, "step": 18300, "torque_loss": 0.18380646407604218 }, { "epoch": 16.465827338129497, "grad_norm": 0.3077666163444519, "learning_rate": 8.323270181403884e-05, "loss": 0.0924, "step": 18310 }, { "action_loss": 0.018719471991062164, "epoch": 16.465827338129497, "step": 18310 }, { "epoch": 16.465827338129497, "step": 18310, "torque_loss": 0.1867436021566391 }, { "epoch": 16.47482014388489, "grad_norm": 0.26429757475852966, "learning_rate": 8.321210689972086e-05, "loss": 0.0917, "step": 18320 }, { "action_loss": 0.00503364996984601, "epoch": 16.47482014388489, "step": 18320 }, { "epoch": 16.47482014388489, "step": 18320, "torque_loss": 0.12744639813899994 }, { "epoch": 16.48381294964029, "grad_norm": 0.5156170725822449, "learning_rate": 8.319150189644174e-05, "loss": 0.0961, "step": 18330 }, { "action_loss": 0.004825991112738848, "epoch": 16.48381294964029, "step": 18330 }, { "epoch": 16.48381294964029, "step": 18330, "torque_loss": 0.10158950835466385 }, { "epoch": 16.492805755395683, "grad_norm": 0.3528153598308563, "learning_rate": 8.31708868104607e-05, "loss": 0.1127, "step": 18340 }, { "action_loss": 0.01287021767348051, "epoch": 16.492805755395683, "step": 18340 }, { "epoch": 16.492805755395683, "step": 18340, "torque_loss": 0.20455080270767212 }, { "epoch": 16.50179856115108, "grad_norm": 0.30509334802627563, "learning_rate": 8.315026164804007e-05, "loss": 0.0992, "step": 18350 }, { "action_loss": 0.005815593991428614, "epoch": 16.50179856115108, "step": 18350 }, { "epoch": 16.50179856115108, "step": 18350, "torque_loss": 0.09625547379255295 }, { "epoch": 16.510791366906474, "grad_norm": 0.33830979466438293, "learning_rate": 8.312962641544524e-05, "loss": 0.0977, "step": 18360 }, { "action_loss": 0.003334297798573971, "epoch": 16.510791366906474, "step": 18360 }, { "epoch": 16.510791366906474, "step": 18360, "torque_loss": 0.08579891920089722 }, { "epoch": 16.519784172661872, "grad_norm": 0.425844669342041, "learning_rate": 8.310898111894465e-05, "loss": 0.0872, "step": 18370 }, { "action_loss": 0.00670865410938859, "epoch": 16.519784172661872, "step": 18370 }, { "epoch": 16.519784172661872, "step": 18370, "torque_loss": 0.10543593764305115 }, { "epoch": 16.528776978417266, "grad_norm": 0.34772956371307373, "learning_rate": 8.308832576480977e-05, "loss": 0.0903, "step": 18380 }, { "action_loss": 0.007042527198791504, "epoch": 16.528776978417266, "step": 18380 }, { "epoch": 16.528776978417266, "step": 18380, "torque_loss": 0.11555717140436172 }, { "epoch": 16.53776978417266, "grad_norm": 0.3532050549983978, "learning_rate": 8.306766035931519e-05, "loss": 0.1048, "step": 18390 }, { "action_loss": 0.00897221639752388, "epoch": 16.53776978417266, "step": 18390 }, { "epoch": 16.53776978417266, "step": 18390, "torque_loss": 0.1686885505914688 }, { "epoch": 16.546762589928058, "grad_norm": 0.4252741038799286, "learning_rate": 8.304698490873847e-05, "loss": 0.0943, "step": 18400 }, { "action_loss": 0.005200957413762808, "epoch": 16.546762589928058, "step": 18400 }, { "epoch": 16.546762589928058, "step": 18400, "torque_loss": 0.0907365083694458 }, { "epoch": 16.555755395683452, "grad_norm": 0.33171865344047546, "learning_rate": 8.30262994193603e-05, "loss": 0.0968, "step": 18410 }, { "action_loss": 0.004447434563189745, "epoch": 16.555755395683452, "step": 18410 }, { "epoch": 16.555755395683452, "step": 18410, "torque_loss": 0.13956685364246368 }, { "epoch": 16.56474820143885, "grad_norm": 0.3467333912849426, "learning_rate": 8.300560389746438e-05, "loss": 0.0906, "step": 18420 }, { "action_loss": 0.015668241307139397, "epoch": 16.56474820143885, "step": 18420 }, { "epoch": 16.56474820143885, "step": 18420, "torque_loss": 0.19594687223434448 }, { "epoch": 16.573741007194243, "grad_norm": 0.46040078997612, "learning_rate": 8.298489834933745e-05, "loss": 0.102, "step": 18430 }, { "action_loss": 0.004506120458245277, "epoch": 16.573741007194243, "step": 18430 }, { "epoch": 16.573741007194243, "step": 18430, "torque_loss": 0.09221047908067703 }, { "epoch": 16.58273381294964, "grad_norm": 0.34456774592399597, "learning_rate": 8.296418278126934e-05, "loss": 0.0988, "step": 18440 }, { "action_loss": 0.0071984268724918365, "epoch": 16.58273381294964, "step": 18440 }, { "epoch": 16.58273381294964, "step": 18440, "torque_loss": 0.13600188493728638 }, { "epoch": 16.591726618705035, "grad_norm": 0.2986716330051422, "learning_rate": 8.294345719955284e-05, "loss": 0.0962, "step": 18450 }, { "action_loss": 0.0055312588810920715, "epoch": 16.591726618705035, "step": 18450 }, { "epoch": 16.591726618705035, "step": 18450, "torque_loss": 0.1270781010389328 }, { "epoch": 16.600719424460433, "grad_norm": 0.3801255524158478, "learning_rate": 8.29227216104839e-05, "loss": 0.0965, "step": 18460 }, { "action_loss": 0.010921536944806576, "epoch": 16.600719424460433, "step": 18460 }, { "epoch": 16.600719424460433, "step": 18460, "torque_loss": 0.19370703399181366 }, { "epoch": 16.609712230215827, "grad_norm": 0.4008491337299347, "learning_rate": 8.290197602036137e-05, "loss": 0.1024, "step": 18470 }, { "action_loss": 0.007822037674486637, "epoch": 16.609712230215827, "step": 18470 }, { "epoch": 16.609712230215827, "step": 18470, "torque_loss": 0.11778231710195541 }, { "epoch": 16.618705035971225, "grad_norm": 0.31159844994544983, "learning_rate": 8.288122043548725e-05, "loss": 0.0822, "step": 18480 }, { "action_loss": 0.006014909595251083, "epoch": 16.618705035971225, "step": 18480 }, { "epoch": 16.618705035971225, "step": 18480, "torque_loss": 0.12912572920322418 }, { "epoch": 16.62769784172662, "grad_norm": 0.234540656208992, "learning_rate": 8.286045486216657e-05, "loss": 0.0933, "step": 18490 }, { "action_loss": 0.014295271597802639, "epoch": 16.62769784172662, "step": 18490 }, { "epoch": 16.62769784172662, "step": 18490, "torque_loss": 0.18494713306427002 }, { "epoch": 16.636690647482013, "grad_norm": 0.34871283173561096, "learning_rate": 8.283967930670733e-05, "loss": 0.0941, "step": 18500 }, { "action_loss": 0.00898340716958046, "epoch": 16.636690647482013, "step": 18500 }, { "epoch": 16.636690647482013, "step": 18500, "torque_loss": 0.15200906991958618 }, { "epoch": 16.64568345323741, "grad_norm": 0.4130002558231354, "learning_rate": 8.281889377542058e-05, "loss": 0.1002, "step": 18510 }, { "action_loss": 0.007858695462346077, "epoch": 16.64568345323741, "step": 18510 }, { "epoch": 16.64568345323741, "step": 18510, "torque_loss": 0.14184652268886566 }, { "epoch": 16.654676258992804, "grad_norm": 0.41759517788887024, "learning_rate": 8.279809827462045e-05, "loss": 0.0868, "step": 18520 }, { "action_loss": 0.010234705172479153, "epoch": 16.654676258992804, "step": 18520 }, { "epoch": 16.654676258992804, "step": 18520, "torque_loss": 0.17384056746959686 }, { "epoch": 16.663669064748202, "grad_norm": 0.3166186511516571, "learning_rate": 8.277729281062402e-05, "loss": 0.1014, "step": 18530 }, { "action_loss": 0.004880594555288553, "epoch": 16.663669064748202, "step": 18530 }, { "epoch": 16.663669064748202, "step": 18530, "torque_loss": 0.14734791219234467 }, { "epoch": 16.672661870503596, "grad_norm": 0.3302207589149475, "learning_rate": 8.27564773897515e-05, "loss": 0.0916, "step": 18540 }, { "action_loss": 0.004790575709193945, "epoch": 16.672661870503596, "step": 18540 }, { "epoch": 16.672661870503596, "step": 18540, "torque_loss": 0.11587313562631607 }, { "epoch": 16.681654676258994, "grad_norm": 0.2461133897304535, "learning_rate": 8.273565201832602e-05, "loss": 0.0812, "step": 18550 }, { "action_loss": 0.009799282997846603, "epoch": 16.681654676258994, "step": 18550 }, { "epoch": 16.681654676258994, "step": 18550, "torque_loss": 0.13032270967960358 }, { "epoch": 16.690647482014388, "grad_norm": 0.29925569891929626, "learning_rate": 8.27148167026738e-05, "loss": 0.0847, "step": 18560 }, { "action_loss": 0.011447593569755554, "epoch": 16.690647482014388, "step": 18560 }, { "epoch": 16.690647482014388, "step": 18560, "torque_loss": 0.1553230732679367 }, { "epoch": 16.699640287769785, "grad_norm": 0.36445438861846924, "learning_rate": 8.269397144912405e-05, "loss": 0.082, "step": 18570 }, { "action_loss": 0.005456476006656885, "epoch": 16.699640287769785, "step": 18570 }, { "epoch": 16.699640287769785, "step": 18570, "torque_loss": 0.11568126082420349 }, { "epoch": 16.70863309352518, "grad_norm": 0.28724509477615356, "learning_rate": 8.267311626400899e-05, "loss": 0.0807, "step": 18580 }, { "action_loss": 0.010664845816791058, "epoch": 16.70863309352518, "step": 18580 }, { "epoch": 16.70863309352518, "step": 18580, "torque_loss": 0.09187781810760498 }, { "epoch": 16.717625899280577, "grad_norm": 0.46913978457450867, "learning_rate": 8.26522511536639e-05, "loss": 0.0951, "step": 18590 }, { "action_loss": 0.008519861847162247, "epoch": 16.717625899280577, "step": 18590 }, { "epoch": 16.717625899280577, "step": 18590, "torque_loss": 0.14667509496212006 }, { "epoch": 16.72661870503597, "grad_norm": 0.40362244844436646, "learning_rate": 8.263137612442706e-05, "loss": 0.0964, "step": 18600 }, { "action_loss": 0.003460899693891406, "epoch": 16.72661870503597, "step": 18600 }, { "epoch": 16.72661870503597, "step": 18600, "torque_loss": 0.11092972755432129 }, { "epoch": 16.735611510791365, "grad_norm": 0.43755078315734863, "learning_rate": 8.261049118263971e-05, "loss": 0.0906, "step": 18610 }, { "action_loss": 0.009538741782307625, "epoch": 16.735611510791365, "step": 18610 }, { "epoch": 16.735611510791365, "step": 18610, "torque_loss": 0.11047714203596115 }, { "epoch": 16.744604316546763, "grad_norm": 0.36895620822906494, "learning_rate": 8.258959633464619e-05, "loss": 0.093, "step": 18620 }, { "action_loss": 0.009296000935137272, "epoch": 16.744604316546763, "step": 18620 }, { "epoch": 16.744604316546763, "step": 18620, "torque_loss": 0.19096462428569794 }, { "epoch": 16.753597122302157, "grad_norm": 0.2758145034313202, "learning_rate": 8.256869158679377e-05, "loss": 0.0879, "step": 18630 }, { "action_loss": 0.014257240109145641, "epoch": 16.753597122302157, "step": 18630 }, { "epoch": 16.753597122302157, "step": 18630, "torque_loss": 0.1958245486021042 }, { "epoch": 16.762589928057555, "grad_norm": 0.3109160363674164, "learning_rate": 8.254777694543278e-05, "loss": 0.0998, "step": 18640 }, { "action_loss": 0.00482841394841671, "epoch": 16.762589928057555, "step": 18640 }, { "epoch": 16.762589928057555, "step": 18640, "torque_loss": 0.10115063190460205 }, { "epoch": 16.77158273381295, "grad_norm": 0.27707919478416443, "learning_rate": 8.252685241691651e-05, "loss": 0.095, "step": 18650 }, { "action_loss": 0.014678028412163258, "epoch": 16.77158273381295, "step": 18650 }, { "epoch": 16.77158273381295, "step": 18650, "torque_loss": 0.21142391860485077 }, { "epoch": 16.780575539568346, "grad_norm": 0.33761194348335266, "learning_rate": 8.250591800760133e-05, "loss": 0.1069, "step": 18660 }, { "action_loss": 0.006213450338691473, "epoch": 16.780575539568346, "step": 18660 }, { "epoch": 16.780575539568346, "step": 18660, "torque_loss": 0.15897031128406525 }, { "epoch": 16.78956834532374, "grad_norm": 0.3522542119026184, "learning_rate": 8.248497372384649e-05, "loss": 0.1002, "step": 18670 }, { "action_loss": 0.006999319419264793, "epoch": 16.78956834532374, "step": 18670 }, { "epoch": 16.78956834532374, "step": 18670, "torque_loss": 0.1309279054403305 }, { "epoch": 16.798561151079138, "grad_norm": 0.4184950292110443, "learning_rate": 8.246401957201437e-05, "loss": 0.0972, "step": 18680 }, { "action_loss": 0.008166112005710602, "epoch": 16.798561151079138, "step": 18680 }, { "epoch": 16.798561151079138, "step": 18680, "torque_loss": 0.15280510485172272 }, { "epoch": 16.807553956834532, "grad_norm": 0.3229549527168274, "learning_rate": 8.244305555847027e-05, "loss": 0.0907, "step": 18690 }, { "action_loss": 0.007556163240224123, "epoch": 16.807553956834532, "step": 18690 }, { "epoch": 16.807553956834532, "step": 18690, "torque_loss": 0.13939477503299713 }, { "epoch": 16.81654676258993, "grad_norm": 0.296256959438324, "learning_rate": 8.24220816895825e-05, "loss": 0.0844, "step": 18700 }, { "action_loss": 0.006434418261051178, "epoch": 16.81654676258993, "step": 18700 }, { "epoch": 16.81654676258993, "step": 18700, "torque_loss": 0.15063829720020294 }, { "epoch": 16.825539568345324, "grad_norm": 0.37369829416275024, "learning_rate": 8.240109797172237e-05, "loss": 0.102, "step": 18710 }, { "action_loss": 0.005189812276512384, "epoch": 16.825539568345324, "step": 18710 }, { "epoch": 16.825539568345324, "step": 18710, "torque_loss": 0.11415234953165054 }, { "epoch": 16.834532374100718, "grad_norm": 0.3405546545982361, "learning_rate": 8.238010441126416e-05, "loss": 0.1008, "step": 18720 }, { "action_loss": 0.015767572447657585, "epoch": 16.834532374100718, "step": 18720 }, { "epoch": 16.834532374100718, "step": 18720, "torque_loss": 0.20705978572368622 }, { "epoch": 16.843525179856115, "grad_norm": 0.33144518733024597, "learning_rate": 8.23591010145852e-05, "loss": 0.1052, "step": 18730 }, { "action_loss": 0.03451564908027649, "epoch": 16.843525179856115, "step": 18730 }, { "epoch": 16.843525179856115, "step": 18730, "torque_loss": 0.15612094104290009 }, { "epoch": 16.85251798561151, "grad_norm": 0.3603058159351349, "learning_rate": 8.233808778806571e-05, "loss": 0.0951, "step": 18740 }, { "action_loss": 0.006733340676873922, "epoch": 16.85251798561151, "step": 18740 }, { "epoch": 16.85251798561151, "step": 18740, "torque_loss": 0.14669476449489594 }, { "epoch": 16.861510791366907, "grad_norm": 0.3574373722076416, "learning_rate": 8.231706473808903e-05, "loss": 0.0956, "step": 18750 }, { "action_loss": 0.010843485593795776, "epoch": 16.861510791366907, "step": 18750 }, { "epoch": 16.861510791366907, "step": 18750, "torque_loss": 0.17918598651885986 }, { "epoch": 16.8705035971223, "grad_norm": 0.3176102042198181, "learning_rate": 8.229603187104133e-05, "loss": 0.1073, "step": 18760 }, { "action_loss": 0.006287971045821905, "epoch": 16.8705035971223, "step": 18760 }, { "epoch": 16.8705035971223, "step": 18760, "torque_loss": 0.0916064977645874 }, { "epoch": 16.8794964028777, "grad_norm": 0.439521849155426, "learning_rate": 8.22749891933119e-05, "loss": 0.099, "step": 18770 }, { "action_loss": 0.015393898822367191, "epoch": 16.8794964028777, "step": 18770 }, { "epoch": 16.8794964028777, "step": 18770, "torque_loss": 0.252413272857666 }, { "epoch": 16.888489208633093, "grad_norm": 0.4236300587654114, "learning_rate": 8.225393671129291e-05, "loss": 0.1018, "step": 18780 }, { "action_loss": 0.008082159794867039, "epoch": 16.888489208633093, "step": 18780 }, { "epoch": 16.888489208633093, "step": 18780, "torque_loss": 0.11927703768014908 }, { "epoch": 16.89748201438849, "grad_norm": 0.3892172873020172, "learning_rate": 8.223287443137957e-05, "loss": 0.0829, "step": 18790 }, { "action_loss": 0.005000781733542681, "epoch": 16.89748201438849, "step": 18790 }, { "epoch": 16.89748201438849, "step": 18790, "torque_loss": 0.10630923509597778 }, { "epoch": 16.906474820143885, "grad_norm": 0.340038001537323, "learning_rate": 8.221180235997004e-05, "loss": 0.0931, "step": 18800 }, { "action_loss": 0.0033716540783643723, "epoch": 16.906474820143885, "step": 18800 }, { "epoch": 16.906474820143885, "step": 18800, "torque_loss": 0.05509871616959572 }, { "epoch": 16.915467625899282, "grad_norm": 0.3372569978237152, "learning_rate": 8.219072050346544e-05, "loss": 0.0913, "step": 18810 }, { "action_loss": 0.01040157675743103, "epoch": 16.915467625899282, "step": 18810 }, { "epoch": 16.915467625899282, "step": 18810, "torque_loss": 0.12407534569501877 }, { "epoch": 16.924460431654676, "grad_norm": 0.3305010497570038, "learning_rate": 8.216962886826992e-05, "loss": 0.0895, "step": 18820 }, { "action_loss": 0.004118137992918491, "epoch": 16.924460431654676, "step": 18820 }, { "epoch": 16.924460431654676, "step": 18820, "torque_loss": 0.11785832792520523 }, { "epoch": 16.93345323741007, "grad_norm": 0.3817189335823059, "learning_rate": 8.214852746079054e-05, "loss": 0.0974, "step": 18830 }, { "action_loss": 0.003903757780790329, "epoch": 16.93345323741007, "step": 18830 }, { "epoch": 16.93345323741007, "step": 18830, "torque_loss": 0.14939971268177032 }, { "epoch": 16.942446043165468, "grad_norm": 0.3547206223011017, "learning_rate": 8.212741628743732e-05, "loss": 0.0875, "step": 18840 }, { "action_loss": 0.008934331126511097, "epoch": 16.942446043165468, "step": 18840 }, { "epoch": 16.942446043165468, "step": 18840, "torque_loss": 0.1326170265674591 }, { "epoch": 16.951438848920862, "grad_norm": 0.319401353597641, "learning_rate": 8.210629535462333e-05, "loss": 0.091, "step": 18850 }, { "action_loss": 0.010687566362321377, "epoch": 16.951438848920862, "step": 18850 }, { "epoch": 16.951438848920862, "step": 18850, "torque_loss": 0.09488046169281006 }, { "epoch": 16.96043165467626, "grad_norm": 0.3314138650894165, "learning_rate": 8.208516466876453e-05, "loss": 0.0911, "step": 18860 }, { "action_loss": 0.004067303147166967, "epoch": 16.96043165467626, "step": 18860 }, { "epoch": 16.96043165467626, "step": 18860, "torque_loss": 0.0831926092505455 }, { "epoch": 16.969424460431654, "grad_norm": 0.2941673696041107, "learning_rate": 8.206402423627986e-05, "loss": 0.0837, "step": 18870 }, { "action_loss": 0.013721436262130737, "epoch": 16.969424460431654, "step": 18870 }, { "epoch": 16.969424460431654, "step": 18870, "torque_loss": 0.17169417440891266 }, { "epoch": 16.97841726618705, "grad_norm": 0.3842267692089081, "learning_rate": 8.204287406359124e-05, "loss": 0.1179, "step": 18880 }, { "action_loss": 0.0098015321418643, "epoch": 16.97841726618705, "step": 18880 }, { "epoch": 16.97841726618705, "step": 18880, "torque_loss": 0.15388783812522888 }, { "epoch": 16.987410071942445, "grad_norm": 0.35111624002456665, "learning_rate": 8.20217141571235e-05, "loss": 0.0997, "step": 18890 }, { "action_loss": 0.003134012222290039, "epoch": 16.987410071942445, "step": 18890 }, { "epoch": 16.987410071942445, "step": 18890, "torque_loss": 0.09352785348892212 }, { "epoch": 16.996402877697843, "grad_norm": 0.3305180072784424, "learning_rate": 8.200054452330449e-05, "loss": 0.077, "step": 18900 }, { "action_loss": 0.0024418910034000874, "epoch": 16.996402877697843, "step": 18900 }, { "epoch": 16.996402877697843, "step": 18900, "torque_loss": 0.09824473410844803 }, { "epoch": 17.005395683453237, "grad_norm": 0.2825358808040619, "learning_rate": 8.197936516856499e-05, "loss": 0.0932, "step": 18910 }, { "action_loss": 0.009925737977027893, "epoch": 17.005395683453237, "step": 18910 }, { "epoch": 17.005395683453237, "step": 18910, "torque_loss": 0.1616065353155136 }, { "epoch": 17.014388489208635, "grad_norm": 0.3635106682777405, "learning_rate": 8.195817609933871e-05, "loss": 0.102, "step": 18920 }, { "action_loss": 0.012480598874390125, "epoch": 17.014388489208635, "step": 18920 }, { "epoch": 17.014388489208635, "step": 18920, "torque_loss": 0.14820611476898193 }, { "epoch": 17.02338129496403, "grad_norm": 0.26082003116607666, "learning_rate": 8.193697732206233e-05, "loss": 0.0922, "step": 18930 }, { "action_loss": 0.008543327450752258, "epoch": 17.02338129496403, "step": 18930 }, { "epoch": 17.02338129496403, "step": 18930, "torque_loss": 0.1328575164079666 }, { "epoch": 17.032374100719423, "grad_norm": 0.35608598589897156, "learning_rate": 8.19157688431755e-05, "loss": 0.098, "step": 18940 }, { "action_loss": 0.00532124936580658, "epoch": 17.032374100719423, "step": 18940 }, { "epoch": 17.032374100719423, "step": 18940, "torque_loss": 0.12249457091093063 }, { "epoch": 17.04136690647482, "grad_norm": 0.4343336820602417, "learning_rate": 8.189455066912077e-05, "loss": 0.0857, "step": 18950 }, { "action_loss": 0.008134562522172928, "epoch": 17.04136690647482, "step": 18950 }, { "epoch": 17.04136690647482, "step": 18950, "torque_loss": 0.11734088510274887 }, { "epoch": 17.050359712230215, "grad_norm": 0.31979554891586304, "learning_rate": 8.187332280634369e-05, "loss": 0.0954, "step": 18960 }, { "action_loss": 0.009659177623689175, "epoch": 17.050359712230215, "step": 18960 }, { "epoch": 17.050359712230215, "step": 18960, "torque_loss": 0.11195734888315201 }, { "epoch": 17.059352517985612, "grad_norm": 0.39833128452301025, "learning_rate": 8.18520852612927e-05, "loss": 0.1001, "step": 18970 }, { "action_loss": 0.008087622001767159, "epoch": 17.059352517985612, "step": 18970 }, { "epoch": 17.059352517985612, "step": 18970, "torque_loss": 0.12929822504520416 }, { "epoch": 17.068345323741006, "grad_norm": 0.30946075916290283, "learning_rate": 8.183083804041921e-05, "loss": 0.1081, "step": 18980 }, { "action_loss": 0.009280961006879807, "epoch": 17.068345323741006, "step": 18980 }, { "epoch": 17.068345323741006, "step": 18980, "torque_loss": 0.19781391322612762 }, { "epoch": 17.077338129496404, "grad_norm": 0.39323943853378296, "learning_rate": 8.180958115017757e-05, "loss": 0.0859, "step": 18990 }, { "action_loss": 0.0039904131554067135, "epoch": 17.077338129496404, "step": 18990 }, { "epoch": 17.077338129496404, "step": 18990, "torque_loss": 0.0768595039844513 }, { "epoch": 17.086330935251798, "grad_norm": 0.3982807397842407, "learning_rate": 8.178831459702505e-05, "loss": 0.1072, "step": 19000 }, { "action_loss": 0.00721932714805007, "epoch": 17.086330935251798, "step": 19000 }, { "epoch": 17.086330935251798, "step": 19000, "torque_loss": 0.14499418437480927 }, { "epoch": 17.095323741007196, "grad_norm": 0.45353373885154724, "learning_rate": 8.17670383874219e-05, "loss": 0.0913, "step": 19010 }, { "action_loss": 0.01701483130455017, "epoch": 17.095323741007196, "step": 19010 }, { "epoch": 17.095323741007196, "step": 19010, "torque_loss": 0.20476467907428741 }, { "epoch": 17.10431654676259, "grad_norm": 0.32641175389289856, "learning_rate": 8.174575252783124e-05, "loss": 0.0879, "step": 19020 }, { "action_loss": 0.038752008229494095, "epoch": 17.10431654676259, "step": 19020 }, { "epoch": 17.10431654676259, "step": 19020, "torque_loss": 0.24787098169326782 }, { "epoch": 17.113309352517987, "grad_norm": 0.37105593085289, "learning_rate": 8.172445702471914e-05, "loss": 0.106, "step": 19030 }, { "action_loss": 0.005097861867398024, "epoch": 17.113309352517987, "step": 19030 }, { "epoch": 17.113309352517987, "step": 19030, "torque_loss": 0.1478923112154007 }, { "epoch": 17.12230215827338, "grad_norm": 0.3860788643360138, "learning_rate": 8.170315188455466e-05, "loss": 0.0972, "step": 19040 }, { "action_loss": 0.006444490049034357, "epoch": 17.12230215827338, "step": 19040 }, { "epoch": 17.12230215827338, "step": 19040, "torque_loss": 0.11119109392166138 }, { "epoch": 17.131294964028775, "grad_norm": 0.3699122369289398, "learning_rate": 8.168183711380969e-05, "loss": 0.098, "step": 19050 }, { "action_loss": 0.010361974127590656, "epoch": 17.131294964028775, "step": 19050 }, { "epoch": 17.131294964028775, "step": 19050, "torque_loss": 0.14954866468906403 }, { "epoch": 17.140287769784173, "grad_norm": 0.39349180459976196, "learning_rate": 8.166051271895913e-05, "loss": 0.0993, "step": 19060 }, { "action_loss": 0.007250263821333647, "epoch": 17.140287769784173, "step": 19060 }, { "epoch": 17.140287769784173, "step": 19060, "torque_loss": 0.1274334043264389 }, { "epoch": 17.149280575539567, "grad_norm": 0.4296318292617798, "learning_rate": 8.163917870648075e-05, "loss": 0.1075, "step": 19070 }, { "action_loss": 0.005838725250214338, "epoch": 17.149280575539567, "step": 19070 }, { "epoch": 17.149280575539567, "step": 19070, "torque_loss": 0.09756787866353989 }, { "epoch": 17.158273381294965, "grad_norm": 0.4198109209537506, "learning_rate": 8.161783508285526e-05, "loss": 0.0883, "step": 19080 }, { "action_loss": 0.02141609601676464, "epoch": 17.158273381294965, "step": 19080 }, { "epoch": 17.158273381294965, "step": 19080, "torque_loss": 0.22172194719314575 }, { "epoch": 17.16726618705036, "grad_norm": 0.296267032623291, "learning_rate": 8.159648185456628e-05, "loss": 0.0982, "step": 19090 }, { "action_loss": 0.005052533000707626, "epoch": 17.16726618705036, "step": 19090 }, { "epoch": 17.16726618705036, "step": 19090, "torque_loss": 0.07580769807100296 }, { "epoch": 17.176258992805757, "grad_norm": 0.3629342317581177, "learning_rate": 8.157511902810038e-05, "loss": 0.0945, "step": 19100 }, { "action_loss": 0.010574179701507092, "epoch": 17.176258992805757, "step": 19100 }, { "epoch": 17.176258992805757, "step": 19100, "torque_loss": 0.14412136375904083 }, { "epoch": 17.18525179856115, "grad_norm": 0.4530353546142578, "learning_rate": 8.155374660994701e-05, "loss": 0.0932, "step": 19110 }, { "action_loss": 0.0046335854567587376, "epoch": 17.18525179856115, "step": 19110 }, { "epoch": 17.18525179856115, "step": 19110, "torque_loss": 0.14515122771263123 }, { "epoch": 17.194244604316548, "grad_norm": 0.3449672758579254, "learning_rate": 8.153236460659857e-05, "loss": 0.105, "step": 19120 }, { "action_loss": 0.023761719465255737, "epoch": 17.194244604316548, "step": 19120 }, { "epoch": 17.194244604316548, "step": 19120, "torque_loss": 0.19460546970367432 }, { "epoch": 17.203237410071942, "grad_norm": 0.32235172390937805, "learning_rate": 8.151097302455031e-05, "loss": 0.0905, "step": 19130 }, { "action_loss": 0.007760549429804087, "epoch": 17.203237410071942, "step": 19130 }, { "epoch": 17.203237410071942, "step": 19130, "torque_loss": 0.1402585357427597 }, { "epoch": 17.21223021582734, "grad_norm": 0.3863418400287628, "learning_rate": 8.148957187030044e-05, "loss": 0.1036, "step": 19140 }, { "action_loss": 0.0036656877491623163, "epoch": 17.21223021582734, "step": 19140 }, { "epoch": 17.21223021582734, "step": 19140, "torque_loss": 0.12575273215770721 }, { "epoch": 17.221223021582734, "grad_norm": 0.30928680300712585, "learning_rate": 8.146816115035006e-05, "loss": 0.092, "step": 19150 }, { "action_loss": 0.0028117650654166937, "epoch": 17.221223021582734, "step": 19150 }, { "epoch": 17.221223021582734, "step": 19150, "torque_loss": 0.06491108983755112 }, { "epoch": 17.230215827338128, "grad_norm": 0.35452407598495483, "learning_rate": 8.14467408712032e-05, "loss": 0.0979, "step": 19160 }, { "action_loss": 0.0037307133898139, "epoch": 17.230215827338128, "step": 19160 }, { "epoch": 17.230215827338128, "step": 19160, "torque_loss": 0.08371420949697495 }, { "epoch": 17.239208633093526, "grad_norm": 0.36187756061553955, "learning_rate": 8.142531103936678e-05, "loss": 0.0917, "step": 19170 }, { "action_loss": 0.00428331783041358, "epoch": 17.239208633093526, "step": 19170 }, { "epoch": 17.239208633093526, "step": 19170, "torque_loss": 0.08557287603616714 }, { "epoch": 17.24820143884892, "grad_norm": 0.28787821531295776, "learning_rate": 8.14038716613506e-05, "loss": 0.0647, "step": 19180 }, { "action_loss": 0.017450764775276184, "epoch": 17.24820143884892, "step": 19180 }, { "epoch": 17.24820143884892, "step": 19180, "torque_loss": 0.2421012669801712 }, { "epoch": 17.257194244604317, "grad_norm": 0.31794819235801697, "learning_rate": 8.138242274366736e-05, "loss": 0.0881, "step": 19190 }, { "action_loss": 0.004411648027598858, "epoch": 17.257194244604317, "step": 19190 }, { "epoch": 17.257194244604317, "step": 19190, "torque_loss": 0.13096360862255096 }, { "epoch": 17.26618705035971, "grad_norm": 0.33117401599884033, "learning_rate": 8.136096429283271e-05, "loss": 0.102, "step": 19200 }, { "action_loss": 0.008100184611976147, "epoch": 17.26618705035971, "step": 19200 }, { "epoch": 17.26618705035971, "step": 19200, "torque_loss": 0.1389503926038742 }, { "epoch": 17.27517985611511, "grad_norm": 0.4077320992946625, "learning_rate": 8.133949631536515e-05, "loss": 0.0997, "step": 19210 }, { "action_loss": 0.008981036953628063, "epoch": 17.27517985611511, "step": 19210 }, { "epoch": 17.27517985611511, "step": 19210, "torque_loss": 0.14940054714679718 }, { "epoch": 17.284172661870503, "grad_norm": 0.3168950378894806, "learning_rate": 8.131801881778607e-05, "loss": 0.1004, "step": 19220 }, { "action_loss": 0.00778372585773468, "epoch": 17.284172661870503, "step": 19220 }, { "epoch": 17.284172661870503, "step": 19220, "torque_loss": 0.10742604732513428 }, { "epoch": 17.2931654676259, "grad_norm": 0.3001580536365509, "learning_rate": 8.129653180661978e-05, "loss": 0.0937, "step": 19230 }, { "action_loss": 0.004914729855954647, "epoch": 17.2931654676259, "step": 19230 }, { "epoch": 17.2931654676259, "step": 19230, "torque_loss": 0.13700954616069794 }, { "epoch": 17.302158273381295, "grad_norm": 0.33876773715019226, "learning_rate": 8.127503528839346e-05, "loss": 0.1053, "step": 19240 }, { "action_loss": 0.015248182229697704, "epoch": 17.302158273381295, "step": 19240 }, { "epoch": 17.302158273381295, "step": 19240, "torque_loss": 0.19749273359775543 }, { "epoch": 17.31115107913669, "grad_norm": 0.4028593897819519, "learning_rate": 8.125352926963721e-05, "loss": 0.1081, "step": 19250 }, { "action_loss": 0.0039922562427818775, "epoch": 17.31115107913669, "step": 19250 }, { "epoch": 17.31115107913669, "step": 19250, "torque_loss": 0.08837287873029709 }, { "epoch": 17.320143884892087, "grad_norm": 0.45216435194015503, "learning_rate": 8.123201375688395e-05, "loss": 0.0892, "step": 19260 }, { "action_loss": 0.0033645706716924906, "epoch": 17.320143884892087, "step": 19260 }, { "epoch": 17.320143884892087, "step": 19260, "torque_loss": 0.10888403654098511 }, { "epoch": 17.32913669064748, "grad_norm": 0.3565727770328522, "learning_rate": 8.121048875666954e-05, "loss": 0.095, "step": 19270 }, { "action_loss": 0.005164742935448885, "epoch": 17.32913669064748, "step": 19270 }, { "epoch": 17.32913669064748, "step": 19270, "torque_loss": 0.13879333436489105 }, { "epoch": 17.33812949640288, "grad_norm": 0.31203195452690125, "learning_rate": 8.118895427553274e-05, "loss": 0.0801, "step": 19280 }, { "action_loss": 0.01104032713919878, "epoch": 17.33812949640288, "step": 19280 }, { "epoch": 17.33812949640288, "step": 19280, "torque_loss": 0.11941608041524887 }, { "epoch": 17.347122302158272, "grad_norm": 0.38015633821487427, "learning_rate": 8.116741032001511e-05, "loss": 0.1022, "step": 19290 }, { "action_loss": 0.008909923024475574, "epoch": 17.347122302158272, "step": 19290 }, { "epoch": 17.347122302158272, "step": 19290, "torque_loss": 0.12400727719068527 }, { "epoch": 17.35611510791367, "grad_norm": 0.32001057267189026, "learning_rate": 8.114585689666114e-05, "loss": 0.0879, "step": 19300 }, { "action_loss": 0.004736791364848614, "epoch": 17.35611510791367, "step": 19300 }, { "epoch": 17.35611510791367, "step": 19300, "torque_loss": 0.08458065986633301 }, { "epoch": 17.365107913669064, "grad_norm": 0.3006833791732788, "learning_rate": 8.112429401201821e-05, "loss": 0.0857, "step": 19310 }, { "action_loss": 0.03200085461139679, "epoch": 17.365107913669064, "step": 19310 }, { "epoch": 17.365107913669064, "step": 19310, "torque_loss": 0.26087486743927 }, { "epoch": 17.37410071942446, "grad_norm": 0.36812180280685425, "learning_rate": 8.110272167263656e-05, "loss": 0.105, "step": 19320 }, { "action_loss": 0.008146815933287144, "epoch": 17.37410071942446, "step": 19320 }, { "epoch": 17.37410071942446, "step": 19320, "torque_loss": 0.16138313710689545 }, { "epoch": 17.383093525179856, "grad_norm": 0.27767565846443176, "learning_rate": 8.108113988506929e-05, "loss": 0.0879, "step": 19330 }, { "action_loss": 0.01588883250951767, "epoch": 17.383093525179856, "step": 19330 }, { "epoch": 17.383093525179856, "step": 19330, "torque_loss": 0.1684533953666687 }, { "epoch": 17.392086330935253, "grad_norm": 0.35842499136924744, "learning_rate": 8.105954865587235e-05, "loss": 0.0933, "step": 19340 }, { "action_loss": 0.005347853992134333, "epoch": 17.392086330935253, "step": 19340 }, { "epoch": 17.392086330935253, "step": 19340, "torque_loss": 0.09402661770582199 }, { "epoch": 17.401079136690647, "grad_norm": 0.37663042545318604, "learning_rate": 8.103794799160463e-05, "loss": 0.0903, "step": 19350 }, { "action_loss": 0.02841835282742977, "epoch": 17.401079136690647, "step": 19350 }, { "epoch": 17.401079136690647, "step": 19350, "torque_loss": 0.2448616474866867 }, { "epoch": 17.41007194244604, "grad_norm": 0.5058532953262329, "learning_rate": 8.101633789882781e-05, "loss": 0.1027, "step": 19360 }, { "action_loss": 0.0050969249568879604, "epoch": 17.41007194244604, "step": 19360 }, { "epoch": 17.41007194244604, "step": 19360, "torque_loss": 0.10723957419395447 }, { "epoch": 17.41906474820144, "grad_norm": 0.3994798958301544, "learning_rate": 8.099471838410648e-05, "loss": 0.0871, "step": 19370 }, { "action_loss": 0.002799062756821513, "epoch": 17.41906474820144, "step": 19370 }, { "epoch": 17.41906474820144, "step": 19370, "torque_loss": 0.09022355824708939 }, { "epoch": 17.428057553956833, "grad_norm": 0.344701886177063, "learning_rate": 8.097308945400806e-05, "loss": 0.0911, "step": 19380 }, { "action_loss": 0.003097306238487363, "epoch": 17.428057553956833, "step": 19380 }, { "epoch": 17.428057553956833, "step": 19380, "torque_loss": 0.08858552575111389 }, { "epoch": 17.43705035971223, "grad_norm": 0.3632790446281433, "learning_rate": 8.095145111510288e-05, "loss": 0.0887, "step": 19390 }, { "action_loss": 0.004692563321441412, "epoch": 17.43705035971223, "step": 19390 }, { "epoch": 17.43705035971223, "step": 19390, "torque_loss": 0.11175358295440674 }, { "epoch": 17.446043165467625, "grad_norm": 0.45264682173728943, "learning_rate": 8.092980337396406e-05, "loss": 0.085, "step": 19400 }, { "action_loss": 0.014703297056257725, "epoch": 17.446043165467625, "step": 19400 }, { "epoch": 17.446043165467625, "step": 19400, "torque_loss": 0.18337933719158173 }, { "epoch": 17.455035971223023, "grad_norm": 0.32375097274780273, "learning_rate": 8.090814623716763e-05, "loss": 0.0964, "step": 19410 }, { "action_loss": 0.0031058266758918762, "epoch": 17.455035971223023, "step": 19410 }, { "epoch": 17.455035971223023, "step": 19410, "torque_loss": 0.10386929661035538 }, { "epoch": 17.464028776978417, "grad_norm": 0.2983385920524597, "learning_rate": 8.088647971129246e-05, "loss": 0.1012, "step": 19420 }, { "action_loss": 0.05181391164660454, "epoch": 17.464028776978417, "step": 19420 }, { "epoch": 17.464028776978417, "step": 19420, "torque_loss": 0.24733968079090118 }, { "epoch": 17.473021582733814, "grad_norm": 0.3059326708316803, "learning_rate": 8.086480380292026e-05, "loss": 0.0947, "step": 19430 }, { "action_loss": 0.0024895821698009968, "epoch": 17.473021582733814, "step": 19430 }, { "epoch": 17.473021582733814, "step": 19430, "torque_loss": 0.04770945385098457 }, { "epoch": 17.48201438848921, "grad_norm": 0.4225446581840515, "learning_rate": 8.084311851863562e-05, "loss": 0.0904, "step": 19440 }, { "action_loss": 0.007490409072488546, "epoch": 17.48201438848921, "step": 19440 }, { "epoch": 17.48201438848921, "step": 19440, "torque_loss": 0.15935692191123962 }, { "epoch": 17.491007194244606, "grad_norm": 0.37716227769851685, "learning_rate": 8.082142386502591e-05, "loss": 0.0905, "step": 19450 }, { "action_loss": 0.014852289110422134, "epoch": 17.491007194244606, "step": 19450 }, { "epoch": 17.491007194244606, "step": 19450, "torque_loss": 0.1385631412267685 }, { "epoch": 17.5, "grad_norm": 0.38378190994262695, "learning_rate": 8.079971984868145e-05, "loss": 0.1062, "step": 19460 }, { "action_loss": 0.007559523452073336, "epoch": 17.5, "step": 19460 }, { "epoch": 17.5, "step": 19460, "torque_loss": 0.1939244419336319 }, { "epoch": 17.508992805755394, "grad_norm": 0.37430134415626526, "learning_rate": 8.077800647619532e-05, "loss": 0.1022, "step": 19470 }, { "action_loss": 0.004405904095619917, "epoch": 17.508992805755394, "step": 19470 }, { "epoch": 17.508992805755394, "step": 19470, "torque_loss": 0.1598258912563324 }, { "epoch": 17.51798561151079, "grad_norm": 0.32629743218421936, "learning_rate": 8.075628375416345e-05, "loss": 0.0879, "step": 19480 }, { "action_loss": 0.004102904815226793, "epoch": 17.51798561151079, "step": 19480 }, { "epoch": 17.51798561151079, "step": 19480, "torque_loss": 0.08690357208251953 }, { "epoch": 17.526978417266186, "grad_norm": 0.4153362214565277, "learning_rate": 8.073455168918464e-05, "loss": 0.102, "step": 19490 }, { "action_loss": 0.006122452672570944, "epoch": 17.526978417266186, "step": 19490 }, { "epoch": 17.526978417266186, "step": 19490, "torque_loss": 0.131839781999588 }, { "epoch": 17.535971223021583, "grad_norm": 0.3780439794063568, "learning_rate": 8.071281028786055e-05, "loss": 0.0792, "step": 19500 }, { "action_loss": 0.004514506086707115, "epoch": 17.535971223021583, "step": 19500 }, { "epoch": 17.535971223021583, "step": 19500, "torque_loss": 0.10282047837972641 }, { "epoch": 17.544964028776977, "grad_norm": 0.3473834693431854, "learning_rate": 8.069105955679562e-05, "loss": 0.0906, "step": 19510 }, { "action_loss": 0.0069936602376401424, "epoch": 17.544964028776977, "step": 19510 }, { "epoch": 17.544964028776977, "step": 19510, "torque_loss": 0.12712521851062775 }, { "epoch": 17.553956834532375, "grad_norm": 0.37810441851615906, "learning_rate": 8.066929950259713e-05, "loss": 0.0922, "step": 19520 }, { "action_loss": 0.006585709750652313, "epoch": 17.553956834532375, "step": 19520 }, { "epoch": 17.553956834532375, "step": 19520, "torque_loss": 0.11320298165082932 }, { "epoch": 17.56294964028777, "grad_norm": 0.37585747241973877, "learning_rate": 8.064753013187522e-05, "loss": 0.0919, "step": 19530 }, { "action_loss": 0.013513338752090931, "epoch": 17.56294964028777, "step": 19530 }, { "epoch": 17.56294964028777, "step": 19530, "torque_loss": 0.22421269118785858 }, { "epoch": 17.571942446043167, "grad_norm": 0.3106481432914734, "learning_rate": 8.062575145124289e-05, "loss": 0.0917, "step": 19540 }, { "action_loss": 0.008815820328891277, "epoch": 17.571942446043167, "step": 19540 }, { "epoch": 17.571942446043167, "step": 19540, "torque_loss": 0.16833724081516266 }, { "epoch": 17.58093525179856, "grad_norm": 0.3630515933036804, "learning_rate": 8.060396346731587e-05, "loss": 0.0995, "step": 19550 }, { "action_loss": 0.00663907453417778, "epoch": 17.58093525179856, "step": 19550 }, { "epoch": 17.58093525179856, "step": 19550, "torque_loss": 0.1036745086312294 }, { "epoch": 17.58992805755396, "grad_norm": 0.4303417205810547, "learning_rate": 8.058216618671281e-05, "loss": 0.082, "step": 19560 }, { "action_loss": 0.009615506045520306, "epoch": 17.58992805755396, "step": 19560 }, { "epoch": 17.58992805755396, "step": 19560, "torque_loss": 0.14477753639221191 }, { "epoch": 17.598920863309353, "grad_norm": 0.28073129057884216, "learning_rate": 8.056035961605514e-05, "loss": 0.0799, "step": 19570 }, { "action_loss": 0.005156311672180891, "epoch": 17.598920863309353, "step": 19570 }, { "epoch": 17.598920863309353, "step": 19570, "torque_loss": 0.13076554238796234 }, { "epoch": 17.607913669064747, "grad_norm": 0.33303865790367126, "learning_rate": 8.05385437619671e-05, "loss": 0.1068, "step": 19580 }, { "action_loss": 0.0033197125885635614, "epoch": 17.607913669064747, "step": 19580 }, { "epoch": 17.607913669064747, "step": 19580, "torque_loss": 0.11598187685012817 }, { "epoch": 17.616906474820144, "grad_norm": 0.34044599533081055, "learning_rate": 8.05167186310758e-05, "loss": 0.0805, "step": 19590 }, { "action_loss": 0.008550050668418407, "epoch": 17.616906474820144, "step": 19590 }, { "epoch": 17.616906474820144, "step": 19590, "torque_loss": 0.16226492822170258 }, { "epoch": 17.62589928057554, "grad_norm": 0.3611937463283539, "learning_rate": 8.049488423001113e-05, "loss": 0.0909, "step": 19600 }, { "action_loss": 0.01603781431913376, "epoch": 17.62589928057554, "step": 19600 }, { "epoch": 17.62589928057554, "step": 19600, "torque_loss": 0.19707699120044708 }, { "epoch": 17.634892086330936, "grad_norm": 0.4195738732814789, "learning_rate": 8.047304056540581e-05, "loss": 0.0953, "step": 19610 }, { "action_loss": 0.012049809098243713, "epoch": 17.634892086330936, "step": 19610 }, { "epoch": 17.634892086330936, "step": 19610, "torque_loss": 0.15319861471652985 }, { "epoch": 17.64388489208633, "grad_norm": 0.4442208409309387, "learning_rate": 8.045118764389534e-05, "loss": 0.0919, "step": 19620 }, { "action_loss": 0.0041909911669790745, "epoch": 17.64388489208633, "step": 19620 }, { "epoch": 17.64388489208633, "step": 19620, "torque_loss": 0.09383255243301392 }, { "epoch": 17.652877697841728, "grad_norm": 0.35994261503219604, "learning_rate": 8.042932547211809e-05, "loss": 0.0739, "step": 19630 }, { "action_loss": 0.008252192288637161, "epoch": 17.652877697841728, "step": 19630 }, { "epoch": 17.652877697841728, "step": 19630, "torque_loss": 0.11902549117803574 }, { "epoch": 17.66187050359712, "grad_norm": 0.3356500566005707, "learning_rate": 8.04074540567152e-05, "loss": 0.0895, "step": 19640 }, { "action_loss": 0.003320985473692417, "epoch": 17.66187050359712, "step": 19640 }, { "epoch": 17.66187050359712, "step": 19640, "torque_loss": 0.09483834356069565 }, { "epoch": 17.67086330935252, "grad_norm": 0.3313612639904022, "learning_rate": 8.038557340433063e-05, "loss": 0.111, "step": 19650 }, { "action_loss": 0.007044665049761534, "epoch": 17.67086330935252, "step": 19650 }, { "epoch": 17.67086330935252, "step": 19650, "torque_loss": 0.16590319573879242 }, { "epoch": 17.679856115107913, "grad_norm": 0.29482007026672363, "learning_rate": 8.036368352161115e-05, "loss": 0.0901, "step": 19660 }, { "action_loss": 0.009659302420914173, "epoch": 17.679856115107913, "step": 19660 }, { "epoch": 17.679856115107913, "step": 19660, "torque_loss": 0.1583573818206787 }, { "epoch": 17.68884892086331, "grad_norm": 0.3646642565727234, "learning_rate": 8.034178441520633e-05, "loss": 0.0868, "step": 19670 }, { "action_loss": 0.00576857989653945, "epoch": 17.68884892086331, "step": 19670 }, { "epoch": 17.68884892086331, "step": 19670, "torque_loss": 0.1278574913740158 }, { "epoch": 17.697841726618705, "grad_norm": 0.5000727772712708, "learning_rate": 8.031987609176852e-05, "loss": 0.1028, "step": 19680 }, { "action_loss": 0.005689872428774834, "epoch": 17.697841726618705, "step": 19680 }, { "epoch": 17.697841726618705, "step": 19680, "torque_loss": 0.10470948368310928 }, { "epoch": 17.7068345323741, "grad_norm": 0.3728264570236206, "learning_rate": 8.02979585579529e-05, "loss": 0.0927, "step": 19690 }, { "action_loss": 0.013776046223938465, "epoch": 17.7068345323741, "step": 19690 }, { "epoch": 17.7068345323741, "step": 19690, "torque_loss": 0.13087902963161469 }, { "epoch": 17.715827338129497, "grad_norm": 0.3768354058265686, "learning_rate": 8.027603182041745e-05, "loss": 0.1058, "step": 19700 }, { "action_loss": 0.010727652348577976, "epoch": 17.715827338129497, "step": 19700 }, { "epoch": 17.715827338129497, "step": 19700, "torque_loss": 0.15342335402965546 }, { "epoch": 17.72482014388489, "grad_norm": 0.3165152668952942, "learning_rate": 8.025409588582292e-05, "loss": 0.0918, "step": 19710 }, { "action_loss": 0.005687573458999395, "epoch": 17.72482014388489, "step": 19710 }, { "epoch": 17.72482014388489, "step": 19710, "torque_loss": 0.11079571396112442 }, { "epoch": 17.73381294964029, "grad_norm": 0.37936314940452576, "learning_rate": 8.023215076083288e-05, "loss": 0.097, "step": 19720 }, { "action_loss": 0.0036943366285413504, "epoch": 17.73381294964029, "step": 19720 }, { "epoch": 17.73381294964029, "step": 19720, "torque_loss": 0.11604855209589005 }, { "epoch": 17.742805755395683, "grad_norm": 0.4132685661315918, "learning_rate": 8.021019645211367e-05, "loss": 0.083, "step": 19730 }, { "action_loss": 0.005630417261272669, "epoch": 17.742805755395683, "step": 19730 }, { "epoch": 17.742805755395683, "step": 19730, "torque_loss": 0.09445258975028992 }, { "epoch": 17.75179856115108, "grad_norm": 0.41758260130882263, "learning_rate": 8.018823296633441e-05, "loss": 0.0862, "step": 19740 }, { "action_loss": 0.012307391501963139, "epoch": 17.75179856115108, "step": 19740 }, { "epoch": 17.75179856115108, "step": 19740, "torque_loss": 0.14192336797714233 }, { "epoch": 17.760791366906474, "grad_norm": 0.28574860095977783, "learning_rate": 8.016626031016708e-05, "loss": 0.0901, "step": 19750 }, { "action_loss": 0.015658637508749962, "epoch": 17.760791366906474, "step": 19750 }, { "epoch": 17.760791366906474, "step": 19750, "torque_loss": 0.11076659709215164 }, { "epoch": 17.769784172661872, "grad_norm": 0.4122401773929596, "learning_rate": 8.014427849028636e-05, "loss": 0.0904, "step": 19760 }, { "action_loss": 0.01191349234431982, "epoch": 17.769784172661872, "step": 19760 }, { "epoch": 17.769784172661872, "step": 19760, "torque_loss": 0.13573789596557617 }, { "epoch": 17.778776978417266, "grad_norm": 0.2996594309806824, "learning_rate": 8.012228751336974e-05, "loss": 0.0941, "step": 19770 }, { "action_loss": 0.012760192155838013, "epoch": 17.778776978417266, "step": 19770 }, { "epoch": 17.778776978417266, "step": 19770, "torque_loss": 0.15685288608074188 }, { "epoch": 17.78776978417266, "grad_norm": 0.48701056838035583, "learning_rate": 8.01002873860975e-05, "loss": 0.1104, "step": 19780 }, { "action_loss": 0.008473562076687813, "epoch": 17.78776978417266, "step": 19780 }, { "epoch": 17.78776978417266, "step": 19780, "torque_loss": 0.08878927677869797 }, { "epoch": 17.796762589928058, "grad_norm": 0.31218376755714417, "learning_rate": 8.00782781151527e-05, "loss": 0.0836, "step": 19790 }, { "action_loss": 0.016625316813588142, "epoch": 17.796762589928058, "step": 19790 }, { "epoch": 17.796762589928058, "step": 19790, "torque_loss": 0.18339039385318756 }, { "epoch": 17.805755395683452, "grad_norm": 0.2878412902355194, "learning_rate": 8.005625970722119e-05, "loss": 0.096, "step": 19800 }, { "action_loss": 0.005255321506410837, "epoch": 17.805755395683452, "step": 19800 }, { "epoch": 17.805755395683452, "step": 19800, "torque_loss": 0.12222256511449814 }, { "epoch": 17.81474820143885, "grad_norm": 0.32905590534210205, "learning_rate": 8.003423216899158e-05, "loss": 0.0783, "step": 19810 }, { "action_loss": 0.0039056434761732817, "epoch": 17.81474820143885, "step": 19810 }, { "epoch": 17.81474820143885, "step": 19810, "torque_loss": 0.14268837869167328 }, { "epoch": 17.823741007194243, "grad_norm": 0.26302269101142883, "learning_rate": 8.001219550715522e-05, "loss": 0.0965, "step": 19820 }, { "action_loss": 0.006908537354320288, "epoch": 17.823741007194243, "step": 19820 }, { "epoch": 17.823741007194243, "step": 19820, "torque_loss": 0.13396961987018585 }, { "epoch": 17.83273381294964, "grad_norm": 0.310624897480011, "learning_rate": 7.999014972840632e-05, "loss": 0.0978, "step": 19830 }, { "action_loss": 0.009660162031650543, "epoch": 17.83273381294964, "step": 19830 }, { "epoch": 17.83273381294964, "step": 19830, "torque_loss": 0.14153601229190826 }, { "epoch": 17.841726618705035, "grad_norm": 0.3451830744743347, "learning_rate": 7.996809483944174e-05, "loss": 0.0877, "step": 19840 }, { "action_loss": 0.008144988678395748, "epoch": 17.841726618705035, "step": 19840 }, { "epoch": 17.841726618705035, "step": 19840, "torque_loss": 0.18431957066059113 }, { "epoch": 17.850719424460433, "grad_norm": 0.2722996175289154, "learning_rate": 7.994603084696124e-05, "loss": 0.0739, "step": 19850 }, { "action_loss": 0.023228874430060387, "epoch": 17.850719424460433, "step": 19850 }, { "epoch": 17.850719424460433, "step": 19850, "torque_loss": 0.20221151411533356 }, { "epoch": 17.859712230215827, "grad_norm": 0.26203039288520813, "learning_rate": 7.992395775766724e-05, "loss": 0.0805, "step": 19860 }, { "action_loss": 0.002843619091436267, "epoch": 17.859712230215827, "step": 19860 }, { "epoch": 17.859712230215827, "step": 19860, "torque_loss": 0.08673729747533798 }, { "epoch": 17.868705035971225, "grad_norm": 0.2887044847011566, "learning_rate": 7.990187557826497e-05, "loss": 0.1015, "step": 19870 }, { "action_loss": 0.014380061067640781, "epoch": 17.868705035971225, "step": 19870 }, { "epoch": 17.868705035971225, "step": 19870, "torque_loss": 0.22281794250011444 }, { "epoch": 17.87769784172662, "grad_norm": 0.30608922243118286, "learning_rate": 7.987978431546242e-05, "loss": 0.1023, "step": 19880 }, { "action_loss": 0.01224774494767189, "epoch": 17.87769784172662, "step": 19880 }, { "epoch": 17.87769784172662, "step": 19880, "torque_loss": 0.13207298517227173 }, { "epoch": 17.886690647482013, "grad_norm": 0.3196524679660797, "learning_rate": 7.985768397597031e-05, "loss": 0.0901, "step": 19890 }, { "action_loss": 0.011275782249867916, "epoch": 17.886690647482013, "step": 19890 }, { "epoch": 17.886690647482013, "step": 19890, "torque_loss": 0.12650947272777557 }, { "epoch": 17.89568345323741, "grad_norm": 0.40177902579307556, "learning_rate": 7.983557456650216e-05, "loss": 0.1075, "step": 19900 }, { "action_loss": 0.023243695497512817, "epoch": 17.89568345323741, "step": 19900 }, { "epoch": 17.89568345323741, "step": 19900, "torque_loss": 0.1571713238954544 }, { "epoch": 17.904676258992804, "grad_norm": 0.4371373653411865, "learning_rate": 7.981345609377422e-05, "loss": 0.0944, "step": 19910 }, { "action_loss": 0.019859490916132927, "epoch": 17.904676258992804, "step": 19910 }, { "epoch": 17.904676258992804, "step": 19910, "torque_loss": 0.18353426456451416 }, { "epoch": 17.913669064748202, "grad_norm": 0.3061910569667816, "learning_rate": 7.97913285645055e-05, "loss": 0.0905, "step": 19920 }, { "action_loss": 0.01988307572901249, "epoch": 17.913669064748202, "step": 19920 }, { "epoch": 17.913669064748202, "step": 19920, "torque_loss": 0.14316529035568237 }, { "epoch": 17.922661870503596, "grad_norm": 0.3646996319293976, "learning_rate": 7.976919198541776e-05, "loss": 0.0872, "step": 19930 }, { "action_loss": 0.006978800054639578, "epoch": 17.922661870503596, "step": 19930 }, { "epoch": 17.922661870503596, "step": 19930, "torque_loss": 0.10540437698364258 }, { "epoch": 17.931654676258994, "grad_norm": 0.46373701095581055, "learning_rate": 7.974704636323548e-05, "loss": 0.0928, "step": 19940 }, { "action_loss": 0.009498446248471737, "epoch": 17.931654676258994, "step": 19940 }, { "epoch": 17.931654676258994, "step": 19940, "torque_loss": 0.14043967425823212 }, { "epoch": 17.940647482014388, "grad_norm": 0.2612217962741852, "learning_rate": 7.972489170468597e-05, "loss": 0.0808, "step": 19950 }, { "action_loss": 0.005959701258689165, "epoch": 17.940647482014388, "step": 19950 }, { "epoch": 17.940647482014388, "step": 19950, "torque_loss": 0.11556798219680786 }, { "epoch": 17.949640287769785, "grad_norm": 0.38565194606781006, "learning_rate": 7.970272801649918e-05, "loss": 0.0987, "step": 19960 }, { "action_loss": 0.0041315690614283085, "epoch": 17.949640287769785, "step": 19960 }, { "epoch": 17.949640287769785, "step": 19960, "torque_loss": 0.16308243572711945 }, { "epoch": 17.95863309352518, "grad_norm": 0.3430868089199066, "learning_rate": 7.96805553054079e-05, "loss": 0.0941, "step": 19970 }, { "action_loss": 0.0031000401359051466, "epoch": 17.95863309352518, "step": 19970 }, { "epoch": 17.95863309352518, "step": 19970, "torque_loss": 0.15375414490699768 }, { "epoch": 17.967625899280577, "grad_norm": 0.4231738746166229, "learning_rate": 7.965837357814756e-05, "loss": 0.0929, "step": 19980 }, { "action_loss": 0.008858766406774521, "epoch": 17.967625899280577, "step": 19980 }, { "epoch": 17.967625899280577, "step": 19980, "torque_loss": 0.13122011721134186 }, { "epoch": 17.97661870503597, "grad_norm": 0.27838945388793945, "learning_rate": 7.963618284145643e-05, "loss": 0.0927, "step": 19990 }, { "action_loss": 0.009500158950686455, "epoch": 17.97661870503597, "step": 19990 }, { "epoch": 17.97661870503597, "step": 19990, "torque_loss": 0.15055203437805176 }, { "epoch": 17.985611510791365, "grad_norm": 0.33308935165405273, "learning_rate": 7.961398310207544e-05, "loss": 0.0964, "step": 20000 }, { "action_loss": 0.004794934764504433, "epoch": 17.985611510791365, "step": 20000 }, { "epoch": 17.985611510791365, "step": 20000, "torque_loss": 0.0931742712855339 }, { "epoch": 17.994604316546763, "grad_norm": 0.29071104526519775, "learning_rate": 7.95917743667483e-05, "loss": 0.0897, "step": 20010 }, { "action_loss": 0.004261153284460306, "epoch": 17.994604316546763, "step": 20010 }, { "epoch": 17.994604316546763, "step": 20010, "torque_loss": 0.07044173777103424 }, { "epoch": 18.003597122302157, "grad_norm": 0.37302374839782715, "learning_rate": 7.956955664222144e-05, "loss": 0.0886, "step": 20020 }, { "action_loss": 0.010334242135286331, "epoch": 18.003597122302157, "step": 20020 }, { "epoch": 18.003597122302157, "step": 20020, "torque_loss": 0.15601937472820282 }, { "epoch": 18.012589928057555, "grad_norm": 0.32923486828804016, "learning_rate": 7.954732993524399e-05, "loss": 0.0926, "step": 20030 }, { "action_loss": 0.01141029316931963, "epoch": 18.012589928057555, "step": 20030 }, { "epoch": 18.012589928057555, "step": 20030, "torque_loss": 0.1771666258573532 }, { "epoch": 18.02158273381295, "grad_norm": 0.25243210792541504, "learning_rate": 7.952509425256786e-05, "loss": 0.0979, "step": 20040 }, { "action_loss": 0.004316064994782209, "epoch": 18.02158273381295, "step": 20040 }, { "epoch": 18.02158273381295, "step": 20040, "torque_loss": 0.08589737862348557 }, { "epoch": 18.030575539568346, "grad_norm": 0.3003741502761841, "learning_rate": 7.950284960094767e-05, "loss": 0.0913, "step": 20050 }, { "action_loss": 0.0036277922336012125, "epoch": 18.030575539568346, "step": 20050 }, { "epoch": 18.030575539568346, "step": 20050, "torque_loss": 0.09777691960334778 }, { "epoch": 18.03956834532374, "grad_norm": 0.3907616138458252, "learning_rate": 7.948059598714076e-05, "loss": 0.0861, "step": 20060 }, { "action_loss": 0.007053365930914879, "epoch": 18.03956834532374, "step": 20060 }, { "epoch": 18.03956834532374, "step": 20060, "torque_loss": 0.10017911344766617 }, { "epoch": 18.048561151079138, "grad_norm": 0.434009313583374, "learning_rate": 7.945833341790717e-05, "loss": 0.1049, "step": 20070 }, { "action_loss": 0.01048863772302866, "epoch": 18.048561151079138, "step": 20070 }, { "epoch": 18.048561151079138, "step": 20070, "torque_loss": 0.13906532526016235 }, { "epoch": 18.057553956834532, "grad_norm": 0.4159417748451233, "learning_rate": 7.94360619000097e-05, "loss": 0.1071, "step": 20080 }, { "action_loss": 0.0061288680881261826, "epoch": 18.057553956834532, "step": 20080 }, { "epoch": 18.057553956834532, "step": 20080, "torque_loss": 0.11554042249917984 }, { "epoch": 18.06654676258993, "grad_norm": 0.3383695185184479, "learning_rate": 7.941378144021381e-05, "loss": 0.0849, "step": 20090 }, { "action_loss": 0.03550611808896065, "epoch": 18.06654676258993, "step": 20090 }, { "epoch": 18.06654676258993, "step": 20090, "torque_loss": 0.2477867156267166 }, { "epoch": 18.075539568345324, "grad_norm": 0.32440707087516785, "learning_rate": 7.939149204528777e-05, "loss": 0.0865, "step": 20100 }, { "action_loss": 0.013402573764324188, "epoch": 18.075539568345324, "step": 20100 }, { "epoch": 18.075539568345324, "step": 20100, "torque_loss": 0.162080779671669 }, { "epoch": 18.084532374100718, "grad_norm": 0.4153040647506714, "learning_rate": 7.936919372200246e-05, "loss": 0.1057, "step": 20110 }, { "action_loss": 0.006969774607568979, "epoch": 18.084532374100718, "step": 20110 }, { "epoch": 18.084532374100718, "step": 20110, "torque_loss": 0.15848326683044434 }, { "epoch": 18.093525179856115, "grad_norm": 0.4797092080116272, "learning_rate": 7.934688647713158e-05, "loss": 0.09, "step": 20120 }, { "action_loss": 0.004146389197558165, "epoch": 18.093525179856115, "step": 20120 }, { "epoch": 18.093525179856115, "step": 20120, "torque_loss": 0.09197720140218735 }, { "epoch": 18.10251798561151, "grad_norm": 0.4243015944957733, "learning_rate": 7.932457031745143e-05, "loss": 0.0863, "step": 20130 }, { "action_loss": 0.0798318088054657, "epoch": 18.10251798561151, "step": 20130 }, { "epoch": 18.10251798561151, "step": 20130, "torque_loss": 0.23761051893234253 }, { "epoch": 18.111510791366907, "grad_norm": 0.4914252758026123, "learning_rate": 7.930224524974108e-05, "loss": 0.0941, "step": 20140 }, { "action_loss": 0.017953379079699516, "epoch": 18.111510791366907, "step": 20140 }, { "epoch": 18.111510791366907, "step": 20140, "torque_loss": 0.1428392380475998 }, { "epoch": 18.1205035971223, "grad_norm": 0.2993495464324951, "learning_rate": 7.927991128078232e-05, "loss": 0.0881, "step": 20150 }, { "action_loss": 0.006562887225300074, "epoch": 18.1205035971223, "step": 20150 }, { "epoch": 18.1205035971223, "step": 20150, "torque_loss": 0.12421637773513794 }, { "epoch": 18.1294964028777, "grad_norm": 0.26096826791763306, "learning_rate": 7.925756841735958e-05, "loss": 0.0901, "step": 20160 }, { "action_loss": 0.011477905325591564, "epoch": 18.1294964028777, "step": 20160 }, { "epoch": 18.1294964028777, "step": 20160, "torque_loss": 0.1707438975572586 }, { "epoch": 18.138489208633093, "grad_norm": 0.39532703161239624, "learning_rate": 7.923521666626008e-05, "loss": 0.0906, "step": 20170 }, { "action_loss": 0.025368472561240196, "epoch": 18.138489208633093, "step": 20170 }, { "epoch": 18.138489208633093, "step": 20170, "torque_loss": 0.16804510354995728 }, { "epoch": 18.14748201438849, "grad_norm": 0.3432958126068115, "learning_rate": 7.921285603427366e-05, "loss": 0.0845, "step": 20180 }, { "action_loss": 0.006530022248625755, "epoch": 18.14748201438849, "step": 20180 }, { "epoch": 18.14748201438849, "step": 20180, "torque_loss": 0.09539274126291275 }, { "epoch": 18.156474820143885, "grad_norm": 0.37761393189430237, "learning_rate": 7.91904865281929e-05, "loss": 0.0922, "step": 20190 }, { "action_loss": 0.0035742304753512144, "epoch": 18.156474820143885, "step": 20190 }, { "epoch": 18.156474820143885, "step": 20190, "torque_loss": 0.10137352347373962 }, { "epoch": 18.165467625899282, "grad_norm": 0.34902527928352356, "learning_rate": 7.916810815481307e-05, "loss": 0.0824, "step": 20200 }, { "action_loss": 0.0029460135847330093, "epoch": 18.165467625899282, "step": 20200 }, { "epoch": 18.165467625899282, "step": 20200, "torque_loss": 0.10667084902524948 }, { "epoch": 18.174460431654676, "grad_norm": 0.3306768834590912, "learning_rate": 7.914572092093211e-05, "loss": 0.0905, "step": 20210 }, { "action_loss": 0.012392611242830753, "epoch": 18.174460431654676, "step": 20210 }, { "epoch": 18.174460431654676, "step": 20210, "torque_loss": 0.18387746810913086 }, { "epoch": 18.18345323741007, "grad_norm": 0.26667866110801697, "learning_rate": 7.912332483335068e-05, "loss": 0.098, "step": 20220 }, { "action_loss": 0.020430123433470726, "epoch": 18.18345323741007, "step": 20220 }, { "epoch": 18.18345323741007, "step": 20220, "torque_loss": 0.1305423378944397 }, { "epoch": 18.192446043165468, "grad_norm": 0.27521201968193054, "learning_rate": 7.910091989887213e-05, "loss": 0.0844, "step": 20230 }, { "action_loss": 0.010321510024368763, "epoch": 18.192446043165468, "step": 20230 }, { "epoch": 18.192446043165468, "step": 20230, "torque_loss": 0.16745305061340332 }, { "epoch": 18.201438848920862, "grad_norm": 0.2861417233943939, "learning_rate": 7.907850612430248e-05, "loss": 0.093, "step": 20240 }, { "action_loss": 0.007228687405586243, "epoch": 18.201438848920862, "step": 20240 }, { "epoch": 18.201438848920862, "step": 20240, "torque_loss": 0.11248091608285904 }, { "epoch": 18.21043165467626, "grad_norm": 0.3551650941371918, "learning_rate": 7.905608351645044e-05, "loss": 0.0789, "step": 20250 }, { "action_loss": 0.009192069061100483, "epoch": 18.21043165467626, "step": 20250 }, { "epoch": 18.21043165467626, "step": 20250, "torque_loss": 0.1607472151517868 }, { "epoch": 18.219424460431654, "grad_norm": 0.29420581459999084, "learning_rate": 7.90336520821274e-05, "loss": 0.092, "step": 20260 }, { "action_loss": 0.010020100511610508, "epoch": 18.219424460431654, "step": 20260 }, { "epoch": 18.219424460431654, "step": 20260, "torque_loss": 0.19834546744823456 }, { "epoch": 18.22841726618705, "grad_norm": 0.23881249129772186, "learning_rate": 7.901121182814746e-05, "loss": 0.0813, "step": 20270 }, { "action_loss": 0.002968003274872899, "epoch": 18.22841726618705, "step": 20270 }, { "epoch": 18.22841726618705, "step": 20270, "torque_loss": 0.11037391424179077 }, { "epoch": 18.237410071942445, "grad_norm": 0.33849653601646423, "learning_rate": 7.898876276132736e-05, "loss": 0.0911, "step": 20280 }, { "action_loss": 0.0051088943146169186, "epoch": 18.237410071942445, "step": 20280 }, { "epoch": 18.237410071942445, "step": 20280, "torque_loss": 0.1118229404091835 }, { "epoch": 18.246402877697843, "grad_norm": 0.23619309067726135, "learning_rate": 7.896630488848654e-05, "loss": 0.0844, "step": 20290 }, { "action_loss": 0.014661538414657116, "epoch": 18.246402877697843, "step": 20290 }, { "epoch": 18.246402877697843, "step": 20290, "torque_loss": 0.1544043868780136 }, { "epoch": 18.255395683453237, "grad_norm": 0.33206287026405334, "learning_rate": 7.89438382164471e-05, "loss": 0.0878, "step": 20300 }, { "action_loss": 0.008431370370090008, "epoch": 18.255395683453237, "step": 20300 }, { "epoch": 18.255395683453237, "step": 20300, "torque_loss": 0.19151920080184937 }, { "epoch": 18.264388489208635, "grad_norm": 0.3470510244369507, "learning_rate": 7.892136275203383e-05, "loss": 0.0887, "step": 20310 }, { "action_loss": 0.0034243997652083635, "epoch": 18.264388489208635, "step": 20310 }, { "epoch": 18.264388489208635, "step": 20310, "torque_loss": 0.08228632062673569 }, { "epoch": 18.27338129496403, "grad_norm": 0.3689459264278412, "learning_rate": 7.889887850207418e-05, "loss": 0.0883, "step": 20320 }, { "action_loss": 0.015285768546164036, "epoch": 18.27338129496403, "step": 20320 }, { "epoch": 18.27338129496403, "step": 20320, "torque_loss": 0.15286071598529816 }, { "epoch": 18.282374100719423, "grad_norm": 0.4065778851509094, "learning_rate": 7.887638547339827e-05, "loss": 0.0919, "step": 20330 }, { "action_loss": 0.008737246505916119, "epoch": 18.282374100719423, "step": 20330 }, { "epoch": 18.282374100719423, "step": 20330, "torque_loss": 0.1392488032579422 }, { "epoch": 18.29136690647482, "grad_norm": 0.280525267124176, "learning_rate": 7.885388367283891e-05, "loss": 0.088, "step": 20340 }, { "action_loss": 0.008524350821971893, "epoch": 18.29136690647482, "step": 20340 }, { "epoch": 18.29136690647482, "step": 20340, "torque_loss": 0.16194172203540802 }, { "epoch": 18.300359712230215, "grad_norm": 0.2741226255893707, "learning_rate": 7.88313731072315e-05, "loss": 0.0883, "step": 20350 }, { "action_loss": 0.006892851088196039, "epoch": 18.300359712230215, "step": 20350 }, { "epoch": 18.300359712230215, "step": 20350, "torque_loss": 0.17866134643554688 }, { "epoch": 18.309352517985612, "grad_norm": 0.3217521905899048, "learning_rate": 7.88088537834142e-05, "loss": 0.0906, "step": 20360 }, { "action_loss": 0.02357008494436741, "epoch": 18.309352517985612, "step": 20360 }, { "epoch": 18.309352517985612, "step": 20360, "torque_loss": 0.2126571089029312 }, { "epoch": 18.318345323741006, "grad_norm": 0.3152751624584198, "learning_rate": 7.878632570822778e-05, "loss": 0.0927, "step": 20370 }, { "action_loss": 0.014840342104434967, "epoch": 18.318345323741006, "step": 20370 }, { "epoch": 18.318345323741006, "step": 20370, "torque_loss": 0.1601990908384323 }, { "epoch": 18.327338129496404, "grad_norm": 0.27446600794792175, "learning_rate": 7.876378888851567e-05, "loss": 0.0927, "step": 20380 }, { "action_loss": 0.011783416382968426, "epoch": 18.327338129496404, "step": 20380 }, { "epoch": 18.327338129496404, "step": 20380, "torque_loss": 0.16756071150302887 }, { "epoch": 18.336330935251798, "grad_norm": 0.40008899569511414, "learning_rate": 7.874124333112396e-05, "loss": 0.0969, "step": 20390 }, { "action_loss": 0.019327357411384583, "epoch": 18.336330935251798, "step": 20390 }, { "epoch": 18.336330935251798, "step": 20390, "torque_loss": 0.13000868260860443 }, { "epoch": 18.345323741007196, "grad_norm": 0.2824147939682007, "learning_rate": 7.871868904290138e-05, "loss": 0.0801, "step": 20400 }, { "action_loss": 0.019577359780669212, "epoch": 18.345323741007196, "step": 20400 }, { "epoch": 18.345323741007196, "step": 20400, "torque_loss": 0.17533081769943237 }, { "epoch": 18.35431654676259, "grad_norm": 0.34522050619125366, "learning_rate": 7.869612603069935e-05, "loss": 0.0903, "step": 20410 }, { "action_loss": 0.005625308025628328, "epoch": 18.35431654676259, "step": 20410 }, { "epoch": 18.35431654676259, "step": 20410, "torque_loss": 0.14562438428401947 }, { "epoch": 18.363309352517987, "grad_norm": 0.37967944145202637, "learning_rate": 7.867355430137192e-05, "loss": 0.0918, "step": 20420 }, { "action_loss": 0.006653548683971167, "epoch": 18.363309352517987, "step": 20420 }, { "epoch": 18.363309352517987, "step": 20420, "torque_loss": 0.11382564157247543 }, { "epoch": 18.37230215827338, "grad_norm": 0.35371872782707214, "learning_rate": 7.865097386177577e-05, "loss": 0.0817, "step": 20430 }, { "action_loss": 0.030717618763446808, "epoch": 18.37230215827338, "step": 20430 }, { "epoch": 18.37230215827338, "step": 20430, "torque_loss": 0.21266181766986847 }, { "epoch": 18.381294964028775, "grad_norm": 0.37043026089668274, "learning_rate": 7.862838471877023e-05, "loss": 0.0928, "step": 20440 }, { "action_loss": 0.013953079469501972, "epoch": 18.381294964028775, "step": 20440 }, { "epoch": 18.381294964028775, "step": 20440, "torque_loss": 0.1975635290145874 }, { "epoch": 18.390287769784173, "grad_norm": 0.33602049946784973, "learning_rate": 7.860578687921731e-05, "loss": 0.0919, "step": 20450 }, { "action_loss": 0.009797925129532814, "epoch": 18.390287769784173, "step": 20450 }, { "epoch": 18.390287769784173, "step": 20450, "torque_loss": 0.16912288963794708 }, { "epoch": 18.399280575539567, "grad_norm": 0.4119470417499542, "learning_rate": 7.858318034998164e-05, "loss": 0.0906, "step": 20460 }, { "action_loss": 0.007784900721162558, "epoch": 18.399280575539567, "step": 20460 }, { "epoch": 18.399280575539567, "step": 20460, "torque_loss": 0.1450451761484146 }, { "epoch": 18.408273381294965, "grad_norm": 0.36244985461235046, "learning_rate": 7.856056513793046e-05, "loss": 0.0819, "step": 20470 }, { "action_loss": 0.011125430464744568, "epoch": 18.408273381294965, "step": 20470 }, { "epoch": 18.408273381294965, "step": 20470, "torque_loss": 0.1861252337694168 }, { "epoch": 18.41726618705036, "grad_norm": 0.3639461100101471, "learning_rate": 7.85379412499337e-05, "loss": 0.0888, "step": 20480 }, { "action_loss": 0.004766311962157488, "epoch": 18.41726618705036, "step": 20480 }, { "epoch": 18.41726618705036, "step": 20480, "torque_loss": 0.14437198638916016 }, { "epoch": 18.426258992805757, "grad_norm": 0.4008773863315582, "learning_rate": 7.851530869286389e-05, "loss": 0.0892, "step": 20490 }, { "action_loss": 0.0031289763282984495, "epoch": 18.426258992805757, "step": 20490 }, { "epoch": 18.426258992805757, "step": 20490, "torque_loss": 0.11062099784612656 }, { "epoch": 18.43525179856115, "grad_norm": 0.43058544397354126, "learning_rate": 7.849266747359619e-05, "loss": 0.0856, "step": 20500 }, { "action_loss": 0.003349529579281807, "epoch": 18.43525179856115, "step": 20500 }, { "epoch": 18.43525179856115, "step": 20500, "torque_loss": 0.11325079947710037 }, { "epoch": 18.444244604316548, "grad_norm": 0.3115875720977783, "learning_rate": 7.847001759900843e-05, "loss": 0.0758, "step": 20510 }, { "action_loss": 0.004663547966629267, "epoch": 18.444244604316548, "step": 20510 }, { "epoch": 18.444244604316548, "step": 20510, "torque_loss": 0.13742756843566895 }, { "epoch": 18.453237410071942, "grad_norm": 0.33529478311538696, "learning_rate": 7.844735907598102e-05, "loss": 0.1029, "step": 20520 }, { "action_loss": 0.024287035688757896, "epoch": 18.453237410071942, "step": 20520 }, { "epoch": 18.453237410071942, "step": 20520, "torque_loss": 0.15030251443386078 }, { "epoch": 18.46223021582734, "grad_norm": 0.23806600272655487, "learning_rate": 7.842469191139703e-05, "loss": 0.086, "step": 20530 }, { "action_loss": 0.004228703211992979, "epoch": 18.46223021582734, "step": 20530 }, { "epoch": 18.46223021582734, "step": 20530, "torque_loss": 0.08574181795120239 }, { "epoch": 18.471223021582734, "grad_norm": 0.308437317609787, "learning_rate": 7.840201611214215e-05, "loss": 0.09, "step": 20540 }, { "action_loss": 0.01800568960607052, "epoch": 18.471223021582734, "step": 20540 }, { "epoch": 18.471223021582734, "step": 20540, "torque_loss": 0.22592592239379883 }, { "epoch": 18.480215827338128, "grad_norm": 0.34360548853874207, "learning_rate": 7.837933168510469e-05, "loss": 0.1169, "step": 20550 }, { "action_loss": 0.006360757630318403, "epoch": 18.480215827338128, "step": 20550 }, { "epoch": 18.480215827338128, "step": 20550, "torque_loss": 0.11209902912378311 }, { "epoch": 18.489208633093526, "grad_norm": 0.3674125373363495, "learning_rate": 7.835663863717559e-05, "loss": 0.1, "step": 20560 }, { "action_loss": 0.01007586345076561, "epoch": 18.489208633093526, "step": 20560 }, { "epoch": 18.489208633093526, "step": 20560, "torque_loss": 0.17469941079616547 }, { "epoch": 18.49820143884892, "grad_norm": 0.3396100699901581, "learning_rate": 7.833393697524838e-05, "loss": 0.1067, "step": 20570 }, { "action_loss": 0.004044493660330772, "epoch": 18.49820143884892, "step": 20570 }, { "epoch": 18.49820143884892, "step": 20570, "torque_loss": 0.11442581564188004 }, { "epoch": 18.507194244604317, "grad_norm": 0.40982547402381897, "learning_rate": 7.831122670621922e-05, "loss": 0.0903, "step": 20580 }, { "action_loss": 0.004372125957161188, "epoch": 18.507194244604317, "step": 20580 }, { "epoch": 18.507194244604317, "step": 20580, "torque_loss": 0.11408340930938721 }, { "epoch": 18.51618705035971, "grad_norm": 0.3320446312427521, "learning_rate": 7.82885078369869e-05, "loss": 0.0894, "step": 20590 }, { "action_loss": 0.005709411110728979, "epoch": 18.51618705035971, "step": 20590 }, { "epoch": 18.51618705035971, "step": 20590, "torque_loss": 0.11314171552658081 }, { "epoch": 18.52517985611511, "grad_norm": 0.2698175311088562, "learning_rate": 7.826578037445283e-05, "loss": 0.0856, "step": 20600 }, { "action_loss": 0.00548244034871459, "epoch": 18.52517985611511, "step": 20600 }, { "epoch": 18.52517985611511, "step": 20600, "torque_loss": 0.1394394040107727 }, { "epoch": 18.534172661870503, "grad_norm": 0.3211716115474701, "learning_rate": 7.824304432552097e-05, "loss": 0.0759, "step": 20610 }, { "action_loss": 0.012035183608531952, "epoch": 18.534172661870503, "step": 20610 }, { "epoch": 18.534172661870503, "step": 20610, "torque_loss": 0.20766496658325195 }, { "epoch": 18.5431654676259, "grad_norm": 0.3117640018463135, "learning_rate": 7.822029969709798e-05, "loss": 0.1049, "step": 20620 }, { "action_loss": 0.009439777582883835, "epoch": 18.5431654676259, "step": 20620 }, { "epoch": 18.5431654676259, "step": 20620, "torque_loss": 0.1344345062971115 }, { "epoch": 18.552158273381295, "grad_norm": 0.2662261128425598, "learning_rate": 7.819754649609306e-05, "loss": 0.0936, "step": 20630 }, { "action_loss": 0.0057852440513670444, "epoch": 18.552158273381295, "step": 20630 }, { "epoch": 18.552158273381295, "step": 20630, "torque_loss": 0.09230363368988037 }, { "epoch": 18.56115107913669, "grad_norm": 0.3105960488319397, "learning_rate": 7.817478472941802e-05, "loss": 0.0942, "step": 20640 }, { "action_loss": 0.005330909043550491, "epoch": 18.56115107913669, "step": 20640 }, { "epoch": 18.56115107913669, "step": 20640, "torque_loss": 0.13083012402057648 }, { "epoch": 18.570143884892087, "grad_norm": 0.33755362033843994, "learning_rate": 7.815201440398727e-05, "loss": 0.0753, "step": 20650 }, { "action_loss": 0.02544517256319523, "epoch": 18.570143884892087, "step": 20650 }, { "epoch": 18.570143884892087, "step": 20650, "torque_loss": 0.20401962101459503 }, { "epoch": 18.57913669064748, "grad_norm": 0.3688476085662842, "learning_rate": 7.812923552671789e-05, "loss": 0.1017, "step": 20660 }, { "action_loss": 0.006949292030185461, "epoch": 18.57913669064748, "step": 20660 }, { "epoch": 18.57913669064748, "step": 20660, "torque_loss": 0.12326667457818985 }, { "epoch": 18.58812949640288, "grad_norm": 0.3348764181137085, "learning_rate": 7.810644810452945e-05, "loss": 0.0834, "step": 20670 }, { "action_loss": 0.005911390762776136, "epoch": 18.58812949640288, "step": 20670 }, { "epoch": 18.58812949640288, "step": 20670, "torque_loss": 0.10939383506774902 }, { "epoch": 18.597122302158272, "grad_norm": 0.330154150724411, "learning_rate": 7.808365214434417e-05, "loss": 0.0853, "step": 20680 }, { "action_loss": 0.0054360865615308285, "epoch": 18.597122302158272, "step": 20680 }, { "epoch": 18.597122302158272, "step": 20680, "torque_loss": 0.1317041516304016 }, { "epoch": 18.60611510791367, "grad_norm": 0.34525609016418457, "learning_rate": 7.80608476530869e-05, "loss": 0.0949, "step": 20690 }, { "action_loss": 0.0033745309337973595, "epoch": 18.60611510791367, "step": 20690 }, { "epoch": 18.60611510791367, "step": 20690, "torque_loss": 0.10936129093170166 }, { "epoch": 18.615107913669064, "grad_norm": 0.28118792176246643, "learning_rate": 7.8038034637685e-05, "loss": 0.0797, "step": 20700 }, { "action_loss": 0.01804095134139061, "epoch": 18.615107913669064, "step": 20700 }, { "epoch": 18.615107913669064, "step": 20700, "torque_loss": 0.1414554864168167 }, { "epoch": 18.62410071942446, "grad_norm": 0.3889027237892151, "learning_rate": 7.801521310506848e-05, "loss": 0.0796, "step": 20710 }, { "action_loss": 0.0076459795236587524, "epoch": 18.62410071942446, "step": 20710 }, { "epoch": 18.62410071942446, "step": 20710, "torque_loss": 0.09113819152116776 }, { "epoch": 18.633093525179856, "grad_norm": 0.3085611164569855, "learning_rate": 7.799238306216994e-05, "loss": 0.0755, "step": 20720 }, { "action_loss": 0.010990013368427753, "epoch": 18.633093525179856, "step": 20720 }, { "epoch": 18.633093525179856, "step": 20720, "torque_loss": 0.1654743254184723 }, { "epoch": 18.642086330935253, "grad_norm": 0.2626160681247711, "learning_rate": 7.796954451592448e-05, "loss": 0.1053, "step": 20730 }, { "action_loss": 0.005132336635142565, "epoch": 18.642086330935253, "step": 20730 }, { "epoch": 18.642086330935253, "step": 20730, "torque_loss": 0.08526179194450378 }, { "epoch": 18.651079136690647, "grad_norm": 0.43451249599456787, "learning_rate": 7.794669747326992e-05, "loss": 0.0904, "step": 20740 }, { "action_loss": 0.004867656622081995, "epoch": 18.651079136690647, "step": 20740 }, { "epoch": 18.651079136690647, "step": 20740, "torque_loss": 0.13249965012073517 }, { "epoch": 18.66007194244604, "grad_norm": 0.27113452553749084, "learning_rate": 7.792384194114654e-05, "loss": 0.0872, "step": 20750 }, { "action_loss": 0.012113518081605434, "epoch": 18.66007194244604, "step": 20750 }, { "epoch": 18.66007194244604, "step": 20750, "torque_loss": 0.15339362621307373 }, { "epoch": 18.66906474820144, "grad_norm": 0.4997711181640625, "learning_rate": 7.790097792649729e-05, "loss": 0.0947, "step": 20760 }, { "action_loss": 0.018266653642058372, "epoch": 18.66906474820144, "step": 20760 }, { "epoch": 18.66906474820144, "step": 20760, "torque_loss": 0.22849242389202118 }, { "epoch": 18.678057553956833, "grad_norm": 0.3216046392917633, "learning_rate": 7.787810543626762e-05, "loss": 0.0901, "step": 20770 }, { "action_loss": 0.011907284148037434, "epoch": 18.678057553956833, "step": 20770 }, { "epoch": 18.678057553956833, "step": 20770, "torque_loss": 0.15726976096630096 }, { "epoch": 18.68705035971223, "grad_norm": 0.37117135524749756, "learning_rate": 7.785522447740558e-05, "loss": 0.0834, "step": 20780 }, { "action_loss": 0.005625199992209673, "epoch": 18.68705035971223, "step": 20780 }, { "epoch": 18.68705035971223, "step": 20780, "torque_loss": 0.07111639529466629 }, { "epoch": 18.696043165467625, "grad_norm": 0.3306056559085846, "learning_rate": 7.783233505686182e-05, "loss": 0.0969, "step": 20790 }, { "action_loss": 0.0040064663626253605, "epoch": 18.696043165467625, "step": 20790 }, { "epoch": 18.696043165467625, "step": 20790, "torque_loss": 0.12390550225973129 }, { "epoch": 18.705035971223023, "grad_norm": 0.2887640595436096, "learning_rate": 7.780943718158955e-05, "loss": 0.076, "step": 20800 }, { "action_loss": 0.0052358247339725494, "epoch": 18.705035971223023, "step": 20800 }, { "epoch": 18.705035971223023, "step": 20800, "torque_loss": 0.11662524938583374 }, { "epoch": 18.714028776978417, "grad_norm": 0.39401963353157043, "learning_rate": 7.778653085854453e-05, "loss": 0.0902, "step": 20810 }, { "action_loss": 0.005045424215495586, "epoch": 18.714028776978417, "step": 20810 }, { "epoch": 18.714028776978417, "step": 20810, "torque_loss": 0.08717610687017441 }, { "epoch": 18.723021582733814, "grad_norm": 0.306953489780426, "learning_rate": 7.77636160946851e-05, "loss": 0.0784, "step": 20820 }, { "action_loss": 0.004268641117960215, "epoch": 18.723021582733814, "step": 20820 }, { "epoch": 18.723021582733814, "step": 20820, "torque_loss": 0.10843697935342789 }, { "epoch": 18.73201438848921, "grad_norm": 0.32188931107521057, "learning_rate": 7.774069289697215e-05, "loss": 0.0881, "step": 20830 }, { "action_loss": 0.0035931828897446394, "epoch": 18.73201438848921, "step": 20830 }, { "epoch": 18.73201438848921, "step": 20830, "torque_loss": 0.10706707090139389 }, { "epoch": 18.741007194244606, "grad_norm": 0.2996636927127838, "learning_rate": 7.771776127236913e-05, "loss": 0.0826, "step": 20840 }, { "action_loss": 0.007509245071560144, "epoch": 18.741007194244606, "step": 20840 }, { "epoch": 18.741007194244606, "step": 20840, "torque_loss": 0.13892124593257904 }, { "epoch": 18.75, "grad_norm": 0.35625821352005005, "learning_rate": 7.769482122784212e-05, "loss": 0.0838, "step": 20850 }, { "action_loss": 0.013491683639585972, "epoch": 18.75, "step": 20850 }, { "epoch": 18.75, "step": 20850, "torque_loss": 0.1762533187866211 }, { "epoch": 18.758992805755394, "grad_norm": 0.36469584703445435, "learning_rate": 7.767187277035963e-05, "loss": 0.086, "step": 20860 }, { "action_loss": 0.015297974459826946, "epoch": 18.758992805755394, "step": 20860 }, { "epoch": 18.758992805755394, "step": 20860, "torque_loss": 0.1701374500989914 }, { "epoch": 18.76798561151079, "grad_norm": 0.32738322019577026, "learning_rate": 7.764891590689285e-05, "loss": 0.0935, "step": 20870 }, { "action_loss": 0.006290167570114136, "epoch": 18.76798561151079, "step": 20870 }, { "epoch": 18.76798561151079, "step": 20870, "torque_loss": 0.1369890421628952 }, { "epoch": 18.776978417266186, "grad_norm": 0.3526645302772522, "learning_rate": 7.762595064441542e-05, "loss": 0.0917, "step": 20880 }, { "action_loss": 0.019061369821429253, "epoch": 18.776978417266186, "step": 20880 }, { "epoch": 18.776978417266186, "step": 20880, "torque_loss": 0.20999372005462646 }, { "epoch": 18.785971223021583, "grad_norm": 0.3046894669532776, "learning_rate": 7.760297698990362e-05, "loss": 0.086, "step": 20890 }, { "action_loss": 0.00468539958819747, "epoch": 18.785971223021583, "step": 20890 }, { "epoch": 18.785971223021583, "step": 20890, "torque_loss": 0.16612829267978668 }, { "epoch": 18.794964028776977, "grad_norm": 0.3383540213108063, "learning_rate": 7.757999495033623e-05, "loss": 0.0939, "step": 20900 }, { "action_loss": 0.008802071213722229, "epoch": 18.794964028776977, "step": 20900 }, { "epoch": 18.794964028776977, "step": 20900, "torque_loss": 0.15204884111881256 }, { "epoch": 18.803956834532375, "grad_norm": 0.24166397750377655, "learning_rate": 7.755700453269456e-05, "loss": 0.0759, "step": 20910 }, { "action_loss": 0.004556302912533283, "epoch": 18.803956834532375, "step": 20910 }, { "epoch": 18.803956834532375, "step": 20910, "torque_loss": 0.10659956187009811 }, { "epoch": 18.81294964028777, "grad_norm": 0.37699174880981445, "learning_rate": 7.753400574396254e-05, "loss": 0.0971, "step": 20920 }, { "action_loss": 0.0037072126287966967, "epoch": 18.81294964028777, "step": 20920 }, { "epoch": 18.81294964028777, "step": 20920, "torque_loss": 0.12164048105478287 }, { "epoch": 18.821942446043167, "grad_norm": 0.37478652596473694, "learning_rate": 7.751099859112655e-05, "loss": 0.0935, "step": 20930 }, { "action_loss": 0.009659129194915295, "epoch": 18.821942446043167, "step": 20930 }, { "epoch": 18.821942446043167, "step": 20930, "torque_loss": 0.1361812949180603 }, { "epoch": 18.83093525179856, "grad_norm": 0.2982809543609619, "learning_rate": 7.748798308117557e-05, "loss": 0.0756, "step": 20940 }, { "action_loss": 0.005542787257581949, "epoch": 18.83093525179856, "step": 20940 }, { "epoch": 18.83093525179856, "step": 20940, "torque_loss": 0.10907415300607681 }, { "epoch": 18.83992805755396, "grad_norm": 0.35812994837760925, "learning_rate": 7.746495922110112e-05, "loss": 0.0828, "step": 20950 }, { "action_loss": 0.009173542261123657, "epoch": 18.83992805755396, "step": 20950 }, { "epoch": 18.83992805755396, "step": 20950, "torque_loss": 0.14081765711307526 }, { "epoch": 18.848920863309353, "grad_norm": 0.2621867060661316, "learning_rate": 7.744192701789723e-05, "loss": 0.0805, "step": 20960 }, { "action_loss": 0.0044942270033061504, "epoch": 18.848920863309353, "step": 20960 }, { "epoch": 18.848920863309353, "step": 20960, "torque_loss": 0.11240822076797485 }, { "epoch": 18.857913669064747, "grad_norm": 0.3233477771282196, "learning_rate": 7.741888647856046e-05, "loss": 0.0809, "step": 20970 }, { "action_loss": 0.009895950555801392, "epoch": 18.857913669064747, "step": 20970 }, { "epoch": 18.857913669064747, "step": 20970, "torque_loss": 0.20391350984573364 }, { "epoch": 18.866906474820144, "grad_norm": 0.28563663363456726, "learning_rate": 7.739583761008994e-05, "loss": 0.0856, "step": 20980 }, { "action_loss": 0.024021288380026817, "epoch": 18.866906474820144, "step": 20980 }, { "epoch": 18.866906474820144, "step": 20980, "torque_loss": 0.2093208283185959 }, { "epoch": 18.87589928057554, "grad_norm": 0.35269778966903687, "learning_rate": 7.73727804194873e-05, "loss": 0.0885, "step": 20990 }, { "action_loss": 0.006313680205494165, "epoch": 18.87589928057554, "step": 20990 }, { "epoch": 18.87589928057554, "step": 20990, "torque_loss": 0.12992411851882935 }, { "epoch": 18.884892086330936, "grad_norm": 0.28904300928115845, "learning_rate": 7.734971491375671e-05, "loss": 0.0835, "step": 21000 }, { "action_loss": 0.004919752012938261, "epoch": 18.884892086330936, "step": 21000 }, { "epoch": 18.884892086330936, "step": 21000, "torque_loss": 0.07162654399871826 }, { "epoch": 18.89388489208633, "grad_norm": 0.33606380224227905, "learning_rate": 7.732664109990485e-05, "loss": 0.0824, "step": 21010 }, { "action_loss": 0.007574392016977072, "epoch": 18.89388489208633, "step": 21010 }, { "epoch": 18.89388489208633, "step": 21010, "torque_loss": 0.08991721272468567 }, { "epoch": 18.902877697841728, "grad_norm": 0.34083059430122375, "learning_rate": 7.730355898494095e-05, "loss": 0.1021, "step": 21020 }, { "action_loss": 0.017906926572322845, "epoch": 18.902877697841728, "step": 21020 }, { "epoch": 18.902877697841728, "step": 21020, "torque_loss": 0.14834095537662506 }, { "epoch": 18.91187050359712, "grad_norm": 0.33621716499328613, "learning_rate": 7.728046857587673e-05, "loss": 0.0982, "step": 21030 }, { "action_loss": 0.005607226863503456, "epoch": 18.91187050359712, "step": 21030 }, { "epoch": 18.91187050359712, "step": 21030, "torque_loss": 0.11342298239469528 }, { "epoch": 18.92086330935252, "grad_norm": 0.3365058898925781, "learning_rate": 7.725736987972647e-05, "loss": 0.0966, "step": 21040 }, { "action_loss": 0.009672005660831928, "epoch": 18.92086330935252, "step": 21040 }, { "epoch": 18.92086330935252, "step": 21040, "torque_loss": 0.1278279721736908 }, { "epoch": 18.929856115107913, "grad_norm": 0.36559250950813293, "learning_rate": 7.723426290350691e-05, "loss": 0.0914, "step": 21050 }, { "action_loss": 0.018150294199585915, "epoch": 18.929856115107913, "step": 21050 }, { "epoch": 18.929856115107913, "step": 21050, "torque_loss": 0.15295635163784027 }, { "epoch": 18.93884892086331, "grad_norm": 0.32514479756355286, "learning_rate": 7.721114765423736e-05, "loss": 0.0953, "step": 21060 }, { "action_loss": 0.007476967293769121, "epoch": 18.93884892086331, "step": 21060 }, { "epoch": 18.93884892086331, "step": 21060, "torque_loss": 0.13860861957073212 }, { "epoch": 18.947841726618705, "grad_norm": 0.3720276653766632, "learning_rate": 7.718802413893963e-05, "loss": 0.0846, "step": 21070 }, { "action_loss": 0.005878788884729147, "epoch": 18.947841726618705, "step": 21070 }, { "epoch": 18.947841726618705, "step": 21070, "torque_loss": 0.1321869045495987 }, { "epoch": 18.9568345323741, "grad_norm": 0.3216933608055115, "learning_rate": 7.716489236463802e-05, "loss": 0.0828, "step": 21080 }, { "action_loss": 0.009814455173909664, "epoch": 18.9568345323741, "step": 21080 }, { "epoch": 18.9568345323741, "step": 21080, "torque_loss": 0.1479647010564804 }, { "epoch": 18.965827338129497, "grad_norm": 0.29782286286354065, "learning_rate": 7.714175233835936e-05, "loss": 0.0784, "step": 21090 }, { "action_loss": 0.003600840689614415, "epoch": 18.965827338129497, "step": 21090 }, { "epoch": 18.965827338129497, "step": 21090, "torque_loss": 0.10395551472902298 }, { "epoch": 18.97482014388489, "grad_norm": 0.3007625937461853, "learning_rate": 7.711860406713299e-05, "loss": 0.0856, "step": 21100 }, { "action_loss": 0.013987655751407146, "epoch": 18.97482014388489, "step": 21100 }, { "epoch": 18.97482014388489, "step": 21100, "torque_loss": 0.1447070837020874 }, { "epoch": 18.98381294964029, "grad_norm": 0.3680391013622284, "learning_rate": 7.70954475579907e-05, "loss": 0.086, "step": 21110 }, { "action_loss": 0.007213123142719269, "epoch": 18.98381294964029, "step": 21110 }, { "epoch": 18.98381294964029, "step": 21110, "torque_loss": 0.10276845097541809 }, { "epoch": 18.992805755395683, "grad_norm": 0.43869954347610474, "learning_rate": 7.707228281796688e-05, "loss": 0.0938, "step": 21120 }, { "action_loss": 0.0038870859425514936, "epoch": 18.992805755395683, "step": 21120 }, { "epoch": 18.992805755395683, "step": 21120, "torque_loss": 0.12546424567699432 }, { "epoch": 19.00179856115108, "grad_norm": 0.4315304458141327, "learning_rate": 7.704910985409833e-05, "loss": 0.0898, "step": 21130 }, { "action_loss": 0.009156394749879837, "epoch": 19.00179856115108, "step": 21130 }, { "epoch": 19.00179856115108, "step": 21130, "torque_loss": 0.12788553535938263 }, { "epoch": 19.010791366906474, "grad_norm": 0.2540854811668396, "learning_rate": 7.702592867342439e-05, "loss": 0.0936, "step": 21140 }, { "action_loss": 0.005419764202088118, "epoch": 19.010791366906474, "step": 21140 }, { "epoch": 19.010791366906474, "step": 21140, "torque_loss": 0.10867869853973389 }, { "epoch": 19.019784172661872, "grad_norm": 0.3139370083808899, "learning_rate": 7.700273928298691e-05, "loss": 0.082, "step": 21150 }, { "action_loss": 0.008165564388036728, "epoch": 19.019784172661872, "step": 21150 }, { "epoch": 19.019784172661872, "step": 21150, "torque_loss": 0.13337290287017822 }, { "epoch": 19.028776978417266, "grad_norm": 0.3755112290382385, "learning_rate": 7.697954168983021e-05, "loss": 0.0853, "step": 21160 }, { "action_loss": 0.006630053278058767, "epoch": 19.028776978417266, "step": 21160 }, { "epoch": 19.028776978417266, "step": 21160, "torque_loss": 0.1254008561372757 }, { "epoch": 19.037769784172664, "grad_norm": 0.2830062210559845, "learning_rate": 7.695633590100109e-05, "loss": 0.0937, "step": 21170 }, { "action_loss": 0.004623245447874069, "epoch": 19.037769784172664, "step": 21170 }, { "epoch": 19.037769784172664, "step": 21170, "torque_loss": 0.1033250093460083 }, { "epoch": 19.046762589928058, "grad_norm": 0.371664434671402, "learning_rate": 7.693312192354886e-05, "loss": 0.0919, "step": 21180 }, { "action_loss": 0.007990953512489796, "epoch": 19.046762589928058, "step": 21180 }, { "epoch": 19.046762589928058, "step": 21180, "torque_loss": 0.1516697108745575 }, { "epoch": 19.055755395683452, "grad_norm": 0.23911835253238678, "learning_rate": 7.690989976452532e-05, "loss": 0.0859, "step": 21190 }, { "action_loss": 0.010153698734939098, "epoch": 19.055755395683452, "step": 21190 }, { "epoch": 19.055755395683452, "step": 21190, "torque_loss": 0.13807645440101624 }, { "epoch": 19.06474820143885, "grad_norm": 0.2707136273384094, "learning_rate": 7.688666943098475e-05, "loss": 0.085, "step": 21200 }, { "action_loss": 0.006891352590173483, "epoch": 19.06474820143885, "step": 21200 }, { "epoch": 19.06474820143885, "step": 21200, "torque_loss": 0.11087387800216675 }, { "epoch": 19.073741007194243, "grad_norm": 0.3532238304615021, "learning_rate": 7.686343092998389e-05, "loss": 0.0812, "step": 21210 }, { "action_loss": 0.004861170891672373, "epoch": 19.073741007194243, "step": 21210 }, { "epoch": 19.073741007194243, "step": 21210, "torque_loss": 0.09009512513875961 }, { "epoch": 19.08273381294964, "grad_norm": 0.29439711570739746, "learning_rate": 7.684018426858202e-05, "loss": 0.0899, "step": 21220 }, { "action_loss": 0.005626974161714315, "epoch": 19.08273381294964, "step": 21220 }, { "epoch": 19.08273381294964, "step": 21220, "torque_loss": 0.10477358847856522 }, { "epoch": 19.091726618705035, "grad_norm": 0.33503320813179016, "learning_rate": 7.681692945384084e-05, "loss": 0.0876, "step": 21230 }, { "action_loss": 0.008049283176660538, "epoch": 19.091726618705035, "step": 21230 }, { "epoch": 19.091726618705035, "step": 21230, "torque_loss": 0.14586633443832397 }, { "epoch": 19.100719424460433, "grad_norm": 0.3083992600440979, "learning_rate": 7.679366649282456e-05, "loss": 0.0793, "step": 21240 }, { "action_loss": 0.013890661299228668, "epoch": 19.100719424460433, "step": 21240 }, { "epoch": 19.100719424460433, "step": 21240, "torque_loss": 0.18905524909496307 }, { "epoch": 19.109712230215827, "grad_norm": 0.3809301555156708, "learning_rate": 7.677039539259983e-05, "loss": 0.0941, "step": 21250 }, { "action_loss": 0.006953440606594086, "epoch": 19.109712230215827, "step": 21250 }, { "epoch": 19.109712230215827, "step": 21250, "torque_loss": 0.20832546055316925 }, { "epoch": 19.118705035971225, "grad_norm": 0.2722795605659485, "learning_rate": 7.674711616023581e-05, "loss": 0.0925, "step": 21260 }, { "action_loss": 0.014207524247467518, "epoch": 19.118705035971225, "step": 21260 }, { "epoch": 19.118705035971225, "step": 21260, "torque_loss": 0.20717830955982208 }, { "epoch": 19.12769784172662, "grad_norm": 0.29257866740226746, "learning_rate": 7.672382880280413e-05, "loss": 0.0841, "step": 21270 }, { "action_loss": 0.00608113594353199, "epoch": 19.12769784172662, "step": 21270 }, { "epoch": 19.12769784172662, "step": 21270, "torque_loss": 0.10854760557413101 }, { "epoch": 19.136690647482013, "grad_norm": 0.31391772627830505, "learning_rate": 7.670053332737885e-05, "loss": 0.0846, "step": 21280 }, { "action_loss": 0.002386990934610367, "epoch": 19.136690647482013, "step": 21280 }, { "epoch": 19.136690647482013, "step": 21280, "torque_loss": 0.07284673303365707 }, { "epoch": 19.14568345323741, "grad_norm": 0.3749631643295288, "learning_rate": 7.667722974103654e-05, "loss": 0.0722, "step": 21290 }, { "action_loss": 0.0023603413719683886, "epoch": 19.14568345323741, "step": 21290 }, { "epoch": 19.14568345323741, "step": 21290, "torque_loss": 0.09032964706420898 }, { "epoch": 19.154676258992804, "grad_norm": 0.3490293025970459, "learning_rate": 7.66539180508562e-05, "loss": 0.0788, "step": 21300 }, { "action_loss": 0.008503551594913006, "epoch": 19.154676258992804, "step": 21300 }, { "epoch": 19.154676258992804, "step": 21300, "torque_loss": 0.1126624122262001 }, { "epoch": 19.163669064748202, "grad_norm": 0.32594311237335205, "learning_rate": 7.663059826391932e-05, "loss": 0.0899, "step": 21310 }, { "action_loss": 0.005015261936932802, "epoch": 19.163669064748202, "step": 21310 }, { "epoch": 19.163669064748202, "step": 21310, "torque_loss": 0.15691711008548737 }, { "epoch": 19.172661870503596, "grad_norm": 0.2214040905237198, "learning_rate": 7.660727038730981e-05, "loss": 0.0832, "step": 21320 }, { "action_loss": 0.019471358507871628, "epoch": 19.172661870503596, "step": 21320 }, { "epoch": 19.172661870503596, "step": 21320, "torque_loss": 0.20435458421707153 }, { "epoch": 19.181654676258994, "grad_norm": 0.2942233979701996, "learning_rate": 7.65839344281141e-05, "loss": 0.087, "step": 21330 }, { "action_loss": 0.006392946001142263, "epoch": 19.181654676258994, "step": 21330 }, { "epoch": 19.181654676258994, "step": 21330, "torque_loss": 0.1503145843744278 }, { "epoch": 19.190647482014388, "grad_norm": 0.3498871922492981, "learning_rate": 7.656059039342101e-05, "loss": 0.0761, "step": 21340 }, { "action_loss": 0.005644313991069794, "epoch": 19.190647482014388, "step": 21340 }, { "epoch": 19.190647482014388, "step": 21340, "torque_loss": 0.12282532453536987 }, { "epoch": 19.199640287769785, "grad_norm": 0.3465186357498169, "learning_rate": 7.653723829032187e-05, "loss": 0.0826, "step": 21350 }, { "action_loss": 0.00486223166808486, "epoch": 19.199640287769785, "step": 21350 }, { "epoch": 19.199640287769785, "step": 21350, "torque_loss": 0.09322068840265274 }, { "epoch": 19.20863309352518, "grad_norm": 0.27490103244781494, "learning_rate": 7.65138781259104e-05, "loss": 0.091, "step": 21360 }, { "action_loss": 0.005319376941770315, "epoch": 19.20863309352518, "step": 21360 }, { "epoch": 19.20863309352518, "step": 21360, "torque_loss": 0.10616327077150345 }, { "epoch": 19.217625899280577, "grad_norm": 0.3809833228588104, "learning_rate": 7.649050990728279e-05, "loss": 0.076, "step": 21370 }, { "action_loss": 0.007011007983237505, "epoch": 19.217625899280577, "step": 21370 }, { "epoch": 19.217625899280577, "step": 21370, "torque_loss": 0.15126079320907593 }, { "epoch": 19.22661870503597, "grad_norm": 0.265543669462204, "learning_rate": 7.646713364153774e-05, "loss": 0.09, "step": 21380 }, { "action_loss": 0.005021360237151384, "epoch": 19.22661870503597, "step": 21380 }, { "epoch": 19.22661870503597, "step": 21380, "torque_loss": 0.12882132828235626 }, { "epoch": 19.235611510791365, "grad_norm": 0.3470516800880432, "learning_rate": 7.64437493357763e-05, "loss": 0.0858, "step": 21390 }, { "action_loss": 0.0044388906098902225, "epoch": 19.235611510791365, "step": 21390 }, { "epoch": 19.235611510791365, "step": 21390, "torque_loss": 0.12814992666244507 }, { "epoch": 19.244604316546763, "grad_norm": 0.24240125715732574, "learning_rate": 7.642035699710202e-05, "loss": 0.0858, "step": 21400 }, { "action_loss": 0.005060770083218813, "epoch": 19.244604316546763, "step": 21400 }, { "epoch": 19.244604316546763, "step": 21400, "torque_loss": 0.14997337758541107 }, { "epoch": 19.253597122302157, "grad_norm": 0.34621381759643555, "learning_rate": 7.639695663262089e-05, "loss": 0.0761, "step": 21410 }, { "action_loss": 0.0154527323320508, "epoch": 19.253597122302157, "step": 21410 }, { "epoch": 19.253597122302157, "step": 21410, "torque_loss": 0.23642800748348236 }, { "epoch": 19.262589928057555, "grad_norm": 0.34965914487838745, "learning_rate": 7.637354824944128e-05, "loss": 0.1076, "step": 21420 }, { "action_loss": 0.007791346404701471, "epoch": 19.262589928057555, "step": 21420 }, { "epoch": 19.262589928057555, "step": 21420, "torque_loss": 0.11899403482675552 }, { "epoch": 19.27158273381295, "grad_norm": 0.36335092782974243, "learning_rate": 7.635013185467408e-05, "loss": 0.0752, "step": 21430 }, { "action_loss": 0.008760087192058563, "epoch": 19.27158273381295, "step": 21430 }, { "epoch": 19.27158273381295, "step": 21430, "torque_loss": 0.07692018896341324 }, { "epoch": 19.280575539568346, "grad_norm": 0.29379919171333313, "learning_rate": 7.632670745543256e-05, "loss": 0.0814, "step": 21440 }, { "action_loss": 0.0035040013026446104, "epoch": 19.280575539568346, "step": 21440 }, { "epoch": 19.280575539568346, "step": 21440, "torque_loss": 0.11028388887643814 }, { "epoch": 19.28956834532374, "grad_norm": 0.31609058380126953, "learning_rate": 7.630327505883242e-05, "loss": 0.0887, "step": 21450 }, { "action_loss": 0.0025894565042108297, "epoch": 19.28956834532374, "step": 21450 }, { "epoch": 19.28956834532374, "step": 21450, "torque_loss": 0.1014619842171669 }, { "epoch": 19.298561151079138, "grad_norm": 0.32694903016090393, "learning_rate": 7.627983467199182e-05, "loss": 0.0741, "step": 21460 }, { "action_loss": 0.006947105750441551, "epoch": 19.298561151079138, "step": 21460 }, { "epoch": 19.298561151079138, "step": 21460, "torque_loss": 0.1653868556022644 }, { "epoch": 19.307553956834532, "grad_norm": 0.33320823311805725, "learning_rate": 7.625638630203132e-05, "loss": 0.0893, "step": 21470 }, { "action_loss": 0.005132623482495546, "epoch": 19.307553956834532, "step": 21470 }, { "epoch": 19.307553956834532, "step": 21470, "torque_loss": 0.1401345580816269 }, { "epoch": 19.31654676258993, "grad_norm": 0.2939514219760895, "learning_rate": 7.623292995607394e-05, "loss": 0.0903, "step": 21480 }, { "action_loss": 0.01321441400796175, "epoch": 19.31654676258993, "step": 21480 }, { "epoch": 19.31654676258993, "step": 21480, "torque_loss": 0.13753251731395721 }, { "epoch": 19.325539568345324, "grad_norm": 0.3531478941440582, "learning_rate": 7.620946564124507e-05, "loss": 0.0833, "step": 21490 }, { "action_loss": 0.009166485629975796, "epoch": 19.325539568345324, "step": 21490 }, { "epoch": 19.325539568345324, "step": 21490, "torque_loss": 0.134939506649971 }, { "epoch": 19.334532374100718, "grad_norm": 0.2762276530265808, "learning_rate": 7.618599336467256e-05, "loss": 0.0829, "step": 21500 }, { "action_loss": 0.006043755915015936, "epoch": 19.334532374100718, "step": 21500 }, { "epoch": 19.334532374100718, "step": 21500, "torque_loss": 0.12349296361207962 }, { "epoch": 19.343525179856115, "grad_norm": 0.34418484568595886, "learning_rate": 7.616251313348666e-05, "loss": 0.0779, "step": 21510 }, { "action_loss": 0.009373911656439304, "epoch": 19.343525179856115, "step": 21510 }, { "epoch": 19.343525179856115, "step": 21510, "torque_loss": 0.1316349357366562 }, { "epoch": 19.35251798561151, "grad_norm": 0.3786344528198242, "learning_rate": 7.613902495482005e-05, "loss": 0.1028, "step": 21520 }, { "action_loss": 0.00546052074059844, "epoch": 19.35251798561151, "step": 21520 }, { "epoch": 19.35251798561151, "step": 21520, "torque_loss": 0.13003768026828766 }, { "epoch": 19.361510791366907, "grad_norm": 0.27373987436294556, "learning_rate": 7.611552883580784e-05, "loss": 0.0841, "step": 21530 }, { "action_loss": 0.007895777933299541, "epoch": 19.361510791366907, "step": 21530 }, { "epoch": 19.361510791366907, "step": 21530, "torque_loss": 0.1629680097103119 }, { "epoch": 19.3705035971223, "grad_norm": 0.29474735260009766, "learning_rate": 7.609202478358748e-05, "loss": 0.0964, "step": 21540 }, { "action_loss": 0.010754077695310116, "epoch": 19.3705035971223, "step": 21540 }, { "epoch": 19.3705035971223, "step": 21540, "torque_loss": 0.16979451477527618 }, { "epoch": 19.3794964028777, "grad_norm": 0.32554757595062256, "learning_rate": 7.606851280529895e-05, "loss": 0.0902, "step": 21550 }, { "action_loss": 0.006549505982547998, "epoch": 19.3794964028777, "step": 21550 }, { "epoch": 19.3794964028777, "step": 21550, "torque_loss": 0.10955312103033066 }, { "epoch": 19.388489208633093, "grad_norm": 0.2937321960926056, "learning_rate": 7.604499290808449e-05, "loss": 0.085, "step": 21560 }, { "action_loss": 0.01642548479139805, "epoch": 19.388489208633093, "step": 21560 }, { "epoch": 19.388489208633093, "step": 21560, "torque_loss": 0.18307726085186005 }, { "epoch": 19.39748201438849, "grad_norm": 0.35253044962882996, "learning_rate": 7.602146509908888e-05, "loss": 0.0875, "step": 21570 }, { "action_loss": 0.005283757578581572, "epoch": 19.39748201438849, "step": 21570 }, { "epoch": 19.39748201438849, "step": 21570, "torque_loss": 0.0652749165892601 }, { "epoch": 19.406474820143885, "grad_norm": 0.27742812037467957, "learning_rate": 7.599792938545921e-05, "loss": 0.0919, "step": 21580 }, { "action_loss": 0.007782362401485443, "epoch": 19.406474820143885, "step": 21580 }, { "epoch": 19.406474820143885, "step": 21580, "torque_loss": 0.10008737444877625 }, { "epoch": 19.415467625899282, "grad_norm": 0.256957471370697, "learning_rate": 7.597438577434506e-05, "loss": 0.0852, "step": 21590 }, { "action_loss": 0.009394805878400803, "epoch": 19.415467625899282, "step": 21590 }, { "epoch": 19.415467625899282, "step": 21590, "torque_loss": 0.1501922458410263 }, { "epoch": 19.424460431654676, "grad_norm": 0.35356417298316956, "learning_rate": 7.595083427289831e-05, "loss": 0.0826, "step": 21600 }, { "action_loss": 0.003901482792571187, "epoch": 19.424460431654676, "step": 21600 }, { "epoch": 19.424460431654676, "step": 21600, "torque_loss": 0.09321033954620361 }, { "epoch": 19.43345323741007, "grad_norm": 0.30982211232185364, "learning_rate": 7.59272748882733e-05, "loss": 0.0806, "step": 21610 }, { "action_loss": 0.028615489602088928, "epoch": 19.43345323741007, "step": 21610 }, { "epoch": 19.43345323741007, "step": 21610, "torque_loss": 0.16308265924453735 }, { "epoch": 19.442446043165468, "grad_norm": 0.2214595079421997, "learning_rate": 7.590370762762675e-05, "loss": 0.0911, "step": 21620 }, { "action_loss": 0.003954201005399227, "epoch": 19.442446043165468, "step": 21620 }, { "epoch": 19.442446043165468, "step": 21620, "torque_loss": 0.09171799570322037 }, { "epoch": 19.451438848920862, "grad_norm": 0.44244736433029175, "learning_rate": 7.588013249811777e-05, "loss": 0.1016, "step": 21630 }, { "action_loss": 0.008980878628790379, "epoch": 19.451438848920862, "step": 21630 }, { "epoch": 19.451438848920862, "step": 21630, "torque_loss": 0.1228618249297142 }, { "epoch": 19.46043165467626, "grad_norm": 0.28401151299476624, "learning_rate": 7.585654950690786e-05, "loss": 0.0859, "step": 21640 }, { "action_loss": 0.003628644160926342, "epoch": 19.46043165467626, "step": 21640 }, { "epoch": 19.46043165467626, "step": 21640, "torque_loss": 0.09087004512548447 }, { "epoch": 19.469424460431654, "grad_norm": 0.31381577253341675, "learning_rate": 7.583295866116091e-05, "loss": 0.088, "step": 21650 }, { "action_loss": 0.010984803549945354, "epoch": 19.469424460431654, "step": 21650 }, { "epoch": 19.469424460431654, "step": 21650, "torque_loss": 0.09978973865509033 }, { "epoch": 19.47841726618705, "grad_norm": 0.38761404156684875, "learning_rate": 7.580935996804321e-05, "loss": 0.1022, "step": 21660 }, { "action_loss": 0.00890992023050785, "epoch": 19.47841726618705, "step": 21660 }, { "epoch": 19.47841726618705, "step": 21660, "torque_loss": 0.18191313743591309 }, { "epoch": 19.487410071942445, "grad_norm": 0.3490758240222931, "learning_rate": 7.57857534347234e-05, "loss": 0.1021, "step": 21670 }, { "action_loss": 0.005915272515267134, "epoch": 19.487410071942445, "step": 21670 }, { "epoch": 19.487410071942445, "step": 21670, "torque_loss": 0.12687945365905762 }, { "epoch": 19.496402877697843, "grad_norm": 0.38527584075927734, "learning_rate": 7.576213906837254e-05, "loss": 0.0886, "step": 21680 }, { "action_loss": 0.020805606618523598, "epoch": 19.496402877697843, "step": 21680 }, { "epoch": 19.496402877697843, "step": 21680, "torque_loss": 0.18530325591564178 }, { "epoch": 19.505395683453237, "grad_norm": 0.42948827147483826, "learning_rate": 7.573851687616403e-05, "loss": 0.0913, "step": 21690 }, { "action_loss": 0.014690724201500416, "epoch": 19.505395683453237, "step": 21690 }, { "epoch": 19.505395683453237, "step": 21690, "torque_loss": 0.1761145144701004 }, { "epoch": 19.514388489208635, "grad_norm": 0.2928464114665985, "learning_rate": 7.571488686527368e-05, "loss": 0.0744, "step": 21700 }, { "action_loss": 0.0075211054645478725, "epoch": 19.514388489208635, "step": 21700 }, { "epoch": 19.514388489208635, "step": 21700, "torque_loss": 0.09468350559473038 }, { "epoch": 19.52338129496403, "grad_norm": 0.2828305661678314, "learning_rate": 7.569124904287968e-05, "loss": 0.0736, "step": 21710 }, { "action_loss": 0.005829526111483574, "epoch": 19.52338129496403, "step": 21710 }, { "epoch": 19.52338129496403, "step": 21710, "torque_loss": 0.1463138312101364 }, { "epoch": 19.532374100719423, "grad_norm": 0.30627602338790894, "learning_rate": 7.566760341616254e-05, "loss": 0.0898, "step": 21720 }, { "action_loss": 0.005068741273134947, "epoch": 19.532374100719423, "step": 21720 }, { "epoch": 19.532374100719423, "step": 21720, "torque_loss": 0.1333134025335312 }, { "epoch": 19.54136690647482, "grad_norm": 0.33699968457221985, "learning_rate": 7.564394999230519e-05, "loss": 0.0898, "step": 21730 }, { "action_loss": 0.006496348883956671, "epoch": 19.54136690647482, "step": 21730 }, { "epoch": 19.54136690647482, "step": 21730, "torque_loss": 0.12265130877494812 }, { "epoch": 19.550359712230215, "grad_norm": 0.28796663880348206, "learning_rate": 7.562028877849294e-05, "loss": 0.075, "step": 21740 }, { "action_loss": 0.007124621886759996, "epoch": 19.550359712230215, "step": 21740 }, { "epoch": 19.550359712230215, "step": 21740, "torque_loss": 0.1449485570192337 }, { "epoch": 19.559352517985612, "grad_norm": 0.37419092655181885, "learning_rate": 7.559661978191341e-05, "loss": 0.0876, "step": 21750 }, { "action_loss": 0.007635466754436493, "epoch": 19.559352517985612, "step": 21750 }, { "epoch": 19.559352517985612, "step": 21750, "torque_loss": 0.1296439915895462 }, { "epoch": 19.568345323741006, "grad_norm": 0.33286958932876587, "learning_rate": 7.557294300975664e-05, "loss": 0.0799, "step": 21760 }, { "action_loss": 0.0042005302384495735, "epoch": 19.568345323741006, "step": 21760 }, { "epoch": 19.568345323741006, "step": 21760, "torque_loss": 0.09189591556787491 }, { "epoch": 19.577338129496404, "grad_norm": 0.2944099009037018, "learning_rate": 7.554925846921499e-05, "loss": 0.083, "step": 21770 }, { "action_loss": 0.0025880250614136457, "epoch": 19.577338129496404, "step": 21770 }, { "epoch": 19.577338129496404, "step": 21770, "torque_loss": 0.11245554685592651 }, { "epoch": 19.586330935251798, "grad_norm": 0.2991395890712738, "learning_rate": 7.552556616748321e-05, "loss": 0.0844, "step": 21780 }, { "action_loss": 0.008366054855287075, "epoch": 19.586330935251798, "step": 21780 }, { "epoch": 19.586330935251798, "step": 21780, "torque_loss": 0.1549873799085617 }, { "epoch": 19.595323741007196, "grad_norm": 0.29032400250434875, "learning_rate": 7.550186611175838e-05, "loss": 0.1, "step": 21790 }, { "action_loss": 0.003923008218407631, "epoch": 19.595323741007196, "step": 21790 }, { "epoch": 19.595323741007196, "step": 21790, "torque_loss": 0.06654486060142517 }, { "epoch": 19.60431654676259, "grad_norm": 0.2637857496738434, "learning_rate": 7.547815830923998e-05, "loss": 0.0724, "step": 21800 }, { "action_loss": 0.00977683812379837, "epoch": 19.60431654676259, "step": 21800 }, { "epoch": 19.60431654676259, "step": 21800, "torque_loss": 0.11053041368722916 }, { "epoch": 19.613309352517987, "grad_norm": 0.4152510464191437, "learning_rate": 7.54544427671298e-05, "loss": 0.0891, "step": 21810 }, { "action_loss": 0.015616429038345814, "epoch": 19.613309352517987, "step": 21810 }, { "epoch": 19.613309352517987, "step": 21810, "torque_loss": 0.21645812690258026 }, { "epoch": 19.62230215827338, "grad_norm": 0.3452844023704529, "learning_rate": 7.543071949263198e-05, "loss": 0.0826, "step": 21820 }, { "action_loss": 0.0037641432136297226, "epoch": 19.62230215827338, "step": 21820 }, { "epoch": 19.62230215827338, "step": 21820, "torque_loss": 0.09938704967498779 }, { "epoch": 19.631294964028775, "grad_norm": 0.3813572824001312, "learning_rate": 7.540698849295305e-05, "loss": 0.0723, "step": 21830 }, { "action_loss": 0.004320889245718718, "epoch": 19.631294964028775, "step": 21830 }, { "epoch": 19.631294964028775, "step": 21830, "torque_loss": 0.10434418171644211 }, { "epoch": 19.640287769784173, "grad_norm": 0.3402734100818634, "learning_rate": 7.538324977530183e-05, "loss": 0.0849, "step": 21840 }, { "action_loss": 0.022310348227620125, "epoch": 19.640287769784173, "step": 21840 }, { "epoch": 19.640287769784173, "step": 21840, "torque_loss": 0.23834121227264404 }, { "epoch": 19.649280575539567, "grad_norm": 0.5308486819267273, "learning_rate": 7.535950334688955e-05, "loss": 0.0967, "step": 21850 }, { "action_loss": 0.0023402387741953135, "epoch": 19.649280575539567, "step": 21850 }, { "epoch": 19.649280575539567, "step": 21850, "torque_loss": 0.05266302824020386 }, { "epoch": 19.658273381294965, "grad_norm": 0.3055640459060669, "learning_rate": 7.533574921492972e-05, "loss": 0.0855, "step": 21860 }, { "action_loss": 0.0031146674882620573, "epoch": 19.658273381294965, "step": 21860 }, { "epoch": 19.658273381294965, "step": 21860, "torque_loss": 0.08006752282381058 }, { "epoch": 19.66726618705036, "grad_norm": 0.31384772062301636, "learning_rate": 7.531198738663824e-05, "loss": 0.0751, "step": 21870 }, { "action_loss": 0.0069799404591321945, "epoch": 19.66726618705036, "step": 21870 }, { "epoch": 19.66726618705036, "step": 21870, "torque_loss": 0.16388140618801117 }, { "epoch": 19.676258992805757, "grad_norm": 0.29287850856781006, "learning_rate": 7.528821786923333e-05, "loss": 0.0883, "step": 21880 }, { "action_loss": 0.017610063776373863, "epoch": 19.676258992805757, "step": 21880 }, { "epoch": 19.676258992805757, "step": 21880, "torque_loss": 0.12996409833431244 }, { "epoch": 19.68525179856115, "grad_norm": 0.38596293330192566, "learning_rate": 7.52644406699355e-05, "loss": 0.0991, "step": 21890 }, { "action_loss": 0.004014801699668169, "epoch": 19.68525179856115, "step": 21890 }, { "epoch": 19.68525179856115, "step": 21890, "torque_loss": 0.0912395641207695 }, { "epoch": 19.694244604316548, "grad_norm": 0.3434992730617523, "learning_rate": 7.524065579596766e-05, "loss": 0.076, "step": 21900 }, { "action_loss": 0.008123699575662613, "epoch": 19.694244604316548, "step": 21900 }, { "epoch": 19.694244604316548, "step": 21900, "torque_loss": 0.1245754063129425 }, { "epoch": 19.703237410071942, "grad_norm": 0.31244704127311707, "learning_rate": 7.521686325455506e-05, "loss": 0.0768, "step": 21910 }, { "action_loss": 0.00833441037684679, "epoch": 19.703237410071942, "step": 21910 }, { "epoch": 19.703237410071942, "step": 21910, "torque_loss": 0.13810229301452637 }, { "epoch": 19.71223021582734, "grad_norm": 0.33842095732688904, "learning_rate": 7.51930630529252e-05, "loss": 0.0874, "step": 21920 }, { "action_loss": 0.010486365295946598, "epoch": 19.71223021582734, "step": 21920 }, { "epoch": 19.71223021582734, "step": 21920, "torque_loss": 0.1836272031068802 }, { "epoch": 19.721223021582734, "grad_norm": 0.32928356528282166, "learning_rate": 7.516925519830797e-05, "loss": 0.0849, "step": 21930 }, { "action_loss": 0.006314496044069529, "epoch": 19.721223021582734, "step": 21930 }, { "epoch": 19.721223021582734, "step": 21930, "torque_loss": 0.13488595187664032 }, { "epoch": 19.730215827338128, "grad_norm": 0.33063533902168274, "learning_rate": 7.514543969793557e-05, "loss": 0.0814, "step": 21940 }, { "action_loss": 0.004623195622116327, "epoch": 19.730215827338128, "step": 21940 }, { "epoch": 19.730215827338128, "step": 21940, "torque_loss": 0.1045270636677742 }, { "epoch": 19.739208633093526, "grad_norm": 0.36188411712646484, "learning_rate": 7.512161655904251e-05, "loss": 0.092, "step": 21950 }, { "action_loss": 0.0021845169831067324, "epoch": 19.739208633093526, "step": 21950 }, { "epoch": 19.739208633093526, "step": 21950, "torque_loss": 0.1278194636106491 }, { "epoch": 19.74820143884892, "grad_norm": 0.481460303068161, "learning_rate": 7.509778578886563e-05, "loss": 0.0856, "step": 21960 }, { "action_loss": 0.0034142378717660904, "epoch": 19.74820143884892, "step": 21960 }, { "epoch": 19.74820143884892, "step": 21960, "torque_loss": 0.10372534394264221 }, { "epoch": 19.757194244604317, "grad_norm": 0.38387253880500793, "learning_rate": 7.507394739464412e-05, "loss": 0.0861, "step": 21970 }, { "action_loss": 0.004481826443225145, "epoch": 19.757194244604317, "step": 21970 }, { "epoch": 19.757194244604317, "step": 21970, "torque_loss": 0.10887358337640762 }, { "epoch": 19.76618705035971, "grad_norm": 0.347614049911499, "learning_rate": 7.50501013836194e-05, "loss": 0.0916, "step": 21980 }, { "action_loss": 0.002282097004354, "epoch": 19.76618705035971, "step": 21980 }, { "epoch": 19.76618705035971, "step": 21980, "torque_loss": 0.08520299196243286 }, { "epoch": 19.77517985611511, "grad_norm": 0.409019410610199, "learning_rate": 7.50262477630353e-05, "loss": 0.0951, "step": 21990 }, { "action_loss": 0.012563981115818024, "epoch": 19.77517985611511, "step": 21990 }, { "epoch": 19.77517985611511, "step": 21990, "torque_loss": 0.14380516111850739 }, { "epoch": 19.784172661870503, "grad_norm": 0.302969753742218, "learning_rate": 7.500238654013794e-05, "loss": 0.1025, "step": 22000 }, { "action_loss": 0.004144519101828337, "epoch": 19.784172661870503, "step": 22000 }, { "epoch": 19.784172661870503, "step": 22000, "torque_loss": 0.11104372888803482 }, { "epoch": 19.7931654676259, "grad_norm": 0.4228251278400421, "learning_rate": 7.497851772217566e-05, "loss": 0.0804, "step": 22010 }, { "action_loss": 0.010258165188133717, "epoch": 19.7931654676259, "step": 22010 }, { "epoch": 19.7931654676259, "step": 22010, "torque_loss": 0.16508112847805023 }, { "epoch": 19.802158273381295, "grad_norm": 0.4489392340183258, "learning_rate": 7.495464131639924e-05, "loss": 0.0823, "step": 22020 }, { "action_loss": 0.008778329007327557, "epoch": 19.802158273381295, "step": 22020 }, { "epoch": 19.802158273381295, "step": 22020, "torque_loss": 0.14808699488639832 }, { "epoch": 19.81115107913669, "grad_norm": 0.40630966424942017, "learning_rate": 7.493075733006166e-05, "loss": 0.092, "step": 22030 }, { "action_loss": 0.003981060814112425, "epoch": 19.81115107913669, "step": 22030 }, { "epoch": 19.81115107913669, "step": 22030, "torque_loss": 0.13009263575077057 }, { "epoch": 19.820143884892087, "grad_norm": 0.34582000970840454, "learning_rate": 7.490686577041828e-05, "loss": 0.0876, "step": 22040 }, { "action_loss": 0.008012217469513416, "epoch": 19.820143884892087, "step": 22040 }, { "epoch": 19.820143884892087, "step": 22040, "torque_loss": 0.14141608774662018 }, { "epoch": 19.82913669064748, "grad_norm": 0.2696736454963684, "learning_rate": 7.488296664472668e-05, "loss": 0.1075, "step": 22050 }, { "action_loss": 0.007852835580706596, "epoch": 19.82913669064748, "step": 22050 }, { "epoch": 19.82913669064748, "step": 22050, "torque_loss": 0.14065180718898773 }, { "epoch": 19.83812949640288, "grad_norm": 0.30212026834487915, "learning_rate": 7.485905996024682e-05, "loss": 0.0843, "step": 22060 }, { "action_loss": 0.00599685637280345, "epoch": 19.83812949640288, "step": 22060 }, { "epoch": 19.83812949640288, "step": 22060, "torque_loss": 0.12794561684131622 }, { "epoch": 19.847122302158272, "grad_norm": 0.39082813262939453, "learning_rate": 7.483514572424093e-05, "loss": 0.0714, "step": 22070 }, { "action_loss": 0.014133196324110031, "epoch": 19.847122302158272, "step": 22070 }, { "epoch": 19.847122302158272, "step": 22070, "torque_loss": 0.14720825850963593 }, { "epoch": 19.85611510791367, "grad_norm": 0.422933429479599, "learning_rate": 7.481122394397349e-05, "loss": 0.0791, "step": 22080 }, { "action_loss": 0.009065103717148304, "epoch": 19.85611510791367, "step": 22080 }, { "epoch": 19.85611510791367, "step": 22080, "torque_loss": 0.11982810497283936 }, { "epoch": 19.865107913669064, "grad_norm": 0.26374945044517517, "learning_rate": 7.478729462671131e-05, "loss": 0.078, "step": 22090 }, { "action_loss": 0.01043765340000391, "epoch": 19.865107913669064, "step": 22090 }, { "epoch": 19.865107913669064, "step": 22090, "torque_loss": 0.1659952998161316 }, { "epoch": 19.87410071942446, "grad_norm": 0.350630521774292, "learning_rate": 7.47633577797235e-05, "loss": 0.094, "step": 22100 }, { "action_loss": 0.0032503108959645033, "epoch": 19.87410071942446, "step": 22100 }, { "epoch": 19.87410071942446, "step": 22100, "torque_loss": 0.11066734045743942 }, { "epoch": 19.883093525179856, "grad_norm": 0.3563601076602936, "learning_rate": 7.473941341028144e-05, "loss": 0.079, "step": 22110 }, { "action_loss": 0.0035181597340852022, "epoch": 19.883093525179856, "step": 22110 }, { "epoch": 19.883093525179856, "step": 22110, "torque_loss": 0.13381025195121765 }, { "epoch": 19.892086330935253, "grad_norm": 0.26093101501464844, "learning_rate": 7.471546152565879e-05, "loss": 0.0736, "step": 22120 }, { "action_loss": 0.014675614424049854, "epoch": 19.892086330935253, "step": 22120 }, { "epoch": 19.892086330935253, "step": 22120, "torque_loss": 0.18078124523162842 }, { "epoch": 19.901079136690647, "grad_norm": 0.25621965527534485, "learning_rate": 7.46915021331315e-05, "loss": 0.0931, "step": 22130 }, { "action_loss": 0.015672734007239342, "epoch": 19.901079136690647, "step": 22130 }, { "epoch": 19.901079136690647, "step": 22130, "torque_loss": 0.22164206206798553 }, { "epoch": 19.91007194244604, "grad_norm": 0.2639658451080322, "learning_rate": 7.466753523997778e-05, "loss": 0.088, "step": 22140 }, { "action_loss": 0.00465167174115777, "epoch": 19.91007194244604, "step": 22140 }, { "epoch": 19.91007194244604, "step": 22140, "torque_loss": 0.11339988559484482 }, { "epoch": 19.91906474820144, "grad_norm": 0.302202433347702, "learning_rate": 7.464356085347819e-05, "loss": 0.0863, "step": 22150 }, { "action_loss": 0.0041390168480575085, "epoch": 19.91906474820144, "step": 22150 }, { "epoch": 19.91906474820144, "step": 22150, "torque_loss": 0.12474439293146133 }, { "epoch": 19.928057553956833, "grad_norm": 0.2995898127555847, "learning_rate": 7.461957898091548e-05, "loss": 0.0874, "step": 22160 }, { "action_loss": 0.0031308671459555626, "epoch": 19.928057553956833, "step": 22160 }, { "epoch": 19.928057553956833, "step": 22160, "torque_loss": 0.1274547427892685 }, { "epoch": 19.93705035971223, "grad_norm": 0.3505816161632538, "learning_rate": 7.459558962957473e-05, "loss": 0.0901, "step": 22170 }, { "action_loss": 0.006777556147426367, "epoch": 19.93705035971223, "step": 22170 }, { "epoch": 19.93705035971223, "step": 22170, "torque_loss": 0.11423760652542114 }, { "epoch": 19.946043165467625, "grad_norm": 0.31680041551589966, "learning_rate": 7.457159280674326e-05, "loss": 0.0825, "step": 22180 }, { "action_loss": 0.006243307609111071, "epoch": 19.946043165467625, "step": 22180 }, { "epoch": 19.946043165467625, "step": 22180, "torque_loss": 0.11413824558258057 }, { "epoch": 19.955035971223023, "grad_norm": 0.378690630197525, "learning_rate": 7.454758851971066e-05, "loss": 0.0843, "step": 22190 }, { "action_loss": 0.012552137486636639, "epoch": 19.955035971223023, "step": 22190 }, { "epoch": 19.955035971223023, "step": 22190, "torque_loss": 0.13645513355731964 }, { "epoch": 19.964028776978417, "grad_norm": 0.40737780928611755, "learning_rate": 7.45235767757688e-05, "loss": 0.0839, "step": 22200 }, { "action_loss": 0.0327996090054512, "epoch": 19.964028776978417, "step": 22200 }, { "epoch": 19.964028776978417, "step": 22200, "torque_loss": 0.20824451744556427 }, { "epoch": 19.973021582733814, "grad_norm": 0.31262385845184326, "learning_rate": 7.449955758221183e-05, "loss": 0.1068, "step": 22210 }, { "action_loss": 0.02154531143605709, "epoch": 19.973021582733814, "step": 22210 }, { "epoch": 19.973021582733814, "step": 22210, "torque_loss": 0.19999712705612183 }, { "epoch": 19.98201438848921, "grad_norm": 0.38333332538604736, "learning_rate": 7.447553094633615e-05, "loss": 0.0848, "step": 22220 }, { "action_loss": 0.008116373792290688, "epoch": 19.98201438848921, "step": 22220 }, { "epoch": 19.98201438848921, "step": 22220, "torque_loss": 0.14411476254463196 }, { "epoch": 19.991007194244606, "grad_norm": 0.27763161063194275, "learning_rate": 7.445149687544039e-05, "loss": 0.0898, "step": 22230 }, { "action_loss": 0.01050946582108736, "epoch": 19.991007194244606, "step": 22230 }, { "epoch": 19.991007194244606, "step": 22230, "torque_loss": 0.18544010818004608 }, { "epoch": 20.0, "grad_norm": 0.34603917598724365, "learning_rate": 7.44274553768255e-05, "loss": 0.086, "step": 22240 }, { "action_loss": 0.006275814026594162, "epoch": 20.0, "step": 22240 }, { "epoch": 20.0, "step": 22240, "torque_loss": 0.07639168947935104 }, { "epoch": 20.008992805755394, "grad_norm": 0.3423983156681061, "learning_rate": 7.440340645779464e-05, "loss": 0.0832, "step": 22250 }, { "action_loss": 0.007160473614931107, "epoch": 20.008992805755394, "step": 22250 }, { "epoch": 20.008992805755394, "step": 22250, "torque_loss": 0.14182543754577637 }, { "epoch": 20.01798561151079, "grad_norm": 0.23882800340652466, "learning_rate": 7.437935012565322e-05, "loss": 0.0832, "step": 22260 }, { "action_loss": 0.0026219456922262907, "epoch": 20.01798561151079, "step": 22260 }, { "epoch": 20.01798561151079, "step": 22260, "torque_loss": 0.07412139326334 }, { "epoch": 20.026978417266186, "grad_norm": 0.3635520935058594, "learning_rate": 7.435528638770893e-05, "loss": 0.0825, "step": 22270 }, { "action_loss": 0.004756861831992865, "epoch": 20.026978417266186, "step": 22270 }, { "epoch": 20.026978417266186, "step": 22270, "torque_loss": 0.11611980944871902 }, { "epoch": 20.035971223021583, "grad_norm": 0.314907968044281, "learning_rate": 7.433121525127171e-05, "loss": 0.0939, "step": 22280 }, { "action_loss": 0.017126524820923805, "epoch": 20.035971223021583, "step": 22280 }, { "epoch": 20.035971223021583, "step": 22280, "torque_loss": 0.12849341332912445 }, { "epoch": 20.044964028776977, "grad_norm": 0.3372287452220917, "learning_rate": 7.430713672365371e-05, "loss": 0.1066, "step": 22290 }, { "action_loss": 0.006195328664034605, "epoch": 20.044964028776977, "step": 22290 }, { "epoch": 20.044964028776977, "step": 22290, "torque_loss": 0.11245080083608627 }, { "epoch": 20.053956834532375, "grad_norm": 0.3227732479572296, "learning_rate": 7.428305081216938e-05, "loss": 0.0844, "step": 22300 }, { "action_loss": 0.010410699062049389, "epoch": 20.053956834532375, "step": 22300 }, { "epoch": 20.053956834532375, "step": 22300, "torque_loss": 0.11286827176809311 }, { "epoch": 20.06294964028777, "grad_norm": 0.354444682598114, "learning_rate": 7.425895752413536e-05, "loss": 0.0881, "step": 22310 }, { "action_loss": 0.03141391649842262, "epoch": 20.06294964028777, "step": 22310 }, { "epoch": 20.06294964028777, "step": 22310, "torque_loss": 0.1378210186958313 }, { "epoch": 20.071942446043167, "grad_norm": 0.2852379381656647, "learning_rate": 7.423485686687057e-05, "loss": 0.0928, "step": 22320 }, { "action_loss": 0.008969821967184544, "epoch": 20.071942446043167, "step": 22320 }, { "epoch": 20.071942446043167, "step": 22320, "torque_loss": 0.10271477699279785 }, { "epoch": 20.08093525179856, "grad_norm": 0.32608503103256226, "learning_rate": 7.421074884769616e-05, "loss": 0.0873, "step": 22330 }, { "action_loss": 0.026451589539647102, "epoch": 20.08093525179856, "step": 22330 }, { "epoch": 20.08093525179856, "step": 22330, "torque_loss": 0.2660682201385498 }, { "epoch": 20.08992805755396, "grad_norm": 0.34268662333488464, "learning_rate": 7.418663347393548e-05, "loss": 0.0979, "step": 22340 }, { "action_loss": 0.014158867299556732, "epoch": 20.08992805755396, "step": 22340 }, { "epoch": 20.08992805755396, "step": 22340, "torque_loss": 0.1493956595659256 }, { "epoch": 20.098920863309353, "grad_norm": 0.3840663731098175, "learning_rate": 7.416251075291418e-05, "loss": 0.0956, "step": 22350 }, { "action_loss": 0.005600979086011648, "epoch": 20.098920863309353, "step": 22350 }, { "epoch": 20.098920863309353, "step": 22350, "torque_loss": 0.14134417474269867 }, { "epoch": 20.107913669064747, "grad_norm": 0.2954639196395874, "learning_rate": 7.413838069196007e-05, "loss": 0.0717, "step": 22360 }, { "action_loss": 0.0031851797830313444, "epoch": 20.107913669064747, "step": 22360 }, { "epoch": 20.107913669064747, "step": 22360, "torque_loss": 0.10020375996828079 }, { "epoch": 20.116906474820144, "grad_norm": 0.361130028963089, "learning_rate": 7.411424329840324e-05, "loss": 0.0806, "step": 22370 }, { "action_loss": 0.004596354439854622, "epoch": 20.116906474820144, "step": 22370 }, { "epoch": 20.116906474820144, "step": 22370, "torque_loss": 0.1065899133682251 }, { "epoch": 20.12589928057554, "grad_norm": 0.35598382353782654, "learning_rate": 7.409009857957601e-05, "loss": 0.1039, "step": 22380 }, { "action_loss": 0.020058676600456238, "epoch": 20.12589928057554, "step": 22380 }, { "epoch": 20.12589928057554, "step": 22380, "torque_loss": 0.1615287810564041 }, { "epoch": 20.134892086330936, "grad_norm": 0.38283005356788635, "learning_rate": 7.40659465428129e-05, "loss": 0.0917, "step": 22390 }, { "action_loss": 0.013450301252305508, "epoch": 20.134892086330936, "step": 22390 }, { "epoch": 20.134892086330936, "step": 22390, "torque_loss": 0.15349571406841278 }, { "epoch": 20.14388489208633, "grad_norm": 0.3067522943019867, "learning_rate": 7.404178719545063e-05, "loss": 0.0904, "step": 22400 }, { "action_loss": 0.005800542887300253, "epoch": 20.14388489208633, "step": 22400 }, { "epoch": 20.14388489208633, "step": 22400, "torque_loss": 0.08712708950042725 }, { "epoch": 20.152877697841728, "grad_norm": 0.38712581992149353, "learning_rate": 7.401762054482822e-05, "loss": 0.0958, "step": 22410 }, { "action_loss": 0.008750204928219318, "epoch": 20.152877697841728, "step": 22410 }, { "epoch": 20.152877697841728, "step": 22410, "torque_loss": 0.13757994771003723 }, { "epoch": 20.16187050359712, "grad_norm": 0.26772668957710266, "learning_rate": 7.39934465982868e-05, "loss": 0.0902, "step": 22420 }, { "action_loss": 0.005622191820293665, "epoch": 20.16187050359712, "step": 22420 }, { "epoch": 20.16187050359712, "step": 22420, "torque_loss": 0.11089614778757095 }, { "epoch": 20.17086330935252, "grad_norm": 0.24855530261993408, "learning_rate": 7.396926536316984e-05, "loss": 0.0754, "step": 22430 }, { "action_loss": 0.008614522404968739, "epoch": 20.17086330935252, "step": 22430 }, { "epoch": 20.17086330935252, "step": 22430, "torque_loss": 0.16782665252685547 }, { "epoch": 20.179856115107913, "grad_norm": 0.3458348512649536, "learning_rate": 7.394507684682293e-05, "loss": 0.098, "step": 22440 }, { "action_loss": 0.008122864179313183, "epoch": 20.179856115107913, "step": 22440 }, { "epoch": 20.179856115107913, "step": 22440, "torque_loss": 0.13053855299949646 }, { "epoch": 20.18884892086331, "grad_norm": 0.3636939525604248, "learning_rate": 7.392088105659393e-05, "loss": 0.0878, "step": 22450 }, { "action_loss": 0.009631927125155926, "epoch": 20.18884892086331, "step": 22450 }, { "epoch": 20.18884892086331, "step": 22450, "torque_loss": 0.12656401097774506 }, { "epoch": 20.197841726618705, "grad_norm": 0.3479393720626831, "learning_rate": 7.389667799983284e-05, "loss": 0.0996, "step": 22460 }, { "action_loss": 0.009294108487665653, "epoch": 20.197841726618705, "step": 22460 }, { "epoch": 20.197841726618705, "step": 22460, "torque_loss": 0.13254772126674652 }, { "epoch": 20.2068345323741, "grad_norm": 0.3276597857475281, "learning_rate": 7.387246768389193e-05, "loss": 0.0813, "step": 22470 }, { "action_loss": 0.013098928146064281, "epoch": 20.2068345323741, "step": 22470 }, { "epoch": 20.2068345323741, "step": 22470, "torque_loss": 0.16776812076568604 }, { "epoch": 20.215827338129497, "grad_norm": 0.30387479066848755, "learning_rate": 7.384825011612563e-05, "loss": 0.0749, "step": 22480 }, { "action_loss": 0.008331761695444584, "epoch": 20.215827338129497, "step": 22480 }, { "epoch": 20.215827338129497, "step": 22480, "torque_loss": 0.1427815556526184 }, { "epoch": 20.22482014388489, "grad_norm": 0.3171856701374054, "learning_rate": 7.382402530389066e-05, "loss": 0.0973, "step": 22490 }, { "action_loss": 0.00492348475381732, "epoch": 20.22482014388489, "step": 22490 }, { "epoch": 20.22482014388489, "step": 22490, "torque_loss": 0.17704002559185028 }, { "epoch": 20.23381294964029, "grad_norm": 0.35690566897392273, "learning_rate": 7.379979325454582e-05, "loss": 0.0812, "step": 22500 }, { "action_loss": 0.0038312573451548815, "epoch": 20.23381294964029, "step": 22500 }, { "epoch": 20.23381294964029, "step": 22500, "torque_loss": 0.09091705083847046 }, { "epoch": 20.242805755395683, "grad_norm": 0.37454691529273987, "learning_rate": 7.37755539754522e-05, "loss": 0.0828, "step": 22510 }, { "action_loss": 0.005573386792093515, "epoch": 20.242805755395683, "step": 22510 }, { "epoch": 20.242805755395683, "step": 22510, "torque_loss": 0.11158844828605652 }, { "epoch": 20.25179856115108, "grad_norm": 0.2793903052806854, "learning_rate": 7.375130747397302e-05, "loss": 0.0743, "step": 22520 }, { "action_loss": 0.00306110642850399, "epoch": 20.25179856115108, "step": 22520 }, { "epoch": 20.25179856115108, "step": 22520, "torque_loss": 0.08118052035570145 }, { "epoch": 20.260791366906474, "grad_norm": 0.37546366453170776, "learning_rate": 7.372705375747377e-05, "loss": 0.0862, "step": 22530 }, { "action_loss": 0.00977406743913889, "epoch": 20.260791366906474, "step": 22530 }, { "epoch": 20.260791366906474, "step": 22530, "torque_loss": 0.12146228551864624 }, { "epoch": 20.269784172661872, "grad_norm": 0.34163448214530945, "learning_rate": 7.370279283332205e-05, "loss": 0.0792, "step": 22540 }, { "action_loss": 0.014495021663606167, "epoch": 20.269784172661872, "step": 22540 }, { "epoch": 20.269784172661872, "step": 22540, "torque_loss": 0.19498278200626373 }, { "epoch": 20.278776978417266, "grad_norm": 0.3493405878543854, "learning_rate": 7.36785247088877e-05, "loss": 0.0786, "step": 22550 }, { "action_loss": 0.010539524257183075, "epoch": 20.278776978417266, "step": 22550 }, { "epoch": 20.278776978417266, "step": 22550, "torque_loss": 0.12619884312152863 }, { "epoch": 20.28776978417266, "grad_norm": 0.30034032464027405, "learning_rate": 7.365424939154275e-05, "loss": 0.0865, "step": 22560 }, { "action_loss": 0.007849741727113724, "epoch": 20.28776978417266, "step": 22560 }, { "epoch": 20.28776978417266, "step": 22560, "torque_loss": 0.14484794437885284 }, { "epoch": 20.296762589928058, "grad_norm": 0.36431685090065, "learning_rate": 7.362996688866138e-05, "loss": 0.0844, "step": 22570 }, { "action_loss": 0.004008021671324968, "epoch": 20.296762589928058, "step": 22570 }, { "epoch": 20.296762589928058, "step": 22570, "torque_loss": 0.1097874715924263 }, { "epoch": 20.305755395683452, "grad_norm": 0.3257385194301605, "learning_rate": 7.360567720761999e-05, "loss": 0.0916, "step": 22580 }, { "action_loss": 0.007365111727267504, "epoch": 20.305755395683452, "step": 22580 }, { "epoch": 20.305755395683452, "step": 22580, "torque_loss": 0.1894538402557373 }, { "epoch": 20.31474820143885, "grad_norm": 0.4054173529148102, "learning_rate": 7.358138035579711e-05, "loss": 0.0948, "step": 22590 }, { "action_loss": 0.00431605102494359, "epoch": 20.31474820143885, "step": 22590 }, { "epoch": 20.31474820143885, "step": 22590, "torque_loss": 0.07391814142465591 }, { "epoch": 20.323741007194243, "grad_norm": 0.32205185294151306, "learning_rate": 7.355707634057354e-05, "loss": 0.0824, "step": 22600 }, { "action_loss": 0.007126168813556433, "epoch": 20.323741007194243, "step": 22600 }, { "epoch": 20.323741007194243, "step": 22600, "torque_loss": 0.11130619794130325 }, { "epoch": 20.33273381294964, "grad_norm": 0.2732789218425751, "learning_rate": 7.353276516933215e-05, "loss": 0.0729, "step": 22610 }, { "action_loss": 0.012804854661226273, "epoch": 20.33273381294964, "step": 22610 }, { "epoch": 20.33273381294964, "step": 22610, "torque_loss": 0.14762331545352936 }, { "epoch": 20.341726618705035, "grad_norm": 0.3192666471004486, "learning_rate": 7.350844684945806e-05, "loss": 0.0857, "step": 22620 }, { "action_loss": 0.0027117207646369934, "epoch": 20.341726618705035, "step": 22620 }, { "epoch": 20.341726618705035, "step": 22620, "torque_loss": 0.11214977502822876 }, { "epoch": 20.350719424460433, "grad_norm": 0.3435561954975128, "learning_rate": 7.348412138833851e-05, "loss": 0.0818, "step": 22630 }, { "action_loss": 0.003644441021606326, "epoch": 20.350719424460433, "step": 22630 }, { "epoch": 20.350719424460433, "step": 22630, "torque_loss": 0.12506574392318726 }, { "epoch": 20.359712230215827, "grad_norm": 0.36220040917396545, "learning_rate": 7.345978879336295e-05, "loss": 0.0804, "step": 22640 }, { "action_loss": 0.012813362292945385, "epoch": 20.359712230215827, "step": 22640 }, { "epoch": 20.359712230215827, "step": 22640, "torque_loss": 0.12203556299209595 }, { "epoch": 20.368705035971225, "grad_norm": 0.2653379440307617, "learning_rate": 7.343544907192296e-05, "loss": 0.0899, "step": 22650 }, { "action_loss": 0.01075250655412674, "epoch": 20.368705035971225, "step": 22650 }, { "epoch": 20.368705035971225, "step": 22650, "torque_loss": 0.16634207963943481 }, { "epoch": 20.37769784172662, "grad_norm": 0.2407214343547821, "learning_rate": 7.341110223141235e-05, "loss": 0.086, "step": 22660 }, { "action_loss": 0.007869417779147625, "epoch": 20.37769784172662, "step": 22660 }, { "epoch": 20.37769784172662, "step": 22660, "torque_loss": 0.12037762254476547 }, { "epoch": 20.386690647482013, "grad_norm": 0.3476046025753021, "learning_rate": 7.3386748279227e-05, "loss": 0.0895, "step": 22670 }, { "action_loss": 0.010275094769895077, "epoch": 20.386690647482013, "step": 22670 }, { "epoch": 20.386690647482013, "step": 22670, "torque_loss": 0.16569705307483673 }, { "epoch": 20.39568345323741, "grad_norm": 0.38404640555381775, "learning_rate": 7.336238722276501e-05, "loss": 0.104, "step": 22680 }, { "action_loss": 0.005885979160666466, "epoch": 20.39568345323741, "step": 22680 }, { "epoch": 20.39568345323741, "step": 22680, "torque_loss": 0.10328993201255798 }, { "epoch": 20.404676258992804, "grad_norm": 0.37020713090896606, "learning_rate": 7.333801906942663e-05, "loss": 0.0796, "step": 22690 }, { "action_loss": 0.003995861858129501, "epoch": 20.404676258992804, "step": 22690 }, { "epoch": 20.404676258992804, "step": 22690, "torque_loss": 0.08637925982475281 }, { "epoch": 20.413669064748202, "grad_norm": 0.21249420940876007, "learning_rate": 7.331364382661428e-05, "loss": 0.0783, "step": 22700 }, { "action_loss": 0.008539066649973392, "epoch": 20.413669064748202, "step": 22700 }, { "epoch": 20.413669064748202, "step": 22700, "torque_loss": 0.11502885818481445 }, { "epoch": 20.422661870503596, "grad_norm": 0.3135538399219513, "learning_rate": 7.328926150173248e-05, "loss": 0.0797, "step": 22710 }, { "action_loss": 0.004049323033541441, "epoch": 20.422661870503596, "step": 22710 }, { "epoch": 20.422661870503596, "step": 22710, "torque_loss": 0.09511637687683105 }, { "epoch": 20.431654676258994, "grad_norm": 0.2992185056209564, "learning_rate": 7.326487210218795e-05, "loss": 0.0788, "step": 22720 }, { "action_loss": 0.004757814574986696, "epoch": 20.431654676258994, "step": 22720 }, { "epoch": 20.431654676258994, "step": 22720, "torque_loss": 0.08717335015535355 }, { "epoch": 20.440647482014388, "grad_norm": 0.3385219871997833, "learning_rate": 7.324047563538955e-05, "loss": 0.0968, "step": 22730 }, { "action_loss": 0.0038335297722369432, "epoch": 20.440647482014388, "step": 22730 }, { "epoch": 20.440647482014388, "step": 22730, "torque_loss": 0.07643857598304749 }, { "epoch": 20.449640287769785, "grad_norm": 0.3190114200115204, "learning_rate": 7.321607210874828e-05, "loss": 0.0928, "step": 22740 }, { "action_loss": 0.014845737256109715, "epoch": 20.449640287769785, "step": 22740 }, { "epoch": 20.449640287769785, "step": 22740, "torque_loss": 0.14748214185237885 }, { "epoch": 20.45863309352518, "grad_norm": 0.32326191663742065, "learning_rate": 7.31916615296773e-05, "loss": 0.0838, "step": 22750 }, { "action_loss": 0.006324350833892822, "epoch": 20.45863309352518, "step": 22750 }, { "epoch": 20.45863309352518, "step": 22750, "torque_loss": 0.16940529644489288 }, { "epoch": 20.467625899280577, "grad_norm": 0.2935223877429962, "learning_rate": 7.316724390559188e-05, "loss": 0.0817, "step": 22760 }, { "action_loss": 0.0040749069303274155, "epoch": 20.467625899280577, "step": 22760 }, { "epoch": 20.467625899280577, "step": 22760, "torque_loss": 0.07716872543096542 }, { "epoch": 20.47661870503597, "grad_norm": 0.37984612584114075, "learning_rate": 7.314281924390946e-05, "loss": 0.0687, "step": 22770 }, { "action_loss": 0.024967113509774208, "epoch": 20.47661870503597, "step": 22770 }, { "epoch": 20.47661870503597, "step": 22770, "torque_loss": 0.3264580965042114 }, { "epoch": 20.485611510791365, "grad_norm": 0.3082062304019928, "learning_rate": 7.311838755204959e-05, "loss": 0.0913, "step": 22780 }, { "action_loss": 0.004388132132589817, "epoch": 20.485611510791365, "step": 22780 }, { "epoch": 20.485611510791365, "step": 22780, "torque_loss": 0.07909640669822693 }, { "epoch": 20.494604316546763, "grad_norm": 0.3218173086643219, "learning_rate": 7.3093948837434e-05, "loss": 0.0803, "step": 22790 }, { "action_loss": 0.0034080008044838905, "epoch": 20.494604316546763, "step": 22790 }, { "epoch": 20.494604316546763, "step": 22790, "torque_loss": 0.0872127041220665 }, { "epoch": 20.503597122302157, "grad_norm": 0.2676374614238739, "learning_rate": 7.306950310748651e-05, "loss": 0.0687, "step": 22800 }, { "action_loss": 0.005045115947723389, "epoch": 20.503597122302157, "step": 22800 }, { "epoch": 20.503597122302157, "step": 22800, "torque_loss": 0.12060286849737167 }, { "epoch": 20.512589928057555, "grad_norm": 0.25609245896339417, "learning_rate": 7.304505036963311e-05, "loss": 0.0951, "step": 22810 }, { "action_loss": 0.0075094327330589294, "epoch": 20.512589928057555, "step": 22810 }, { "epoch": 20.512589928057555, "step": 22810, "torque_loss": 0.12753410637378693 }, { "epoch": 20.52158273381295, "grad_norm": 0.34137865900993347, "learning_rate": 7.302059063130186e-05, "loss": 0.0821, "step": 22820 }, { "action_loss": 0.006541822571307421, "epoch": 20.52158273381295, "step": 22820 }, { "epoch": 20.52158273381295, "step": 22820, "torque_loss": 0.11528562754392624 }, { "epoch": 20.530575539568346, "grad_norm": 0.3337499797344208, "learning_rate": 7.2996123899923e-05, "loss": 0.0826, "step": 22830 }, { "action_loss": 0.008430560119450092, "epoch": 20.530575539568346, "step": 22830 }, { "epoch": 20.530575539568346, "step": 22830, "torque_loss": 0.13445419073104858 }, { "epoch": 20.53956834532374, "grad_norm": 0.29786306619644165, "learning_rate": 7.297165018292886e-05, "loss": 0.0891, "step": 22840 }, { "action_loss": 0.0050064329989254475, "epoch": 20.53956834532374, "step": 22840 }, { "epoch": 20.53956834532374, "step": 22840, "torque_loss": 0.10354789346456528 }, { "epoch": 20.548561151079138, "grad_norm": 0.3125115633010864, "learning_rate": 7.294716948775396e-05, "loss": 0.08, "step": 22850 }, { "action_loss": 0.0082260025665164, "epoch": 20.548561151079138, "step": 22850 }, { "epoch": 20.548561151079138, "step": 22850, "torque_loss": 0.13575828075408936 }, { "epoch": 20.557553956834532, "grad_norm": 0.3754071295261383, "learning_rate": 7.292268182183484e-05, "loss": 0.0882, "step": 22860 }, { "action_loss": 0.007948189042508602, "epoch": 20.557553956834532, "step": 22860 }, { "epoch": 20.557553956834532, "step": 22860, "torque_loss": 0.14787276089191437 }, { "epoch": 20.56654676258993, "grad_norm": 0.29669877886772156, "learning_rate": 7.28981871926102e-05, "loss": 0.077, "step": 22870 }, { "action_loss": 0.01112382858991623, "epoch": 20.56654676258993, "step": 22870 }, { "epoch": 20.56654676258993, "step": 22870, "torque_loss": 0.16764724254608154 }, { "epoch": 20.575539568345324, "grad_norm": 0.31653285026550293, "learning_rate": 7.28736856075209e-05, "loss": 0.0893, "step": 22880 }, { "action_loss": 0.007309264037758112, "epoch": 20.575539568345324, "step": 22880 }, { "epoch": 20.575539568345324, "step": 22880, "torque_loss": 0.13532529771327972 }, { "epoch": 20.584532374100718, "grad_norm": 0.3354112505912781, "learning_rate": 7.284917707400985e-05, "loss": 0.0793, "step": 22890 }, { "action_loss": 0.016063161194324493, "epoch": 20.584532374100718, "step": 22890 }, { "epoch": 20.584532374100718, "step": 22890, "torque_loss": 0.15981264412403107 }, { "epoch": 20.593525179856115, "grad_norm": 0.3161676824092865, "learning_rate": 7.282466159952212e-05, "loss": 0.0952, "step": 22900 }, { "action_loss": 0.007155477534979582, "epoch": 20.593525179856115, "step": 22900 }, { "epoch": 20.593525179856115, "step": 22900, "torque_loss": 0.1293586939573288 }, { "epoch": 20.60251798561151, "grad_norm": 0.28572311997413635, "learning_rate": 7.280013919150483e-05, "loss": 0.0872, "step": 22910 }, { "action_loss": 0.03601275011897087, "epoch": 20.60251798561151, "step": 22910 }, { "epoch": 20.60251798561151, "step": 22910, "torque_loss": 0.19264201819896698 }, { "epoch": 20.611510791366907, "grad_norm": 0.32861098647117615, "learning_rate": 7.277560985740728e-05, "loss": 0.1137, "step": 22920 }, { "action_loss": 0.009441850706934929, "epoch": 20.611510791366907, "step": 22920 }, { "epoch": 20.611510791366907, "step": 22920, "torque_loss": 0.09758269041776657 }, { "epoch": 20.6205035971223, "grad_norm": 0.24509593844413757, "learning_rate": 7.275107360468079e-05, "loss": 0.0863, "step": 22930 }, { "action_loss": 0.009741767309606075, "epoch": 20.6205035971223, "step": 22930 }, { "epoch": 20.6205035971223, "step": 22930, "torque_loss": 0.19108353555202484 }, { "epoch": 20.6294964028777, "grad_norm": 0.3542356491088867, "learning_rate": 7.272653044077885e-05, "loss": 0.0834, "step": 22940 }, { "action_loss": 0.045878421515226364, "epoch": 20.6294964028777, "step": 22940 }, { "epoch": 20.6294964028777, "step": 22940, "torque_loss": 0.16109436750411987 }, { "epoch": 20.638489208633093, "grad_norm": 0.27799269556999207, "learning_rate": 7.270198037315703e-05, "loss": 0.0857, "step": 22950 }, { "action_loss": 0.009349712170660496, "epoch": 20.638489208633093, "step": 22950 }, { "epoch": 20.638489208633093, "step": 22950, "torque_loss": 0.1928708553314209 }, { "epoch": 20.64748201438849, "grad_norm": 0.26198792457580566, "learning_rate": 7.267742340927297e-05, "loss": 0.0728, "step": 22960 }, { "action_loss": 0.004836739972233772, "epoch": 20.64748201438849, "step": 22960 }, { "epoch": 20.64748201438849, "step": 22960, "torque_loss": 0.1232639029622078 }, { "epoch": 20.656474820143885, "grad_norm": 0.31779053807258606, "learning_rate": 7.265285955658645e-05, "loss": 0.0732, "step": 22970 }, { "action_loss": 0.0129030616953969, "epoch": 20.656474820143885, "step": 22970 }, { "epoch": 20.656474820143885, "step": 22970, "torque_loss": 0.14647237956523895 }, { "epoch": 20.665467625899282, "grad_norm": 0.31795772910118103, "learning_rate": 7.26282888225593e-05, "loss": 0.0804, "step": 22980 }, { "action_loss": 0.0066238269209861755, "epoch": 20.665467625899282, "step": 22980 }, { "epoch": 20.665467625899282, "step": 22980, "torque_loss": 0.1180991530418396 }, { "epoch": 20.674460431654676, "grad_norm": 0.3421212136745453, "learning_rate": 7.260371121465548e-05, "loss": 0.0833, "step": 22990 }, { "action_loss": 0.004588224459439516, "epoch": 20.674460431654676, "step": 22990 }, { "epoch": 20.674460431654676, "step": 22990, "torque_loss": 0.13264314830303192 }, { "epoch": 20.68345323741007, "grad_norm": 0.29294532537460327, "learning_rate": 7.2579126740341e-05, "loss": 0.0882, "step": 23000 }, { "action_loss": 0.011768692173063755, "epoch": 20.68345323741007, "step": 23000 }, { "epoch": 20.68345323741007, "step": 23000, "torque_loss": 0.17955134809017181 }, { "epoch": 20.692446043165468, "grad_norm": 0.37408721446990967, "learning_rate": 7.2554535407084e-05, "loss": 0.0746, "step": 23010 }, { "action_loss": 0.0043189432471990585, "epoch": 20.692446043165468, "step": 23010 }, { "epoch": 20.692446043165468, "step": 23010, "torque_loss": 0.10573933273553848 }, { "epoch": 20.701438848920862, "grad_norm": 0.32552289962768555, "learning_rate": 7.252993722235464e-05, "loss": 0.0859, "step": 23020 }, { "action_loss": 0.005526702851057053, "epoch": 20.701438848920862, "step": 23020 }, { "epoch": 20.701438848920862, "step": 23020, "torque_loss": 0.10861573368310928 }, { "epoch": 20.71043165467626, "grad_norm": 0.35884183645248413, "learning_rate": 7.250533219362523e-05, "loss": 0.0796, "step": 23030 }, { "action_loss": 0.010767818428575993, "epoch": 20.71043165467626, "step": 23030 }, { "epoch": 20.71043165467626, "step": 23030, "torque_loss": 0.1603512018918991 }, { "epoch": 20.719424460431654, "grad_norm": 0.32845228910446167, "learning_rate": 7.248072032837012e-05, "loss": 0.0791, "step": 23040 }, { "action_loss": 0.005023655015975237, "epoch": 20.719424460431654, "step": 23040 }, { "epoch": 20.719424460431654, "step": 23040, "torque_loss": 0.1448749601840973 }, { "epoch": 20.72841726618705, "grad_norm": 0.3427826166152954, "learning_rate": 7.245610163406575e-05, "loss": 0.0836, "step": 23050 }, { "action_loss": 0.012422509491443634, "epoch": 20.72841726618705, "step": 23050 }, { "epoch": 20.72841726618705, "step": 23050, "torque_loss": 0.15675009787082672 }, { "epoch": 20.737410071942445, "grad_norm": 0.41358229517936707, "learning_rate": 7.243147611819061e-05, "loss": 0.0923, "step": 23060 }, { "action_loss": 0.0029850059654563665, "epoch": 20.737410071942445, "step": 23060 }, { "epoch": 20.737410071942445, "step": 23060, "torque_loss": 0.09252569824457169 }, { "epoch": 20.746402877697843, "grad_norm": 0.341920405626297, "learning_rate": 7.240684378822531e-05, "loss": 0.0747, "step": 23070 }, { "action_loss": 0.0064218356274068356, "epoch": 20.746402877697843, "step": 23070 }, { "epoch": 20.746402877697843, "step": 23070, "torque_loss": 0.07390408962965012 }, { "epoch": 20.755395683453237, "grad_norm": 0.3137661814689636, "learning_rate": 7.238220465165248e-05, "loss": 0.0815, "step": 23080 }, { "action_loss": 0.006322280969470739, "epoch": 20.755395683453237, "step": 23080 }, { "epoch": 20.755395683453237, "step": 23080, "torque_loss": 0.11114750057458878 }, { "epoch": 20.764388489208635, "grad_norm": 0.3831518590450287, "learning_rate": 7.235755871595684e-05, "loss": 0.0923, "step": 23090 }, { "action_loss": 0.005713455379009247, "epoch": 20.764388489208635, "step": 23090 }, { "epoch": 20.764388489208635, "step": 23090, "torque_loss": 0.09624788910150528 }, { "epoch": 20.77338129496403, "grad_norm": 0.323785662651062, "learning_rate": 7.233290598862517e-05, "loss": 0.0817, "step": 23100 }, { "action_loss": 0.0043406132608652115, "epoch": 20.77338129496403, "step": 23100 }, { "epoch": 20.77338129496403, "step": 23100, "torque_loss": 0.12064016610383987 }, { "epoch": 20.782374100719423, "grad_norm": 0.2812909483909607, "learning_rate": 7.230824647714635e-05, "loss": 0.0839, "step": 23110 }, { "action_loss": 0.008865279145538807, "epoch": 20.782374100719423, "step": 23110 }, { "epoch": 20.782374100719423, "step": 23110, "torque_loss": 0.15678036212921143 }, { "epoch": 20.79136690647482, "grad_norm": 0.28785407543182373, "learning_rate": 7.228358018901124e-05, "loss": 0.084, "step": 23120 }, { "action_loss": 0.010885911993682384, "epoch": 20.79136690647482, "step": 23120 }, { "epoch": 20.79136690647482, "step": 23120, "torque_loss": 0.1309250444173813 }, { "epoch": 20.800359712230215, "grad_norm": 0.3638690412044525, "learning_rate": 7.225890713171286e-05, "loss": 0.0929, "step": 23130 }, { "action_loss": 0.010158671997487545, "epoch": 20.800359712230215, "step": 23130 }, { "epoch": 20.800359712230215, "step": 23130, "torque_loss": 0.11644172668457031 }, { "epoch": 20.809352517985612, "grad_norm": 0.31218767166137695, "learning_rate": 7.223422731274618e-05, "loss": 0.0744, "step": 23140 }, { "action_loss": 0.003373999148607254, "epoch": 20.809352517985612, "step": 23140 }, { "epoch": 20.809352517985612, "step": 23140, "torque_loss": 0.08617918938398361 }, { "epoch": 20.818345323741006, "grad_norm": 0.2640685737133026, "learning_rate": 7.220954073960832e-05, "loss": 0.0747, "step": 23150 }, { "action_loss": 0.0144840432330966, "epoch": 20.818345323741006, "step": 23150 }, { "epoch": 20.818345323741006, "step": 23150, "torque_loss": 0.13699208199977875 }, { "epoch": 20.827338129496404, "grad_norm": 0.3124534785747528, "learning_rate": 7.218484741979838e-05, "loss": 0.0805, "step": 23160 }, { "action_loss": 0.005043745972216129, "epoch": 20.827338129496404, "step": 23160 }, { "epoch": 20.827338129496404, "step": 23160, "torque_loss": 0.11001556366682053 }, { "epoch": 20.836330935251798, "grad_norm": 0.36621880531311035, "learning_rate": 7.216014736081756e-05, "loss": 0.0905, "step": 23170 }, { "action_loss": 0.0044032433070242405, "epoch": 20.836330935251798, "step": 23170 }, { "epoch": 20.836330935251798, "step": 23170, "torque_loss": 0.11665735393762589 }, { "epoch": 20.845323741007196, "grad_norm": 0.3240525722503662, "learning_rate": 7.213544057016906e-05, "loss": 0.0812, "step": 23180 }, { "action_loss": 0.005055306013673544, "epoch": 20.845323741007196, "step": 23180 }, { "epoch": 20.845323741007196, "step": 23180, "torque_loss": 0.08322639018297195 }, { "epoch": 20.85431654676259, "grad_norm": 0.2927996814250946, "learning_rate": 7.211072705535819e-05, "loss": 0.0828, "step": 23190 }, { "action_loss": 0.004169024992734194, "epoch": 20.85431654676259, "step": 23190 }, { "epoch": 20.85431654676259, "step": 23190, "torque_loss": 0.10404136031866074 }, { "epoch": 20.863309352517987, "grad_norm": 0.32091638445854187, "learning_rate": 7.208600682389224e-05, "loss": 0.0825, "step": 23200 }, { "action_loss": 0.00245666760019958, "epoch": 20.863309352517987, "step": 23200 }, { "epoch": 20.863309352517987, "step": 23200, "torque_loss": 0.11753455549478531 }, { "epoch": 20.87230215827338, "grad_norm": 0.323529452085495, "learning_rate": 7.206127988328055e-05, "loss": 0.0853, "step": 23210 }, { "action_loss": 0.0049173180013895035, "epoch": 20.87230215827338, "step": 23210 }, { "epoch": 20.87230215827338, "step": 23210, "torque_loss": 0.10900819301605225 }, { "epoch": 20.881294964028775, "grad_norm": 0.3205546438694, "learning_rate": 7.203654624103453e-05, "loss": 0.0732, "step": 23220 }, { "action_loss": 0.018296077847480774, "epoch": 20.881294964028775, "step": 23220 }, { "epoch": 20.881294964028775, "step": 23220, "torque_loss": 0.14741377532482147 }, { "epoch": 20.890287769784173, "grad_norm": 0.3911847174167633, "learning_rate": 7.201180590466761e-05, "loss": 0.0849, "step": 23230 }, { "action_loss": 0.003648019628599286, "epoch": 20.890287769784173, "step": 23230 }, { "epoch": 20.890287769784173, "step": 23230, "torque_loss": 0.11148446053266525 }, { "epoch": 20.899280575539567, "grad_norm": 0.4136955142021179, "learning_rate": 7.198705888169523e-05, "loss": 0.0923, "step": 23240 }, { "action_loss": 0.0032016767654567957, "epoch": 20.899280575539567, "step": 23240 }, { "epoch": 20.899280575539567, "step": 23240, "torque_loss": 0.06719548255205154 }, { "epoch": 20.908273381294965, "grad_norm": 0.36459049582481384, "learning_rate": 7.196230517963491e-05, "loss": 0.0746, "step": 23250 }, { "action_loss": 0.03153720125555992, "epoch": 20.908273381294965, "step": 23250 }, { "epoch": 20.908273381294965, "step": 23250, "torque_loss": 0.22213530540466309 }, { "epoch": 20.91726618705036, "grad_norm": 0.32083749771118164, "learning_rate": 7.193754480600615e-05, "loss": 0.0797, "step": 23260 }, { "action_loss": 0.003196409670636058, "epoch": 20.91726618705036, "step": 23260 }, { "epoch": 20.91726618705036, "step": 23260, "torque_loss": 0.08566489070653915 }, { "epoch": 20.926258992805757, "grad_norm": 0.36489346623420715, "learning_rate": 7.19127777683305e-05, "loss": 0.0963, "step": 23270 }, { "action_loss": 0.002649240894243121, "epoch": 20.926258992805757, "step": 23270 }, { "epoch": 20.926258992805757, "step": 23270, "torque_loss": 0.08605992794036865 }, { "epoch": 20.93525179856115, "grad_norm": 0.30635443329811096, "learning_rate": 7.188800407413156e-05, "loss": 0.0639, "step": 23280 }, { "action_loss": 0.0031205713748931885, "epoch": 20.93525179856115, "step": 23280 }, { "epoch": 20.93525179856115, "step": 23280, "torque_loss": 0.12004977464675903 }, { "epoch": 20.944244604316548, "grad_norm": 0.31568092107772827, "learning_rate": 7.186322373093489e-05, "loss": 0.0896, "step": 23290 }, { "action_loss": 0.003322415752336383, "epoch": 20.944244604316548, "step": 23290 }, { "epoch": 20.944244604316548, "step": 23290, "torque_loss": 0.09644252061843872 }, { "epoch": 20.953237410071942, "grad_norm": 0.3334214985370636, "learning_rate": 7.18384367462681e-05, "loss": 0.0682, "step": 23300 }, { "action_loss": 0.010503738187253475, "epoch": 20.953237410071942, "step": 23300 }, { "epoch": 20.953237410071942, "step": 23300, "torque_loss": 0.15948928892612457 }, { "epoch": 20.96223021582734, "grad_norm": 0.30679330229759216, "learning_rate": 7.181364312766085e-05, "loss": 0.087, "step": 23310 }, { "action_loss": 0.006546178366988897, "epoch": 20.96223021582734, "step": 23310 }, { "epoch": 20.96223021582734, "step": 23310, "torque_loss": 0.11676763743162155 }, { "epoch": 20.971223021582734, "grad_norm": 0.2840712368488312, "learning_rate": 7.178884288264477e-05, "loss": 0.0708, "step": 23320 }, { "action_loss": 0.010369325056672096, "epoch": 20.971223021582734, "step": 23320 }, { "epoch": 20.971223021582734, "step": 23320, "torque_loss": 0.1391957402229309 }, { "epoch": 20.980215827338128, "grad_norm": 0.3374142646789551, "learning_rate": 7.176403601875353e-05, "loss": 0.0852, "step": 23330 }, { "action_loss": 0.003634039545431733, "epoch": 20.980215827338128, "step": 23330 }, { "epoch": 20.980215827338128, "step": 23330, "torque_loss": 0.1044938936829567 }, { "epoch": 20.989208633093526, "grad_norm": 0.38275831937789917, "learning_rate": 7.173922254352279e-05, "loss": 0.0853, "step": 23340 }, { "action_loss": 0.005101141054183245, "epoch": 20.989208633093526, "step": 23340 }, { "epoch": 20.989208633093526, "step": 23340, "torque_loss": 0.10294601321220398 }, { "epoch": 20.99820143884892, "grad_norm": 0.2633880078792572, "learning_rate": 7.171440246449024e-05, "loss": 0.0728, "step": 23350 }, { "action_loss": 0.006193322595208883, "epoch": 20.99820143884892, "step": 23350 }, { "epoch": 20.99820143884892, "step": 23350, "torque_loss": 0.09470707178115845 }, { "epoch": 21.007194244604317, "grad_norm": 0.3034098744392395, "learning_rate": 7.168957578919555e-05, "loss": 0.0799, "step": 23360 }, { "action_loss": 0.008776393719017506, "epoch": 21.007194244604317, "step": 23360 }, { "epoch": 21.007194244604317, "step": 23360, "torque_loss": 0.12075092643499374 }, { "epoch": 21.01618705035971, "grad_norm": 0.3086661398410797, "learning_rate": 7.16647425251804e-05, "loss": 0.08, "step": 23370 }, { "action_loss": 0.009048265404999256, "epoch": 21.01618705035971, "step": 23370 }, { "epoch": 21.01618705035971, "step": 23370, "torque_loss": 0.16425220668315887 }, { "epoch": 21.02517985611511, "grad_norm": 0.22583749890327454, "learning_rate": 7.163990267998852e-05, "loss": 0.0856, "step": 23380 }, { "action_loss": 0.0066929408349096775, "epoch": 21.02517985611511, "step": 23380 }, { "epoch": 21.02517985611511, "step": 23380, "torque_loss": 0.12665854394435883 }, { "epoch": 21.034172661870503, "grad_norm": 0.31859564781188965, "learning_rate": 7.161505626116556e-05, "loss": 0.0889, "step": 23390 }, { "action_loss": 0.0025281452108174562, "epoch": 21.034172661870503, "step": 23390 }, { "epoch": 21.034172661870503, "step": 23390, "torque_loss": 0.09832976013422012 }, { "epoch": 21.0431654676259, "grad_norm": 0.35166680812835693, "learning_rate": 7.159020327625923e-05, "loss": 0.0808, "step": 23400 }, { "action_loss": 0.015732645988464355, "epoch": 21.0431654676259, "step": 23400 }, { "epoch": 21.0431654676259, "step": 23400, "torque_loss": 0.17724810540676117 }, { "epoch": 21.052158273381295, "grad_norm": 0.3982737064361572, "learning_rate": 7.15653437328192e-05, "loss": 0.1029, "step": 23410 }, { "action_loss": 0.004185659810900688, "epoch": 21.052158273381295, "step": 23410 }, { "epoch": 21.052158273381295, "step": 23410, "torque_loss": 0.09774959087371826 }, { "epoch": 21.06115107913669, "grad_norm": 0.3557136356830597, "learning_rate": 7.154047763839713e-05, "loss": 0.0774, "step": 23420 }, { "action_loss": 0.003258742392063141, "epoch": 21.06115107913669, "step": 23420 }, { "epoch": 21.06115107913669, "step": 23420, "torque_loss": 0.11304539442062378 }, { "epoch": 21.070143884892087, "grad_norm": 0.3208012282848358, "learning_rate": 7.15156050005467e-05, "loss": 0.0714, "step": 23430 }, { "action_loss": 0.016375087201595306, "epoch": 21.070143884892087, "step": 23430 }, { "epoch": 21.070143884892087, "step": 23430, "torque_loss": 0.13207024335861206 }, { "epoch": 21.07913669064748, "grad_norm": 0.29706233739852905, "learning_rate": 7.149072582682357e-05, "loss": 0.0818, "step": 23440 }, { "action_loss": 0.012216425500810146, "epoch": 21.07913669064748, "step": 23440 }, { "epoch": 21.07913669064748, "step": 23440, "torque_loss": 0.10424371808767319 }, { "epoch": 21.08812949640288, "grad_norm": 0.28802233934402466, "learning_rate": 7.146584012478535e-05, "loss": 0.0762, "step": 23450 }, { "action_loss": 0.01344779971987009, "epoch": 21.08812949640288, "step": 23450 }, { "epoch": 21.08812949640288, "step": 23450, "torque_loss": 0.144209623336792 }, { "epoch": 21.097122302158272, "grad_norm": 0.2631221115589142, "learning_rate": 7.144094790199169e-05, "loss": 0.0843, "step": 23460 }, { "action_loss": 0.010749053210020065, "epoch": 21.097122302158272, "step": 23460 }, { "epoch": 21.097122302158272, "step": 23460, "torque_loss": 0.1209777370095253 }, { "epoch": 21.10611510791367, "grad_norm": 0.2866004407405853, "learning_rate": 7.141604916600415e-05, "loss": 0.0844, "step": 23470 }, { "action_loss": 0.005839595105499029, "epoch": 21.10611510791367, "step": 23470 }, { "epoch": 21.10611510791367, "step": 23470, "torque_loss": 0.13323931396007538 }, { "epoch": 21.115107913669064, "grad_norm": 0.40908390283584595, "learning_rate": 7.139114392438635e-05, "loss": 0.0728, "step": 23480 }, { "action_loss": 0.02664935030043125, "epoch": 21.115107913669064, "step": 23480 }, { "epoch": 21.115107913669064, "step": 23480, "torque_loss": 0.18057693541049957 }, { "epoch": 21.12410071942446, "grad_norm": 0.23954208195209503, "learning_rate": 7.136623218470382e-05, "loss": 0.0859, "step": 23490 }, { "action_loss": 0.003982384689152241, "epoch": 21.12410071942446, "step": 23490 }, { "epoch": 21.12410071942446, "step": 23490, "torque_loss": 0.11557167023420334 }, { "epoch": 21.133093525179856, "grad_norm": 0.2870762050151825, "learning_rate": 7.13413139545241e-05, "loss": 0.0839, "step": 23500 }, { "action_loss": 0.007423267234116793, "epoch": 21.133093525179856, "step": 23500 }, { "epoch": 21.133093525179856, "step": 23500, "torque_loss": 0.11539336293935776 }, { "epoch": 21.142086330935253, "grad_norm": 0.2956419587135315, "learning_rate": 7.131638924141668e-05, "loss": 0.0852, "step": 23510 }, { "action_loss": 0.0029339008033275604, "epoch": 21.142086330935253, "step": 23510 }, { "epoch": 21.142086330935253, "step": 23510, "torque_loss": 0.06667663156986237 }, { "epoch": 21.151079136690647, "grad_norm": 0.3766897916793823, "learning_rate": 7.129145805295304e-05, "loss": 0.079, "step": 23520 }, { "action_loss": 0.002890628529712558, "epoch": 21.151079136690647, "step": 23520 }, { "epoch": 21.151079136690647, "step": 23520, "torque_loss": 0.09149321913719177 }, { "epoch": 21.16007194244604, "grad_norm": 0.25797632336616516, "learning_rate": 7.126652039670661e-05, "loss": 0.0651, "step": 23530 }, { "action_loss": 0.004383078310638666, "epoch": 21.16007194244604, "step": 23530 }, { "epoch": 21.16007194244604, "step": 23530, "torque_loss": 0.1202118769288063 }, { "epoch": 21.16906474820144, "grad_norm": 0.26865851879119873, "learning_rate": 7.124157628025278e-05, "loss": 0.0774, "step": 23540 }, { "action_loss": 0.00549798458814621, "epoch": 21.16906474820144, "step": 23540 }, { "epoch": 21.16906474820144, "step": 23540, "torque_loss": 0.12997348606586456 }, { "epoch": 21.178057553956833, "grad_norm": 0.28804031014442444, "learning_rate": 7.121662571116894e-05, "loss": 0.0836, "step": 23550 }, { "action_loss": 0.003430395619943738, "epoch": 21.178057553956833, "step": 23550 }, { "epoch": 21.178057553956833, "step": 23550, "torque_loss": 0.10673586279153824 }, { "epoch": 21.18705035971223, "grad_norm": 0.32831865549087524, "learning_rate": 7.119166869703441e-05, "loss": 0.0692, "step": 23560 }, { "action_loss": 0.0033619434107095003, "epoch": 21.18705035971223, "step": 23560 }, { "epoch": 21.18705035971223, "step": 23560, "torque_loss": 0.0769561305642128 }, { "epoch": 21.196043165467625, "grad_norm": 0.2997820973396301, "learning_rate": 7.116670524543044e-05, "loss": 0.0828, "step": 23570 }, { "action_loss": 0.004877699073404074, "epoch": 21.196043165467625, "step": 23570 }, { "epoch": 21.196043165467625, "step": 23570, "torque_loss": 0.11799731105566025 }, { "epoch": 21.205035971223023, "grad_norm": 0.33637502789497375, "learning_rate": 7.114173536394032e-05, "loss": 0.0843, "step": 23580 }, { "action_loss": 0.0058481525629758835, "epoch": 21.205035971223023, "step": 23580 }, { "epoch": 21.205035971223023, "step": 23580, "torque_loss": 0.10532170534133911 }, { "epoch": 21.214028776978417, "grad_norm": 0.25024110078811646, "learning_rate": 7.111675906014917e-05, "loss": 0.0912, "step": 23590 }, { "action_loss": 0.009967646561563015, "epoch": 21.214028776978417, "step": 23590 }, { "epoch": 21.214028776978417, "step": 23590, "torque_loss": 0.1360529512166977 }, { "epoch": 21.223021582733814, "grad_norm": 0.250603049993515, "learning_rate": 7.109177634164421e-05, "loss": 0.0717, "step": 23600 }, { "action_loss": 0.002989603206515312, "epoch": 21.223021582733814, "step": 23600 }, { "epoch": 21.223021582733814, "step": 23600, "torque_loss": 0.05628135800361633 }, { "epoch": 21.23201438848921, "grad_norm": 0.2148611694574356, "learning_rate": 7.106678721601449e-05, "loss": 0.0677, "step": 23610 }, { "action_loss": 0.00805624295026064, "epoch": 21.23201438848921, "step": 23610 }, { "epoch": 21.23201438848921, "step": 23610, "torque_loss": 0.11854544281959534 }, { "epoch": 21.241007194244606, "grad_norm": 0.3187536597251892, "learning_rate": 7.104179169085103e-05, "loss": 0.0874, "step": 23620 }, { "action_loss": 0.006888324860483408, "epoch": 21.241007194244606, "step": 23620 }, { "epoch": 21.241007194244606, "step": 23620, "torque_loss": 0.11784350872039795 }, { "epoch": 21.25, "grad_norm": 0.4854729473590851, "learning_rate": 7.101678977374683e-05, "loss": 0.089, "step": 23630 }, { "action_loss": 0.00247090682387352, "epoch": 21.25, "step": 23630 }, { "epoch": 21.25, "step": 23630, "torque_loss": 0.05115717649459839 }, { "epoch": 21.258992805755394, "grad_norm": 0.34034496545791626, "learning_rate": 7.099178147229685e-05, "loss": 0.0832, "step": 23640 }, { "action_loss": 0.013023068197071552, "epoch": 21.258992805755394, "step": 23640 }, { "epoch": 21.258992805755394, "step": 23640, "torque_loss": 0.1463272124528885 }, { "epoch": 21.26798561151079, "grad_norm": 0.3561372458934784, "learning_rate": 7.096676679409789e-05, "loss": 0.0862, "step": 23650 }, { "action_loss": 0.01190824806690216, "epoch": 21.26798561151079, "step": 23650 }, { "epoch": 21.26798561151079, "step": 23650, "torque_loss": 0.15166649222373962 }, { "epoch": 21.276978417266186, "grad_norm": 0.3640524446964264, "learning_rate": 7.094174574674877e-05, "loss": 0.0848, "step": 23660 }, { "action_loss": 0.007892767898738384, "epoch": 21.276978417266186, "step": 23660 }, { "epoch": 21.276978417266186, "step": 23660, "torque_loss": 0.18180815875530243 }, { "epoch": 21.285971223021583, "grad_norm": 0.3745129406452179, "learning_rate": 7.091671833785025e-05, "loss": 0.0721, "step": 23670 }, { "action_loss": 0.004608754068613052, "epoch": 21.285971223021583, "step": 23670 }, { "epoch": 21.285971223021583, "step": 23670, "torque_loss": 0.11808896064758301 }, { "epoch": 21.294964028776977, "grad_norm": 0.31107500195503235, "learning_rate": 7.089168457500493e-05, "loss": 0.0854, "step": 23680 }, { "action_loss": 0.006243586540222168, "epoch": 21.294964028776977, "step": 23680 }, { "epoch": 21.294964028776977, "step": 23680, "torque_loss": 0.11378210037946701 }, { "epoch": 21.303956834532375, "grad_norm": 0.2897372543811798, "learning_rate": 7.086664446581747e-05, "loss": 0.084, "step": 23690 }, { "action_loss": 0.007014961447566748, "epoch": 21.303956834532375, "step": 23690 }, { "epoch": 21.303956834532375, "step": 23690, "torque_loss": 0.09558665752410889 }, { "epoch": 21.31294964028777, "grad_norm": 0.3263077735900879, "learning_rate": 7.084159801789438e-05, "loss": 0.0746, "step": 23700 }, { "action_loss": 0.003593936562538147, "epoch": 21.31294964028777, "step": 23700 }, { "epoch": 21.31294964028777, "step": 23700, "torque_loss": 0.10483673959970474 }, { "epoch": 21.321942446043167, "grad_norm": 0.3543184697628021, "learning_rate": 7.081654523884411e-05, "loss": 0.0968, "step": 23710 }, { "action_loss": 0.003988039214164019, "epoch": 21.321942446043167, "step": 23710 }, { "epoch": 21.321942446043167, "step": 23710, "torque_loss": 0.09734266996383667 }, { "epoch": 21.33093525179856, "grad_norm": 0.319487601518631, "learning_rate": 7.0791486136277e-05, "loss": 0.0946, "step": 23720 }, { "action_loss": 0.004224991891533136, "epoch": 21.33093525179856, "step": 23720 }, { "epoch": 21.33093525179856, "step": 23720, "torque_loss": 0.12009170651435852 }, { "epoch": 21.33992805755396, "grad_norm": 0.43164992332458496, "learning_rate": 7.07664207178054e-05, "loss": 0.0872, "step": 23730 }, { "action_loss": 0.004346225876361132, "epoch": 21.33992805755396, "step": 23730 }, { "epoch": 21.33992805755396, "step": 23730, "torque_loss": 0.08893879503011703 }, { "epoch": 21.348920863309353, "grad_norm": 0.3954757750034332, "learning_rate": 7.074134899104345e-05, "loss": 0.0934, "step": 23740 }, { "action_loss": 0.00322823622263968, "epoch": 21.348920863309353, "step": 23740 }, { "epoch": 21.348920863309353, "step": 23740, "torque_loss": 0.09768732637166977 }, { "epoch": 21.357913669064747, "grad_norm": 0.2569331228733063, "learning_rate": 7.071627096360735e-05, "loss": 0.0726, "step": 23750 }, { "action_loss": 0.005231190472841263, "epoch": 21.357913669064747, "step": 23750 }, { "epoch": 21.357913669064747, "step": 23750, "torque_loss": 0.12167823314666748 }, { "epoch": 21.366906474820144, "grad_norm": 0.33883076906204224, "learning_rate": 7.069118664311511e-05, "loss": 0.0798, "step": 23760 }, { "action_loss": 0.003689226694405079, "epoch": 21.366906474820144, "step": 23760 }, { "epoch": 21.366906474820144, "step": 23760, "torque_loss": 0.10381271690130234 }, { "epoch": 21.37589928057554, "grad_norm": 0.3373733162879944, "learning_rate": 7.06660960371867e-05, "loss": 0.0723, "step": 23770 }, { "action_loss": 0.014001076109707355, "epoch": 21.37589928057554, "step": 23770 }, { "epoch": 21.37589928057554, "step": 23770, "torque_loss": 0.12917304039001465 }, { "epoch": 21.384892086330936, "grad_norm": 0.34779322147369385, "learning_rate": 7.064099915344396e-05, "loss": 0.0921, "step": 23780 }, { "action_loss": 0.004310066346079111, "epoch": 21.384892086330936, "step": 23780 }, { "epoch": 21.384892086330936, "step": 23780, "torque_loss": 0.11352431774139404 }, { "epoch": 21.39388489208633, "grad_norm": 0.505897581577301, "learning_rate": 7.061589599951066e-05, "loss": 0.0888, "step": 23790 }, { "action_loss": 0.003356571076437831, "epoch": 21.39388489208633, "step": 23790 }, { "epoch": 21.39388489208633, "step": 23790, "torque_loss": 0.08410174399614334 }, { "epoch": 21.402877697841728, "grad_norm": 0.301106333732605, "learning_rate": 7.05907865830125e-05, "loss": 0.0863, "step": 23800 }, { "action_loss": 0.006267629563808441, "epoch": 21.402877697841728, "step": 23800 }, { "epoch": 21.402877697841728, "step": 23800, "torque_loss": 0.08994577080011368 }, { "epoch": 21.41187050359712, "grad_norm": 0.2886948585510254, "learning_rate": 7.056567091157703e-05, "loss": 0.0778, "step": 23810 }, { "action_loss": 0.010548601858317852, "epoch": 21.41187050359712, "step": 23810 }, { "epoch": 21.41187050359712, "step": 23810, "torque_loss": 0.13285617530345917 }, { "epoch": 21.42086330935252, "grad_norm": 0.24624568223953247, "learning_rate": 7.054054899283375e-05, "loss": 0.0831, "step": 23820 }, { "action_loss": 0.004908485803753138, "epoch": 21.42086330935252, "step": 23820 }, { "epoch": 21.42086330935252, "step": 23820, "torque_loss": 0.10439842939376831 }, { "epoch": 21.429856115107913, "grad_norm": 0.21488720178604126, "learning_rate": 7.051542083441403e-05, "loss": 0.0751, "step": 23830 }, { "action_loss": 0.005019283387809992, "epoch": 21.429856115107913, "step": 23830 }, { "epoch": 21.429856115107913, "step": 23830, "torque_loss": 0.09826960414648056 }, { "epoch": 21.43884892086331, "grad_norm": 0.28325194120407104, "learning_rate": 7.049028644395113e-05, "loss": 0.0815, "step": 23840 }, { "action_loss": 0.007219022139906883, "epoch": 21.43884892086331, "step": 23840 }, { "epoch": 21.43884892086331, "step": 23840, "torque_loss": 0.08605393767356873 }, { "epoch": 21.447841726618705, "grad_norm": 0.28388679027557373, "learning_rate": 7.046514582908024e-05, "loss": 0.0726, "step": 23850 }, { "action_loss": 0.006882587913423777, "epoch": 21.447841726618705, "step": 23850 }, { "epoch": 21.447841726618705, "step": 23850, "torque_loss": 0.09469994157552719 }, { "epoch": 21.4568345323741, "grad_norm": 0.3386783003807068, "learning_rate": 7.043999899743838e-05, "loss": 0.0793, "step": 23860 }, { "action_loss": 0.008535089902579784, "epoch": 21.4568345323741, "step": 23860 }, { "epoch": 21.4568345323741, "step": 23860, "torque_loss": 0.13860882818698883 }, { "epoch": 21.465827338129497, "grad_norm": 0.24573679268360138, "learning_rate": 7.041484595666451e-05, "loss": 0.0894, "step": 23870 }, { "action_loss": 0.008162986487150192, "epoch": 21.465827338129497, "step": 23870 }, { "epoch": 21.465827338129497, "step": 23870, "torque_loss": 0.1180153489112854 }, { "epoch": 21.47482014388489, "grad_norm": 0.29900315403938293, "learning_rate": 7.038968671439948e-05, "loss": 0.0822, "step": 23880 }, { "action_loss": 0.009715147316455841, "epoch": 21.47482014388489, "step": 23880 }, { "epoch": 21.47482014388489, "step": 23880, "torque_loss": 0.17723114788532257 }, { "epoch": 21.48381294964029, "grad_norm": 0.3493753671646118, "learning_rate": 7.036452127828596e-05, "loss": 0.094, "step": 23890 }, { "action_loss": 0.007672868203371763, "epoch": 21.48381294964029, "step": 23890 }, { "epoch": 21.48381294964029, "step": 23890, "torque_loss": 0.11824550479650497 }, { "epoch": 21.492805755395683, "grad_norm": 0.3526593744754791, "learning_rate": 7.033934965596859e-05, "loss": 0.073, "step": 23900 }, { "action_loss": 0.015284747816622257, "epoch": 21.492805755395683, "step": 23900 }, { "epoch": 21.492805755395683, "step": 23900, "torque_loss": 0.14503729343414307 }, { "epoch": 21.50179856115108, "grad_norm": 0.3623329997062683, "learning_rate": 7.031417185509381e-05, "loss": 0.0736, "step": 23910 }, { "action_loss": 0.006574598606675863, "epoch": 21.50179856115108, "step": 23910 }, { "epoch": 21.50179856115108, "step": 23910, "torque_loss": 0.12113312631845474 }, { "epoch": 21.510791366906474, "grad_norm": 0.22941982746124268, "learning_rate": 7.028898788331e-05, "loss": 0.0743, "step": 23920 }, { "action_loss": 0.0708036944270134, "epoch": 21.510791366906474, "step": 23920 }, { "epoch": 21.510791366906474, "step": 23920, "torque_loss": 0.1829289197921753 }, { "epoch": 21.519784172661872, "grad_norm": 0.341134637594223, "learning_rate": 7.026379774826736e-05, "loss": 0.097, "step": 23930 }, { "action_loss": 0.009825601242482662, "epoch": 21.519784172661872, "step": 23930 }, { "epoch": 21.519784172661872, "step": 23930, "torque_loss": 0.11124954372644424 }, { "epoch": 21.528776978417266, "grad_norm": 0.3989449143409729, "learning_rate": 7.0238601457618e-05, "loss": 0.0723, "step": 23940 }, { "action_loss": 0.015148240141570568, "epoch": 21.528776978417266, "step": 23940 }, { "epoch": 21.528776978417266, "step": 23940, "torque_loss": 0.16419686377048492 }, { "epoch": 21.53776978417266, "grad_norm": 0.29547956585884094, "learning_rate": 7.02133990190159e-05, "loss": 0.081, "step": 23950 }, { "action_loss": 0.005951233208179474, "epoch": 21.53776978417266, "step": 23950 }, { "epoch": 21.53776978417266, "step": 23950, "torque_loss": 0.11439886689186096 }, { "epoch": 21.546762589928058, "grad_norm": 0.36886805295944214, "learning_rate": 7.018819044011687e-05, "loss": 0.0897, "step": 23960 }, { "action_loss": 0.006749249994754791, "epoch": 21.546762589928058, "step": 23960 }, { "epoch": 21.546762589928058, "step": 23960, "torque_loss": 0.09628715366125107 }, { "epoch": 21.555755395683452, "grad_norm": 0.31177207827568054, "learning_rate": 7.016297572857863e-05, "loss": 0.0713, "step": 23970 }, { "action_loss": 0.002398131648078561, "epoch": 21.555755395683452, "step": 23970 }, { "epoch": 21.555755395683452, "step": 23970, "torque_loss": 0.12906785309314728 }, { "epoch": 21.56474820143885, "grad_norm": 0.2976647615432739, "learning_rate": 7.013775489206072e-05, "loss": 0.0749, "step": 23980 }, { "action_loss": 0.011301460675895214, "epoch": 21.56474820143885, "step": 23980 }, { "epoch": 21.56474820143885, "step": 23980, "torque_loss": 0.15707316994667053 }, { "epoch": 21.573741007194243, "grad_norm": 0.3263804614543915, "learning_rate": 7.01125279382246e-05, "loss": 0.0717, "step": 23990 }, { "action_loss": 0.012318238615989685, "epoch": 21.573741007194243, "step": 23990 }, { "epoch": 21.573741007194243, "step": 23990, "torque_loss": 0.17523054778575897 }, { "epoch": 21.58273381294964, "grad_norm": 0.3243844509124756, "learning_rate": 7.008729487473351e-05, "loss": 0.0823, "step": 24000 }, { "action_loss": 0.005917495582252741, "epoch": 21.58273381294964, "step": 24000 }, { "epoch": 21.58273381294964, "step": 24000, "torque_loss": 0.11798644065856934 }, { "epoch": 21.591726618705035, "grad_norm": 0.3441637456417084, "learning_rate": 7.006205570925263e-05, "loss": 0.089, "step": 24010 }, { "action_loss": 0.003216732293367386, "epoch": 21.591726618705035, "step": 24010 }, { "epoch": 21.591726618705035, "step": 24010, "torque_loss": 0.09215097874403 }, { "epoch": 21.600719424460433, "grad_norm": 0.3338419497013092, "learning_rate": 7.003681044944892e-05, "loss": 0.0803, "step": 24020 }, { "action_loss": 0.005135314539074898, "epoch": 21.600719424460433, "step": 24020 }, { "epoch": 21.600719424460433, "step": 24020, "torque_loss": 0.10179873555898666 }, { "epoch": 21.609712230215827, "grad_norm": 0.33765709400177, "learning_rate": 7.001155910299126e-05, "loss": 0.0815, "step": 24030 }, { "action_loss": 0.026032671332359314, "epoch": 21.609712230215827, "step": 24030 }, { "epoch": 21.609712230215827, "step": 24030, "torque_loss": 0.17629344761371613 }, { "epoch": 21.618705035971225, "grad_norm": 0.2565867304801941, "learning_rate": 6.99863016775503e-05, "loss": 0.0903, "step": 24040 }, { "action_loss": 0.00801235344260931, "epoch": 21.618705035971225, "step": 24040 }, { "epoch": 21.618705035971225, "step": 24040, "torque_loss": 0.13257646560668945 }, { "epoch": 21.62769784172662, "grad_norm": 0.3343800902366638, "learning_rate": 6.996103818079859e-05, "loss": 0.0829, "step": 24050 }, { "action_loss": 0.010481626726686954, "epoch": 21.62769784172662, "step": 24050 }, { "epoch": 21.62769784172662, "step": 24050, "torque_loss": 0.18034040927886963 }, { "epoch": 21.636690647482013, "grad_norm": 0.2999333143234253, "learning_rate": 6.993576862041054e-05, "loss": 0.0883, "step": 24060 }, { "action_loss": 0.007912714965641499, "epoch": 21.636690647482013, "step": 24060 }, { "epoch": 21.636690647482013, "step": 24060, "torque_loss": 0.12426582723855972 }, { "epoch": 21.64568345323741, "grad_norm": 0.3063163161277771, "learning_rate": 6.991049300406235e-05, "loss": 0.0735, "step": 24070 }, { "action_loss": 0.006735204253345728, "epoch": 21.64568345323741, "step": 24070 }, { "epoch": 21.64568345323741, "step": 24070, "torque_loss": 0.11358991265296936 }, { "epoch": 21.654676258992804, "grad_norm": 0.38485005497932434, "learning_rate": 6.988521133943209e-05, "loss": 0.0853, "step": 24080 }, { "action_loss": 0.01768595725297928, "epoch": 21.654676258992804, "step": 24080 }, { "epoch": 21.654676258992804, "step": 24080, "torque_loss": 0.17342519760131836 }, { "epoch": 21.663669064748202, "grad_norm": 0.36458033323287964, "learning_rate": 6.985992363419966e-05, "loss": 0.0918, "step": 24090 }, { "action_loss": 0.0041279662400484085, "epoch": 21.663669064748202, "step": 24090 }, { "epoch": 21.663669064748202, "step": 24090, "torque_loss": 0.10565634816884995 }, { "epoch": 21.672661870503596, "grad_norm": 0.37864580750465393, "learning_rate": 6.983462989604682e-05, "loss": 0.0885, "step": 24100 }, { "action_loss": 0.004425973631441593, "epoch": 21.672661870503596, "step": 24100 }, { "epoch": 21.672661870503596, "step": 24100, "torque_loss": 0.09847535938024521 }, { "epoch": 21.681654676258994, "grad_norm": 0.41206371784210205, "learning_rate": 6.980933013265709e-05, "loss": 0.0701, "step": 24110 }, { "action_loss": 0.00902226846665144, "epoch": 21.681654676258994, "step": 24110 }, { "epoch": 21.681654676258994, "step": 24110, "torque_loss": 0.1189795508980751 }, { "epoch": 21.690647482014388, "grad_norm": 0.44129112362861633, "learning_rate": 6.978402435171592e-05, "loss": 0.1069, "step": 24120 }, { "action_loss": 0.010820012539625168, "epoch": 21.690647482014388, "step": 24120 }, { "epoch": 21.690647482014388, "step": 24120, "torque_loss": 0.1096072793006897 }, { "epoch": 21.699640287769785, "grad_norm": 0.3171503245830536, "learning_rate": 6.975871256091052e-05, "loss": 0.0879, "step": 24130 }, { "action_loss": 0.002650093985721469, "epoch": 21.699640287769785, "step": 24130 }, { "epoch": 21.699640287769785, "step": 24130, "torque_loss": 0.08130853623151779 }, { "epoch": 21.70863309352518, "grad_norm": 0.35213974118232727, "learning_rate": 6.973339476792995e-05, "loss": 0.0749, "step": 24140 }, { "action_loss": 0.0053815762512385845, "epoch": 21.70863309352518, "step": 24140 }, { "epoch": 21.70863309352518, "step": 24140, "torque_loss": 0.11909613758325577 }, { "epoch": 21.717625899280577, "grad_norm": 0.3951173722743988, "learning_rate": 6.970807098046505e-05, "loss": 0.0745, "step": 24150 }, { "action_loss": 0.0034171382430940866, "epoch": 21.717625899280577, "step": 24150 }, { "epoch": 21.717625899280577, "step": 24150, "torque_loss": 0.08194129168987274 }, { "epoch": 21.72661870503597, "grad_norm": 0.43031075596809387, "learning_rate": 6.968274120620858e-05, "loss": 0.0828, "step": 24160 }, { "action_loss": 0.0034439528826624155, "epoch": 21.72661870503597, "step": 24160 }, { "epoch": 21.72661870503597, "step": 24160, "torque_loss": 0.09019043296575546 }, { "epoch": 21.735611510791365, "grad_norm": 0.3006274104118347, "learning_rate": 6.965740545285499e-05, "loss": 0.069, "step": 24170 }, { "action_loss": 0.006819461937993765, "epoch": 21.735611510791365, "step": 24170 }, { "epoch": 21.735611510791365, "step": 24170, "torque_loss": 0.092717744410038 }, { "epoch": 21.744604316546763, "grad_norm": 0.24978961050510406, "learning_rate": 6.963206372810068e-05, "loss": 0.0708, "step": 24180 }, { "action_loss": 0.0077166506089270115, "epoch": 21.744604316546763, "step": 24180 }, { "epoch": 21.744604316546763, "step": 24180, "torque_loss": 0.1642957329750061 }, { "epoch": 21.753597122302157, "grad_norm": 0.304638147354126, "learning_rate": 6.960671603964375e-05, "loss": 0.092, "step": 24190 }, { "action_loss": 0.033999502658843994, "epoch": 21.753597122302157, "step": 24190 }, { "epoch": 21.753597122302157, "step": 24190, "torque_loss": 0.17689307034015656 }, { "epoch": 21.762589928057555, "grad_norm": 0.3816242516040802, "learning_rate": 6.958136239518418e-05, "loss": 0.0807, "step": 24200 }, { "action_loss": 0.0035695694386959076, "epoch": 21.762589928057555, "step": 24200 }, { "epoch": 21.762589928057555, "step": 24200, "torque_loss": 0.06584589928388596 }, { "epoch": 21.77158273381295, "grad_norm": 0.339493066072464, "learning_rate": 6.955600280242371e-05, "loss": 0.0787, "step": 24210 }, { "action_loss": 0.005659842398017645, "epoch": 21.77158273381295, "step": 24210 }, { "epoch": 21.77158273381295, "step": 24210, "torque_loss": 0.10611746460199356 }, { "epoch": 21.780575539568346, "grad_norm": 0.29400375485420227, "learning_rate": 6.953063726906596e-05, "loss": 0.0744, "step": 24220 }, { "action_loss": 0.009176968596875668, "epoch": 21.780575539568346, "step": 24220 }, { "epoch": 21.780575539568346, "step": 24220, "torque_loss": 0.12012213468551636 }, { "epoch": 21.78956834532374, "grad_norm": 0.3532813489437103, "learning_rate": 6.950526580281626e-05, "loss": 0.0824, "step": 24230 }, { "action_loss": 0.00391989154741168, "epoch": 21.78956834532374, "step": 24230 }, { "epoch": 21.78956834532374, "step": 24230, "torque_loss": 0.11084706336259842 }, { "epoch": 21.798561151079138, "grad_norm": 0.33912193775177, "learning_rate": 6.947988841138184e-05, "loss": 0.0922, "step": 24240 }, { "action_loss": 0.0027906273026019335, "epoch": 21.798561151079138, "step": 24240 }, { "epoch": 21.798561151079138, "step": 24240, "torque_loss": 0.10488732904195786 }, { "epoch": 21.807553956834532, "grad_norm": 0.35843220353126526, "learning_rate": 6.945450510247165e-05, "loss": 0.0781, "step": 24250 }, { "action_loss": 0.007906532846391201, "epoch": 21.807553956834532, "step": 24250 }, { "epoch": 21.807553956834532, "step": 24250, "torque_loss": 0.12748263776302338 }, { "epoch": 21.81654676258993, "grad_norm": 0.3754189908504486, "learning_rate": 6.942911588379647e-05, "loss": 0.0797, "step": 24260 }, { "action_loss": 0.009025423787534237, "epoch": 21.81654676258993, "step": 24260 }, { "epoch": 21.81654676258993, "step": 24260, "torque_loss": 0.13933952152729034 }, { "epoch": 21.825539568345324, "grad_norm": 0.5114034414291382, "learning_rate": 6.940372076306888e-05, "loss": 0.0808, "step": 24270 }, { "action_loss": 0.011188398115336895, "epoch": 21.825539568345324, "step": 24270 }, { "epoch": 21.825539568345324, "step": 24270, "torque_loss": 0.1634095013141632 }, { "epoch": 21.834532374100718, "grad_norm": 0.29401540756225586, "learning_rate": 6.937831974800326e-05, "loss": 0.0798, "step": 24280 }, { "action_loss": 0.005663054063916206, "epoch": 21.834532374100718, "step": 24280 }, { "epoch": 21.834532374100718, "step": 24280, "torque_loss": 0.10558953881263733 }, { "epoch": 21.843525179856115, "grad_norm": 0.29438430070877075, "learning_rate": 6.935291284631574e-05, "loss": 0.0895, "step": 24290 }, { "action_loss": 0.024422645568847656, "epoch": 21.843525179856115, "step": 24290 }, { "epoch": 21.843525179856115, "step": 24290, "torque_loss": 0.16652065515518188 }, { "epoch": 21.85251798561151, "grad_norm": 0.31451985239982605, "learning_rate": 6.932750006572428e-05, "loss": 0.074, "step": 24300 }, { "action_loss": 0.007303353864699602, "epoch": 21.85251798561151, "step": 24300 }, { "epoch": 21.85251798561151, "step": 24300, "torque_loss": 0.09441620111465454 }, { "epoch": 21.861510791366907, "grad_norm": 0.2656010091304779, "learning_rate": 6.930208141394863e-05, "loss": 0.1052, "step": 24310 }, { "action_loss": 0.0064261252991855145, "epoch": 21.861510791366907, "step": 24310 }, { "epoch": 21.861510791366907, "step": 24310, "torque_loss": 0.1652422994375229 }, { "epoch": 21.8705035971223, "grad_norm": 0.29633504152297974, "learning_rate": 6.927665689871026e-05, "loss": 0.0748, "step": 24320 }, { "action_loss": 0.012149080634117126, "epoch": 21.8705035971223, "step": 24320 }, { "epoch": 21.8705035971223, "step": 24320, "torque_loss": 0.16241617500782013 }, { "epoch": 21.8794964028777, "grad_norm": 0.36991414427757263, "learning_rate": 6.925122652773253e-05, "loss": 0.08, "step": 24330 }, { "action_loss": 0.0036016779486089945, "epoch": 21.8794964028777, "step": 24330 }, { "epoch": 21.8794964028777, "step": 24330, "torque_loss": 0.10895057767629623 }, { "epoch": 21.888489208633093, "grad_norm": 0.3290375769138336, "learning_rate": 6.922579030874046e-05, "loss": 0.0811, "step": 24340 }, { "action_loss": 0.014348994009196758, "epoch": 21.888489208633093, "step": 24340 }, { "epoch": 21.888489208633093, "step": 24340, "torque_loss": 0.17044083774089813 }, { "epoch": 21.89748201438849, "grad_norm": 0.29252058267593384, "learning_rate": 6.920034824946093e-05, "loss": 0.0892, "step": 24350 }, { "action_loss": 0.006073296070098877, "epoch": 21.89748201438849, "step": 24350 }, { "epoch": 21.89748201438849, "step": 24350, "torque_loss": 0.09466293454170227 }, { "epoch": 21.906474820143885, "grad_norm": 0.2596883773803711, "learning_rate": 6.917490035762255e-05, "loss": 0.0795, "step": 24360 }, { "action_loss": 0.04117971286177635, "epoch": 21.906474820143885, "step": 24360 }, { "epoch": 21.906474820143885, "step": 24360, "torque_loss": 0.20683710277080536 }, { "epoch": 21.915467625899282, "grad_norm": 0.33828797936439514, "learning_rate": 6.914944664095573e-05, "loss": 0.1028, "step": 24370 }, { "action_loss": 0.004047393333166838, "epoch": 21.915467625899282, "step": 24370 }, { "epoch": 21.915467625899282, "step": 24370, "torque_loss": 0.12441539019346237 }, { "epoch": 21.924460431654676, "grad_norm": 0.3695458769798279, "learning_rate": 6.912398710719264e-05, "loss": 0.0793, "step": 24380 }, { "action_loss": 0.003531466471031308, "epoch": 21.924460431654676, "step": 24380 }, { "epoch": 21.924460431654676, "step": 24380, "torque_loss": 0.11641012877225876 }, { "epoch": 21.93345323741007, "grad_norm": 0.23989799618721008, "learning_rate": 6.90985217640672e-05, "loss": 0.0648, "step": 24390 }, { "action_loss": 0.005852987989783287, "epoch": 21.93345323741007, "step": 24390 }, { "epoch": 21.93345323741007, "step": 24390, "torque_loss": 0.09266213327646255 }, { "epoch": 21.942446043165468, "grad_norm": 0.28215330839157104, "learning_rate": 6.90730506193151e-05, "loss": 0.079, "step": 24400 }, { "action_loss": 0.005199176725000143, "epoch": 21.942446043165468, "step": 24400 }, { "epoch": 21.942446043165468, "step": 24400, "torque_loss": 0.10702375322580338 }, { "epoch": 21.951438848920862, "grad_norm": 0.2901553809642792, "learning_rate": 6.904757368067384e-05, "loss": 0.0795, "step": 24410 }, { "action_loss": 0.010889534838497639, "epoch": 21.951438848920862, "step": 24410 }, { "epoch": 21.951438848920862, "step": 24410, "torque_loss": 0.09012635797262192 }, { "epoch": 21.96043165467626, "grad_norm": 0.2887428104877472, "learning_rate": 6.90220909558826e-05, "loss": 0.0926, "step": 24420 }, { "action_loss": 0.006995562929660082, "epoch": 21.96043165467626, "step": 24420 }, { "epoch": 21.96043165467626, "step": 24420, "torque_loss": 0.11214947700500488 }, { "epoch": 21.969424460431654, "grad_norm": 0.26208174228668213, "learning_rate": 6.899660245268237e-05, "loss": 0.0761, "step": 24430 }, { "action_loss": 0.003369879210367799, "epoch": 21.969424460431654, "step": 24430 }, { "epoch": 21.969424460431654, "step": 24430, "torque_loss": 0.07243002206087112 }, { "epoch": 21.97841726618705, "grad_norm": 0.2583374083042145, "learning_rate": 6.897110817881592e-05, "loss": 0.084, "step": 24440 }, { "action_loss": 0.008821395225822926, "epoch": 21.97841726618705, "step": 24440 }, { "epoch": 21.97841726618705, "step": 24440, "torque_loss": 0.1329847127199173 }, { "epoch": 21.987410071942445, "grad_norm": 0.3523102402687073, "learning_rate": 6.894560814202769e-05, "loss": 0.0881, "step": 24450 }, { "action_loss": 0.00244846916757524, "epoch": 21.987410071942445, "step": 24450 }, { "epoch": 21.987410071942445, "step": 24450, "torque_loss": 0.06871955841779709 }, { "epoch": 21.996402877697843, "grad_norm": 0.3392413854598999, "learning_rate": 6.892010235006394e-05, "loss": 0.0952, "step": 24460 }, { "action_loss": 0.004715557210147381, "epoch": 21.996402877697843, "step": 24460 }, { "epoch": 21.996402877697843, "step": 24460, "torque_loss": 0.12700536847114563 }, { "epoch": 22.005395683453237, "grad_norm": 0.25139808654785156, "learning_rate": 6.889459081067264e-05, "loss": 0.072, "step": 24470 }, { "action_loss": 0.005910065025091171, "epoch": 22.005395683453237, "step": 24470 }, { "epoch": 22.005395683453237, "step": 24470, "torque_loss": 0.1137310341000557 }, { "epoch": 22.014388489208635, "grad_norm": 0.31812357902526855, "learning_rate": 6.886907353160356e-05, "loss": 0.0849, "step": 24480 }, { "action_loss": 0.01693611778318882, "epoch": 22.014388489208635, "step": 24480 }, { "epoch": 22.014388489208635, "step": 24480, "torque_loss": 0.19933223724365234 }, { "epoch": 22.02338129496403, "grad_norm": 0.4044528901576996, "learning_rate": 6.884355052060814e-05, "loss": 0.0801, "step": 24490 }, { "action_loss": 0.007822438143193722, "epoch": 22.02338129496403, "step": 24490 }, { "epoch": 22.02338129496403, "step": 24490, "torque_loss": 0.12086714059114456 }, { "epoch": 22.032374100719423, "grad_norm": 0.4201582372188568, "learning_rate": 6.88180217854396e-05, "loss": 0.0784, "step": 24500 }, { "action_loss": 0.00344061735086143, "epoch": 22.032374100719423, "step": 24500 }, { "epoch": 22.032374100719423, "step": 24500, "torque_loss": 0.0784129872918129 }, { "epoch": 22.04136690647482, "grad_norm": 0.24325047433376312, "learning_rate": 6.87924873338529e-05, "loss": 0.0889, "step": 24510 }, { "action_loss": 0.01270278263837099, "epoch": 22.04136690647482, "step": 24510 }, { "epoch": 22.04136690647482, "step": 24510, "torque_loss": 0.1226864755153656 }, { "epoch": 22.050359712230215, "grad_norm": 0.3548344075679779, "learning_rate": 6.876694717360475e-05, "loss": 0.084, "step": 24520 }, { "action_loss": 0.013290826231241226, "epoch": 22.050359712230215, "step": 24520 }, { "epoch": 22.050359712230215, "step": 24520, "torque_loss": 0.11304964870214462 }, { "epoch": 22.059352517985612, "grad_norm": 0.2357633262872696, "learning_rate": 6.874140131245355e-05, "loss": 0.0802, "step": 24530 }, { "action_loss": 0.012196493335068226, "epoch": 22.059352517985612, "step": 24530 }, { "epoch": 22.059352517985612, "step": 24530, "torque_loss": 0.15020905435085297 }, { "epoch": 22.068345323741006, "grad_norm": 0.3747880160808563, "learning_rate": 6.871584975815948e-05, "loss": 0.0872, "step": 24540 }, { "action_loss": 0.0027260640636086464, "epoch": 22.068345323741006, "step": 24540 }, { "epoch": 22.068345323741006, "step": 24540, "torque_loss": 0.12578682601451874 }, { "epoch": 22.077338129496404, "grad_norm": 0.28899288177490234, "learning_rate": 6.86902925184844e-05, "loss": 0.0736, "step": 24550 }, { "action_loss": 0.002836144296452403, "epoch": 22.077338129496404, "step": 24550 }, { "epoch": 22.077338129496404, "step": 24550, "torque_loss": 0.08523503690958023 }, { "epoch": 22.086330935251798, "grad_norm": 0.3288339674472809, "learning_rate": 6.866472960119195e-05, "loss": 0.0888, "step": 24560 }, { "action_loss": 0.006458842661231756, "epoch": 22.086330935251798, "step": 24560 }, { "epoch": 22.086330935251798, "step": 24560, "torque_loss": 0.09862402826547623 }, { "epoch": 22.095323741007196, "grad_norm": 0.2423548400402069, "learning_rate": 6.863916101404748e-05, "loss": 0.077, "step": 24570 }, { "action_loss": 0.00353326671756804, "epoch": 22.095323741007196, "step": 24570 }, { "epoch": 22.095323741007196, "step": 24570, "torque_loss": 0.09288924932479858 }, { "epoch": 22.10431654676259, "grad_norm": 0.35865041613578796, "learning_rate": 6.8613586764818e-05, "loss": 0.0721, "step": 24580 }, { "action_loss": 0.003206765977665782, "epoch": 22.10431654676259, "step": 24580 }, { "epoch": 22.10431654676259, "step": 24580, "torque_loss": 0.1108783707022667 }, { "epoch": 22.113309352517987, "grad_norm": 0.24177321791648865, "learning_rate": 6.858800686127233e-05, "loss": 0.075, "step": 24590 }, { "action_loss": 0.010137815959751606, "epoch": 22.113309352517987, "step": 24590 }, { "epoch": 22.113309352517987, "step": 24590, "torque_loss": 0.1774774044752121 }, { "epoch": 22.12230215827338, "grad_norm": 0.36737412214279175, "learning_rate": 6.856242131118097e-05, "loss": 0.079, "step": 24600 }, { "action_loss": 0.009435710497200489, "epoch": 22.12230215827338, "step": 24600 }, { "epoch": 22.12230215827338, "step": 24600, "torque_loss": 0.12492106109857559 }, { "epoch": 22.131294964028775, "grad_norm": 0.37350183725357056, "learning_rate": 6.853683012231614e-05, "loss": 0.0792, "step": 24610 }, { "action_loss": 0.002391454065218568, "epoch": 22.131294964028775, "step": 24610 }, { "epoch": 22.131294964028775, "step": 24610, "torque_loss": 0.10151815414428711 }, { "epoch": 22.140287769784173, "grad_norm": 0.42100176215171814, "learning_rate": 6.851123330245173e-05, "loss": 0.0766, "step": 24620 }, { "action_loss": 0.009561662562191486, "epoch": 22.140287769784173, "step": 24620 }, { "epoch": 22.140287769784173, "step": 24620, "torque_loss": 0.14868788421154022 }, { "epoch": 22.149280575539567, "grad_norm": 0.30347689986228943, "learning_rate": 6.848563085936343e-05, "loss": 0.0786, "step": 24630 }, { "action_loss": 0.0029126068111509085, "epoch": 22.149280575539567, "step": 24630 }, { "epoch": 22.149280575539567, "step": 24630, "torque_loss": 0.056038033217191696 }, { "epoch": 22.158273381294965, "grad_norm": 0.2767152190208435, "learning_rate": 6.846002280082853e-05, "loss": 0.0778, "step": 24640 }, { "action_loss": 0.00724721560254693, "epoch": 22.158273381294965, "step": 24640 }, { "epoch": 22.158273381294965, "step": 24640, "torque_loss": 0.11127257347106934 }, { "epoch": 22.16726618705036, "grad_norm": 0.257345974445343, "learning_rate": 6.843440913462614e-05, "loss": 0.0685, "step": 24650 }, { "action_loss": 0.002941598417237401, "epoch": 22.16726618705036, "step": 24650 }, { "epoch": 22.16726618705036, "step": 24650, "torque_loss": 0.08080194145441055 }, { "epoch": 22.176258992805757, "grad_norm": 0.32458046078681946, "learning_rate": 6.840878986853698e-05, "loss": 0.0774, "step": 24660 }, { "action_loss": 0.004126891493797302, "epoch": 22.176258992805757, "step": 24660 }, { "epoch": 22.176258992805757, "step": 24660, "torque_loss": 0.12845836579799652 }, { "epoch": 22.18525179856115, "grad_norm": 0.31304633617401123, "learning_rate": 6.838316501034352e-05, "loss": 0.0801, "step": 24670 }, { "action_loss": 0.00630401074886322, "epoch": 22.18525179856115, "step": 24670 }, { "epoch": 22.18525179856115, "step": 24670, "torque_loss": 0.11301837116479874 }, { "epoch": 22.194244604316548, "grad_norm": 0.21563708782196045, "learning_rate": 6.83575345678299e-05, "loss": 0.0693, "step": 24680 }, { "action_loss": 0.012431693263351917, "epoch": 22.194244604316548, "step": 24680 }, { "epoch": 22.194244604316548, "step": 24680, "torque_loss": 0.11531414836645126 }, { "epoch": 22.203237410071942, "grad_norm": 0.3052273392677307, "learning_rate": 6.833189854878196e-05, "loss": 0.0821, "step": 24690 }, { "action_loss": 0.004057707730680704, "epoch": 22.203237410071942, "step": 24690 }, { "epoch": 22.203237410071942, "step": 24690, "torque_loss": 0.09630704671144485 }, { "epoch": 22.21223021582734, "grad_norm": 0.36466071009635925, "learning_rate": 6.83062569609873e-05, "loss": 0.0743, "step": 24700 }, { "action_loss": 0.009520280174911022, "epoch": 22.21223021582734, "step": 24700 }, { "epoch": 22.21223021582734, "step": 24700, "torque_loss": 0.12313926964998245 }, { "epoch": 22.221223021582734, "grad_norm": 0.2932778596878052, "learning_rate": 6.828060981223512e-05, "loss": 0.0725, "step": 24710 }, { "action_loss": 0.004081482533365488, "epoch": 22.221223021582734, "step": 24710 }, { "epoch": 22.221223021582734, "step": 24710, "torque_loss": 0.06526116281747818 }, { "epoch": 22.230215827338128, "grad_norm": 0.26908814907073975, "learning_rate": 6.825495711031634e-05, "loss": 0.0758, "step": 24720 }, { "action_loss": 0.006789425853639841, "epoch": 22.230215827338128, "step": 24720 }, { "epoch": 22.230215827338128, "step": 24720, "torque_loss": 0.09858707338571548 }, { "epoch": 22.239208633093526, "grad_norm": 0.286139577627182, "learning_rate": 6.822929886302359e-05, "loss": 0.0667, "step": 24730 }, { "action_loss": 0.002714812057092786, "epoch": 22.239208633093526, "step": 24730 }, { "epoch": 22.239208633093526, "step": 24730, "torque_loss": 0.08540085703134537 }, { "epoch": 22.24820143884892, "grad_norm": 0.3721581697463989, "learning_rate": 6.820363507815116e-05, "loss": 0.0671, "step": 24740 }, { "action_loss": 0.005569318775087595, "epoch": 22.24820143884892, "step": 24740 }, { "epoch": 22.24820143884892, "step": 24740, "torque_loss": 0.10223084688186646 }, { "epoch": 22.257194244604317, "grad_norm": 0.3569082021713257, "learning_rate": 6.817796576349501e-05, "loss": 0.0825, "step": 24750 }, { "action_loss": 0.006347042974084616, "epoch": 22.257194244604317, "step": 24750 }, { "epoch": 22.257194244604317, "step": 24750, "torque_loss": 0.11300138384103775 }, { "epoch": 22.26618705035971, "grad_norm": 0.36813345551490784, "learning_rate": 6.815229092685285e-05, "loss": 0.0947, "step": 24760 }, { "action_loss": 0.009614763781428337, "epoch": 22.26618705035971, "step": 24760 }, { "epoch": 22.26618705035971, "step": 24760, "torque_loss": 0.1179107055068016 }, { "epoch": 22.27517985611511, "grad_norm": 0.3258046507835388, "learning_rate": 6.812661057602399e-05, "loss": 0.0729, "step": 24770 }, { "action_loss": 0.009285487234592438, "epoch": 22.27517985611511, "step": 24770 }, { "epoch": 22.27517985611511, "step": 24770, "torque_loss": 0.11426812410354614 }, { "epoch": 22.284172661870503, "grad_norm": 0.26070141792297363, "learning_rate": 6.810092471880943e-05, "loss": 0.0776, "step": 24780 }, { "action_loss": 0.01110096275806427, "epoch": 22.284172661870503, "step": 24780 }, { "epoch": 22.284172661870503, "step": 24780, "torque_loss": 0.10445747524499893 }, { "epoch": 22.2931654676259, "grad_norm": 0.36670348048210144, "learning_rate": 6.807523336301187e-05, "loss": 0.0827, "step": 24790 }, { "action_loss": 0.01044538151472807, "epoch": 22.2931654676259, "step": 24790 }, { "epoch": 22.2931654676259, "step": 24790, "torque_loss": 0.15689347684383392 }, { "epoch": 22.302158273381295, "grad_norm": 0.3348888158798218, "learning_rate": 6.804953651643566e-05, "loss": 0.0941, "step": 24800 }, { "action_loss": 0.013334299437701702, "epoch": 22.302158273381295, "step": 24800 }, { "epoch": 22.302158273381295, "step": 24800, "torque_loss": 0.17377883195877075 }, { "epoch": 22.31115107913669, "grad_norm": 0.2592044472694397, "learning_rate": 6.802383418688685e-05, "loss": 0.0872, "step": 24810 }, { "action_loss": 0.00952133908867836, "epoch": 22.31115107913669, "step": 24810 }, { "epoch": 22.31115107913669, "step": 24810, "torque_loss": 0.11315611749887466 }, { "epoch": 22.320143884892087, "grad_norm": 0.3159818649291992, "learning_rate": 6.799812638217309e-05, "loss": 0.087, "step": 24820 }, { "action_loss": 0.005542483646422625, "epoch": 22.320143884892087, "step": 24820 }, { "epoch": 22.320143884892087, "step": 24820, "torque_loss": 0.16306498646736145 }, { "epoch": 22.32913669064748, "grad_norm": 0.37411120533943176, "learning_rate": 6.797241311010373e-05, "loss": 0.0878, "step": 24830 }, { "action_loss": 0.0169984083622694, "epoch": 22.32913669064748, "step": 24830 }, { "epoch": 22.32913669064748, "step": 24830, "torque_loss": 0.15032429993152618 }, { "epoch": 22.33812949640288, "grad_norm": 0.3160530924797058, "learning_rate": 6.794669437848982e-05, "loss": 0.0821, "step": 24840 }, { "action_loss": 0.012192375026643276, "epoch": 22.33812949640288, "step": 24840 }, { "epoch": 22.33812949640288, "step": 24840, "torque_loss": 0.14648406207561493 }, { "epoch": 22.347122302158272, "grad_norm": 0.34213531017303467, "learning_rate": 6.792097019514402e-05, "loss": 0.0822, "step": 24850 }, { "action_loss": 0.006640389561653137, "epoch": 22.347122302158272, "step": 24850 }, { "epoch": 22.347122302158272, "step": 24850, "torque_loss": 0.11371854692697525 }, { "epoch": 22.35611510791367, "grad_norm": 0.3159818947315216, "learning_rate": 6.789524056788064e-05, "loss": 0.0796, "step": 24860 }, { "action_loss": 0.010599065572023392, "epoch": 22.35611510791367, "step": 24860 }, { "epoch": 22.35611510791367, "step": 24860, "torque_loss": 0.11967047303915024 }, { "epoch": 22.365107913669064, "grad_norm": 0.21515347063541412, "learning_rate": 6.786950550451567e-05, "loss": 0.071, "step": 24870 }, { "action_loss": 0.004531612619757652, "epoch": 22.365107913669064, "step": 24870 }, { "epoch": 22.365107913669064, "step": 24870, "torque_loss": 0.11757519841194153 }, { "epoch": 22.37410071942446, "grad_norm": 0.33676186203956604, "learning_rate": 6.784376501286676e-05, "loss": 0.083, "step": 24880 }, { "action_loss": 0.0036653310526162386, "epoch": 22.37410071942446, "step": 24880 }, { "epoch": 22.37410071942446, "step": 24880, "torque_loss": 0.11043087393045425 }, { "epoch": 22.383093525179856, "grad_norm": 0.24107056856155396, "learning_rate": 6.781801910075316e-05, "loss": 0.0701, "step": 24890 }, { "action_loss": 0.009513032622635365, "epoch": 22.383093525179856, "step": 24890 }, { "epoch": 22.383093525179856, "step": 24890, "torque_loss": 0.14891675114631653 }, { "epoch": 22.392086330935253, "grad_norm": 0.261544793844223, "learning_rate": 6.779226777599581e-05, "loss": 0.0778, "step": 24900 }, { "action_loss": 0.003030239837244153, "epoch": 22.392086330935253, "step": 24900 }, { "epoch": 22.392086330935253, "step": 24900, "torque_loss": 0.08199027925729752 }, { "epoch": 22.401079136690647, "grad_norm": 0.27093878388404846, "learning_rate": 6.776651104641729e-05, "loss": 0.0836, "step": 24910 }, { "action_loss": 0.0030419034883379936, "epoch": 22.401079136690647, "step": 24910 }, { "epoch": 22.401079136690647, "step": 24910, "torque_loss": 0.12218377739191055 }, { "epoch": 22.41007194244604, "grad_norm": 0.37000298500061035, "learning_rate": 6.774074891984183e-05, "loss": 0.083, "step": 24920 }, { "action_loss": 0.0037719709798693657, "epoch": 22.41007194244604, "step": 24920 }, { "epoch": 22.41007194244604, "step": 24920, "torque_loss": 0.06876856833696365 }, { "epoch": 22.41906474820144, "grad_norm": 0.2598024010658264, "learning_rate": 6.771498140409526e-05, "loss": 0.0798, "step": 24930 }, { "action_loss": 0.003092416562139988, "epoch": 22.41906474820144, "step": 24930 }, { "epoch": 22.41906474820144, "step": 24930, "torque_loss": 0.0762961208820343 }, { "epoch": 22.428057553956833, "grad_norm": 0.30556201934814453, "learning_rate": 6.768920850700506e-05, "loss": 0.0705, "step": 24940 }, { "action_loss": 0.03520018607378006, "epoch": 22.428057553956833, "step": 24940 }, { "epoch": 22.428057553956833, "step": 24940, "torque_loss": 0.1737503558397293 }, { "epoch": 22.43705035971223, "grad_norm": 0.33987632393836975, "learning_rate": 6.766343023640039e-05, "loss": 0.0915, "step": 24950 }, { "action_loss": 0.004366819281131029, "epoch": 22.43705035971223, "step": 24950 }, { "epoch": 22.43705035971223, "step": 24950, "torque_loss": 0.10024221986532211 }, { "epoch": 22.446043165467625, "grad_norm": 0.46019506454467773, "learning_rate": 6.763764660011198e-05, "loss": 0.0806, "step": 24960 }, { "action_loss": 0.003101175418123603, "epoch": 22.446043165467625, "step": 24960 }, { "epoch": 22.446043165467625, "step": 24960, "torque_loss": 0.13652794063091278 }, { "epoch": 22.455035971223023, "grad_norm": 0.27629518508911133, "learning_rate": 6.761185760597223e-05, "loss": 0.0726, "step": 24970 }, { "action_loss": 0.006508382502943277, "epoch": 22.455035971223023, "step": 24970 }, { "epoch": 22.455035971223023, "step": 24970, "torque_loss": 0.13320773839950562 }, { "epoch": 22.464028776978417, "grad_norm": 0.26535969972610474, "learning_rate": 6.758606326181515e-05, "loss": 0.0831, "step": 24980 }, { "action_loss": 0.003060925053432584, "epoch": 22.464028776978417, "step": 24980 }, { "epoch": 22.464028776978417, "step": 24980, "torque_loss": 0.12193995714187622 }, { "epoch": 22.473021582733814, "grad_norm": 0.3992759883403778, "learning_rate": 6.75602635754764e-05, "loss": 0.0745, "step": 24990 }, { "action_loss": 0.005197544582188129, "epoch": 22.473021582733814, "step": 24990 }, { "epoch": 22.473021582733814, "step": 24990, "torque_loss": 0.09235844761133194 }, { "epoch": 22.48201438848921, "grad_norm": 0.28137993812561035, "learning_rate": 6.75344585547932e-05, "loss": 0.0837, "step": 25000 }, { "action_loss": 0.004651012364774942, "epoch": 22.48201438848921, "step": 25000 }, { "epoch": 22.48201438848921, "step": 25000, "torque_loss": 0.1588708609342575 }, { "epoch": 22.491007194244606, "grad_norm": 0.3669337034225464, "learning_rate": 6.750864820760449e-05, "loss": 0.0754, "step": 25010 }, { "action_loss": 0.003113906132057309, "epoch": 22.491007194244606, "step": 25010 }, { "epoch": 22.491007194244606, "step": 25010, "torque_loss": 0.1098475232720375 }, { "epoch": 22.5, "grad_norm": 0.3866216838359833, "learning_rate": 6.748283254175072e-05, "loss": 0.0867, "step": 25020 }, { "action_loss": 0.011826052330434322, "epoch": 22.5, "step": 25020 }, { "epoch": 22.5, "step": 25020, "torque_loss": 0.1729486733675003 }, { "epoch": 22.508992805755394, "grad_norm": 0.37527358531951904, "learning_rate": 6.745701156507404e-05, "loss": 0.0834, "step": 25030 }, { "action_loss": 0.002212947467342019, "epoch": 22.508992805755394, "step": 25030 }, { "epoch": 22.508992805755394, "step": 25030, "torque_loss": 0.09166247397661209 }, { "epoch": 22.51798561151079, "grad_norm": 0.29136642813682556, "learning_rate": 6.743118528541818e-05, "loss": 0.0686, "step": 25040 }, { "action_loss": 0.0033121760934591293, "epoch": 22.51798561151079, "step": 25040 }, { "epoch": 22.51798561151079, "step": 25040, "torque_loss": 0.1077820286154747 }, { "epoch": 22.526978417266186, "grad_norm": 0.3503836989402771, "learning_rate": 6.740535371062846e-05, "loss": 0.0787, "step": 25050 }, { "action_loss": 0.007201194763183594, "epoch": 22.526978417266186, "step": 25050 }, { "epoch": 22.526978417266186, "step": 25050, "torque_loss": 0.11787388473749161 }, { "epoch": 22.535971223021583, "grad_norm": 0.30883699655532837, "learning_rate": 6.737951684855185e-05, "loss": 0.073, "step": 25060 }, { "action_loss": 0.007754489779472351, "epoch": 22.535971223021583, "step": 25060 }, { "epoch": 22.535971223021583, "step": 25060, "torque_loss": 0.09018342941999435 }, { "epoch": 22.544964028776977, "grad_norm": 0.4646201729774475, "learning_rate": 6.735367470703691e-05, "loss": 0.0814, "step": 25070 }, { "action_loss": 0.00656931335106492, "epoch": 22.544964028776977, "step": 25070 }, { "epoch": 22.544964028776977, "step": 25070, "torque_loss": 0.12133874744176865 }, { "epoch": 22.553956834532375, "grad_norm": 0.3989839553833008, "learning_rate": 6.732782729393379e-05, "loss": 0.0739, "step": 25080 }, { "action_loss": 0.005677813198417425, "epoch": 22.553956834532375, "step": 25080 }, { "epoch": 22.553956834532375, "step": 25080, "torque_loss": 0.0913066640496254 }, { "epoch": 22.56294964028777, "grad_norm": 0.39083462953567505, "learning_rate": 6.730197461709425e-05, "loss": 0.0743, "step": 25090 }, { "action_loss": 0.004431446548551321, "epoch": 22.56294964028777, "step": 25090 }, { "epoch": 22.56294964028777, "step": 25090, "torque_loss": 0.1006883755326271 }, { "epoch": 22.571942446043167, "grad_norm": 0.3723694086074829, "learning_rate": 6.727611668437164e-05, "loss": 0.0841, "step": 25100 }, { "action_loss": 0.006658031139522791, "epoch": 22.571942446043167, "step": 25100 }, { "epoch": 22.571942446043167, "step": 25100, "torque_loss": 0.1718875914812088 }, { "epoch": 22.58093525179856, "grad_norm": 0.39551296830177307, "learning_rate": 6.725025350362094e-05, "loss": 0.0738, "step": 25110 }, { "action_loss": 0.005147768650203943, "epoch": 22.58093525179856, "step": 25110 }, { "epoch": 22.58093525179856, "step": 25110, "torque_loss": 0.10644448548555374 }, { "epoch": 22.58992805755396, "grad_norm": 0.2682115435600281, "learning_rate": 6.72243850826987e-05, "loss": 0.0713, "step": 25120 }, { "action_loss": 0.006125787738710642, "epoch": 22.58992805755396, "step": 25120 }, { "epoch": 22.58992805755396, "step": 25120, "torque_loss": 0.12768495082855225 }, { "epoch": 22.598920863309353, "grad_norm": 0.3235085904598236, "learning_rate": 6.719851142946305e-05, "loss": 0.0743, "step": 25130 }, { "action_loss": 0.005825674161314964, "epoch": 22.598920863309353, "step": 25130 }, { "epoch": 22.598920863309353, "step": 25130, "torque_loss": 0.14552414417266846 }, { "epoch": 22.607913669064747, "grad_norm": 0.3965200185775757, "learning_rate": 6.717263255177372e-05, "loss": 0.0951, "step": 25140 }, { "action_loss": 0.006059898063540459, "epoch": 22.607913669064747, "step": 25140 }, { "epoch": 22.607913669064747, "step": 25140, "torque_loss": 0.12444709986448288 }, { "epoch": 22.616906474820144, "grad_norm": 0.24167412519454956, "learning_rate": 6.714674845749205e-05, "loss": 0.0776, "step": 25150 }, { "action_loss": 0.0096846679225564, "epoch": 22.616906474820144, "step": 25150 }, { "epoch": 22.616906474820144, "step": 25150, "torque_loss": 0.1249786838889122 }, { "epoch": 22.62589928057554, "grad_norm": 0.36384281516075134, "learning_rate": 6.712085915448092e-05, "loss": 0.0829, "step": 25160 }, { "action_loss": 0.0041747684590518475, "epoch": 22.62589928057554, "step": 25160 }, { "epoch": 22.62589928057554, "step": 25160, "torque_loss": 0.10156013816595078 }, { "epoch": 22.634892086330936, "grad_norm": 0.39392709732055664, "learning_rate": 6.709496465060486e-05, "loss": 0.0776, "step": 25170 }, { "action_loss": 0.002727583982050419, "epoch": 22.634892086330936, "step": 25170 }, { "epoch": 22.634892086330936, "step": 25170, "torque_loss": 0.09101156145334244 }, { "epoch": 22.64388489208633, "grad_norm": 0.32335054874420166, "learning_rate": 6.706906495372987e-05, "loss": 0.0714, "step": 25180 }, { "action_loss": 0.004210109356790781, "epoch": 22.64388489208633, "step": 25180 }, { "epoch": 22.64388489208633, "step": 25180, "torque_loss": 0.10866576433181763 }, { "epoch": 22.652877697841728, "grad_norm": 0.3649648427963257, "learning_rate": 6.704316007172365e-05, "loss": 0.0724, "step": 25190 }, { "action_loss": 0.006703610066324472, "epoch": 22.652877697841728, "step": 25190 }, { "epoch": 22.652877697841728, "step": 25190, "torque_loss": 0.12142805010080338 }, { "epoch": 22.66187050359712, "grad_norm": 0.24004550278186798, "learning_rate": 6.701725001245539e-05, "loss": 0.0884, "step": 25200 }, { "action_loss": 0.018801603466272354, "epoch": 22.66187050359712, "step": 25200 }, { "epoch": 22.66187050359712, "step": 25200, "torque_loss": 0.18475128710269928 }, { "epoch": 22.67086330935252, "grad_norm": 0.28512459993362427, "learning_rate": 6.699133478379588e-05, "loss": 0.0704, "step": 25210 }, { "action_loss": 0.007215478923171759, "epoch": 22.67086330935252, "step": 25210 }, { "epoch": 22.67086330935252, "step": 25210, "torque_loss": 0.1398775577545166 }, { "epoch": 22.679856115107913, "grad_norm": 0.22022849321365356, "learning_rate": 6.69654143936175e-05, "loss": 0.0726, "step": 25220 }, { "action_loss": 0.003861231030896306, "epoch": 22.679856115107913, "step": 25220 }, { "epoch": 22.679856115107913, "step": 25220, "torque_loss": 0.11658388376235962 }, { "epoch": 22.68884892086331, "grad_norm": 0.3727239966392517, "learning_rate": 6.693948884979419e-05, "loss": 0.0824, "step": 25230 }, { "action_loss": 0.00459104822948575, "epoch": 22.68884892086331, "step": 25230 }, { "epoch": 22.68884892086331, "step": 25230, "torque_loss": 0.1272730678319931 }, { "epoch": 22.697841726618705, "grad_norm": 0.27419331669807434, "learning_rate": 6.691355816020142e-05, "loss": 0.0697, "step": 25240 }, { "action_loss": 0.007406793534755707, "epoch": 22.697841726618705, "step": 25240 }, { "epoch": 22.697841726618705, "step": 25240, "torque_loss": 0.1228947639465332 }, { "epoch": 22.7068345323741, "grad_norm": 0.2258613556623459, "learning_rate": 6.688762233271624e-05, "loss": 0.0668, "step": 25250 }, { "action_loss": 0.005786702036857605, "epoch": 22.7068345323741, "step": 25250 }, { "epoch": 22.7068345323741, "step": 25250, "torque_loss": 0.1516328752040863 }, { "epoch": 22.715827338129497, "grad_norm": 0.2949698269367218, "learning_rate": 6.68616813752173e-05, "loss": 0.083, "step": 25260 }, { "action_loss": 0.0035449638962745667, "epoch": 22.715827338129497, "step": 25260 }, { "epoch": 22.715827338129497, "step": 25260, "torque_loss": 0.10557854175567627 }, { "epoch": 22.72482014388489, "grad_norm": 0.29289352893829346, "learning_rate": 6.683573529558477e-05, "loss": 0.0788, "step": 25270 }, { "action_loss": 0.00255620782263577, "epoch": 22.72482014388489, "step": 25270 }, { "epoch": 22.72482014388489, "step": 25270, "torque_loss": 0.09230490773916245 }, { "epoch": 22.73381294964029, "grad_norm": 0.36794131994247437, "learning_rate": 6.680978410170037e-05, "loss": 0.0778, "step": 25280 }, { "action_loss": 0.0038006079848855734, "epoch": 22.73381294964029, "step": 25280 }, { "epoch": 22.73381294964029, "step": 25280, "torque_loss": 0.08570923656225204 }, { "epoch": 22.742805755395683, "grad_norm": 0.3389282822608948, "learning_rate": 6.678382780144741e-05, "loss": 0.0781, "step": 25290 }, { "action_loss": 0.009076337330043316, "epoch": 22.742805755395683, "step": 25290 }, { "epoch": 22.742805755395683, "step": 25290, "torque_loss": 0.13053417205810547 }, { "epoch": 22.75179856115108, "grad_norm": 0.3302294909954071, "learning_rate": 6.675786640271071e-05, "loss": 0.0785, "step": 25300 }, { "action_loss": 0.0025586530100554228, "epoch": 22.75179856115108, "step": 25300 }, { "epoch": 22.75179856115108, "step": 25300, "torque_loss": 0.07758751511573792 }, { "epoch": 22.760791366906474, "grad_norm": 0.2525315284729004, "learning_rate": 6.673189991337665e-05, "loss": 0.0749, "step": 25310 }, { "action_loss": 0.005746800452470779, "epoch": 22.760791366906474, "step": 25310 }, { "epoch": 22.760791366906474, "step": 25310, "torque_loss": 0.08612751215696335 }, { "epoch": 22.769784172661872, "grad_norm": 0.32658132910728455, "learning_rate": 6.670592834133317e-05, "loss": 0.0818, "step": 25320 }, { "action_loss": 0.003964836709201336, "epoch": 22.769784172661872, "step": 25320 }, { "epoch": 22.769784172661872, "step": 25320, "torque_loss": 0.09533989429473877 }, { "epoch": 22.778776978417266, "grad_norm": 0.2765657305717468, "learning_rate": 6.667995169446979e-05, "loss": 0.072, "step": 25330 }, { "action_loss": 0.007187915500253439, "epoch": 22.778776978417266, "step": 25330 }, { "epoch": 22.778776978417266, "step": 25330, "torque_loss": 0.1888606995344162 }, { "epoch": 22.78776978417266, "grad_norm": 0.28519299626350403, "learning_rate": 6.665396998067747e-05, "loss": 0.0961, "step": 25340 }, { "action_loss": 0.00511070666834712, "epoch": 22.78776978417266, "step": 25340 }, { "epoch": 22.78776978417266, "step": 25340, "torque_loss": 0.10512097924947739 }, { "epoch": 22.796762589928058, "grad_norm": 0.3264622986316681, "learning_rate": 6.66279832078488e-05, "loss": 0.0695, "step": 25350 }, { "action_loss": 0.0059942398220300674, "epoch": 22.796762589928058, "step": 25350 }, { "epoch": 22.796762589928058, "step": 25350, "torque_loss": 0.09006436914205551 }, { "epoch": 22.805755395683452, "grad_norm": 0.3282230496406555, "learning_rate": 6.660199138387786e-05, "loss": 0.0744, "step": 25360 }, { "action_loss": 0.008336558006703854, "epoch": 22.805755395683452, "step": 25360 }, { "epoch": 22.805755395683452, "step": 25360, "torque_loss": 0.1419508457183838 }, { "epoch": 22.81474820143885, "grad_norm": 0.3351815640926361, "learning_rate": 6.65759945166603e-05, "loss": 0.0916, "step": 25370 }, { "action_loss": 0.004247708711773157, "epoch": 22.81474820143885, "step": 25370 }, { "epoch": 22.81474820143885, "step": 25370, "torque_loss": 0.11324814707040787 }, { "epoch": 22.823741007194243, "grad_norm": 0.3185517191886902, "learning_rate": 6.654999261409326e-05, "loss": 0.0908, "step": 25380 }, { "action_loss": 0.02489050291478634, "epoch": 22.823741007194243, "step": 25380 }, { "epoch": 22.823741007194243, "step": 25380, "torque_loss": 0.15650908648967743 }, { "epoch": 22.83273381294964, "grad_norm": 0.3522074818611145, "learning_rate": 6.652398568407544e-05, "loss": 0.0792, "step": 25390 }, { "action_loss": 0.021681996062397957, "epoch": 22.83273381294964, "step": 25390 }, { "epoch": 22.83273381294964, "step": 25390, "torque_loss": 0.13151498138904572 }, { "epoch": 22.841726618705035, "grad_norm": 0.2961673438549042, "learning_rate": 6.649797373450707e-05, "loss": 0.0778, "step": 25400 }, { "action_loss": 0.00917994137853384, "epoch": 22.841726618705035, "step": 25400 }, { "epoch": 22.841726618705035, "step": 25400, "torque_loss": 0.12742814421653748 }, { "epoch": 22.850719424460433, "grad_norm": 0.23107776045799255, "learning_rate": 6.647195677328988e-05, "loss": 0.0696, "step": 25410 }, { "action_loss": 0.0016799165168777108, "epoch": 22.850719424460433, "step": 25410 }, { "epoch": 22.850719424460433, "step": 25410, "torque_loss": 0.08635256439447403 }, { "epoch": 22.859712230215827, "grad_norm": 0.25850382447242737, "learning_rate": 6.644593480832712e-05, "loss": 0.0659, "step": 25420 }, { "action_loss": 0.0035718244034796953, "epoch": 22.859712230215827, "step": 25420 }, { "epoch": 22.859712230215827, "step": 25420, "torque_loss": 0.08795066922903061 }, { "epoch": 22.868705035971225, "grad_norm": 0.3812962770462036, "learning_rate": 6.641990784752363e-05, "loss": 0.0795, "step": 25430 }, { "action_loss": 0.006583665031939745, "epoch": 22.868705035971225, "step": 25430 }, { "epoch": 22.868705035971225, "step": 25430, "torque_loss": 0.10745995491743088 }, { "epoch": 22.87769784172662, "grad_norm": 0.3747582733631134, "learning_rate": 6.639387589878566e-05, "loss": 0.0815, "step": 25440 }, { "action_loss": 0.003229687921702862, "epoch": 22.87769784172662, "step": 25440 }, { "epoch": 22.87769784172662, "step": 25440, "torque_loss": 0.07129736989736557 }, { "epoch": 22.886690647482013, "grad_norm": 0.26087164878845215, "learning_rate": 6.636783897002103e-05, "loss": 0.0732, "step": 25450 }, { "action_loss": 0.00839914008975029, "epoch": 22.886690647482013, "step": 25450 }, { "epoch": 22.886690647482013, "step": 25450, "torque_loss": 0.13349707424640656 }, { "epoch": 22.89568345323741, "grad_norm": 0.2868444621562958, "learning_rate": 6.63417970691391e-05, "loss": 0.0776, "step": 25460 }, { "action_loss": 0.003415057435631752, "epoch": 22.89568345323741, "step": 25460 }, { "epoch": 22.89568345323741, "step": 25460, "torque_loss": 0.09360817819833755 }, { "epoch": 22.904676258992804, "grad_norm": 0.2828933894634247, "learning_rate": 6.63157502040507e-05, "loss": 0.0707, "step": 25470 }, { "action_loss": 0.017705602571368217, "epoch": 22.904676258992804, "step": 25470 }, { "epoch": 22.904676258992804, "step": 25470, "torque_loss": 0.18273760378360748 }, { "epoch": 22.913669064748202, "grad_norm": 0.27717331051826477, "learning_rate": 6.628969838266819e-05, "loss": 0.0844, "step": 25480 }, { "action_loss": 0.010024861432611942, "epoch": 22.913669064748202, "step": 25480 }, { "epoch": 22.913669064748202, "step": 25480, "torque_loss": 0.12930089235305786 }, { "epoch": 22.922661870503596, "grad_norm": 0.27119356393814087, "learning_rate": 6.626364161290541e-05, "loss": 0.0765, "step": 25490 }, { "action_loss": 0.005225079599767923, "epoch": 22.922661870503596, "step": 25490 }, { "epoch": 22.922661870503596, "step": 25490, "torque_loss": 0.0946316346526146 }, { "epoch": 22.931654676258994, "grad_norm": 0.24486801028251648, "learning_rate": 6.623757990267774e-05, "loss": 0.0719, "step": 25500 }, { "action_loss": 0.010447102598845959, "epoch": 22.931654676258994, "step": 25500 }, { "epoch": 22.931654676258994, "step": 25500, "torque_loss": 0.17602799832820892 }, { "epoch": 22.940647482014388, "grad_norm": 0.3043392598628998, "learning_rate": 6.621151325990201e-05, "loss": 0.0792, "step": 25510 }, { "action_loss": 0.01110987737774849, "epoch": 22.940647482014388, "step": 25510 }, { "epoch": 22.940647482014388, "step": 25510, "torque_loss": 0.14998896420001984 }, { "epoch": 22.949640287769785, "grad_norm": 0.23880089819431305, "learning_rate": 6.618544169249657e-05, "loss": 0.0733, "step": 25520 }, { "action_loss": 0.0015324648702517152, "epoch": 22.949640287769785, "step": 25520 }, { "epoch": 22.949640287769785, "step": 25520, "torque_loss": 0.09317903965711594 }, { "epoch": 22.95863309352518, "grad_norm": 0.39478304982185364, "learning_rate": 6.615936520838133e-05, "loss": 0.0794, "step": 25530 }, { "action_loss": 0.0022677029483020306, "epoch": 22.95863309352518, "step": 25530 }, { "epoch": 22.95863309352518, "step": 25530, "torque_loss": 0.09234937280416489 }, { "epoch": 22.967625899280577, "grad_norm": 0.274516224861145, "learning_rate": 6.613328381547759e-05, "loss": 0.0595, "step": 25540 }, { "action_loss": 0.006488510873168707, "epoch": 22.967625899280577, "step": 25540 }, { "epoch": 22.967625899280577, "step": 25540, "torque_loss": 0.1370246261358261 }, { "epoch": 22.97661870503597, "grad_norm": 0.3404320776462555, "learning_rate": 6.610719752170821e-05, "loss": 0.0978, "step": 25550 }, { "action_loss": 0.002154839923605323, "epoch": 22.97661870503597, "step": 25550 }, { "epoch": 22.97661870503597, "step": 25550, "torque_loss": 0.07501987367868423 }, { "epoch": 22.985611510791365, "grad_norm": 0.2856218218803406, "learning_rate": 6.60811063349975e-05, "loss": 0.0749, "step": 25560 }, { "action_loss": 0.0037651609163731337, "epoch": 22.985611510791365, "step": 25560 }, { "epoch": 22.985611510791365, "step": 25560, "torque_loss": 0.09612429141998291 }, { "epoch": 22.994604316546763, "grad_norm": 0.3799239695072174, "learning_rate": 6.605501026327127e-05, "loss": 0.0781, "step": 25570 }, { "action_loss": 0.006318926811218262, "epoch": 22.994604316546763, "step": 25570 }, { "epoch": 22.994604316546763, "step": 25570, "torque_loss": 0.14783430099487305 }, { "epoch": 23.003597122302157, "grad_norm": 0.2513357102870941, "learning_rate": 6.602890931445685e-05, "loss": 0.0864, "step": 25580 }, { "action_loss": 0.0055106766521930695, "epoch": 23.003597122302157, "step": 25580 }, { "epoch": 23.003597122302157, "step": 25580, "torque_loss": 0.13557694852352142 }, { "epoch": 23.012589928057555, "grad_norm": 0.3162645101547241, "learning_rate": 6.6002803496483e-05, "loss": 0.0859, "step": 25590 }, { "action_loss": 0.0025124752428382635, "epoch": 23.012589928057555, "step": 25590 }, { "epoch": 23.012589928057555, "step": 25590, "torque_loss": 0.09467307478189468 }, { "epoch": 23.02158273381295, "grad_norm": 0.3045474588871002, "learning_rate": 6.597669281727997e-05, "loss": 0.0759, "step": 25600 }, { "action_loss": 0.01633811928331852, "epoch": 23.02158273381295, "step": 25600 }, { "epoch": 23.02158273381295, "step": 25600, "torque_loss": 0.14574375748634338 }, { "epoch": 23.030575539568346, "grad_norm": 0.34103643894195557, "learning_rate": 6.595057728477949e-05, "loss": 0.0854, "step": 25610 }, { "action_loss": 0.012728444300591946, "epoch": 23.030575539568346, "step": 25610 }, { "epoch": 23.030575539568346, "step": 25610, "torque_loss": 0.14087168872356415 }, { "epoch": 23.03956834532374, "grad_norm": 0.32552823424339294, "learning_rate": 6.59244569069148e-05, "loss": 0.0845, "step": 25620 }, { "action_loss": 0.014441781677305698, "epoch": 23.03956834532374, "step": 25620 }, { "epoch": 23.03956834532374, "step": 25620, "torque_loss": 0.1472974270582199 }, { "epoch": 23.048561151079138, "grad_norm": 0.3073940575122833, "learning_rate": 6.589833169162054e-05, "loss": 0.0799, "step": 25630 }, { "action_loss": 0.006289674434810877, "epoch": 23.048561151079138, "step": 25630 }, { "epoch": 23.048561151079138, "step": 25630, "torque_loss": 0.11872434616088867 }, { "epoch": 23.057553956834532, "grad_norm": 0.33066216111183167, "learning_rate": 6.587220164683291e-05, "loss": 0.085, "step": 25640 }, { "action_loss": 0.006621358450502157, "epoch": 23.057553956834532, "step": 25640 }, { "epoch": 23.057553956834532, "step": 25640, "torque_loss": 0.09470199793577194 }, { "epoch": 23.06654676258993, "grad_norm": 0.3407909870147705, "learning_rate": 6.58460667804895e-05, "loss": 0.0767, "step": 25650 }, { "action_loss": 0.00888665858656168, "epoch": 23.06654676258993, "step": 25650 }, { "epoch": 23.06654676258993, "step": 25650, "torque_loss": 0.14343690872192383 }, { "epoch": 23.075539568345324, "grad_norm": 0.3234472870826721, "learning_rate": 6.581992710052938e-05, "loss": 0.0819, "step": 25660 }, { "action_loss": 0.004893087316304445, "epoch": 23.075539568345324, "step": 25660 }, { "epoch": 23.075539568345324, "step": 25660, "torque_loss": 0.10772816091775894 }, { "epoch": 23.084532374100718, "grad_norm": 0.3156912922859192, "learning_rate": 6.579378261489311e-05, "loss": 0.0841, "step": 25670 }, { "action_loss": 0.01085022184997797, "epoch": 23.084532374100718, "step": 25670 }, { "epoch": 23.084532374100718, "step": 25670, "torque_loss": 0.15216708183288574 }, { "epoch": 23.093525179856115, "grad_norm": 0.2371026575565338, "learning_rate": 6.576763333152268e-05, "loss": 0.077, "step": 25680 }, { "action_loss": 0.007613780442625284, "epoch": 23.093525179856115, "step": 25680 }, { "epoch": 23.093525179856115, "step": 25680, "torque_loss": 0.10948649793863297 }, { "epoch": 23.10251798561151, "grad_norm": 0.40794625878334045, "learning_rate": 6.574147925836159e-05, "loss": 0.07, "step": 25690 }, { "action_loss": 0.020278004929423332, "epoch": 23.10251798561151, "step": 25690 }, { "epoch": 23.10251798561151, "step": 25690, "torque_loss": 0.16125820577144623 }, { "epoch": 23.111510791366907, "grad_norm": 0.2745753228664398, "learning_rate": 6.571532040335472e-05, "loss": 0.0817, "step": 25700 }, { "action_loss": 0.0026920996606349945, "epoch": 23.111510791366907, "step": 25700 }, { "epoch": 23.111510791366907, "step": 25700, "torque_loss": 0.05403205752372742 }, { "epoch": 23.1205035971223, "grad_norm": 0.36957913637161255, "learning_rate": 6.568915677444845e-05, "loss": 0.0708, "step": 25710 }, { "action_loss": 0.018206799402832985, "epoch": 23.1205035971223, "step": 25710 }, { "epoch": 23.1205035971223, "step": 25710, "torque_loss": 0.14634086191654205 }, { "epoch": 23.1294964028777, "grad_norm": 0.3390801250934601, "learning_rate": 6.56629883795906e-05, "loss": 0.0811, "step": 25720 }, { "action_loss": 0.005412260536104441, "epoch": 23.1294964028777, "step": 25720 }, { "epoch": 23.1294964028777, "step": 25720, "torque_loss": 0.08738303184509277 }, { "epoch": 23.138489208633093, "grad_norm": 0.2996397912502289, "learning_rate": 6.563681522673043e-05, "loss": 0.0714, "step": 25730 }, { "action_loss": 0.013431350700557232, "epoch": 23.138489208633093, "step": 25730 }, { "epoch": 23.138489208633093, "step": 25730, "torque_loss": 0.125998392701149 }, { "epoch": 23.14748201438849, "grad_norm": 0.36988475918769836, "learning_rate": 6.561063732381867e-05, "loss": 0.087, "step": 25740 }, { "action_loss": 0.00650082528591156, "epoch": 23.14748201438849, "step": 25740 }, { "epoch": 23.14748201438849, "step": 25740, "torque_loss": 0.09434100240468979 }, { "epoch": 23.156474820143885, "grad_norm": 0.26146939396858215, "learning_rate": 6.558445467880745e-05, "loss": 0.0758, "step": 25750 }, { "action_loss": 0.004518407862633467, "epoch": 23.156474820143885, "step": 25750 }, { "epoch": 23.156474820143885, "step": 25750, "torque_loss": 0.11159635335206985 }, { "epoch": 23.165467625899282, "grad_norm": 0.31418341398239136, "learning_rate": 6.55582672996504e-05, "loss": 0.0746, "step": 25760 }, { "action_loss": 0.00590363098308444, "epoch": 23.165467625899282, "step": 25760 }, { "epoch": 23.165467625899282, "step": 25760, "torque_loss": 0.1103934571146965 }, { "epoch": 23.174460431654676, "grad_norm": 0.25762876868247986, "learning_rate": 6.553207519430253e-05, "loss": 0.0706, "step": 25770 }, { "action_loss": 0.01076322328299284, "epoch": 23.174460431654676, "step": 25770 }, { "epoch": 23.174460431654676, "step": 25770, "torque_loss": 0.13653631508350372 }, { "epoch": 23.18345323741007, "grad_norm": 0.26889342069625854, "learning_rate": 6.550587837072032e-05, "loss": 0.0854, "step": 25780 }, { "action_loss": 0.009172274731099606, "epoch": 23.18345323741007, "step": 25780 }, { "epoch": 23.18345323741007, "step": 25780, "torque_loss": 0.1204732283949852 }, { "epoch": 23.192446043165468, "grad_norm": 0.2572880983352661, "learning_rate": 6.547967683686166e-05, "loss": 0.0768, "step": 25790 }, { "action_loss": 0.004443229641765356, "epoch": 23.192446043165468, "step": 25790 }, { "epoch": 23.192446043165468, "step": 25790, "torque_loss": 0.11928850412368774 }, { "epoch": 23.201438848920862, "grad_norm": 0.2668056786060333, "learning_rate": 6.545347060068591e-05, "loss": 0.0642, "step": 25800 }, { "action_loss": 0.005624496843665838, "epoch": 23.201438848920862, "step": 25800 }, { "epoch": 23.201438848920862, "step": 25800, "torque_loss": 0.11615541577339172 }, { "epoch": 23.21043165467626, "grad_norm": 0.3360828459262848, "learning_rate": 6.542725967015382e-05, "loss": 0.0776, "step": 25810 }, { "action_loss": 0.002671479480341077, "epoch": 23.21043165467626, "step": 25810 }, { "epoch": 23.21043165467626, "step": 25810, "torque_loss": 0.11603397130966187 }, { "epoch": 23.219424460431654, "grad_norm": 0.28181880712509155, "learning_rate": 6.540104405322757e-05, "loss": 0.071, "step": 25820 }, { "action_loss": 0.0032528925221413374, "epoch": 23.219424460431654, "step": 25820 }, { "epoch": 23.219424460431654, "step": 25820, "torque_loss": 0.1062241792678833 }, { "epoch": 23.22841726618705, "grad_norm": 0.2750423848628998, "learning_rate": 6.537482375787077e-05, "loss": 0.0836, "step": 25830 }, { "action_loss": 0.0032384414225816727, "epoch": 23.22841726618705, "step": 25830 }, { "epoch": 23.22841726618705, "step": 25830, "torque_loss": 0.07686951756477356 }, { "epoch": 23.237410071942445, "grad_norm": 0.2933165431022644, "learning_rate": 6.534859879204845e-05, "loss": 0.0834, "step": 25840 }, { "action_loss": 0.01746531017124653, "epoch": 23.237410071942445, "step": 25840 }, { "epoch": 23.237410071942445, "step": 25840, "torque_loss": 0.18643324077129364 }, { "epoch": 23.246402877697843, "grad_norm": 0.288331001996994, "learning_rate": 6.532236916372709e-05, "loss": 0.0765, "step": 25850 }, { "action_loss": 0.006580287124961615, "epoch": 23.246402877697843, "step": 25850 }, { "epoch": 23.246402877697843, "step": 25850, "torque_loss": 0.1123180165886879 }, { "epoch": 23.255395683453237, "grad_norm": 0.3262643814086914, "learning_rate": 6.529613488087454e-05, "loss": 0.0733, "step": 25860 }, { "action_loss": 0.0024872859939932823, "epoch": 23.255395683453237, "step": 25860 }, { "epoch": 23.255395683453237, "step": 25860, "torque_loss": 0.08501815795898438 }, { "epoch": 23.264388489208635, "grad_norm": 0.360765665769577, "learning_rate": 6.526989595146009e-05, "loss": 0.0642, "step": 25870 }, { "action_loss": 0.005509381648153067, "epoch": 23.264388489208635, "step": 25870 }, { "epoch": 23.264388489208635, "step": 25870, "torque_loss": 0.1311342865228653 }, { "epoch": 23.27338129496403, "grad_norm": 0.21113842725753784, "learning_rate": 6.524365238345441e-05, "loss": 0.0706, "step": 25880 }, { "action_loss": 0.019949540495872498, "epoch": 23.27338129496403, "step": 25880 }, { "epoch": 23.27338129496403, "step": 25880, "torque_loss": 0.1776018738746643 }, { "epoch": 23.282374100719423, "grad_norm": 0.34191012382507324, "learning_rate": 6.521740418482964e-05, "loss": 0.0839, "step": 25890 }, { "action_loss": 0.005881598684936762, "epoch": 23.282374100719423, "step": 25890 }, { "epoch": 23.282374100719423, "step": 25890, "torque_loss": 0.11039672046899796 }, { "epoch": 23.29136690647482, "grad_norm": 0.3172544538974762, "learning_rate": 6.519115136355925e-05, "loss": 0.0706, "step": 25900 }, { "action_loss": 0.007345018442720175, "epoch": 23.29136690647482, "step": 25900 }, { "epoch": 23.29136690647482, "step": 25900, "torque_loss": 0.11922004073858261 }, { "epoch": 23.300359712230215, "grad_norm": 0.2859196960926056, "learning_rate": 6.51648939276182e-05, "loss": 0.075, "step": 25910 }, { "action_loss": 0.011283417232334614, "epoch": 23.300359712230215, "step": 25910 }, { "epoch": 23.300359712230215, "step": 25910, "torque_loss": 0.14363984763622284 }, { "epoch": 23.309352517985612, "grad_norm": 0.24289710819721222, "learning_rate": 6.513863188498277e-05, "loss": 0.0742, "step": 25920 }, { "action_loss": 0.005193326156586409, "epoch": 23.309352517985612, "step": 25920 }, { "epoch": 23.309352517985612, "step": 25920, "torque_loss": 0.09184989333152771 }, { "epoch": 23.318345323741006, "grad_norm": 0.42669877409935, "learning_rate": 6.511236524363068e-05, "loss": 0.073, "step": 25930 }, { "action_loss": 0.006508185062557459, "epoch": 23.318345323741006, "step": 25930 }, { "epoch": 23.318345323741006, "step": 25930, "torque_loss": 0.08169040083885193 }, { "epoch": 23.327338129496404, "grad_norm": 0.33207952976226807, "learning_rate": 6.508609401154104e-05, "loss": 0.0908, "step": 25940 }, { "action_loss": 0.0025256320368498564, "epoch": 23.327338129496404, "step": 25940 }, { "epoch": 23.327338129496404, "step": 25940, "torque_loss": 0.06259195506572723 }, { "epoch": 23.336330935251798, "grad_norm": 0.4085474908351898, "learning_rate": 6.505981819669439e-05, "loss": 0.0751, "step": 25950 }, { "action_loss": 0.016198279336094856, "epoch": 23.336330935251798, "step": 25950 }, { "epoch": 23.336330935251798, "step": 25950, "torque_loss": 0.177787184715271 }, { "epoch": 23.345323741007196, "grad_norm": 0.3974858522415161, "learning_rate": 6.503353780707258e-05, "loss": 0.0797, "step": 25960 }, { "action_loss": 0.0029499121010303497, "epoch": 23.345323741007196, "step": 25960 }, { "epoch": 23.345323741007196, "step": 25960, "torque_loss": 0.09310971945524216 }, { "epoch": 23.35431654676259, "grad_norm": 0.28513583540916443, "learning_rate": 6.500725285065895e-05, "loss": 0.078, "step": 25970 }, { "action_loss": 0.011030781082808971, "epoch": 23.35431654676259, "step": 25970 }, { "epoch": 23.35431654676259, "step": 25970, "torque_loss": 0.11791971325874329 }, { "epoch": 23.363309352517987, "grad_norm": 0.33286285400390625, "learning_rate": 6.498096333543813e-05, "loss": 0.0798, "step": 25980 }, { "action_loss": 0.002585478127002716, "epoch": 23.363309352517987, "step": 25980 }, { "epoch": 23.363309352517987, "step": 25980, "torque_loss": 0.07220259308815002 }, { "epoch": 23.37230215827338, "grad_norm": 0.2677749991416931, "learning_rate": 6.49546692693962e-05, "loss": 0.0682, "step": 25990 }, { "action_loss": 0.01278234738856554, "epoch": 23.37230215827338, "step": 25990 }, { "epoch": 23.37230215827338, "step": 25990, "torque_loss": 0.16536253690719604 }, { "epoch": 23.381294964028775, "grad_norm": 0.32138827443122864, "learning_rate": 6.492837066052059e-05, "loss": 0.0852, "step": 26000 }, { "action_loss": 0.0033726512920111418, "epoch": 23.381294964028775, "step": 26000 }, { "epoch": 23.381294964028775, "step": 26000, "torque_loss": 0.10167598724365234 }, { "epoch": 23.390287769784173, "grad_norm": 0.2992890477180481, "learning_rate": 6.490206751680014e-05, "loss": 0.0852, "step": 26010 }, { "action_loss": 0.002727731131017208, "epoch": 23.390287769784173, "step": 26010 }, { "epoch": 23.390287769784173, "step": 26010, "torque_loss": 0.09478811174631119 }, { "epoch": 23.399280575539567, "grad_norm": 0.32411089539527893, "learning_rate": 6.487575984622505e-05, "loss": 0.0822, "step": 26020 }, { "action_loss": 0.013988153077661991, "epoch": 23.399280575539567, "step": 26020 }, { "epoch": 23.399280575539567, "step": 26020, "torque_loss": 0.13851913809776306 }, { "epoch": 23.408273381294965, "grad_norm": 0.3180409073829651, "learning_rate": 6.484944765678689e-05, "loss": 0.0754, "step": 26030 }, { "action_loss": 0.00625310093164444, "epoch": 23.408273381294965, "step": 26030 }, { "epoch": 23.408273381294965, "step": 26030, "torque_loss": 0.13137029111385345 }, { "epoch": 23.41726618705036, "grad_norm": 0.3816865086555481, "learning_rate": 6.482313095647861e-05, "loss": 0.0806, "step": 26040 }, { "action_loss": 0.006164890248328447, "epoch": 23.41726618705036, "step": 26040 }, { "epoch": 23.41726618705036, "step": 26040, "torque_loss": 0.1228431984782219 }, { "epoch": 23.426258992805757, "grad_norm": 0.23911698162555695, "learning_rate": 6.479680975329451e-05, "loss": 0.0836, "step": 26050 }, { "action_loss": 0.00555381691083312, "epoch": 23.426258992805757, "step": 26050 }, { "epoch": 23.426258992805757, "step": 26050, "torque_loss": 0.11121370643377304 }, { "epoch": 23.43525179856115, "grad_norm": 0.331678569316864, "learning_rate": 6.477048405523031e-05, "loss": 0.0737, "step": 26060 }, { "action_loss": 0.00203570444136858, "epoch": 23.43525179856115, "step": 26060 }, { "epoch": 23.43525179856115, "step": 26060, "torque_loss": 0.09275131672620773 }, { "epoch": 23.444244604316548, "grad_norm": 0.3047536611557007, "learning_rate": 6.474415387028304e-05, "loss": 0.0815, "step": 26070 }, { "action_loss": 0.004480014089494944, "epoch": 23.444244604316548, "step": 26070 }, { "epoch": 23.444244604316548, "step": 26070, "torque_loss": 0.08450586348772049 }, { "epoch": 23.453237410071942, "grad_norm": 0.28779336810112, "learning_rate": 6.471781920645114e-05, "loss": 0.065, "step": 26080 }, { "action_loss": 0.017206929624080658, "epoch": 23.453237410071942, "step": 26080 }, { "epoch": 23.453237410071942, "step": 26080, "torque_loss": 0.1227828860282898 }, { "epoch": 23.46223021582734, "grad_norm": 0.2812912166118622, "learning_rate": 6.469148007173434e-05, "loss": 0.0774, "step": 26090 }, { "action_loss": 0.0038211513310670853, "epoch": 23.46223021582734, "step": 26090 }, { "epoch": 23.46223021582734, "step": 26090, "torque_loss": 0.09682762622833252 }, { "epoch": 23.471223021582734, "grad_norm": 0.35549890995025635, "learning_rate": 6.466513647413381e-05, "loss": 0.0785, "step": 26100 }, { "action_loss": 0.004873498808592558, "epoch": 23.471223021582734, "step": 26100 }, { "epoch": 23.471223021582734, "step": 26100, "torque_loss": 0.13505613803863525 }, { "epoch": 23.480215827338128, "grad_norm": 0.30452170968055725, "learning_rate": 6.463878842165203e-05, "loss": 0.0812, "step": 26110 }, { "action_loss": 0.0029572846833616495, "epoch": 23.480215827338128, "step": 26110 }, { "epoch": 23.480215827338128, "step": 26110, "torque_loss": 0.061018768697977066 }, { "epoch": 23.489208633093526, "grad_norm": 0.23558387160301208, "learning_rate": 6.461243592229286e-05, "loss": 0.0776, "step": 26120 }, { "action_loss": 0.0034110506530851126, "epoch": 23.489208633093526, "step": 26120 }, { "epoch": 23.489208633093526, "step": 26120, "torque_loss": 0.07616755366325378 }, { "epoch": 23.49820143884892, "grad_norm": 0.294656366109848, "learning_rate": 6.458607898406146e-05, "loss": 0.0607, "step": 26130 }, { "action_loss": 0.0038569404277950525, "epoch": 23.49820143884892, "step": 26130 }, { "epoch": 23.49820143884892, "step": 26130, "torque_loss": 0.108209528028965 }, { "epoch": 23.507194244604317, "grad_norm": 0.31138792634010315, "learning_rate": 6.455971761496439e-05, "loss": 0.0658, "step": 26140 }, { "action_loss": 0.0033639140892773867, "epoch": 23.507194244604317, "step": 26140 }, { "epoch": 23.507194244604317, "step": 26140, "torque_loss": 0.09217926114797592 }, { "epoch": 23.51618705035971, "grad_norm": 0.34683138132095337, "learning_rate": 6.453335182300953e-05, "loss": 0.0764, "step": 26150 }, { "action_loss": 0.008017351850867271, "epoch": 23.51618705035971, "step": 26150 }, { "epoch": 23.51618705035971, "step": 26150, "torque_loss": 0.18768823146820068 }, { "epoch": 23.52517985611511, "grad_norm": 0.3488617241382599, "learning_rate": 6.450698161620612e-05, "loss": 0.0792, "step": 26160 }, { "action_loss": 0.0029803605284541845, "epoch": 23.52517985611511, "step": 26160 }, { "epoch": 23.52517985611511, "step": 26160, "torque_loss": 0.09990862756967545 }, { "epoch": 23.534172661870503, "grad_norm": 0.35427209734916687, "learning_rate": 6.448060700256473e-05, "loss": 0.0843, "step": 26170 }, { "action_loss": 0.005505170673131943, "epoch": 23.534172661870503, "step": 26170 }, { "epoch": 23.534172661870503, "step": 26170, "torque_loss": 0.10760536044836044 }, { "epoch": 23.5431654676259, "grad_norm": 0.32288962602615356, "learning_rate": 6.445422799009726e-05, "loss": 0.0707, "step": 26180 }, { "action_loss": 0.0031494100112468004, "epoch": 23.5431654676259, "step": 26180 }, { "epoch": 23.5431654676259, "step": 26180, "torque_loss": 0.08575606346130371 }, { "epoch": 23.552158273381295, "grad_norm": 0.4193212389945984, "learning_rate": 6.442784458681699e-05, "loss": 0.0804, "step": 26190 }, { "action_loss": 0.005325924605131149, "epoch": 23.552158273381295, "step": 26190 }, { "epoch": 23.552158273381295, "step": 26190, "torque_loss": 0.07614951580762863 }, { "epoch": 23.56115107913669, "grad_norm": 0.3688426613807678, "learning_rate": 6.440145680073847e-05, "loss": 0.0773, "step": 26200 }, { "action_loss": 0.003452368313446641, "epoch": 23.56115107913669, "step": 26200 }, { "epoch": 23.56115107913669, "step": 26200, "torque_loss": 0.09880126267671585 }, { "epoch": 23.570143884892087, "grad_norm": 0.3020898103713989, "learning_rate": 6.437506463987762e-05, "loss": 0.0797, "step": 26210 }, { "action_loss": 0.006717648822814226, "epoch": 23.570143884892087, "step": 26210 }, { "epoch": 23.570143884892087, "step": 26210, "torque_loss": 0.14008159935474396 }, { "epoch": 23.57913669064748, "grad_norm": 0.2535039484500885, "learning_rate": 6.434866811225168e-05, "loss": 0.0728, "step": 26220 }, { "action_loss": 0.004118741489946842, "epoch": 23.57913669064748, "step": 26220 }, { "epoch": 23.57913669064748, "step": 26220, "torque_loss": 0.09318354725837708 }, { "epoch": 23.58812949640288, "grad_norm": 0.21970859169960022, "learning_rate": 6.432226722587923e-05, "loss": 0.078, "step": 26230 }, { "action_loss": 0.0065964520908892155, "epoch": 23.58812949640288, "step": 26230 }, { "epoch": 23.58812949640288, "step": 26230, "torque_loss": 0.10049957036972046 }, { "epoch": 23.597122302158272, "grad_norm": 0.3532967269420624, "learning_rate": 6.429586198878015e-05, "loss": 0.0848, "step": 26240 }, { "action_loss": 0.054923225194215775, "epoch": 23.597122302158272, "step": 26240 }, { "epoch": 23.597122302158272, "step": 26240, "torque_loss": 0.17287379503250122 }, { "epoch": 23.60611510791367, "grad_norm": 0.3245481848716736, "learning_rate": 6.426945240897566e-05, "loss": 0.0817, "step": 26250 }, { "action_loss": 0.010060003958642483, "epoch": 23.60611510791367, "step": 26250 }, { "epoch": 23.60611510791367, "step": 26250, "torque_loss": 0.14399774372577667 }, { "epoch": 23.615107913669064, "grad_norm": 0.2921847105026245, "learning_rate": 6.424303849448829e-05, "loss": 0.076, "step": 26260 }, { "action_loss": 0.003001173259690404, "epoch": 23.615107913669064, "step": 26260 }, { "epoch": 23.615107913669064, "step": 26260, "torque_loss": 0.060705337673425674 }, { "epoch": 23.62410071942446, "grad_norm": 0.34574201703071594, "learning_rate": 6.42166202533419e-05, "loss": 0.0786, "step": 26270 }, { "action_loss": 0.0034936442971229553, "epoch": 23.62410071942446, "step": 26270 }, { "epoch": 23.62410071942446, "step": 26270, "torque_loss": 0.08929973840713501 }, { "epoch": 23.633093525179856, "grad_norm": 0.3533228635787964, "learning_rate": 6.419019769356164e-05, "loss": 0.073, "step": 26280 }, { "action_loss": 0.006797388195991516, "epoch": 23.633093525179856, "step": 26280 }, { "epoch": 23.633093525179856, "step": 26280, "torque_loss": 0.13423947989940643 }, { "epoch": 23.642086330935253, "grad_norm": 0.3231765925884247, "learning_rate": 6.416377082317398e-05, "loss": 0.0794, "step": 26290 }, { "action_loss": 0.005605537444353104, "epoch": 23.642086330935253, "step": 26290 }, { "epoch": 23.642086330935253, "step": 26290, "torque_loss": 0.06520681083202362 }, { "epoch": 23.651079136690647, "grad_norm": 0.41746366024017334, "learning_rate": 6.413733965020674e-05, "loss": 0.068, "step": 26300 }, { "action_loss": 0.010066519491374493, "epoch": 23.651079136690647, "step": 26300 }, { "epoch": 23.651079136690647, "step": 26300, "torque_loss": 0.13092215359210968 }, { "epoch": 23.66007194244604, "grad_norm": 0.2800491750240326, "learning_rate": 6.411090418268896e-05, "loss": 0.0656, "step": 26310 }, { "action_loss": 0.0031744204461574554, "epoch": 23.66007194244604, "step": 26310 }, { "epoch": 23.66007194244604, "step": 26310, "torque_loss": 0.10469627380371094 }, { "epoch": 23.66906474820144, "grad_norm": 0.2941299080848694, "learning_rate": 6.408446442865109e-05, "loss": 0.065, "step": 26320 }, { "action_loss": 0.003996326122432947, "epoch": 23.66906474820144, "step": 26320 }, { "epoch": 23.66906474820144, "step": 26320, "torque_loss": 0.08816802501678467 }, { "epoch": 23.678057553956833, "grad_norm": 0.34931591153144836, "learning_rate": 6.405802039612479e-05, "loss": 0.0855, "step": 26330 }, { "action_loss": 0.0030856330413371325, "epoch": 23.678057553956833, "step": 26330 }, { "epoch": 23.678057553956833, "step": 26330, "torque_loss": 0.0801922157406807 }, { "epoch": 23.68705035971223, "grad_norm": 0.35720890760421753, "learning_rate": 6.403157209314308e-05, "loss": 0.0798, "step": 26340 }, { "action_loss": 0.008022729307413101, "epoch": 23.68705035971223, "step": 26340 }, { "epoch": 23.68705035971223, "step": 26340, "torque_loss": 0.09228497743606567 }, { "epoch": 23.696043165467625, "grad_norm": 0.28900569677352905, "learning_rate": 6.400511952774024e-05, "loss": 0.0772, "step": 26350 }, { "action_loss": 0.00563581520691514, "epoch": 23.696043165467625, "step": 26350 }, { "epoch": 23.696043165467625, "step": 26350, "torque_loss": 0.14423131942749023 }, { "epoch": 23.705035971223023, "grad_norm": 0.30793312191963196, "learning_rate": 6.397866270795187e-05, "loss": 0.0694, "step": 26360 }, { "action_loss": 0.004527151118963957, "epoch": 23.705035971223023, "step": 26360 }, { "epoch": 23.705035971223023, "step": 26360, "torque_loss": 0.09693596512079239 }, { "epoch": 23.714028776978417, "grad_norm": 0.3196352422237396, "learning_rate": 6.395220164181489e-05, "loss": 0.0658, "step": 26370 }, { "action_loss": 0.012385598383843899, "epoch": 23.714028776978417, "step": 26370 }, { "epoch": 23.714028776978417, "step": 26370, "torque_loss": 0.14918208122253418 }, { "epoch": 23.723021582733814, "grad_norm": 0.3424401879310608, "learning_rate": 6.39257363373674e-05, "loss": 0.0672, "step": 26380 }, { "action_loss": 0.006168623920530081, "epoch": 23.723021582733814, "step": 26380 }, { "epoch": 23.723021582733814, "step": 26380, "torque_loss": 0.13347391784191132 }, { "epoch": 23.73201438848921, "grad_norm": 0.2952961027622223, "learning_rate": 6.389926680264892e-05, "loss": 0.0871, "step": 26390 }, { "action_loss": 0.004926592111587524, "epoch": 23.73201438848921, "step": 26390 }, { "epoch": 23.73201438848921, "step": 26390, "torque_loss": 0.10826978832483292 }, { "epoch": 23.741007194244606, "grad_norm": 0.2814869284629822, "learning_rate": 6.387279304570017e-05, "loss": 0.0967, "step": 26400 }, { "action_loss": 0.0031028457451611757, "epoch": 23.741007194244606, "step": 26400 }, { "epoch": 23.741007194244606, "step": 26400, "torque_loss": 0.07856174558401108 }, { "epoch": 23.75, "grad_norm": 0.2894528806209564, "learning_rate": 6.384631507456319e-05, "loss": 0.0737, "step": 26410 }, { "action_loss": 0.009294874034821987, "epoch": 23.75, "step": 26410 }, { "epoch": 23.75, "step": 26410, "torque_loss": 0.08819124847650528 }, { "epoch": 23.758992805755394, "grad_norm": 0.30797067284584045, "learning_rate": 6.381983289728126e-05, "loss": 0.0816, "step": 26420 }, { "action_loss": 0.0033457695972174406, "epoch": 23.758992805755394, "step": 26420 }, { "epoch": 23.758992805755394, "step": 26420, "torque_loss": 0.08164703100919724 }, { "epoch": 23.76798561151079, "grad_norm": 0.3327929675579071, "learning_rate": 6.3793346521899e-05, "loss": 0.0802, "step": 26430 }, { "action_loss": 0.007590940687805414, "epoch": 23.76798561151079, "step": 26430 }, { "epoch": 23.76798561151079, "step": 26430, "torque_loss": 0.14830951392650604 }, { "epoch": 23.776978417266186, "grad_norm": 0.31655457615852356, "learning_rate": 6.376685595646226e-05, "loss": 0.0861, "step": 26440 }, { "action_loss": 0.017079748213291168, "epoch": 23.776978417266186, "step": 26440 }, { "epoch": 23.776978417266186, "step": 26440, "torque_loss": 0.17544473707675934 }, { "epoch": 23.785971223021583, "grad_norm": 0.4283851087093353, "learning_rate": 6.374036120901816e-05, "loss": 0.0891, "step": 26450 }, { "action_loss": 0.008548489771783352, "epoch": 23.785971223021583, "step": 26450 }, { "epoch": 23.785971223021583, "step": 26450, "torque_loss": 0.1807182878255844 }, { "epoch": 23.794964028776977, "grad_norm": 0.2974622845649719, "learning_rate": 6.371386228761514e-05, "loss": 0.0828, "step": 26460 }, { "action_loss": 0.008346076123416424, "epoch": 23.794964028776977, "step": 26460 }, { "epoch": 23.794964028776977, "step": 26460, "torque_loss": 0.1272481381893158 }, { "epoch": 23.803956834532375, "grad_norm": 0.35663068294525146, "learning_rate": 6.368735920030283e-05, "loss": 0.0925, "step": 26470 }, { "action_loss": 0.004529378842562437, "epoch": 23.803956834532375, "step": 26470 }, { "epoch": 23.803956834532375, "step": 26470, "torque_loss": 0.11213213205337524 }, { "epoch": 23.81294964028777, "grad_norm": 0.2344370037317276, "learning_rate": 6.366085195513218e-05, "loss": 0.07, "step": 26480 }, { "action_loss": 0.006120430771261454, "epoch": 23.81294964028777, "step": 26480 }, { "epoch": 23.81294964028777, "step": 26480, "torque_loss": 0.12596313655376434 }, { "epoch": 23.821942446043167, "grad_norm": 0.3413466513156891, "learning_rate": 6.363434056015543e-05, "loss": 0.0887, "step": 26490 }, { "action_loss": 0.010132760740816593, "epoch": 23.821942446043167, "step": 26490 }, { "epoch": 23.821942446043167, "step": 26490, "torque_loss": 0.14079126715660095 }, { "epoch": 23.83093525179856, "grad_norm": 0.3588504195213318, "learning_rate": 6.360782502342599e-05, "loss": 0.0708, "step": 26500 }, { "action_loss": 0.0030297369230538607, "epoch": 23.83093525179856, "step": 26500 }, { "epoch": 23.83093525179856, "step": 26500, "torque_loss": 0.08778225630521774 }, { "epoch": 23.83992805755396, "grad_norm": 0.3158741593360901, "learning_rate": 6.358130535299862e-05, "loss": 0.0831, "step": 26510 }, { "action_loss": 0.004513731691986322, "epoch": 23.83992805755396, "step": 26510 }, { "epoch": 23.83992805755396, "step": 26510, "torque_loss": 0.1080358549952507 }, { "epoch": 23.848920863309353, "grad_norm": 0.3516215682029724, "learning_rate": 6.355478155692926e-05, "loss": 0.082, "step": 26520 }, { "action_loss": 0.005573015660047531, "epoch": 23.848920863309353, "step": 26520 }, { "epoch": 23.848920863309353, "step": 26520, "torque_loss": 0.12712900340557098 }, { "epoch": 23.857913669064747, "grad_norm": 0.3270998001098633, "learning_rate": 6.352825364327517e-05, "loss": 0.0739, "step": 26530 }, { "action_loss": 0.015038315206766129, "epoch": 23.857913669064747, "step": 26530 }, { "epoch": 23.857913669064747, "step": 26530, "torque_loss": 0.20778970420360565 }, { "epoch": 23.866906474820144, "grad_norm": 0.34093523025512695, "learning_rate": 6.350172162009482e-05, "loss": 0.1004, "step": 26540 }, { "action_loss": 0.004669498186558485, "epoch": 23.866906474820144, "step": 26540 }, { "epoch": 23.866906474820144, "step": 26540, "torque_loss": 0.0858517661690712 }, { "epoch": 23.87589928057554, "grad_norm": 0.4062618911266327, "learning_rate": 6.347518549544793e-05, "loss": 0.0852, "step": 26550 }, { "action_loss": 0.0017095059156417847, "epoch": 23.87589928057554, "step": 26550 }, { "epoch": 23.87589928057554, "step": 26550, "torque_loss": 0.08761171251535416 }, { "epoch": 23.884892086330936, "grad_norm": 0.3250444233417511, "learning_rate": 6.344864527739547e-05, "loss": 0.0729, "step": 26560 }, { "action_loss": 0.007962641306221485, "epoch": 23.884892086330936, "step": 26560 }, { "epoch": 23.884892086330936, "step": 26560, "torque_loss": 0.12190752476453781 }, { "epoch": 23.89388489208633, "grad_norm": 0.334751158952713, "learning_rate": 6.342210097399966e-05, "loss": 0.0748, "step": 26570 }, { "action_loss": 0.00911192037165165, "epoch": 23.89388489208633, "step": 26570 }, { "epoch": 23.89388489208633, "step": 26570, "torque_loss": 0.0823044553399086 }, { "epoch": 23.902877697841728, "grad_norm": 0.32898208498954773, "learning_rate": 6.339555259332398e-05, "loss": 0.0894, "step": 26580 }, { "action_loss": 0.008544321171939373, "epoch": 23.902877697841728, "step": 26580 }, { "epoch": 23.902877697841728, "step": 26580, "torque_loss": 0.09945875406265259 }, { "epoch": 23.91187050359712, "grad_norm": 0.398209810256958, "learning_rate": 6.33690001434331e-05, "loss": 0.0949, "step": 26590 }, { "action_loss": 0.016883529722690582, "epoch": 23.91187050359712, "step": 26590 }, { "epoch": 23.91187050359712, "step": 26590, "torque_loss": 0.18436898291110992 }, { "epoch": 23.92086330935252, "grad_norm": 0.7017538547515869, "learning_rate": 6.334244363239296e-05, "loss": 0.0827, "step": 26600 }, { "action_loss": 0.0031208836007863283, "epoch": 23.92086330935252, "step": 26600 }, { "epoch": 23.92086330935252, "step": 26600, "torque_loss": 0.0520293302834034 }, { "epoch": 23.929856115107913, "grad_norm": 0.32012930512428284, "learning_rate": 6.331588306827073e-05, "loss": 0.0661, "step": 26610 }, { "action_loss": 0.004612784367054701, "epoch": 23.929856115107913, "step": 26610 }, { "epoch": 23.929856115107913, "step": 26610, "torque_loss": 0.10828232765197754 }, { "epoch": 23.93884892086331, "grad_norm": 0.4344647526741028, "learning_rate": 6.328931845913483e-05, "loss": 0.0651, "step": 26620 }, { "action_loss": 0.0034906594082713127, "epoch": 23.93884892086331, "step": 26620 }, { "epoch": 23.93884892086331, "step": 26620, "torque_loss": 0.11245104670524597 }, { "epoch": 23.947841726618705, "grad_norm": 0.27154940366744995, "learning_rate": 6.326274981305484e-05, "loss": 0.0736, "step": 26630 }, { "action_loss": 0.009198499843478203, "epoch": 23.947841726618705, "step": 26630 }, { "epoch": 23.947841726618705, "step": 26630, "torque_loss": 0.1290782392024994 }, { "epoch": 23.9568345323741, "grad_norm": 0.27830952405929565, "learning_rate": 6.323617713810166e-05, "loss": 0.0768, "step": 26640 }, { "action_loss": 0.0035818994510918856, "epoch": 23.9568345323741, "step": 26640 }, { "epoch": 23.9568345323741, "step": 26640, "torque_loss": 0.08327574282884598 }, { "epoch": 23.965827338129497, "grad_norm": 0.25851890444755554, "learning_rate": 6.320960044234734e-05, "loss": 0.0736, "step": 26650 }, { "action_loss": 0.007633570581674576, "epoch": 23.965827338129497, "step": 26650 }, { "epoch": 23.965827338129497, "step": 26650, "torque_loss": 0.13784503936767578 }, { "epoch": 23.97482014388489, "grad_norm": 0.32527029514312744, "learning_rate": 6.318301973386518e-05, "loss": 0.0975, "step": 26660 }, { "action_loss": 0.006440112832933664, "epoch": 23.97482014388489, "step": 26660 }, { "epoch": 23.97482014388489, "step": 26660, "torque_loss": 0.1405312567949295 }, { "epoch": 23.98381294964029, "grad_norm": 0.41196104884147644, "learning_rate": 6.315643502072971e-05, "loss": 0.0804, "step": 26670 }, { "action_loss": 0.002963489852845669, "epoch": 23.98381294964029, "step": 26670 }, { "epoch": 23.98381294964029, "step": 26670, "torque_loss": 0.06742515414953232 }, { "epoch": 23.992805755395683, "grad_norm": 0.2998652756214142, "learning_rate": 6.312984631101667e-05, "loss": 0.0683, "step": 26680 }, { "action_loss": 0.005384727846831083, "epoch": 23.992805755395683, "step": 26680 }, { "epoch": 23.992805755395683, "step": 26680, "torque_loss": 0.1437460333108902 }, { "epoch": 24.00179856115108, "grad_norm": 0.3147631585597992, "learning_rate": 6.310325361280297e-05, "loss": 0.07, "step": 26690 }, { "action_loss": 0.0026670873630791903, "epoch": 24.00179856115108, "step": 26690 }, { "epoch": 24.00179856115108, "step": 26690, "torque_loss": 0.10579761117696762 }, { "epoch": 24.010791366906474, "grad_norm": 0.24267791211605072, "learning_rate": 6.30766569341668e-05, "loss": 0.0657, "step": 26700 }, { "action_loss": 0.0026218879502266645, "epoch": 24.010791366906474, "step": 26700 }, { "epoch": 24.010791366906474, "step": 26700, "torque_loss": 0.06265757232904434 }, { "epoch": 24.019784172661872, "grad_norm": 0.29593518376350403, "learning_rate": 6.305005628318753e-05, "loss": 0.0769, "step": 26710 }, { "action_loss": 0.004068875219672918, "epoch": 24.019784172661872, "step": 26710 }, { "epoch": 24.019784172661872, "step": 26710, "torque_loss": 0.11499828100204468 }, { "epoch": 24.028776978417266, "grad_norm": 0.29105401039123535, "learning_rate": 6.302345166794572e-05, "loss": 0.0665, "step": 26720 }, { "action_loss": 0.008380607701838017, "epoch": 24.028776978417266, "step": 26720 }, { "epoch": 24.028776978417266, "step": 26720, "torque_loss": 0.1252802461385727 }, { "epoch": 24.037769784172664, "grad_norm": 0.2534368336200714, "learning_rate": 6.299684309652316e-05, "loss": 0.0791, "step": 26730 }, { "action_loss": 0.002474637934938073, "epoch": 24.037769784172664, "step": 26730 }, { "epoch": 24.037769784172664, "step": 26730, "torque_loss": 0.07156895846128464 }, { "epoch": 24.046762589928058, "grad_norm": 0.34146761894226074, "learning_rate": 6.297023057700283e-05, "loss": 0.0627, "step": 26740 }, { "action_loss": 0.020987212657928467, "epoch": 24.046762589928058, "step": 26740 }, { "epoch": 24.046762589928058, "step": 26740, "torque_loss": 0.16934700310230255 }, { "epoch": 24.055755395683452, "grad_norm": 0.3015943765640259, "learning_rate": 6.294361411746891e-05, "loss": 0.0751, "step": 26750 }, { "action_loss": 0.005610266700387001, "epoch": 24.055755395683452, "step": 26750 }, { "epoch": 24.055755395683452, "step": 26750, "torque_loss": 0.11357516050338745 }, { "epoch": 24.06474820143885, "grad_norm": 0.267545610666275, "learning_rate": 6.291699372600677e-05, "loss": 0.0758, "step": 26760 }, { "action_loss": 0.004665960557758808, "epoch": 24.06474820143885, "step": 26760 }, { "epoch": 24.06474820143885, "step": 26760, "torque_loss": 0.06791406869888306 }, { "epoch": 24.073741007194243, "grad_norm": 0.2944679856300354, "learning_rate": 6.2890369410703e-05, "loss": 0.0725, "step": 26770 }, { "action_loss": 0.0034206320997327566, "epoch": 24.073741007194243, "step": 26770 }, { "epoch": 24.073741007194243, "step": 26770, "torque_loss": 0.085554338991642 }, { "epoch": 24.08273381294964, "grad_norm": 0.28179752826690674, "learning_rate": 6.286374117964534e-05, "loss": 0.0749, "step": 26780 }, { "action_loss": 0.003168449504300952, "epoch": 24.08273381294964, "step": 26780 }, { "epoch": 24.08273381294964, "step": 26780, "torque_loss": 0.07344076782464981 }, { "epoch": 24.091726618705035, "grad_norm": 0.2827184796333313, "learning_rate": 6.283710904092277e-05, "loss": 0.0647, "step": 26790 }, { "action_loss": 0.00860639289021492, "epoch": 24.091726618705035, "step": 26790 }, { "epoch": 24.091726618705035, "step": 26790, "torque_loss": 0.13875575363636017 }, { "epoch": 24.100719424460433, "grad_norm": 0.3090909421443939, "learning_rate": 6.281047300262542e-05, "loss": 0.072, "step": 26800 }, { "action_loss": 0.004283765330910683, "epoch": 24.100719424460433, "step": 26800 }, { "epoch": 24.100719424460433, "step": 26800, "torque_loss": 0.10598239302635193 }, { "epoch": 24.109712230215827, "grad_norm": 0.33721300959587097, "learning_rate": 6.278383307284461e-05, "loss": 0.0806, "step": 26810 }, { "action_loss": 0.005407949443906546, "epoch": 24.109712230215827, "step": 26810 }, { "epoch": 24.109712230215827, "step": 26810, "torque_loss": 0.10348234325647354 }, { "epoch": 24.118705035971225, "grad_norm": 0.25429680943489075, "learning_rate": 6.275718925967284e-05, "loss": 0.0751, "step": 26820 }, { "action_loss": 0.011455663479864597, "epoch": 24.118705035971225, "step": 26820 }, { "epoch": 24.118705035971225, "step": 26820, "torque_loss": 0.15407909452915192 }, { "epoch": 24.12769784172662, "grad_norm": 0.27183812856674194, "learning_rate": 6.273054157120382e-05, "loss": 0.0764, "step": 26830 }, { "action_loss": 0.004144706297665834, "epoch": 24.12769784172662, "step": 26830 }, { "epoch": 24.12769784172662, "step": 26830, "torque_loss": 0.10068926960229874 }, { "epoch": 24.136690647482013, "grad_norm": 0.3038117587566376, "learning_rate": 6.270389001553238e-05, "loss": 0.0801, "step": 26840 }, { "action_loss": 0.002991418121382594, "epoch": 24.136690647482013, "step": 26840 }, { "epoch": 24.136690647482013, "step": 26840, "torque_loss": 0.1053893193602562 }, { "epoch": 24.14568345323741, "grad_norm": 0.3057265281677246, "learning_rate": 6.26772346007546e-05, "loss": 0.0828, "step": 26850 }, { "action_loss": 0.008490507490932941, "epoch": 24.14568345323741, "step": 26850 }, { "epoch": 24.14568345323741, "step": 26850, "torque_loss": 0.172764852643013 }, { "epoch": 24.154676258992804, "grad_norm": 0.28538084030151367, "learning_rate": 6.265057533496767e-05, "loss": 0.08, "step": 26860 }, { "action_loss": 0.007043296005576849, "epoch": 24.154676258992804, "step": 26860 }, { "epoch": 24.154676258992804, "step": 26860, "torque_loss": 0.09563863277435303 }, { "epoch": 24.163669064748202, "grad_norm": 0.32937222719192505, "learning_rate": 6.262391222626997e-05, "loss": 0.0822, "step": 26870 }, { "action_loss": 0.004028202034533024, "epoch": 24.163669064748202, "step": 26870 }, { "epoch": 24.163669064748202, "step": 26870, "torque_loss": 0.08658572286367416 }, { "epoch": 24.172661870503596, "grad_norm": 0.2800627648830414, "learning_rate": 6.259724528276106e-05, "loss": 0.0811, "step": 26880 }, { "action_loss": 0.009005970321595669, "epoch": 24.172661870503596, "step": 26880 }, { "epoch": 24.172661870503596, "step": 26880, "torque_loss": 0.09605014324188232 }, { "epoch": 24.181654676258994, "grad_norm": 0.44975587725639343, "learning_rate": 6.257057451254162e-05, "loss": 0.0799, "step": 26890 }, { "action_loss": 0.006717374082654715, "epoch": 24.181654676258994, "step": 26890 }, { "epoch": 24.181654676258994, "step": 26890, "torque_loss": 0.12283096462488174 }, { "epoch": 24.190647482014388, "grad_norm": 0.2683819532394409, "learning_rate": 6.254389992371357e-05, "loss": 0.081, "step": 26900 }, { "action_loss": 0.02118665911257267, "epoch": 24.190647482014388, "step": 26900 }, { "epoch": 24.190647482014388, "step": 26900, "torque_loss": 0.16471968591213226 }, { "epoch": 24.199640287769785, "grad_norm": 0.23618605732917786, "learning_rate": 6.25172215243799e-05, "loss": 0.0814, "step": 26910 }, { "action_loss": 0.009287308901548386, "epoch": 24.199640287769785, "step": 26910 }, { "epoch": 24.199640287769785, "step": 26910, "torque_loss": 0.10479442030191422 }, { "epoch": 24.20863309352518, "grad_norm": 0.22745607793331146, "learning_rate": 6.249053932264486e-05, "loss": 0.0718, "step": 26920 }, { "action_loss": 0.026683298870921135, "epoch": 24.20863309352518, "step": 26920 }, { "epoch": 24.20863309352518, "step": 26920, "torque_loss": 0.15204381942749023 }, { "epoch": 24.217625899280577, "grad_norm": 0.25210142135620117, "learning_rate": 6.246385332661376e-05, "loss": 0.0749, "step": 26930 }, { "action_loss": 0.025570325553417206, "epoch": 24.217625899280577, "step": 26930 }, { "epoch": 24.217625899280577, "step": 26930, "torque_loss": 0.12623560428619385 }, { "epoch": 24.22661870503597, "grad_norm": 0.3630076050758362, "learning_rate": 6.24371635443931e-05, "loss": 0.0809, "step": 26940 }, { "action_loss": 0.002235495252534747, "epoch": 24.22661870503597, "step": 26940 }, { "epoch": 24.22661870503597, "step": 26940, "torque_loss": 0.09852305799722672 }, { "epoch": 24.235611510791365, "grad_norm": 0.4208858609199524, "learning_rate": 6.241046998409054e-05, "loss": 0.0575, "step": 26950 }, { "action_loss": 0.0035706174094229937, "epoch": 24.235611510791365, "step": 26950 }, { "epoch": 24.235611510791365, "step": 26950, "torque_loss": 0.10246739536523819 }, { "epoch": 24.244604316546763, "grad_norm": 0.26983702182769775, "learning_rate": 6.238377265381489e-05, "loss": 0.0728, "step": 26960 }, { "action_loss": 0.004557773005217314, "epoch": 24.244604316546763, "step": 26960 }, { "epoch": 24.244604316546763, "step": 26960, "torque_loss": 0.13759446144104004 }, { "epoch": 24.253597122302157, "grad_norm": 0.3816186785697937, "learning_rate": 6.235707156167607e-05, "loss": 0.093, "step": 26970 }, { "action_loss": 0.011775177903473377, "epoch": 24.253597122302157, "step": 26970 }, { "epoch": 24.253597122302157, "step": 26970, "torque_loss": 0.13388574123382568 }, { "epoch": 24.262589928057555, "grad_norm": 0.2970850169658661, "learning_rate": 6.233036671578519e-05, "loss": 0.086, "step": 26980 }, { "action_loss": 0.02900972217321396, "epoch": 24.262589928057555, "step": 26980 }, { "epoch": 24.262589928057555, "step": 26980, "torque_loss": 0.16854362189769745 }, { "epoch": 24.27158273381295, "grad_norm": 0.32983797788619995, "learning_rate": 6.230365812425445e-05, "loss": 0.0765, "step": 26990 }, { "action_loss": 0.02481541782617569, "epoch": 24.27158273381295, "step": 26990 }, { "epoch": 24.27158273381295, "step": 26990, "torque_loss": 0.1606644243001938 }, { "epoch": 24.280575539568346, "grad_norm": 0.2985406219959259, "learning_rate": 6.227694579519724e-05, "loss": 0.0776, "step": 27000 }, { "action_loss": 0.014201290905475616, "epoch": 24.280575539568346, "step": 27000 }, { "epoch": 24.280575539568346, "step": 27000, "torque_loss": 0.1847439557313919 }, { "epoch": 24.28956834532374, "grad_norm": 0.2857685983181, "learning_rate": 6.225022973672805e-05, "loss": 0.0764, "step": 27010 }, { "action_loss": 0.0397278368473053, "epoch": 24.28956834532374, "step": 27010 }, { "epoch": 24.28956834532374, "step": 27010, "torque_loss": 0.22521202266216278 }, { "epoch": 24.298561151079138, "grad_norm": 0.3432311713695526, "learning_rate": 6.222350995696253e-05, "loss": 0.0931, "step": 27020 }, { "action_loss": 0.010296802967786789, "epoch": 24.298561151079138, "step": 27020 }, { "epoch": 24.298561151079138, "step": 27020, "torque_loss": 0.13419127464294434 }, { "epoch": 24.307553956834532, "grad_norm": 0.3364028334617615, "learning_rate": 6.21967864640174e-05, "loss": 0.0669, "step": 27030 }, { "action_loss": 0.003346335142850876, "epoch": 24.307553956834532, "step": 27030 }, { "epoch": 24.307553956834532, "step": 27030, "torque_loss": 0.09209132194519043 }, { "epoch": 24.31654676258993, "grad_norm": 0.3471417725086212, "learning_rate": 6.217005926601059e-05, "loss": 0.067, "step": 27040 }, { "action_loss": 0.018086476251482964, "epoch": 24.31654676258993, "step": 27040 }, { "epoch": 24.31654676258993, "step": 27040, "torque_loss": 0.11558441072702408 }, { "epoch": 24.325539568345324, "grad_norm": 0.3371846675872803, "learning_rate": 6.214332837106111e-05, "loss": 0.0696, "step": 27050 }, { "action_loss": 0.0035611160565167665, "epoch": 24.325539568345324, "step": 27050 }, { "epoch": 24.325539568345324, "step": 27050, "torque_loss": 0.10979608446359634 }, { "epoch": 24.334532374100718, "grad_norm": 0.30694979429244995, "learning_rate": 6.21165937872891e-05, "loss": 0.0717, "step": 27060 }, { "action_loss": 0.02407168783247471, "epoch": 24.334532374100718, "step": 27060 }, { "epoch": 24.334532374100718, "step": 27060, "torque_loss": 0.1930418759584427 }, { "epoch": 24.343525179856115, "grad_norm": 0.3565654456615448, "learning_rate": 6.208985552281582e-05, "loss": 0.0813, "step": 27070 }, { "action_loss": 0.008137623779475689, "epoch": 24.343525179856115, "step": 27070 }, { "epoch": 24.343525179856115, "step": 27070, "torque_loss": 0.12260828167200089 }, { "epoch": 24.35251798561151, "grad_norm": 0.3842250406742096, "learning_rate": 6.206311358576364e-05, "loss": 0.0874, "step": 27080 }, { "action_loss": 0.005282718222588301, "epoch": 24.35251798561151, "step": 27080 }, { "epoch": 24.35251798561151, "step": 27080, "torque_loss": 0.13196630775928497 }, { "epoch": 24.361510791366907, "grad_norm": 0.36640816926956177, "learning_rate": 6.203636798425608e-05, "loss": 0.0754, "step": 27090 }, { "action_loss": 0.010446584783494473, "epoch": 24.361510791366907, "step": 27090 }, { "epoch": 24.361510791366907, "step": 27090, "torque_loss": 0.15592508018016815 }, { "epoch": 24.3705035971223, "grad_norm": 0.3011930584907532, "learning_rate": 6.20096187264177e-05, "loss": 0.0771, "step": 27100 }, { "action_loss": 0.01034233346581459, "epoch": 24.3705035971223, "step": 27100 }, { "epoch": 24.3705035971223, "step": 27100, "torque_loss": 0.1555773764848709 }, { "epoch": 24.3794964028777, "grad_norm": 0.25040990114212036, "learning_rate": 6.198286582037425e-05, "loss": 0.0773, "step": 27110 }, { "action_loss": 0.010205419734120369, "epoch": 24.3794964028777, "step": 27110 }, { "epoch": 24.3794964028777, "step": 27110, "torque_loss": 0.14172504842281342 }, { "epoch": 24.388489208633093, "grad_norm": 0.341589093208313, "learning_rate": 6.195610927425256e-05, "loss": 0.0825, "step": 27120 }, { "action_loss": 0.00395343778654933, "epoch": 24.388489208633093, "step": 27120 }, { "epoch": 24.388489208633093, "step": 27120, "torque_loss": 0.07714266330003738 }, { "epoch": 24.39748201438849, "grad_norm": 0.3001756966114044, "learning_rate": 6.192934909618056e-05, "loss": 0.0626, "step": 27130 }, { "action_loss": 0.004879168700426817, "epoch": 24.39748201438849, "step": 27130 }, { "epoch": 24.39748201438849, "step": 27130, "torque_loss": 0.07726173847913742 }, { "epoch": 24.406474820143885, "grad_norm": 0.2729642391204834, "learning_rate": 6.190258529428728e-05, "loss": 0.07, "step": 27140 }, { "action_loss": 0.0846315398812294, "epoch": 24.406474820143885, "step": 27140 }, { "epoch": 24.406474820143885, "step": 27140, "torque_loss": 0.16961844265460968 }, { "epoch": 24.415467625899282, "grad_norm": 0.26983240246772766, "learning_rate": 6.187581787670285e-05, "loss": 0.0849, "step": 27150 }, { "action_loss": 0.005592163186520338, "epoch": 24.415467625899282, "step": 27150 }, { "epoch": 24.415467625899282, "step": 27150, "torque_loss": 0.09213137626647949 }, { "epoch": 24.424460431654676, "grad_norm": 0.3416697084903717, "learning_rate": 6.184904685155852e-05, "loss": 0.0744, "step": 27160 }, { "action_loss": 0.012467111460864544, "epoch": 24.424460431654676, "step": 27160 }, { "epoch": 24.424460431654676, "step": 27160, "torque_loss": 0.15320426225662231 }, { "epoch": 24.43345323741007, "grad_norm": 0.3331722915172577, "learning_rate": 6.18222722269866e-05, "loss": 0.0841, "step": 27170 }, { "action_loss": 0.005306856706738472, "epoch": 24.43345323741007, "step": 27170 }, { "epoch": 24.43345323741007, "step": 27170, "torque_loss": 0.14370407164096832 }, { "epoch": 24.442446043165468, "grad_norm": 0.26792284846305847, "learning_rate": 6.179549401112053e-05, "loss": 0.0696, "step": 27180 }, { "action_loss": 0.0054575433023273945, "epoch": 24.442446043165468, "step": 27180 }, { "epoch": 24.442446043165468, "step": 27180, "torque_loss": 0.09840735048055649 }, { "epoch": 24.451438848920862, "grad_norm": 0.3067781329154968, "learning_rate": 6.176871221209482e-05, "loss": 0.0785, "step": 27190 }, { "action_loss": 0.010338949970901012, "epoch": 24.451438848920862, "step": 27190 }, { "epoch": 24.451438848920862, "step": 27190, "torque_loss": 0.12666535377502441 }, { "epoch": 24.46043165467626, "grad_norm": 0.2814054489135742, "learning_rate": 6.174192683804508e-05, "loss": 0.0717, "step": 27200 }, { "action_loss": 0.007696919143199921, "epoch": 24.46043165467626, "step": 27200 }, { "epoch": 24.46043165467626, "step": 27200, "torque_loss": 0.14552009105682373 }, { "epoch": 24.469424460431654, "grad_norm": 0.22309377789497375, "learning_rate": 6.1715137897108e-05, "loss": 0.073, "step": 27210 }, { "action_loss": 0.0026566777378320694, "epoch": 24.469424460431654, "step": 27210 }, { "epoch": 24.469424460431654, "step": 27210, "torque_loss": 0.07229021936655045 }, { "epoch": 24.47841726618705, "grad_norm": 0.2713739275932312, "learning_rate": 6.168834539742134e-05, "loss": 0.0764, "step": 27220 }, { "action_loss": 0.00963549967855215, "epoch": 24.47841726618705, "step": 27220 }, { "epoch": 24.47841726618705, "step": 27220, "torque_loss": 0.1683695763349533 }, { "epoch": 24.487410071942445, "grad_norm": 0.3941788077354431, "learning_rate": 6.166154934712397e-05, "loss": 0.0889, "step": 27230 }, { "action_loss": 0.0030643779318779707, "epoch": 24.487410071942445, "step": 27230 }, { "epoch": 24.487410071942445, "step": 27230, "torque_loss": 0.08634406328201294 }, { "epoch": 24.496402877697843, "grad_norm": 0.34493616223335266, "learning_rate": 6.163474975435581e-05, "loss": 0.0707, "step": 27240 }, { "action_loss": 0.014574282802641392, "epoch": 24.496402877697843, "step": 27240 }, { "epoch": 24.496402877697843, "step": 27240, "torque_loss": 0.1553828865289688 }, { "epoch": 24.505395683453237, "grad_norm": 0.3209536075592041, "learning_rate": 6.160794662725787e-05, "loss": 0.0896, "step": 27250 }, { "action_loss": 0.0034149838611483574, "epoch": 24.505395683453237, "step": 27250 }, { "epoch": 24.505395683453237, "step": 27250, "torque_loss": 0.08966875821352005 }, { "epoch": 24.514388489208635, "grad_norm": 0.35764339566230774, "learning_rate": 6.158113997397222e-05, "loss": 0.0785, "step": 27260 }, { "action_loss": 0.008582723326981068, "epoch": 24.514388489208635, "step": 27260 }, { "epoch": 24.514388489208635, "step": 27260, "torque_loss": 0.08078143745660782 }, { "epoch": 24.52338129496403, "grad_norm": 0.29764848947525024, "learning_rate": 6.155432980264205e-05, "loss": 0.072, "step": 27270 }, { "action_loss": 0.003780227852985263, "epoch": 24.52338129496403, "step": 27270 }, { "epoch": 24.52338129496403, "step": 27270, "torque_loss": 0.07834644615650177 }, { "epoch": 24.532374100719423, "grad_norm": 0.2721945643424988, "learning_rate": 6.152751612141156e-05, "loss": 0.0769, "step": 27280 }, { "action_loss": 0.0037612691521644592, "epoch": 24.532374100719423, "step": 27280 }, { "epoch": 24.532374100719423, "step": 27280, "torque_loss": 0.1077488362789154 }, { "epoch": 24.54136690647482, "grad_norm": 0.3652364909648895, "learning_rate": 6.150069893842602e-05, "loss": 0.0773, "step": 27290 }, { "action_loss": 0.0038635460659861565, "epoch": 24.54136690647482, "step": 27290 }, { "epoch": 24.54136690647482, "step": 27290, "torque_loss": 0.1196548119187355 }, { "epoch": 24.550359712230215, "grad_norm": 0.29482898116111755, "learning_rate": 6.147387826183182e-05, "loss": 0.0665, "step": 27300 }, { "action_loss": 0.02578875422477722, "epoch": 24.550359712230215, "step": 27300 }, { "epoch": 24.550359712230215, "step": 27300, "torque_loss": 0.25399523973464966 }, { "epoch": 24.559352517985612, "grad_norm": 0.29741477966308594, "learning_rate": 6.144705409977635e-05, "loss": 0.0912, "step": 27310 }, { "action_loss": 0.005284557584673166, "epoch": 24.559352517985612, "step": 27310 }, { "epoch": 24.559352517985612, "step": 27310, "torque_loss": 0.10202157497406006 }, { "epoch": 24.568345323741006, "grad_norm": 0.2680540680885315, "learning_rate": 6.142022646040808e-05, "loss": 0.0713, "step": 27320 }, { "action_loss": 0.00982692837715149, "epoch": 24.568345323741006, "step": 27320 }, { "epoch": 24.568345323741006, "step": 27320, "torque_loss": 0.15856407582759857 }, { "epoch": 24.577338129496404, "grad_norm": 0.3150714337825775, "learning_rate": 6.139339535187653e-05, "loss": 0.0782, "step": 27330 }, { "action_loss": 0.004915532190352678, "epoch": 24.577338129496404, "step": 27330 }, { "epoch": 24.577338129496404, "step": 27330, "torque_loss": 0.09836580604314804 }, { "epoch": 24.586330935251798, "grad_norm": 0.2794588506221771, "learning_rate": 6.136656078233232e-05, "loss": 0.0638, "step": 27340 }, { "action_loss": 0.0019727095495909452, "epoch": 24.586330935251798, "step": 27340 }, { "epoch": 24.586330935251798, "step": 27340, "torque_loss": 0.07617367804050446 }, { "epoch": 24.595323741007196, "grad_norm": 0.2552453875541687, "learning_rate": 6.133972275992707e-05, "loss": 0.0834, "step": 27350 }, { "action_loss": 0.0024697675835341215, "epoch": 24.595323741007196, "step": 27350 }, { "epoch": 24.595323741007196, "step": 27350, "torque_loss": 0.06324214488267899 }, { "epoch": 24.60431654676259, "grad_norm": 0.3143237829208374, "learning_rate": 6.131288129281342e-05, "loss": 0.0817, "step": 27360 }, { "action_loss": 0.010652471333742142, "epoch": 24.60431654676259, "step": 27360 }, { "epoch": 24.60431654676259, "step": 27360, "torque_loss": 0.15017586946487427 }, { "epoch": 24.613309352517987, "grad_norm": 0.3488878607749939, "learning_rate": 6.128603638914516e-05, "loss": 0.0807, "step": 27370 }, { "action_loss": 0.005508484784513712, "epoch": 24.613309352517987, "step": 27370 }, { "epoch": 24.613309352517987, "step": 27370, "torque_loss": 0.09663749486207962 }, { "epoch": 24.62230215827338, "grad_norm": 0.40200379490852356, "learning_rate": 6.125918805707704e-05, "loss": 0.0699, "step": 27380 }, { "action_loss": 0.010169568471610546, "epoch": 24.62230215827338, "step": 27380 }, { "epoch": 24.62230215827338, "step": 27380, "torque_loss": 0.16652800142765045 }, { "epoch": 24.631294964028775, "grad_norm": 0.3498310446739197, "learning_rate": 6.123233630476485e-05, "loss": 0.0829, "step": 27390 }, { "action_loss": 0.002023273380473256, "epoch": 24.631294964028775, "step": 27390 }, { "epoch": 24.631294964028775, "step": 27390, "torque_loss": 0.049382541328668594 }, { "epoch": 24.640287769784173, "grad_norm": 0.29972490668296814, "learning_rate": 6.120548114036547e-05, "loss": 0.0768, "step": 27400 }, { "action_loss": 0.003884265199303627, "epoch": 24.640287769784173, "step": 27400 }, { "epoch": 24.640287769784173, "step": 27400, "torque_loss": 0.13530127704143524 }, { "epoch": 24.649280575539567, "grad_norm": 0.34429869055747986, "learning_rate": 6.117862257203679e-05, "loss": 0.0768, "step": 27410 }, { "action_loss": 0.005920755211263895, "epoch": 24.649280575539567, "step": 27410 }, { "epoch": 24.649280575539567, "step": 27410, "torque_loss": 0.09201877564191818 }, { "epoch": 24.658273381294965, "grad_norm": 0.2817162573337555, "learning_rate": 6.115176060793771e-05, "loss": 0.0803, "step": 27420 }, { "action_loss": 0.0028562787920236588, "epoch": 24.658273381294965, "step": 27420 }, { "epoch": 24.658273381294965, "step": 27420, "torque_loss": 0.09074529260396957 }, { "epoch": 24.66726618705036, "grad_norm": 0.3620588481426239, "learning_rate": 6.112489525622822e-05, "loss": 0.0568, "step": 27430 }, { "action_loss": 0.007559534162282944, "epoch": 24.66726618705036, "step": 27430 }, { "epoch": 24.66726618705036, "step": 27430, "torque_loss": 0.10706611722707748 }, { "epoch": 24.676258992805757, "grad_norm": 0.33488166332244873, "learning_rate": 6.109802652506928e-05, "loss": 0.0757, "step": 27440 }, { "action_loss": 0.006069765891879797, "epoch": 24.676258992805757, "step": 27440 }, { "epoch": 24.676258992805757, "step": 27440, "torque_loss": 0.1433684378862381 }, { "epoch": 24.68525179856115, "grad_norm": 0.3340926766395569, "learning_rate": 6.107115442262291e-05, "loss": 0.0757, "step": 27450 }, { "action_loss": 0.004028854425996542, "epoch": 24.68525179856115, "step": 27450 }, { "epoch": 24.68525179856115, "step": 27450, "torque_loss": 0.12570995092391968 }, { "epoch": 24.694244604316548, "grad_norm": 0.3397285044193268, "learning_rate": 6.104427895705214e-05, "loss": 0.0796, "step": 27460 }, { "action_loss": 0.002971990266814828, "epoch": 24.694244604316548, "step": 27460 }, { "epoch": 24.694244604316548, "step": 27460, "torque_loss": 0.07889974117279053 }, { "epoch": 24.703237410071942, "grad_norm": 0.2325773537158966, "learning_rate": 6.101740013652103e-05, "loss": 0.0593, "step": 27470 }, { "action_loss": 0.0180402472615242, "epoch": 24.703237410071942, "step": 27470 }, { "epoch": 24.703237410071942, "step": 27470, "torque_loss": 0.19682329893112183 }, { "epoch": 24.71223021582734, "grad_norm": 0.37319594621658325, "learning_rate": 6.099051796919465e-05, "loss": 0.088, "step": 27480 }, { "action_loss": 0.05149612948298454, "epoch": 24.71223021582734, "step": 27480 }, { "epoch": 24.71223021582734, "step": 27480, "torque_loss": 0.16677792370319366 }, { "epoch": 24.721223021582734, "grad_norm": 0.26622888445854187, "learning_rate": 6.096363246323911e-05, "loss": 0.0763, "step": 27490 }, { "action_loss": 0.0034338608384132385, "epoch": 24.721223021582734, "step": 27490 }, { "epoch": 24.721223021582734, "step": 27490, "torque_loss": 0.11657798290252686 }, { "epoch": 24.730215827338128, "grad_norm": 0.29799264669418335, "learning_rate": 6.0936743626821504e-05, "loss": 0.0873, "step": 27500 }, { "action_loss": 0.0027399135287851095, "epoch": 24.730215827338128, "step": 27500 }, { "epoch": 24.730215827338128, "step": 27500, "torque_loss": 0.08242913335561752 }, { "epoch": 24.739208633093526, "grad_norm": 0.27072983980178833, "learning_rate": 6.090985146810996e-05, "loss": 0.0661, "step": 27510 }, { "action_loss": 0.003548014210537076, "epoch": 24.739208633093526, "step": 27510 }, { "epoch": 24.739208633093526, "step": 27510, "torque_loss": 0.11690545082092285 }, { "epoch": 24.74820143884892, "grad_norm": 0.2816256880760193, "learning_rate": 6.088295599527357e-05, "loss": 0.073, "step": 27520 }, { "action_loss": 0.006076745223253965, "epoch": 24.74820143884892, "step": 27520 }, { "epoch": 24.74820143884892, "step": 27520, "torque_loss": 0.13149555027484894 }, { "epoch": 24.757194244604317, "grad_norm": 0.3694593608379364, "learning_rate": 6.085605721648252e-05, "loss": 0.0745, "step": 27530 }, { "action_loss": 0.0185424592345953, "epoch": 24.757194244604317, "step": 27530 }, { "epoch": 24.757194244604317, "step": 27530, "torque_loss": 0.19289541244506836 }, { "epoch": 24.76618705035971, "grad_norm": 0.35430774092674255, "learning_rate": 6.082915513990792e-05, "loss": 0.0775, "step": 27540 }, { "action_loss": 0.008934193290770054, "epoch": 24.76618705035971, "step": 27540 }, { "epoch": 24.76618705035971, "step": 27540, "torque_loss": 0.12435919791460037 }, { "epoch": 24.77517985611511, "grad_norm": 0.21500816941261292, "learning_rate": 6.080224977372192e-05, "loss": 0.07, "step": 27550 }, { "action_loss": 0.016605401411652565, "epoch": 24.77517985611511, "step": 27550 }, { "epoch": 24.77517985611511, "step": 27550, "torque_loss": 0.14558662474155426 }, { "epoch": 24.784172661870503, "grad_norm": 0.3278011679649353, "learning_rate": 6.0775341126097666e-05, "loss": 0.0687, "step": 27560 }, { "action_loss": 0.006759332027286291, "epoch": 24.784172661870503, "step": 27560 }, { "epoch": 24.784172661870503, "step": 27560, "torque_loss": 0.10605736821889877 }, { "epoch": 24.7931654676259, "grad_norm": 0.3260844349861145, "learning_rate": 6.074842920520926e-05, "loss": 0.0819, "step": 27570 }, { "action_loss": 0.003681295784190297, "epoch": 24.7931654676259, "step": 27570 }, { "epoch": 24.7931654676259, "step": 27570, "torque_loss": 0.08866315335035324 }, { "epoch": 24.802158273381295, "grad_norm": 0.36683622002601624, "learning_rate": 6.072151401923186e-05, "loss": 0.0746, "step": 27580 }, { "action_loss": 0.005114368628710508, "epoch": 24.802158273381295, "step": 27580 }, { "epoch": 24.802158273381295, "step": 27580, "torque_loss": 0.07635187357664108 }, { "epoch": 24.81115107913669, "grad_norm": 0.33904722332954407, "learning_rate": 6.069459557634159e-05, "loss": 0.0831, "step": 27590 }, { "action_loss": 0.0024970388039946556, "epoch": 24.81115107913669, "step": 27590 }, { "epoch": 24.81115107913669, "step": 27590, "torque_loss": 0.1169714704155922 }, { "epoch": 24.820143884892087, "grad_norm": 0.40018409490585327, "learning_rate": 6.066767388471557e-05, "loss": 0.0756, "step": 27600 }, { "action_loss": 0.012202121317386627, "epoch": 24.820143884892087, "step": 27600 }, { "epoch": 24.820143884892087, "step": 27600, "torque_loss": 0.22128267586231232 }, { "epoch": 24.82913669064748, "grad_norm": 0.38264769315719604, "learning_rate": 6.064074895253188e-05, "loss": 0.082, "step": 27610 }, { "action_loss": 0.008019773289561272, "epoch": 24.82913669064748, "step": 27610 }, { "epoch": 24.82913669064748, "step": 27610, "torque_loss": 0.11299855262041092 }, { "epoch": 24.83812949640288, "grad_norm": 0.29631543159484863, "learning_rate": 6.061382078796961e-05, "loss": 0.0827, "step": 27620 }, { "action_loss": 0.007323878351598978, "epoch": 24.83812949640288, "step": 27620 }, { "epoch": 24.83812949640288, "step": 27620, "torque_loss": 0.11691182851791382 }, { "epoch": 24.847122302158272, "grad_norm": 0.3120098114013672, "learning_rate": 6.0586889399208814e-05, "loss": 0.0639, "step": 27630 }, { "action_loss": 0.004663525149226189, "epoch": 24.847122302158272, "step": 27630 }, { "epoch": 24.847122302158272, "step": 27630, "torque_loss": 0.12706820666790009 }, { "epoch": 24.85611510791367, "grad_norm": 0.2645380198955536, "learning_rate": 6.0559954794430565e-05, "loss": 0.0837, "step": 27640 }, { "action_loss": 0.03448093309998512, "epoch": 24.85611510791367, "step": 27640 }, { "epoch": 24.85611510791367, "step": 27640, "torque_loss": 0.18468670547008514 }, { "epoch": 24.865107913669064, "grad_norm": 0.3048035204410553, "learning_rate": 6.053301698181687e-05, "loss": 0.076, "step": 27650 }, { "action_loss": 0.0018859748961403966, "epoch": 24.865107913669064, "step": 27650 }, { "epoch": 24.865107913669064, "step": 27650, "torque_loss": 0.046243179589509964 }, { "epoch": 24.87410071942446, "grad_norm": 0.29584255814552307, "learning_rate": 6.0506075969550725e-05, "loss": 0.0711, "step": 27660 }, { "action_loss": 0.007830731570720673, "epoch": 24.87410071942446, "step": 27660 }, { "epoch": 24.87410071942446, "step": 27660, "torque_loss": 0.13900668919086456 }, { "epoch": 24.883093525179856, "grad_norm": 0.22423139214515686, "learning_rate": 6.047913176581609e-05, "loss": 0.0794, "step": 27670 }, { "action_loss": 0.003960310015827417, "epoch": 24.883093525179856, "step": 27670 }, { "epoch": 24.883093525179856, "step": 27670, "torque_loss": 0.09412642568349838 }, { "epoch": 24.892086330935253, "grad_norm": 0.2744292616844177, "learning_rate": 6.0452184378797904e-05, "loss": 0.072, "step": 27680 }, { "action_loss": 0.0063209086656570435, "epoch": 24.892086330935253, "step": 27680 }, { "epoch": 24.892086330935253, "step": 27680, "torque_loss": 0.10336335748434067 }, { "epoch": 24.901079136690647, "grad_norm": 0.32592567801475525, "learning_rate": 6.042523381668209e-05, "loss": 0.0687, "step": 27690 }, { "action_loss": 0.0033147090580314398, "epoch": 24.901079136690647, "step": 27690 }, { "epoch": 24.901079136690647, "step": 27690, "torque_loss": 0.09517528861761093 }, { "epoch": 24.91007194244604, "grad_norm": 0.33415016531944275, "learning_rate": 6.03982800876555e-05, "loss": 0.0666, "step": 27700 }, { "action_loss": 0.02003292180597782, "epoch": 24.91007194244604, "step": 27700 }, { "epoch": 24.91007194244604, "step": 27700, "torque_loss": 0.1423054188489914 }, { "epoch": 24.91906474820144, "grad_norm": 0.28040045499801636, "learning_rate": 6.0371323199905975e-05, "loss": 0.0824, "step": 27710 }, { "action_loss": 0.003209102898836136, "epoch": 24.91906474820144, "step": 27710 }, { "epoch": 24.91906474820144, "step": 27710, "torque_loss": 0.10223990678787231 }, { "epoch": 24.928057553956833, "grad_norm": 0.2580537796020508, "learning_rate": 6.03443631616223e-05, "loss": 0.0686, "step": 27720 }, { "action_loss": 0.003163563786074519, "epoch": 24.928057553956833, "step": 27720 }, { "epoch": 24.928057553956833, "step": 27720, "torque_loss": 0.07119854539632797 }, { "epoch": 24.93705035971223, "grad_norm": 0.30993062257766724, "learning_rate": 6.031739998099421e-05, "loss": 0.064, "step": 27730 }, { "action_loss": 0.011025776155292988, "epoch": 24.93705035971223, "step": 27730 }, { "epoch": 24.93705035971223, "step": 27730, "torque_loss": 0.14750516414642334 }, { "epoch": 24.946043165467625, "grad_norm": 0.33051225543022156, "learning_rate": 6.029043366621243e-05, "loss": 0.067, "step": 27740 }, { "action_loss": 0.003991697449237108, "epoch": 24.946043165467625, "step": 27740 }, { "epoch": 24.946043165467625, "step": 27740, "torque_loss": 0.10026019811630249 }, { "epoch": 24.955035971223023, "grad_norm": 0.31621450185775757, "learning_rate": 6.0263464225468615e-05, "loss": 0.0697, "step": 27750 }, { "action_loss": 0.0024104572366923094, "epoch": 24.955035971223023, "step": 27750 }, { "epoch": 24.955035971223023, "step": 27750, "torque_loss": 0.07787478715181351 }, { "epoch": 24.964028776978417, "grad_norm": 0.28891170024871826, "learning_rate": 6.023649166695534e-05, "loss": 0.0656, "step": 27760 }, { "action_loss": 0.008134870789945126, "epoch": 24.964028776978417, "step": 27760 }, { "epoch": 24.964028776978417, "step": 27760, "torque_loss": 0.11655046790838242 }, { "epoch": 24.973021582733814, "grad_norm": 0.23938758671283722, "learning_rate": 6.0209515998866186e-05, "loss": 0.0792, "step": 27770 }, { "action_loss": 0.002548736985772848, "epoch": 24.973021582733814, "step": 27770 }, { "epoch": 24.973021582733814, "step": 27770, "torque_loss": 0.061301395297050476 }, { "epoch": 24.98201438848921, "grad_norm": 0.30928370356559753, "learning_rate": 6.018253722939563e-05, "loss": 0.0755, "step": 27780 }, { "action_loss": 0.004474143963307142, "epoch": 24.98201438848921, "step": 27780 }, { "epoch": 24.98201438848921, "step": 27780, "torque_loss": 0.11202347278594971 }, { "epoch": 24.991007194244606, "grad_norm": 0.3032703399658203, "learning_rate": 6.015555536673914e-05, "loss": 0.0752, "step": 27790 }, { "action_loss": 0.003985452465713024, "epoch": 24.991007194244606, "step": 27790 }, { "epoch": 24.991007194244606, "step": 27790, "torque_loss": 0.10936745256185532 }, { "epoch": 25.0, "grad_norm": 0.3144039511680603, "learning_rate": 6.0128570419093054e-05, "loss": 0.0787, "step": 27800 }, { "action_loss": 0.004393977113068104, "epoch": 25.0, "step": 27800 }, { "epoch": 25.0, "step": 27800, "torque_loss": 0.09677859395742416 }, { "epoch": 25.008992805755394, "grad_norm": 0.30316704511642456, "learning_rate": 6.010158239465471e-05, "loss": 0.0609, "step": 27810 }, { "action_loss": 0.003966872114688158, "epoch": 25.008992805755394, "step": 27810 }, { "epoch": 25.008992805755394, "step": 27810, "torque_loss": 0.11157277971506119 }, { "epoch": 25.01798561151079, "grad_norm": 0.2521178424358368, "learning_rate": 6.007459130162235e-05, "loss": 0.0791, "step": 27820 }, { "action_loss": 0.010328344069421291, "epoch": 25.01798561151079, "step": 27820 }, { "epoch": 25.01798561151079, "step": 27820, "torque_loss": 0.15932652354240417 }, { "epoch": 25.026978417266186, "grad_norm": 0.2877529263496399, "learning_rate": 6.004759714819516e-05, "loss": 0.0723, "step": 27830 }, { "action_loss": 0.0019157013157382607, "epoch": 25.026978417266186, "step": 27830 }, { "epoch": 25.026978417266186, "step": 27830, "torque_loss": 0.0798426941037178 }, { "epoch": 25.035971223021583, "grad_norm": 0.29393717646598816, "learning_rate": 6.002059994257323e-05, "loss": 0.0887, "step": 27840 }, { "action_loss": 0.005333239678293467, "epoch": 25.035971223021583, "step": 27840 }, { "epoch": 25.035971223021583, "step": 27840, "torque_loss": 0.10076712816953659 }, { "epoch": 25.044964028776977, "grad_norm": 0.3401517868041992, "learning_rate": 5.999359969295764e-05, "loss": 0.077, "step": 27850 }, { "action_loss": 0.005655268672853708, "epoch": 25.044964028776977, "step": 27850 }, { "epoch": 25.044964028776977, "step": 27850, "torque_loss": 0.129129096865654 }, { "epoch": 25.053956834532375, "grad_norm": 0.23754332959651947, "learning_rate": 5.9966596407550314e-05, "loss": 0.0639, "step": 27860 }, { "action_loss": 0.018437756225466728, "epoch": 25.053956834532375, "step": 27860 }, { "epoch": 25.053956834532375, "step": 27860, "torque_loss": 0.14868579804897308 }, { "epoch": 25.06294964028777, "grad_norm": 0.3171374201774597, "learning_rate": 5.993959009455416e-05, "loss": 0.0778, "step": 27870 }, { "action_loss": 0.003759446321055293, "epoch": 25.06294964028777, "step": 27870 }, { "epoch": 25.06294964028777, "step": 27870, "torque_loss": 0.07713030278682709 }, { "epoch": 25.071942446043167, "grad_norm": 0.35398727655410767, "learning_rate": 5.991258076217298e-05, "loss": 0.0647, "step": 27880 }, { "action_loss": 0.015921354293823242, "epoch": 25.071942446043167, "step": 27880 }, { "epoch": 25.071942446043167, "step": 27880, "torque_loss": 0.16890831291675568 }, { "epoch": 25.08093525179856, "grad_norm": 0.29671338200569153, "learning_rate": 5.988556841861147e-05, "loss": 0.0768, "step": 27890 }, { "action_loss": 0.007224968168884516, "epoch": 25.08093525179856, "step": 27890 }, { "epoch": 25.08093525179856, "step": 27890, "torque_loss": 0.14993761479854584 }, { "epoch": 25.08992805755396, "grad_norm": 0.3369870185852051, "learning_rate": 5.985855307207531e-05, "loss": 0.0858, "step": 27900 }, { "action_loss": 0.0037733856588602066, "epoch": 25.08992805755396, "step": 27900 }, { "epoch": 25.08992805755396, "step": 27900, "torque_loss": 0.0691223219037056 }, { "epoch": 25.098920863309353, "grad_norm": 0.29035893082618713, "learning_rate": 5.9831534730771e-05, "loss": 0.0649, "step": 27910 }, { "action_loss": 0.005171618424355984, "epoch": 25.098920863309353, "step": 27910 }, { "epoch": 25.098920863309353, "step": 27910, "torque_loss": 0.11898982524871826 }, { "epoch": 25.107913669064747, "grad_norm": 0.21592944860458374, "learning_rate": 5.980451340290605e-05, "loss": 0.0749, "step": 27920 }, { "action_loss": 0.002492240397259593, "epoch": 25.107913669064747, "step": 27920 }, { "epoch": 25.107913669064747, "step": 27920, "torque_loss": 0.1047581359744072 }, { "epoch": 25.116906474820144, "grad_norm": 0.3200257122516632, "learning_rate": 5.97774890966888e-05, "loss": 0.0677, "step": 27930 }, { "action_loss": 0.004771375562995672, "epoch": 25.116906474820144, "step": 27930 }, { "epoch": 25.116906474820144, "step": 27930, "torque_loss": 0.10531709343194962 }, { "epoch": 25.12589928057554, "grad_norm": 0.2657764256000519, "learning_rate": 5.975046182032851e-05, "loss": 0.0764, "step": 27940 }, { "action_loss": 0.011435255408287048, "epoch": 25.12589928057554, "step": 27940 }, { "epoch": 25.12589928057554, "step": 27940, "torque_loss": 0.1262841522693634 }, { "epoch": 25.134892086330936, "grad_norm": 0.3018190562725067, "learning_rate": 5.972343158203537e-05, "loss": 0.0722, "step": 27950 }, { "action_loss": 0.011756010353565216, "epoch": 25.134892086330936, "step": 27950 }, { "epoch": 25.134892086330936, "step": 27950, "torque_loss": 0.1293884813785553 }, { "epoch": 25.14388489208633, "grad_norm": 0.28093576431274414, "learning_rate": 5.969639839002045e-05, "loss": 0.0692, "step": 27960 }, { "action_loss": 0.008735896088182926, "epoch": 25.14388489208633, "step": 27960 }, { "epoch": 25.14388489208633, "step": 27960, "torque_loss": 0.14884281158447266 }, { "epoch": 25.152877697841728, "grad_norm": 0.22188976407051086, "learning_rate": 5.966936225249572e-05, "loss": 0.0729, "step": 27970 }, { "action_loss": 0.012482043355703354, "epoch": 25.152877697841728, "step": 27970 }, { "epoch": 25.152877697841728, "step": 27970, "torque_loss": 0.12156454473733902 }, { "epoch": 25.16187050359712, "grad_norm": 0.3062056303024292, "learning_rate": 5.9642323177674044e-05, "loss": 0.07, "step": 27980 }, { "action_loss": 0.001786458888091147, "epoch": 25.16187050359712, "step": 27980 }, { "epoch": 25.16187050359712, "step": 27980, "torque_loss": 0.06476340442895889 }, { "epoch": 25.17086330935252, "grad_norm": 0.3093818426132202, "learning_rate": 5.9615281173769154e-05, "loss": 0.0669, "step": 27990 }, { "action_loss": 0.014588743448257446, "epoch": 25.17086330935252, "step": 27990 }, { "epoch": 25.17086330935252, "step": 27990, "torque_loss": 0.15759311616420746 }, { "epoch": 25.179856115107913, "grad_norm": 0.255343496799469, "learning_rate": 5.958823624899574e-05, "loss": 0.0644, "step": 28000 }, { "action_loss": 0.004764961544424295, "epoch": 25.179856115107913, "step": 28000 }, { "epoch": 25.179856115107913, "step": 28000, "torque_loss": 0.09370499849319458 }, { "epoch": 25.18884892086331, "grad_norm": 0.43052297830581665, "learning_rate": 5.956118841156933e-05, "loss": 0.071, "step": 28010 }, { "action_loss": 0.002680281875655055, "epoch": 25.18884892086331, "step": 28010 }, { "epoch": 25.18884892086331, "step": 28010, "torque_loss": 0.10424675792455673 }, { "epoch": 25.197841726618705, "grad_norm": 0.1607753336429596, "learning_rate": 5.953413766970631e-05, "loss": 0.0729, "step": 28020 }, { "action_loss": 0.005450624972581863, "epoch": 25.197841726618705, "step": 28020 }, { "epoch": 25.197841726618705, "step": 28020, "torque_loss": 0.09581393748521805 }, { "epoch": 25.2068345323741, "grad_norm": 0.33881622552871704, "learning_rate": 5.9507084031624e-05, "loss": 0.0773, "step": 28030 }, { "action_loss": 0.008111945353448391, "epoch": 25.2068345323741, "step": 28030 }, { "epoch": 25.2068345323741, "step": 28030, "torque_loss": 0.10606000572443008 }, { "epoch": 25.215827338129497, "grad_norm": 0.34942665696144104, "learning_rate": 5.948002750554058e-05, "loss": 0.0791, "step": 28040 }, { "action_loss": 0.008905048482120037, "epoch": 25.215827338129497, "step": 28040 }, { "epoch": 25.215827338129497, "step": 28040, "torque_loss": 0.13550449907779694 }, { "epoch": 25.22482014388489, "grad_norm": 0.27807503938674927, "learning_rate": 5.9452968099675124e-05, "loss": 0.0701, "step": 28050 }, { "action_loss": 0.004557817708700895, "epoch": 25.22482014388489, "step": 28050 }, { "epoch": 25.22482014388489, "step": 28050, "torque_loss": 0.1381525993347168 }, { "epoch": 25.23381294964029, "grad_norm": 0.2877275347709656, "learning_rate": 5.9425905822247527e-05, "loss": 0.0707, "step": 28060 }, { "action_loss": 0.006210202816873789, "epoch": 25.23381294964029, "step": 28060 }, { "epoch": 25.23381294964029, "step": 28060, "torque_loss": 0.10798437148332596 }, { "epoch": 25.242805755395683, "grad_norm": 0.30219683051109314, "learning_rate": 5.939884068147864e-05, "loss": 0.0756, "step": 28070 }, { "action_loss": 0.0023428432177752256, "epoch": 25.242805755395683, "step": 28070 }, { "epoch": 25.242805755395683, "step": 28070, "torque_loss": 0.06755012273788452 }, { "epoch": 25.25179856115108, "grad_norm": 0.3148571252822876, "learning_rate": 5.937177268559011e-05, "loss": 0.0618, "step": 28080 }, { "action_loss": 0.004747506696730852, "epoch": 25.25179856115108, "step": 28080 }, { "epoch": 25.25179856115108, "step": 28080, "torque_loss": 0.08917620033025742 }, { "epoch": 25.260791366906474, "grad_norm": 0.32061153650283813, "learning_rate": 5.934470184280448e-05, "loss": 0.0658, "step": 28090 }, { "action_loss": 0.0033518618438392878, "epoch": 25.260791366906474, "step": 28090 }, { "epoch": 25.260791366906474, "step": 28090, "torque_loss": 0.12082032114267349 }, { "epoch": 25.269784172661872, "grad_norm": 0.35799792408943176, "learning_rate": 5.931762816134516e-05, "loss": 0.0714, "step": 28100 }, { "action_loss": 0.004560097586363554, "epoch": 25.269784172661872, "step": 28100 }, { "epoch": 25.269784172661872, "step": 28100, "torque_loss": 0.12318118661642075 }, { "epoch": 25.278776978417266, "grad_norm": 0.32174983620643616, "learning_rate": 5.9290551649436434e-05, "loss": 0.0707, "step": 28110 }, { "action_loss": 0.004459346178919077, "epoch": 25.278776978417266, "step": 28110 }, { "epoch": 25.278776978417266, "step": 28110, "torque_loss": 0.08168026804924011 }, { "epoch": 25.28776978417266, "grad_norm": 0.3794429302215576, "learning_rate": 5.9263472315303416e-05, "loss": 0.0822, "step": 28120 }, { "action_loss": 0.004194691777229309, "epoch": 25.28776978417266, "step": 28120 }, { "epoch": 25.28776978417266, "step": 28120, "torque_loss": 0.08485228568315506 }, { "epoch": 25.296762589928058, "grad_norm": 0.3996596038341522, "learning_rate": 5.9236390167172096e-05, "loss": 0.0684, "step": 28130 }, { "action_loss": 0.01111880224198103, "epoch": 25.296762589928058, "step": 28130 }, { "epoch": 25.296762589928058, "step": 28130, "torque_loss": 0.16695408523082733 }, { "epoch": 25.305755395683452, "grad_norm": 0.33353397250175476, "learning_rate": 5.920930521326932e-05, "loss": 0.0828, "step": 28140 }, { "action_loss": 0.017824195325374603, "epoch": 25.305755395683452, "step": 28140 }, { "epoch": 25.305755395683452, "step": 28140, "torque_loss": 0.1953369379043579 }, { "epoch": 25.31474820143885, "grad_norm": 0.2749834954738617, "learning_rate": 5.918221746182276e-05, "loss": 0.0739, "step": 28150 }, { "action_loss": 0.005200426559895277, "epoch": 25.31474820143885, "step": 28150 }, { "epoch": 25.31474820143885, "step": 28150, "torque_loss": 0.11185953766107559 }, { "epoch": 25.323741007194243, "grad_norm": 0.2687603533267975, "learning_rate": 5.9155126921061e-05, "loss": 0.0692, "step": 28160 }, { "action_loss": 0.003219078993424773, "epoch": 25.323741007194243, "step": 28160 }, { "epoch": 25.323741007194243, "step": 28160, "torque_loss": 0.07903183996677399 }, { "epoch": 25.33273381294964, "grad_norm": 0.4037219285964966, "learning_rate": 5.91280335992134e-05, "loss": 0.0666, "step": 28170 }, { "action_loss": 0.0030142858158797026, "epoch": 25.33273381294964, "step": 28170 }, { "epoch": 25.33273381294964, "step": 28170, "torque_loss": 0.08054842799901962 }, { "epoch": 25.341726618705035, "grad_norm": 0.30869489908218384, "learning_rate": 5.91009375045102e-05, "loss": 0.0712, "step": 28180 }, { "action_loss": 0.003402137430384755, "epoch": 25.341726618705035, "step": 28180 }, { "epoch": 25.341726618705035, "step": 28180, "torque_loss": 0.08149983733892441 }, { "epoch": 25.350719424460433, "grad_norm": 0.39159154891967773, "learning_rate": 5.9073838645182476e-05, "loss": 0.0616, "step": 28190 }, { "action_loss": 0.004185924772173166, "epoch": 25.350719424460433, "step": 28190 }, { "epoch": 25.350719424460433, "step": 28190, "torque_loss": 0.07643212378025055 }, { "epoch": 25.359712230215827, "grad_norm": 0.3891468942165375, "learning_rate": 5.904673702946217e-05, "loss": 0.0792, "step": 28200 }, { "action_loss": 0.00493858614936471, "epoch": 25.359712230215827, "step": 28200 }, { "epoch": 25.359712230215827, "step": 28200, "torque_loss": 0.07014685124158859 }, { "epoch": 25.368705035971225, "grad_norm": 0.27076205611228943, "learning_rate": 5.9019632665582004e-05, "loss": 0.0641, "step": 28210 }, { "action_loss": 0.001945150550454855, "epoch": 25.368705035971225, "step": 28210 }, { "epoch": 25.368705035971225, "step": 28210, "torque_loss": 0.06968450546264648 }, { "epoch": 25.37769784172662, "grad_norm": 0.34469643235206604, "learning_rate": 5.899252556177559e-05, "loss": 0.065, "step": 28220 }, { "action_loss": 0.003892573295161128, "epoch": 25.37769784172662, "step": 28220 }, { "epoch": 25.37769784172662, "step": 28220, "torque_loss": 0.11752312630414963 }, { "epoch": 25.386690647482013, "grad_norm": 0.32304948568344116, "learning_rate": 5.896541572627735e-05, "loss": 0.0709, "step": 28230 }, { "action_loss": 0.006341375410556793, "epoch": 25.386690647482013, "step": 28230 }, { "epoch": 25.386690647482013, "step": 28230, "torque_loss": 0.10392043739557266 }, { "epoch": 25.39568345323741, "grad_norm": 0.29223382472991943, "learning_rate": 5.893830316732253e-05, "loss": 0.0737, "step": 28240 }, { "action_loss": 0.005190910305827856, "epoch": 25.39568345323741, "step": 28240 }, { "epoch": 25.39568345323741, "step": 28240, "torque_loss": 0.06961461901664734 }, { "epoch": 25.404676258992804, "grad_norm": 0.23387083411216736, "learning_rate": 5.8911187893147214e-05, "loss": 0.0749, "step": 28250 }, { "action_loss": 0.007104924414306879, "epoch": 25.404676258992804, "step": 28250 }, { "epoch": 25.404676258992804, "step": 28250, "torque_loss": 0.10242369025945663 }, { "epoch": 25.413669064748202, "grad_norm": 0.29617223143577576, "learning_rate": 5.888406991198828e-05, "loss": 0.0763, "step": 28260 }, { "action_loss": 0.0062598916701972485, "epoch": 25.413669064748202, "step": 28260 }, { "epoch": 25.413669064748202, "step": 28260, "torque_loss": 0.10831645876169205 }, { "epoch": 25.422661870503596, "grad_norm": 0.33800220489501953, "learning_rate": 5.885694923208349e-05, "loss": 0.081, "step": 28270 }, { "action_loss": 0.005949844140559435, "epoch": 25.422661870503596, "step": 28270 }, { "epoch": 25.422661870503596, "step": 28270, "torque_loss": 0.12060918658971786 }, { "epoch": 25.431654676258994, "grad_norm": 0.3424675166606903, "learning_rate": 5.882982586167138e-05, "loss": 0.0778, "step": 28280 }, { "action_loss": 0.0034365560859441757, "epoch": 25.431654676258994, "step": 28280 }, { "epoch": 25.431654676258994, "step": 28280, "torque_loss": 0.07129731774330139 }, { "epoch": 25.440647482014388, "grad_norm": 0.31417161226272583, "learning_rate": 5.880269980899131e-05, "loss": 0.0683, "step": 28290 }, { "action_loss": 0.002560505410656333, "epoch": 25.440647482014388, "step": 28290 }, { "epoch": 25.440647482014388, "step": 28290, "torque_loss": 0.06261040270328522 }, { "epoch": 25.449640287769785, "grad_norm": 0.3365100622177124, "learning_rate": 5.8775571082283465e-05, "loss": 0.0665, "step": 28300 }, { "action_loss": 0.004692959599196911, "epoch": 25.449640287769785, "step": 28300 }, { "epoch": 25.449640287769785, "step": 28300, "torque_loss": 0.08773508667945862 }, { "epoch": 25.45863309352518, "grad_norm": 0.3075443506240845, "learning_rate": 5.8748439689788824e-05, "loss": 0.0726, "step": 28310 }, { "action_loss": 0.005715473089367151, "epoch": 25.45863309352518, "step": 28310 }, { "epoch": 25.45863309352518, "step": 28310, "torque_loss": 0.12562230229377747 }, { "epoch": 25.467625899280577, "grad_norm": 0.31796666979789734, "learning_rate": 5.87213056397492e-05, "loss": 0.0678, "step": 28320 }, { "action_loss": 0.007996329106390476, "epoch": 25.467625899280577, "step": 28320 }, { "epoch": 25.467625899280577, "step": 28320, "torque_loss": 0.12241315841674805 }, { "epoch": 25.47661870503597, "grad_norm": 0.2827773690223694, "learning_rate": 5.869416894040719e-05, "loss": 0.0802, "step": 28330 }, { "action_loss": 0.00589805468916893, "epoch": 25.47661870503597, "step": 28330 }, { "epoch": 25.47661870503597, "step": 28330, "torque_loss": 0.10473853349685669 }, { "epoch": 25.485611510791365, "grad_norm": 0.24224258959293365, "learning_rate": 5.866702960000621e-05, "loss": 0.0667, "step": 28340 }, { "action_loss": 0.005146846640855074, "epoch": 25.485611510791365, "step": 28340 }, { "epoch": 25.485611510791365, "step": 28340, "torque_loss": 0.08878042548894882 }, { "epoch": 25.494604316546763, "grad_norm": 0.3138313889503479, "learning_rate": 5.863988762679048e-05, "loss": 0.0805, "step": 28350 }, { "action_loss": 0.003718686057254672, "epoch": 25.494604316546763, "step": 28350 }, { "epoch": 25.494604316546763, "step": 28350, "torque_loss": 0.07800006866455078 }, { "epoch": 25.503597122302157, "grad_norm": 0.2589147686958313, "learning_rate": 5.8612743029005e-05, "loss": 0.0601, "step": 28360 }, { "action_loss": 0.0013389714295044541, "epoch": 25.503597122302157, "step": 28360 }, { "epoch": 25.503597122302157, "step": 28360, "torque_loss": 0.07138551026582718 }, { "epoch": 25.512589928057555, "grad_norm": 0.28465813398361206, "learning_rate": 5.858559581489561e-05, "loss": 0.0652, "step": 28370 }, { "action_loss": 0.018342284485697746, "epoch": 25.512589928057555, "step": 28370 }, { "epoch": 25.512589928057555, "step": 28370, "torque_loss": 0.1723880022764206 }, { "epoch": 25.52158273381295, "grad_norm": 0.2724984586238861, "learning_rate": 5.85584459927089e-05, "loss": 0.0722, "step": 28380 }, { "action_loss": 0.0122441491112113, "epoch": 25.52158273381295, "step": 28380 }, { "epoch": 25.52158273381295, "step": 28380, "torque_loss": 0.11844175308942795 }, { "epoch": 25.530575539568346, "grad_norm": 0.22683440148830414, "learning_rate": 5.853129357069227e-05, "loss": 0.0684, "step": 28390 }, { "action_loss": 0.007904427126049995, "epoch": 25.530575539568346, "step": 28390 }, { "epoch": 25.530575539568346, "step": 28390, "torque_loss": 0.16555844247341156 }, { "epoch": 25.53956834532374, "grad_norm": 0.267549604177475, "learning_rate": 5.8504138557093913e-05, "loss": 0.0734, "step": 28400 }, { "action_loss": 0.01699374057352543, "epoch": 25.53956834532374, "step": 28400 }, { "epoch": 25.53956834532374, "step": 28400, "torque_loss": 0.17850244045257568 }, { "epoch": 25.548561151079138, "grad_norm": 0.2435440570116043, "learning_rate": 5.8476980960162784e-05, "loss": 0.0738, "step": 28410 }, { "action_loss": 0.007534225005656481, "epoch": 25.548561151079138, "step": 28410 }, { "epoch": 25.548561151079138, "step": 28410, "torque_loss": 0.12413940578699112 }, { "epoch": 25.557553956834532, "grad_norm": 0.36534038186073303, "learning_rate": 5.844982078814868e-05, "loss": 0.0689, "step": 28420 }, { "action_loss": 0.008895154111087322, "epoch": 25.557553956834532, "step": 28420 }, { "epoch": 25.557553956834532, "step": 28420, "torque_loss": 0.1547756940126419 }, { "epoch": 25.56654676258993, "grad_norm": 0.4081001281738281, "learning_rate": 5.842265804930211e-05, "loss": 0.0856, "step": 28430 }, { "action_loss": 0.0024214175064116716, "epoch": 25.56654676258993, "step": 28430 }, { "epoch": 25.56654676258993, "step": 28430, "torque_loss": 0.08414909243583679 }, { "epoch": 25.575539568345324, "grad_norm": 0.2546674907207489, "learning_rate": 5.839549275187444e-05, "loss": 0.0644, "step": 28440 }, { "action_loss": 0.009705710224807262, "epoch": 25.575539568345324, "step": 28440 }, { "epoch": 25.575539568345324, "step": 28440, "torque_loss": 0.187751904129982 }, { "epoch": 25.584532374100718, "grad_norm": 0.26310405135154724, "learning_rate": 5.836832490411771e-05, "loss": 0.079, "step": 28450 }, { "action_loss": 0.002452960005030036, "epoch": 25.584532374100718, "step": 28450 }, { "epoch": 25.584532374100718, "step": 28450, "torque_loss": 0.06231098249554634 }, { "epoch": 25.593525179856115, "grad_norm": 0.28705522418022156, "learning_rate": 5.834115451428485e-05, "loss": 0.08, "step": 28460 }, { "action_loss": 0.01718234457075596, "epoch": 25.593525179856115, "step": 28460 }, { "epoch": 25.593525179856115, "step": 28460, "torque_loss": 0.1476050168275833 }, { "epoch": 25.60251798561151, "grad_norm": 0.29163214564323425, "learning_rate": 5.831398159062946e-05, "loss": 0.0705, "step": 28470 }, { "action_loss": 0.002578643150627613, "epoch": 25.60251798561151, "step": 28470 }, { "epoch": 25.60251798561151, "step": 28470, "torque_loss": 0.10659950971603394 }, { "epoch": 25.611510791366907, "grad_norm": 0.3460941016674042, "learning_rate": 5.828680614140599e-05, "loss": 0.0702, "step": 28480 }, { "action_loss": 0.010021652095019817, "epoch": 25.611510791366907, "step": 28480 }, { "epoch": 25.611510791366907, "step": 28480, "torque_loss": 0.15217311680316925 }, { "epoch": 25.6205035971223, "grad_norm": 0.3623691499233246, "learning_rate": 5.825962817486962e-05, "loss": 0.0811, "step": 28490 }, { "action_loss": 0.002533593447878957, "epoch": 25.6205035971223, "step": 28490 }, { "epoch": 25.6205035971223, "step": 28490, "torque_loss": 0.0759151503443718 }, { "epoch": 25.6294964028777, "grad_norm": 0.3628937005996704, "learning_rate": 5.823244769927629e-05, "loss": 0.077, "step": 28500 }, { "action_loss": 0.007223500404506922, "epoch": 25.6294964028777, "step": 28500 }, { "epoch": 25.6294964028777, "step": 28500, "torque_loss": 0.10438022017478943 }, { "epoch": 25.638489208633093, "grad_norm": 0.3396506607532501, "learning_rate": 5.8205264722882716e-05, "loss": 0.0852, "step": 28510 }, { "action_loss": 0.0020537760574370623, "epoch": 25.638489208633093, "step": 28510 }, { "epoch": 25.638489208633093, "step": 28510, "torque_loss": 0.06996490061283112 }, { "epoch": 25.64748201438849, "grad_norm": 0.32646605372428894, "learning_rate": 5.817807925394636e-05, "loss": 0.0654, "step": 28520 }, { "action_loss": 0.0060651577077806, "epoch": 25.64748201438849, "step": 28520 }, { "epoch": 25.64748201438849, "step": 28520, "torque_loss": 0.11575466394424438 }, { "epoch": 25.656474820143885, "grad_norm": 0.2979532778263092, "learning_rate": 5.815089130072546e-05, "loss": 0.0729, "step": 28530 }, { "action_loss": 0.007059374358505011, "epoch": 25.656474820143885, "step": 28530 }, { "epoch": 25.656474820143885, "step": 28530, "torque_loss": 0.08148600906133652 }, { "epoch": 25.665467625899282, "grad_norm": 0.33683857321739197, "learning_rate": 5.8123700871479e-05, "loss": 0.0628, "step": 28540 }, { "action_loss": 0.003983888775110245, "epoch": 25.665467625899282, "step": 28540 }, { "epoch": 25.665467625899282, "step": 28540, "torque_loss": 0.09044266492128372 }, { "epoch": 25.674460431654676, "grad_norm": 0.36689329147338867, "learning_rate": 5.809650797446671e-05, "loss": 0.0718, "step": 28550 }, { "action_loss": 0.003125666407868266, "epoch": 25.674460431654676, "step": 28550 }, { "epoch": 25.674460431654676, "step": 28550, "torque_loss": 0.10079395771026611 }, { "epoch": 25.68345323741007, "grad_norm": 0.32306116819381714, "learning_rate": 5.806931261794907e-05, "loss": 0.0719, "step": 28560 }, { "action_loss": 0.00292080151848495, "epoch": 25.68345323741007, "step": 28560 }, { "epoch": 25.68345323741007, "step": 28560, "torque_loss": 0.08560696244239807 }, { "epoch": 25.692446043165468, "grad_norm": 0.2697027325630188, "learning_rate": 5.804211481018731e-05, "loss": 0.0586, "step": 28570 }, { "action_loss": 0.010819231159985065, "epoch": 25.692446043165468, "step": 28570 }, { "epoch": 25.692446043165468, "step": 28570, "torque_loss": 0.14030574262142181 }, { "epoch": 25.701438848920862, "grad_norm": 0.2453904002904892, "learning_rate": 5.801491455944341e-05, "loss": 0.0584, "step": 28580 }, { "action_loss": 0.004980072844773531, "epoch": 25.701438848920862, "step": 28580 }, { "epoch": 25.701438848920862, "step": 28580, "torque_loss": 0.11658384650945663 }, { "epoch": 25.71043165467626, "grad_norm": 0.25129878520965576, "learning_rate": 5.79877118739801e-05, "loss": 0.0659, "step": 28590 }, { "action_loss": 0.03308965638279915, "epoch": 25.71043165467626, "step": 28590 }, { "epoch": 25.71043165467626, "step": 28590, "torque_loss": 0.16974858939647675 }, { "epoch": 25.719424460431654, "grad_norm": 0.30914363265037537, "learning_rate": 5.7960506762060816e-05, "loss": 0.0722, "step": 28600 }, { "action_loss": 0.005564519669860601, "epoch": 25.719424460431654, "step": 28600 }, { "epoch": 25.719424460431654, "step": 28600, "torque_loss": 0.12069400399923325 }, { "epoch": 25.72841726618705, "grad_norm": 0.3269912004470825, "learning_rate": 5.793329923194977e-05, "loss": 0.0641, "step": 28610 }, { "action_loss": 0.004935868084430695, "epoch": 25.72841726618705, "step": 28610 }, { "epoch": 25.72841726618705, "step": 28610, "torque_loss": 0.123292475938797 }, { "epoch": 25.737410071942445, "grad_norm": 0.33071187138557434, "learning_rate": 5.790608929191187e-05, "loss": 0.0636, "step": 28620 }, { "action_loss": 0.00316730048507452, "epoch": 25.737410071942445, "step": 28620 }, { "epoch": 25.737410071942445, "step": 28620, "torque_loss": 0.09203004091978073 }, { "epoch": 25.746402877697843, "grad_norm": 0.24979428946971893, "learning_rate": 5.78788769502128e-05, "loss": 0.085, "step": 28630 }, { "action_loss": 0.005114823114126921, "epoch": 25.746402877697843, "step": 28630 }, { "epoch": 25.746402877697843, "step": 28630, "torque_loss": 0.11627861112356186 }, { "epoch": 25.755395683453237, "grad_norm": 0.27697864174842834, "learning_rate": 5.785166221511894e-05, "loss": 0.0748, "step": 28640 }, { "action_loss": 0.0046099270693957806, "epoch": 25.755395683453237, "step": 28640 }, { "epoch": 25.755395683453237, "step": 28640, "torque_loss": 0.10049962252378464 }, { "epoch": 25.764388489208635, "grad_norm": 0.32740360498428345, "learning_rate": 5.7824445094897415e-05, "loss": 0.0814, "step": 28650 }, { "action_loss": 0.008527475409209728, "epoch": 25.764388489208635, "step": 28650 }, { "epoch": 25.764388489208635, "step": 28650, "torque_loss": 0.14484736323356628 }, { "epoch": 25.77338129496403, "grad_norm": 0.2729944586753845, "learning_rate": 5.7797225597816065e-05, "loss": 0.0795, "step": 28660 }, { "action_loss": 0.002463594311848283, "epoch": 25.77338129496403, "step": 28660 }, { "epoch": 25.77338129496403, "step": 28660, "torque_loss": 0.06870757788419724 }, { "epoch": 25.782374100719423, "grad_norm": 0.3322727382183075, "learning_rate": 5.777000373214345e-05, "loss": 0.0632, "step": 28670 }, { "action_loss": 0.003938771318644285, "epoch": 25.782374100719423, "step": 28670 }, { "epoch": 25.782374100719423, "step": 28670, "torque_loss": 0.079009510576725 }, { "epoch": 25.79136690647482, "grad_norm": 0.3479297161102295, "learning_rate": 5.774277950614885e-05, "loss": 0.0817, "step": 28680 }, { "action_loss": 0.012002146802842617, "epoch": 25.79136690647482, "step": 28680 }, { "epoch": 25.79136690647482, "step": 28680, "torque_loss": 0.1665532886981964 }, { "epoch": 25.800359712230215, "grad_norm": 0.2389768362045288, "learning_rate": 5.771555292810227e-05, "loss": 0.0729, "step": 28690 }, { "action_loss": 0.005290515720844269, "epoch": 25.800359712230215, "step": 28690 }, { "epoch": 25.800359712230215, "step": 28690, "torque_loss": 0.12348413467407227 }, { "epoch": 25.809352517985612, "grad_norm": 0.23914529383182526, "learning_rate": 5.768832400627444e-05, "loss": 0.0718, "step": 28700 }, { "action_loss": 0.009669211693108082, "epoch": 25.809352517985612, "step": 28700 }, { "epoch": 25.809352517985612, "step": 28700, "torque_loss": 0.11973175406455994 }, { "epoch": 25.818345323741006, "grad_norm": 0.29329633712768555, "learning_rate": 5.7661092748936775e-05, "loss": 0.0757, "step": 28710 }, { "action_loss": 0.004995009396225214, "epoch": 25.818345323741006, "step": 28710 }, { "epoch": 25.818345323741006, "step": 28710, "torque_loss": 0.1294529139995575 }, { "epoch": 25.827338129496404, "grad_norm": 0.2926766276359558, "learning_rate": 5.76338591643614e-05, "loss": 0.0778, "step": 28720 }, { "action_loss": 0.007773674558848143, "epoch": 25.827338129496404, "step": 28720 }, { "epoch": 25.827338129496404, "step": 28720, "torque_loss": 0.14696411788463593 }, { "epoch": 25.836330935251798, "grad_norm": 0.29169681668281555, "learning_rate": 5.760662326082118e-05, "loss": 0.0744, "step": 28730 }, { "action_loss": 0.002499561756849289, "epoch": 25.836330935251798, "step": 28730 }, { "epoch": 25.836330935251798, "step": 28730, "torque_loss": 0.12145847827196121 }, { "epoch": 25.845323741007196, "grad_norm": 0.3269982933998108, "learning_rate": 5.757938504658965e-05, "loss": 0.0688, "step": 28740 }, { "action_loss": 0.004597885999828577, "epoch": 25.845323741007196, "step": 28740 }, { "epoch": 25.845323741007196, "step": 28740, "torque_loss": 0.08365271240472794 }, { "epoch": 25.85431654676259, "grad_norm": 0.37308937311172485, "learning_rate": 5.755214452994107e-05, "loss": 0.0754, "step": 28750 }, { "action_loss": 0.009500325657427311, "epoch": 25.85431654676259, "step": 28750 }, { "epoch": 25.85431654676259, "step": 28750, "torque_loss": 0.09627475589513779 }, { "epoch": 25.863309352517987, "grad_norm": 0.24187174439430237, "learning_rate": 5.752490171915039e-05, "loss": 0.0796, "step": 28760 }, { "action_loss": 0.002694901078939438, "epoch": 25.863309352517987, "step": 28760 }, { "epoch": 25.863309352517987, "step": 28760, "torque_loss": 0.10484874248504639 }, { "epoch": 25.87230215827338, "grad_norm": 0.32045280933380127, "learning_rate": 5.749765662249324e-05, "loss": 0.075, "step": 28770 }, { "action_loss": 0.003924757242202759, "epoch": 25.87230215827338, "step": 28770 }, { "epoch": 25.87230215827338, "step": 28770, "torque_loss": 0.08400102704763412 }, { "epoch": 25.881294964028775, "grad_norm": 0.2679542005062103, "learning_rate": 5.747040924824596e-05, "loss": 0.0564, "step": 28780 }, { "action_loss": 0.002888693707063794, "epoch": 25.881294964028775, "step": 28780 }, { "epoch": 25.881294964028775, "step": 28780, "torque_loss": 0.09020302444696426 }, { "epoch": 25.890287769784173, "grad_norm": 0.3187273144721985, "learning_rate": 5.7443159604685613e-05, "loss": 0.0675, "step": 28790 }, { "action_loss": 0.01132163405418396, "epoch": 25.890287769784173, "step": 28790 }, { "epoch": 25.890287769784173, "step": 28790, "torque_loss": 0.14332331717014313 }, { "epoch": 25.899280575539567, "grad_norm": 0.25182756781578064, "learning_rate": 5.74159077000899e-05, "loss": 0.0789, "step": 28800 }, { "action_loss": 0.008694267831742764, "epoch": 25.899280575539567, "step": 28800 }, { "epoch": 25.899280575539567, "step": 28800, "torque_loss": 0.1010504886507988 }, { "epoch": 25.908273381294965, "grad_norm": 0.2895260155200958, "learning_rate": 5.7388653542737235e-05, "loss": 0.0662, "step": 28810 }, { "action_loss": 0.006350414361804724, "epoch": 25.908273381294965, "step": 28810 }, { "epoch": 25.908273381294965, "step": 28810, "torque_loss": 0.1262184977531433 }, { "epoch": 25.91726618705036, "grad_norm": 0.29774922132492065, "learning_rate": 5.736139714090672e-05, "loss": 0.079, "step": 28820 }, { "action_loss": 0.02436712384223938, "epoch": 25.91726618705036, "step": 28820 }, { "epoch": 25.91726618705036, "step": 28820, "torque_loss": 0.16317717730998993 }, { "epoch": 25.926258992805757, "grad_norm": 0.3121792674064636, "learning_rate": 5.73341385028781e-05, "loss": 0.0691, "step": 28830 }, { "action_loss": 0.011189696379005909, "epoch": 25.926258992805757, "step": 28830 }, { "epoch": 25.926258992805757, "step": 28830, "torque_loss": 0.1319676637649536 }, { "epoch": 25.93525179856115, "grad_norm": 0.27120551466941833, "learning_rate": 5.7306877636931855e-05, "loss": 0.0642, "step": 28840 }, { "action_loss": 0.003307979553937912, "epoch": 25.93525179856115, "step": 28840 }, { "epoch": 25.93525179856115, "step": 28840, "torque_loss": 0.10522478818893433 }, { "epoch": 25.944244604316548, "grad_norm": 0.3297356963157654, "learning_rate": 5.7279614551349125e-05, "loss": 0.0656, "step": 28850 }, { "action_loss": 0.0026119789108633995, "epoch": 25.944244604316548, "step": 28850 }, { "epoch": 25.944244604316548, "step": 28850, "torque_loss": 0.08419743180274963 }, { "epoch": 25.953237410071942, "grad_norm": 0.283577024936676, "learning_rate": 5.725234925441169e-05, "loss": 0.0584, "step": 28860 }, { "action_loss": 0.0014007698046043515, "epoch": 25.953237410071942, "step": 28860 }, { "epoch": 25.953237410071942, "step": 28860, "torque_loss": 0.05699239298701286 }, { "epoch": 25.96223021582734, "grad_norm": 0.30736109614372253, "learning_rate": 5.7225081754402044e-05, "loss": 0.0629, "step": 28870 }, { "action_loss": 0.0075543527491390705, "epoch": 25.96223021582734, "step": 28870 }, { "epoch": 25.96223021582734, "step": 28870, "torque_loss": 0.11712905019521713 }, { "epoch": 25.971223021582734, "grad_norm": 0.2597993016242981, "learning_rate": 5.7197812059603326e-05, "loss": 0.0668, "step": 28880 }, { "action_loss": 0.00957445241510868, "epoch": 25.971223021582734, "step": 28880 }, { "epoch": 25.971223021582734, "step": 28880, "torque_loss": 0.15744058787822723 }, { "epoch": 25.980215827338128, "grad_norm": 0.25868913531303406, "learning_rate": 5.717054017829934e-05, "loss": 0.0707, "step": 28890 }, { "action_loss": 0.0037037881556898355, "epoch": 25.980215827338128, "step": 28890 }, { "epoch": 25.980215827338128, "step": 28890, "torque_loss": 0.08456817269325256 }, { "epoch": 25.989208633093526, "grad_norm": 0.31462693214416504, "learning_rate": 5.7143266118774584e-05, "loss": 0.0848, "step": 28900 }, { "action_loss": 0.0032125574070960283, "epoch": 25.989208633093526, "step": 28900 }, { "epoch": 25.989208633093526, "step": 28900, "torque_loss": 0.08708769083023071 }, { "epoch": 25.99820143884892, "grad_norm": 0.3590066730976105, "learning_rate": 5.711598988931418e-05, "loss": 0.0645, "step": 28910 }, { "action_loss": 0.0036639368627220392, "epoch": 25.99820143884892, "step": 28910 }, { "epoch": 25.99820143884892, "step": 28910, "torque_loss": 0.10064313560724258 }, { "epoch": 26.007194244604317, "grad_norm": 0.32243791222572327, "learning_rate": 5.7088711498203954e-05, "loss": 0.0729, "step": 28920 }, { "action_loss": 0.004186647944152355, "epoch": 26.007194244604317, "step": 28920 }, { "epoch": 26.007194244604317, "step": 28920, "torque_loss": 0.0872613862156868 }, { "epoch": 26.01618705035971, "grad_norm": 0.31854960322380066, "learning_rate": 5.706143095373033e-05, "loss": 0.0869, "step": 28930 }, { "action_loss": 0.003708192380145192, "epoch": 26.01618705035971, "step": 28930 }, { "epoch": 26.01618705035971, "step": 28930, "torque_loss": 0.11370084434747696 }, { "epoch": 26.02517985611511, "grad_norm": 0.3136741518974304, "learning_rate": 5.703414826418042e-05, "loss": 0.07, "step": 28940 }, { "action_loss": 0.009647026658058167, "epoch": 26.02517985611511, "step": 28940 }, { "epoch": 26.02517985611511, "step": 28940, "torque_loss": 0.1386290043592453 }, { "epoch": 26.034172661870503, "grad_norm": 0.30615538358688354, "learning_rate": 5.7006863437842007e-05, "loss": 0.0725, "step": 28950 }, { "action_loss": 0.004408461041748524, "epoch": 26.034172661870503, "step": 28950 }, { "epoch": 26.034172661870503, "step": 28950, "torque_loss": 0.10615218430757523 }, { "epoch": 26.0431654676259, "grad_norm": 0.353304386138916, "learning_rate": 5.697957648300348e-05, "loss": 0.0824, "step": 28960 }, { "action_loss": 0.004902693443000317, "epoch": 26.0431654676259, "step": 28960 }, { "epoch": 26.0431654676259, "step": 28960, "torque_loss": 0.08815192431211472 }, { "epoch": 26.052158273381295, "grad_norm": 0.3521588444709778, "learning_rate": 5.695228740795391e-05, "loss": 0.0696, "step": 28970 }, { "action_loss": 0.004338397178798914, "epoch": 26.052158273381295, "step": 28970 }, { "epoch": 26.052158273381295, "step": 28970, "torque_loss": 0.08263800293207169 }, { "epoch": 26.06115107913669, "grad_norm": 0.35420510172843933, "learning_rate": 5.6924996220982985e-05, "loss": 0.0713, "step": 28980 }, { "action_loss": 0.0051075792871415615, "epoch": 26.06115107913669, "step": 28980 }, { "epoch": 26.06115107913669, "step": 28980, "torque_loss": 0.12047407776117325 }, { "epoch": 26.070143884892087, "grad_norm": 0.3113428056240082, "learning_rate": 5.6897702930381045e-05, "loss": 0.0625, "step": 28990 }, { "action_loss": 0.0022465530782938004, "epoch": 26.070143884892087, "step": 28990 }, { "epoch": 26.070143884892087, "step": 28990, "torque_loss": 0.05849720910191536 }, { "epoch": 26.07913669064748, "grad_norm": 0.3239055573940277, "learning_rate": 5.687040754443908e-05, "loss": 0.0563, "step": 29000 }, { "action_loss": 0.01002910640090704, "epoch": 26.07913669064748, "step": 29000 }, { "epoch": 26.07913669064748, "step": 29000, "torque_loss": 0.11846164613962173 }, { "epoch": 26.08812949640288, "grad_norm": 0.2699744403362274, "learning_rate": 5.6843110071448725e-05, "loss": 0.0774, "step": 29010 }, { "action_loss": 0.01051352173089981, "epoch": 26.08812949640288, "step": 29010 }, { "epoch": 26.08812949640288, "step": 29010, "torque_loss": 0.11668407917022705 }, { "epoch": 26.097122302158272, "grad_norm": 0.266439288854599, "learning_rate": 5.6815810519702194e-05, "loss": 0.0692, "step": 29020 }, { "action_loss": 0.00341072678565979, "epoch": 26.097122302158272, "step": 29020 }, { "epoch": 26.097122302158272, "step": 29020, "torque_loss": 0.08749867230653763 }, { "epoch": 26.10611510791367, "grad_norm": 0.3565065860748291, "learning_rate": 5.6788508897492396e-05, "loss": 0.061, "step": 29030 }, { "action_loss": 0.0018869329942390323, "epoch": 26.10611510791367, "step": 29030 }, { "epoch": 26.10611510791367, "step": 29030, "torque_loss": 0.06747471541166306 }, { "epoch": 26.115107913669064, "grad_norm": 0.4508759081363678, "learning_rate": 5.676120521311282e-05, "loss": 0.067, "step": 29040 }, { "action_loss": 0.011819721199572086, "epoch": 26.115107913669064, "step": 29040 }, { "epoch": 26.115107913669064, "step": 29040, "torque_loss": 0.15288417041301727 }, { "epoch": 26.12410071942446, "grad_norm": 0.25119248032569885, "learning_rate": 5.6733899474857634e-05, "loss": 0.0727, "step": 29050 }, { "action_loss": 0.003161183325573802, "epoch": 26.12410071942446, "step": 29050 }, { "epoch": 26.12410071942446, "step": 29050, "torque_loss": 0.11678232997655869 }, { "epoch": 26.133093525179856, "grad_norm": 0.2858201265335083, "learning_rate": 5.670659169102157e-05, "loss": 0.0676, "step": 29060 }, { "action_loss": 0.007873904891312122, "epoch": 26.133093525179856, "step": 29060 }, { "epoch": 26.133093525179856, "step": 29060, "torque_loss": 0.12149959802627563 }, { "epoch": 26.142086330935253, "grad_norm": 0.3532249331474304, "learning_rate": 5.6679281869900044e-05, "loss": 0.0846, "step": 29070 }, { "action_loss": 0.002560314955189824, "epoch": 26.142086330935253, "step": 29070 }, { "epoch": 26.142086330935253, "step": 29070, "torque_loss": 0.08993220329284668 }, { "epoch": 26.151079136690647, "grad_norm": 0.34400784969329834, "learning_rate": 5.6651970019789045e-05, "loss": 0.0704, "step": 29080 }, { "action_loss": 0.007191849406808615, "epoch": 26.151079136690647, "step": 29080 }, { "epoch": 26.151079136690647, "step": 29080, "torque_loss": 0.11647012829780579 }, { "epoch": 26.16007194244604, "grad_norm": 0.3070221245288849, "learning_rate": 5.662465614898519e-05, "loss": 0.0634, "step": 29090 }, { "action_loss": 0.00262719695456326, "epoch": 26.16007194244604, "step": 29090 }, { "epoch": 26.16007194244604, "step": 29090, "torque_loss": 0.0658186674118042 }, { "epoch": 26.16906474820144, "grad_norm": 0.2821534276008606, "learning_rate": 5.6597340265785695e-05, "loss": 0.0772, "step": 29100 }, { "action_loss": 0.0033274367451667786, "epoch": 26.16906474820144, "step": 29100 }, { "epoch": 26.16906474820144, "step": 29100, "torque_loss": 0.10285630077123642 }, { "epoch": 26.178057553956833, "grad_norm": 0.2346794158220291, "learning_rate": 5.657002237848843e-05, "loss": 0.0726, "step": 29110 }, { "action_loss": 0.0025940558407455683, "epoch": 26.178057553956833, "step": 29110 }, { "epoch": 26.178057553956833, "step": 29110, "torque_loss": 0.09299581497907639 }, { "epoch": 26.18705035971223, "grad_norm": 0.3518727719783783, "learning_rate": 5.654270249539183e-05, "loss": 0.0673, "step": 29120 }, { "action_loss": 0.016014164313673973, "epoch": 26.18705035971223, "step": 29120 }, { "epoch": 26.18705035971223, "step": 29120, "torque_loss": 0.1053389236330986 }, { "epoch": 26.196043165467625, "grad_norm": 0.3200758695602417, "learning_rate": 5.651538062479498e-05, "loss": 0.0749, "step": 29130 }, { "action_loss": 0.016613366082310677, "epoch": 26.196043165467625, "step": 29130 }, { "epoch": 26.196043165467625, "step": 29130, "torque_loss": 0.18083719909191132 }, { "epoch": 26.205035971223023, "grad_norm": 0.3911636173725128, "learning_rate": 5.648805677499751e-05, "loss": 0.0735, "step": 29140 }, { "action_loss": 0.0025979620404541492, "epoch": 26.205035971223023, "step": 29140 }, { "epoch": 26.205035971223023, "step": 29140, "torque_loss": 0.08098653703927994 }, { "epoch": 26.214028776978417, "grad_norm": 0.2674202024936676, "learning_rate": 5.646073095429969e-05, "loss": 0.0762, "step": 29150 }, { "action_loss": 0.009412077255547047, "epoch": 26.214028776978417, "step": 29150 }, { "epoch": 26.214028776978417, "step": 29150, "torque_loss": 0.09236404299736023 }, { "epoch": 26.223021582733814, "grad_norm": 0.3302106559276581, "learning_rate": 5.643340317100241e-05, "loss": 0.0653, "step": 29160 }, { "action_loss": 0.005260949954390526, "epoch": 26.223021582733814, "step": 29160 }, { "epoch": 26.223021582733814, "step": 29160, "torque_loss": 0.11367478966712952 }, { "epoch": 26.23201438848921, "grad_norm": 0.3458833694458008, "learning_rate": 5.64060734334071e-05, "loss": 0.0729, "step": 29170 }, { "action_loss": 0.00480559142306447, "epoch": 26.23201438848921, "step": 29170 }, { "epoch": 26.23201438848921, "step": 29170, "torque_loss": 0.07067089527845383 }, { "epoch": 26.241007194244606, "grad_norm": 0.25295740365982056, "learning_rate": 5.637874174981583e-05, "loss": 0.0598, "step": 29180 }, { "action_loss": 0.005010520573705435, "epoch": 26.241007194244606, "step": 29180 }, { "epoch": 26.241007194244606, "step": 29180, "torque_loss": 0.08893633633852005 }, { "epoch": 26.25, "grad_norm": 0.32438352704048157, "learning_rate": 5.635140812853124e-05, "loss": 0.0669, "step": 29190 }, { "action_loss": 0.002199243986979127, "epoch": 26.25, "step": 29190 }, { "epoch": 26.25, "step": 29190, "torque_loss": 0.046191781759262085 }, { "epoch": 26.258992805755394, "grad_norm": 0.21977598965168, "learning_rate": 5.6324072577856544e-05, "loss": 0.064, "step": 29200 }, { "action_loss": 0.013624110259115696, "epoch": 26.258992805755394, "step": 29200 }, { "epoch": 26.258992805755394, "step": 29200, "torque_loss": 0.13349030911922455 }, { "epoch": 26.26798561151079, "grad_norm": 0.3196758031845093, "learning_rate": 5.629673510609559e-05, "loss": 0.0688, "step": 29210 }, { "action_loss": 0.0030414138454943895, "epoch": 26.26798561151079, "step": 29210 }, { "epoch": 26.26798561151079, "step": 29210, "torque_loss": 0.09333682060241699 }, { "epoch": 26.276978417266186, "grad_norm": 0.33183640241622925, "learning_rate": 5.626939572155276e-05, "loss": 0.0798, "step": 29220 }, { "action_loss": 0.00626608869060874, "epoch": 26.276978417266186, "step": 29220 }, { "epoch": 26.276978417266186, "step": 29220, "torque_loss": 0.11225203424692154 }, { "epoch": 26.285971223021583, "grad_norm": 0.3776416778564453, "learning_rate": 5.6242054432533054e-05, "loss": 0.0663, "step": 29230 }, { "action_loss": 0.00905032828450203, "epoch": 26.285971223021583, "step": 29230 }, { "epoch": 26.285971223021583, "step": 29230, "torque_loss": 0.10897906869649887 }, { "epoch": 26.294964028776977, "grad_norm": 0.3012387752532959, "learning_rate": 5.621471124734201e-05, "loss": 0.0699, "step": 29240 }, { "action_loss": 0.005076809320598841, "epoch": 26.294964028776977, "step": 29240 }, { "epoch": 26.294964028776977, "step": 29240, "torque_loss": 0.10972923040390015 }, { "epoch": 26.303956834532375, "grad_norm": 0.3242396414279938, "learning_rate": 5.6187366174285794e-05, "loss": 0.0934, "step": 29250 }, { "action_loss": 0.0043777781538665295, "epoch": 26.303956834532375, "step": 29250 }, { "epoch": 26.303956834532375, "step": 29250, "torque_loss": 0.0950775220990181 }, { "epoch": 26.31294964028777, "grad_norm": 0.3259635865688324, "learning_rate": 5.616001922167109e-05, "loss": 0.0701, "step": 29260 }, { "action_loss": 0.0032193567603826523, "epoch": 26.31294964028777, "step": 29260 }, { "epoch": 26.31294964028777, "step": 29260, "torque_loss": 0.08047987520694733 }, { "epoch": 26.321942446043167, "grad_norm": 0.3538023829460144, "learning_rate": 5.61326703978052e-05, "loss": 0.0802, "step": 29270 }, { "action_loss": 0.0030549701768904924, "epoch": 26.321942446043167, "step": 29270 }, { "epoch": 26.321942446043167, "step": 29270, "torque_loss": 0.07689274102449417 }, { "epoch": 26.33093525179856, "grad_norm": 0.4383124113082886, "learning_rate": 5.6105319710995964e-05, "loss": 0.0718, "step": 29280 }, { "action_loss": 0.0039002392441034317, "epoch": 26.33093525179856, "step": 29280 }, { "epoch": 26.33093525179856, "step": 29280, "torque_loss": 0.09147379547357559 }, { "epoch": 26.33992805755396, "grad_norm": 0.3386524021625519, "learning_rate": 5.60779671695518e-05, "loss": 0.0683, "step": 29290 }, { "action_loss": 0.0034509978722780943, "epoch": 26.33992805755396, "step": 29290 }, { "epoch": 26.33992805755396, "step": 29290, "torque_loss": 0.08101505786180496 }, { "epoch": 26.348920863309353, "grad_norm": 0.2697407901287079, "learning_rate": 5.6050612781781684e-05, "loss": 0.0655, "step": 29300 }, { "action_loss": 0.0020162297878414392, "epoch": 26.348920863309353, "step": 29300 }, { "epoch": 26.348920863309353, "step": 29300, "torque_loss": 0.05960714817047119 }, { "epoch": 26.357913669064747, "grad_norm": 0.28165388107299805, "learning_rate": 5.602325655599516e-05, "loss": 0.0681, "step": 29310 }, { "action_loss": 0.006458458956331015, "epoch": 26.357913669064747, "step": 29310 }, { "epoch": 26.357913669064747, "step": 29310, "torque_loss": 0.10698428004980087 }, { "epoch": 26.366906474820144, "grad_norm": 0.33174771070480347, "learning_rate": 5.599589850050234e-05, "loss": 0.0721, "step": 29320 }, { "action_loss": 0.0040946477092802525, "epoch": 26.366906474820144, "step": 29320 }, { "epoch": 26.366906474820144, "step": 29320, "torque_loss": 0.1264447122812271 }, { "epoch": 26.37589928057554, "grad_norm": 0.28447026014328003, "learning_rate": 5.5968538623613874e-05, "loss": 0.0701, "step": 29330 }, { "action_loss": 0.0029704358894377947, "epoch": 26.37589928057554, "step": 29330 }, { "epoch": 26.37589928057554, "step": 29330, "torque_loss": 0.07826495170593262 }, { "epoch": 26.384892086330936, "grad_norm": 0.28147879242897034, "learning_rate": 5.594117693364095e-05, "loss": 0.0619, "step": 29340 }, { "action_loss": 0.010825426317751408, "epoch": 26.384892086330936, "step": 29340 }, { "epoch": 26.384892086330936, "step": 29340, "torque_loss": 0.10961422324180603 }, { "epoch": 26.39388489208633, "grad_norm": 0.37890923023223877, "learning_rate": 5.591381343889535e-05, "loss": 0.0763, "step": 29350 }, { "action_loss": 0.008951547555625439, "epoch": 26.39388489208633, "step": 29350 }, { "epoch": 26.39388489208633, "step": 29350, "torque_loss": 0.12160992622375488 }, { "epoch": 26.402877697841728, "grad_norm": 0.40983641147613525, "learning_rate": 5.5886448147689355e-05, "loss": 0.0704, "step": 29360 }, { "action_loss": 0.00797679927200079, "epoch": 26.402877697841728, "step": 29360 }, { "epoch": 26.402877697841728, "step": 29360, "torque_loss": 0.16817478835582733 }, { "epoch": 26.41187050359712, "grad_norm": 0.38066989183425903, "learning_rate": 5.585908106833585e-05, "loss": 0.0649, "step": 29370 }, { "action_loss": 0.0035841644275933504, "epoch": 26.41187050359712, "step": 29370 }, { "epoch": 26.41187050359712, "step": 29370, "torque_loss": 0.08270034193992615 }, { "epoch": 26.42086330935252, "grad_norm": 0.2987053692340851, "learning_rate": 5.5831712209148226e-05, "loss": 0.0705, "step": 29380 }, { "action_loss": 0.02035050094127655, "epoch": 26.42086330935252, "step": 29380 }, { "epoch": 26.42086330935252, "step": 29380, "torque_loss": 0.15553636848926544 }, { "epoch": 26.429856115107913, "grad_norm": 0.2685205042362213, "learning_rate": 5.58043415784404e-05, "loss": 0.0727, "step": 29390 }, { "action_loss": 0.00644509494304657, "epoch": 26.429856115107913, "step": 29390 }, { "epoch": 26.429856115107913, "step": 29390, "torque_loss": 0.09107768535614014 }, { "epoch": 26.43884892086331, "grad_norm": 0.3670492470264435, "learning_rate": 5.577696918452686e-05, "loss": 0.0666, "step": 29400 }, { "action_loss": 0.003999009262770414, "epoch": 26.43884892086331, "step": 29400 }, { "epoch": 26.43884892086331, "step": 29400, "torque_loss": 0.10527902096509933 }, { "epoch": 26.447841726618705, "grad_norm": 0.2917299270629883, "learning_rate": 5.5749595035722604e-05, "loss": 0.0972, "step": 29410 }, { "action_loss": 0.0022596383932977915, "epoch": 26.447841726618705, "step": 29410 }, { "epoch": 26.447841726618705, "step": 29410, "torque_loss": 0.05382375791668892 }, { "epoch": 26.4568345323741, "grad_norm": 0.270731657743454, "learning_rate": 5.5722219140343193e-05, "loss": 0.0625, "step": 29420 }, { "action_loss": 0.0028083890210837126, "epoch": 26.4568345323741, "step": 29420 }, { "epoch": 26.4568345323741, "step": 29420, "torque_loss": 0.0712827816605568 }, { "epoch": 26.465827338129497, "grad_norm": 0.3486013412475586, "learning_rate": 5.56948415067047e-05, "loss": 0.0773, "step": 29430 }, { "action_loss": 0.01637173630297184, "epoch": 26.465827338129497, "step": 29430 }, { "epoch": 26.465827338129497, "step": 29430, "torque_loss": 0.17111249268054962 }, { "epoch": 26.47482014388489, "grad_norm": 0.41059795022010803, "learning_rate": 5.5667462143123704e-05, "loss": 0.0855, "step": 29440 }, { "action_loss": 0.0031953260768204927, "epoch": 26.47482014388489, "step": 29440 }, { "epoch": 26.47482014388489, "step": 29440, "torque_loss": 0.0876011773943901 }, { "epoch": 26.48381294964029, "grad_norm": 0.32403433322906494, "learning_rate": 5.564008105791737e-05, "loss": 0.0645, "step": 29450 }, { "action_loss": 0.00552387535572052, "epoch": 26.48381294964029, "step": 29450 }, { "epoch": 26.48381294964029, "step": 29450, "torque_loss": 0.08598482608795166 }, { "epoch": 26.492805755395683, "grad_norm": 0.3966757655143738, "learning_rate": 5.5612698259403316e-05, "loss": 0.08, "step": 29460 }, { "action_loss": 0.004243496805429459, "epoch": 26.492805755395683, "step": 29460 }, { "epoch": 26.492805755395683, "step": 29460, "torque_loss": 0.12639544904232025 }, { "epoch": 26.50179856115108, "grad_norm": 0.35959967970848083, "learning_rate": 5.5585313755899724e-05, "loss": 0.0837, "step": 29470 }, { "action_loss": 0.009230654686689377, "epoch": 26.50179856115108, "step": 29470 }, { "epoch": 26.50179856115108, "step": 29470, "torque_loss": 0.11086469888687134 }, { "epoch": 26.510791366906474, "grad_norm": 0.36845970153808594, "learning_rate": 5.5557927555725285e-05, "loss": 0.0857, "step": 29480 }, { "action_loss": 0.004048296716064215, "epoch": 26.510791366906474, "step": 29480 }, { "epoch": 26.510791366906474, "step": 29480, "torque_loss": 0.07654360681772232 }, { "epoch": 26.519784172661872, "grad_norm": 0.3020698130130768, "learning_rate": 5.55305396671992e-05, "loss": 0.0602, "step": 29490 }, { "action_loss": 0.0031970925629138947, "epoch": 26.519784172661872, "step": 29490 }, { "epoch": 26.519784172661872, "step": 29490, "torque_loss": 0.0656811073422432 }, { "epoch": 26.528776978417266, "grad_norm": 0.26976069808006287, "learning_rate": 5.55031500986412e-05, "loss": 0.0864, "step": 29500 }, { "action_loss": 0.002294227946549654, "epoch": 26.528776978417266, "step": 29500 }, { "epoch": 26.528776978417266, "step": 29500, "torque_loss": 0.06619922816753387 }, { "epoch": 26.53776978417266, "grad_norm": 0.368055135011673, "learning_rate": 5.547575885837149e-05, "loss": 0.0654, "step": 29510 }, { "action_loss": 0.009807547554373741, "epoch": 26.53776978417266, "step": 29510 }, { "epoch": 26.53776978417266, "step": 29510, "torque_loss": 0.12532709538936615 }, { "epoch": 26.546762589928058, "grad_norm": 0.3214879631996155, "learning_rate": 5.5448365954710825e-05, "loss": 0.0763, "step": 29520 }, { "action_loss": 0.013702874071896076, "epoch": 26.546762589928058, "step": 29520 }, { "epoch": 26.546762589928058, "step": 29520, "torque_loss": 0.13535737991333008 }, { "epoch": 26.555755395683452, "grad_norm": 0.23952122032642365, "learning_rate": 5.5420971395980446e-05, "loss": 0.0851, "step": 29530 }, { "action_loss": 0.010953332297503948, "epoch": 26.555755395683452, "step": 29530 }, { "epoch": 26.555755395683452, "step": 29530, "torque_loss": 0.14101029932498932 }, { "epoch": 26.56474820143885, "grad_norm": 0.28876549005508423, "learning_rate": 5.539357519050209e-05, "loss": 0.0837, "step": 29540 }, { "action_loss": 0.003857637522742152, "epoch": 26.56474820143885, "step": 29540 }, { "epoch": 26.56474820143885, "step": 29540, "torque_loss": 0.08667561411857605 }, { "epoch": 26.573741007194243, "grad_norm": 0.3269238770008087, "learning_rate": 5.536617734659799e-05, "loss": 0.0779, "step": 29550 }, { "action_loss": 0.002807905664667487, "epoch": 26.573741007194243, "step": 29550 }, { "epoch": 26.573741007194243, "step": 29550, "torque_loss": 0.055899638682603836 }, { "epoch": 26.58273381294964, "grad_norm": 0.325979083776474, "learning_rate": 5.533877787259091e-05, "loss": 0.0654, "step": 29560 }, { "action_loss": 0.004706577863544226, "epoch": 26.58273381294964, "step": 29560 }, { "epoch": 26.58273381294964, "step": 29560, "torque_loss": 0.11289467662572861 }, { "epoch": 26.591726618705035, "grad_norm": 0.310016393661499, "learning_rate": 5.5311376776804044e-05, "loss": 0.0732, "step": 29570 }, { "action_loss": 0.0023589415941387415, "epoch": 26.591726618705035, "step": 29570 }, { "epoch": 26.591726618705035, "step": 29570, "torque_loss": 0.08478625863790512 }, { "epoch": 26.600719424460433, "grad_norm": 0.305564284324646, "learning_rate": 5.528397406756118e-05, "loss": 0.0745, "step": 29580 }, { "action_loss": 0.005593590438365936, "epoch": 26.600719424460433, "step": 29580 }, { "epoch": 26.600719424460433, "step": 29580, "torque_loss": 0.12222311645746231 }, { "epoch": 26.609712230215827, "grad_norm": 0.3536485731601715, "learning_rate": 5.525656975318652e-05, "loss": 0.0647, "step": 29590 }, { "action_loss": 0.003775297896936536, "epoch": 26.609712230215827, "step": 29590 }, { "epoch": 26.609712230215827, "step": 29590, "torque_loss": 0.10139423608779907 }, { "epoch": 26.618705035971225, "grad_norm": 0.28474128246307373, "learning_rate": 5.522916384200474e-05, "loss": 0.052, "step": 29600 }, { "action_loss": 0.00617388216778636, "epoch": 26.618705035971225, "step": 29600 }, { "epoch": 26.618705035971225, "step": 29600, "torque_loss": 0.12406545877456665 }, { "epoch": 26.62769784172662, "grad_norm": 0.3359247148036957, "learning_rate": 5.520175634234106e-05, "loss": 0.0675, "step": 29610 }, { "action_loss": 0.005908141378313303, "epoch": 26.62769784172662, "step": 29610 }, { "epoch": 26.62769784172662, "step": 29610, "torque_loss": 0.13192932307720184 }, { "epoch": 26.636690647482013, "grad_norm": 0.32118743658065796, "learning_rate": 5.517434726252113e-05, "loss": 0.0683, "step": 29620 }, { "action_loss": 0.005706592928618193, "epoch": 26.636690647482013, "step": 29620 }, { "epoch": 26.636690647482013, "step": 29620, "torque_loss": 0.09975627809762955 }, { "epoch": 26.64568345323741, "grad_norm": 0.3328876495361328, "learning_rate": 5.514693661087113e-05, "loss": 0.0709, "step": 29630 }, { "action_loss": 0.004919637460261583, "epoch": 26.64568345323741, "step": 29630 }, { "epoch": 26.64568345323741, "step": 29630, "torque_loss": 0.09461266547441483 }, { "epoch": 26.654676258992804, "grad_norm": 0.380210816860199, "learning_rate": 5.511952439571769e-05, "loss": 0.0691, "step": 29640 }, { "action_loss": 0.007134139072149992, "epoch": 26.654676258992804, "step": 29640 }, { "epoch": 26.654676258992804, "step": 29640, "torque_loss": 0.17740046977996826 }, { "epoch": 26.663669064748202, "grad_norm": 0.2323005348443985, "learning_rate": 5.509211062538791e-05, "loss": 0.0662, "step": 29650 }, { "action_loss": 0.012102591805160046, "epoch": 26.663669064748202, "step": 29650 }, { "epoch": 26.663669064748202, "step": 29650, "torque_loss": 0.16562069952487946 }, { "epoch": 26.672661870503596, "grad_norm": 0.3691399395465851, "learning_rate": 5.506469530820939e-05, "loss": 0.0871, "step": 29660 }, { "action_loss": 0.004141971468925476, "epoch": 26.672661870503596, "step": 29660 }, { "epoch": 26.672661870503596, "step": 29660, "torque_loss": 0.10828453302383423 }, { "epoch": 26.681654676258994, "grad_norm": 0.349431574344635, "learning_rate": 5.503727845251014e-05, "loss": 0.0675, "step": 29670 }, { "action_loss": 0.00721019646152854, "epoch": 26.681654676258994, "step": 29670 }, { "epoch": 26.681654676258994, "step": 29670, "torque_loss": 0.11846552044153214 }, { "epoch": 26.690647482014388, "grad_norm": 0.2748221457004547, "learning_rate": 5.50098600666187e-05, "loss": 0.0692, "step": 29680 }, { "action_loss": 0.002945834072306752, "epoch": 26.690647482014388, "step": 29680 }, { "epoch": 26.690647482014388, "step": 29680, "torque_loss": 0.07573302835226059 }, { "epoch": 26.699640287769785, "grad_norm": 0.3205670118331909, "learning_rate": 5.498244015886406e-05, "loss": 0.0684, "step": 29690 }, { "action_loss": 0.00777796795591712, "epoch": 26.699640287769785, "step": 29690 }, { "epoch": 26.699640287769785, "step": 29690, "torque_loss": 0.09779080003499985 }, { "epoch": 26.70863309352518, "grad_norm": 0.3411702811717987, "learning_rate": 5.495501873757565e-05, "loss": 0.0629, "step": 29700 }, { "action_loss": 0.007355297449976206, "epoch": 26.70863309352518, "step": 29700 }, { "epoch": 26.70863309352518, "step": 29700, "torque_loss": 0.11272323131561279 }, { "epoch": 26.717625899280577, "grad_norm": 0.29850637912750244, "learning_rate": 5.492759581108336e-05, "loss": 0.0694, "step": 29710 }, { "action_loss": 0.00910764466971159, "epoch": 26.717625899280577, "step": 29710 }, { "epoch": 26.717625899280577, "step": 29710, "torque_loss": 0.1363316923379898 }, { "epoch": 26.72661870503597, "grad_norm": 0.2709207534790039, "learning_rate": 5.490017138771759e-05, "loss": 0.0714, "step": 29720 }, { "action_loss": 0.00929273571819067, "epoch": 26.72661870503597, "step": 29720 }, { "epoch": 26.72661870503597, "step": 29720, "torque_loss": 0.13226014375686646 }, { "epoch": 26.735611510791365, "grad_norm": 0.3387089669704437, "learning_rate": 5.487274547580912e-05, "loss": 0.0734, "step": 29730 }, { "action_loss": 0.009213699959218502, "epoch": 26.735611510791365, "step": 29730 }, { "epoch": 26.735611510791365, "step": 29730, "torque_loss": 0.14102941751480103 }, { "epoch": 26.744604316546763, "grad_norm": 0.3555232584476471, "learning_rate": 5.484531808368923e-05, "loss": 0.0689, "step": 29740 }, { "action_loss": 0.007232954725623131, "epoch": 26.744604316546763, "step": 29740 }, { "epoch": 26.744604316546763, "step": 29740, "torque_loss": 0.12459202855825424 }, { "epoch": 26.753597122302157, "grad_norm": 0.33046841621398926, "learning_rate": 5.4817889219689656e-05, "loss": 0.0745, "step": 29750 }, { "action_loss": 0.0024334369227290154, "epoch": 26.753597122302157, "step": 29750 }, { "epoch": 26.753597122302157, "step": 29750, "torque_loss": 0.07699446380138397 }, { "epoch": 26.762589928057555, "grad_norm": 0.36555224657058716, "learning_rate": 5.4790458892142536e-05, "loss": 0.0668, "step": 29760 }, { "action_loss": 0.0024152931291610003, "epoch": 26.762589928057555, "step": 29760 }, { "epoch": 26.762589928057555, "step": 29760, "torque_loss": 0.07887545973062515 }, { "epoch": 26.77158273381295, "grad_norm": 0.25860628485679626, "learning_rate": 5.476302710938048e-05, "loss": 0.0643, "step": 29770 }, { "action_loss": 0.00801380816847086, "epoch": 26.77158273381295, "step": 29770 }, { "epoch": 26.77158273381295, "step": 29770, "torque_loss": 0.12123292684555054 }, { "epoch": 26.780575539568346, "grad_norm": 0.33769381046295166, "learning_rate": 5.473559387973657e-05, "loss": 0.0675, "step": 29780 }, { "action_loss": 0.009670868515968323, "epoch": 26.780575539568346, "step": 29780 }, { "epoch": 26.780575539568346, "step": 29780, "torque_loss": 0.14738817512989044 }, { "epoch": 26.78956834532374, "grad_norm": 0.34085679054260254, "learning_rate": 5.470815921154425e-05, "loss": 0.0763, "step": 29790 }, { "action_loss": 0.001864205696620047, "epoch": 26.78956834532374, "step": 29790 }, { "epoch": 26.78956834532374, "step": 29790, "torque_loss": 0.07219234108924866 }, { "epoch": 26.798561151079138, "grad_norm": 0.278643399477005, "learning_rate": 5.468072311313749e-05, "loss": 0.0559, "step": 29800 }, { "action_loss": 0.005992906633764505, "epoch": 26.798561151079138, "step": 29800 }, { "epoch": 26.798561151079138, "step": 29800, "torque_loss": 0.09880111366510391 }, { "epoch": 26.807553956834532, "grad_norm": 0.301593154668808, "learning_rate": 5.465328559285063e-05, "loss": 0.0643, "step": 29810 }, { "action_loss": 0.0033454708755016327, "epoch": 26.807553956834532, "step": 29810 }, { "epoch": 26.807553956834532, "step": 29810, "torque_loss": 0.07953587174415588 }, { "epoch": 26.81654676258993, "grad_norm": 0.35974255204200745, "learning_rate": 5.462584665901849e-05, "loss": 0.065, "step": 29820 }, { "action_loss": 0.002323760883882642, "epoch": 26.81654676258993, "step": 29820 }, { "epoch": 26.81654676258993, "step": 29820, "torque_loss": 0.07979602366685867 }, { "epoch": 26.825539568345324, "grad_norm": 0.311039000749588, "learning_rate": 5.4598406319976235e-05, "loss": 0.0863, "step": 29830 }, { "action_loss": 0.03889657184481621, "epoch": 26.825539568345324, "step": 29830 }, { "epoch": 26.825539568345324, "step": 29830, "torque_loss": 0.1711566001176834 }, { "epoch": 26.834532374100718, "grad_norm": 0.37567657232284546, "learning_rate": 5.457096458405958e-05, "loss": 0.0843, "step": 29840 }, { "action_loss": 0.008015435189008713, "epoch": 26.834532374100718, "step": 29840 }, { "epoch": 26.834532374100718, "step": 29840, "torque_loss": 0.1104622483253479 }, { "epoch": 26.843525179856115, "grad_norm": 0.23916001617908478, "learning_rate": 5.454352145960457e-05, "loss": 0.0693, "step": 29850 }, { "action_loss": 0.004380340222269297, "epoch": 26.843525179856115, "step": 29850 }, { "epoch": 26.843525179856115, "step": 29850, "torque_loss": 0.0826047956943512 }, { "epoch": 26.85251798561151, "grad_norm": 0.21383421123027802, "learning_rate": 5.4516076954947715e-05, "loss": 0.062, "step": 29860 }, { "action_loss": 0.0036297652404755354, "epoch": 26.85251798561151, "step": 29860 }, { "epoch": 26.85251798561151, "step": 29860, "torque_loss": 0.055829714983701706 }, { "epoch": 26.861510791366907, "grad_norm": 0.3308497369289398, "learning_rate": 5.448863107842591e-05, "loss": 0.0749, "step": 29870 }, { "action_loss": 0.00789750274270773, "epoch": 26.861510791366907, "step": 29870 }, { "epoch": 26.861510791366907, "step": 29870, "torque_loss": 0.15048784017562866 }, { "epoch": 26.8705035971223, "grad_norm": 0.3040582537651062, "learning_rate": 5.446118383837651e-05, "loss": 0.0747, "step": 29880 }, { "action_loss": 0.031547218561172485, "epoch": 26.8705035971223, "step": 29880 }, { "epoch": 26.8705035971223, "step": 29880, "torque_loss": 0.13813357055187225 }, { "epoch": 26.8794964028777, "grad_norm": 0.32629847526550293, "learning_rate": 5.443373524313722e-05, "loss": 0.0747, "step": 29890 }, { "action_loss": 0.003686295822262764, "epoch": 26.8794964028777, "step": 29890 }, { "epoch": 26.8794964028777, "step": 29890, "torque_loss": 0.11579927802085876 }, { "epoch": 26.888489208633093, "grad_norm": 0.3228836953639984, "learning_rate": 5.440628530104626e-05, "loss": 0.066, "step": 29900 }, { "action_loss": 0.002437710529193282, "epoch": 26.888489208633093, "step": 29900 }, { "epoch": 26.888489208633093, "step": 29900, "torque_loss": 0.0770040899515152 }, { "epoch": 26.89748201438849, "grad_norm": 0.3369635045528412, "learning_rate": 5.4378834020442146e-05, "loss": 0.0644, "step": 29910 }, { "action_loss": 0.006745086517184973, "epoch": 26.89748201438849, "step": 29910 }, { "epoch": 26.89748201438849, "step": 29910, "torque_loss": 0.11315345764160156 }, { "epoch": 26.906474820143885, "grad_norm": 0.36121848225593567, "learning_rate": 5.4351381409663884e-05, "loss": 0.0632, "step": 29920 }, { "action_loss": 0.003197053447365761, "epoch": 26.906474820143885, "step": 29920 }, { "epoch": 26.906474820143885, "step": 29920, "torque_loss": 0.051040057092905045 }, { "epoch": 26.915467625899282, "grad_norm": 0.3037407696247101, "learning_rate": 5.432392747705084e-05, "loss": 0.0666, "step": 29930 }, { "action_loss": 0.005544725805521011, "epoch": 26.915467625899282, "step": 29930 }, { "epoch": 26.915467625899282, "step": 29930, "torque_loss": 0.14692716300487518 }, { "epoch": 26.924460431654676, "grad_norm": 0.2964875102043152, "learning_rate": 5.429647223094278e-05, "loss": 0.061, "step": 29940 }, { "action_loss": 0.004719275515526533, "epoch": 26.924460431654676, "step": 29940 }, { "epoch": 26.924460431654676, "step": 29940, "torque_loss": 0.12550409138202667 }, { "epoch": 26.93345323741007, "grad_norm": 0.2935997247695923, "learning_rate": 5.4269015679679924e-05, "loss": 0.0724, "step": 29950 }, { "action_loss": 0.001867813989520073, "epoch": 26.93345323741007, "step": 29950 }, { "epoch": 26.93345323741007, "step": 29950, "torque_loss": 0.07696405053138733 }, { "epoch": 26.942446043165468, "grad_norm": 0.292125940322876, "learning_rate": 5.424155783160281e-05, "loss": 0.0565, "step": 29960 }, { "action_loss": 0.0070937699638307095, "epoch": 26.942446043165468, "step": 29960 }, { "epoch": 26.942446043165468, "step": 29960, "torque_loss": 0.12357622385025024 }, { "epoch": 26.951438848920862, "grad_norm": 0.2837543189525604, "learning_rate": 5.4214098695052415e-05, "loss": 0.0729, "step": 29970 }, { "action_loss": 0.006799509283155203, "epoch": 26.951438848920862, "step": 29970 }, { "epoch": 26.951438848920862, "step": 29970, "torque_loss": 0.09490242600440979 }, { "epoch": 26.96043165467626, "grad_norm": 0.34150421619415283, "learning_rate": 5.418663827837012e-05, "loss": 0.0757, "step": 29980 }, { "action_loss": 0.0046167499385774136, "epoch": 26.96043165467626, "step": 29980 }, { "epoch": 26.96043165467626, "step": 29980, "torque_loss": 0.07594247162342072 }, { "epoch": 26.969424460431654, "grad_norm": 0.3619091510772705, "learning_rate": 5.415917658989763e-05, "loss": 0.0617, "step": 29990 }, { "action_loss": 0.00241961982101202, "epoch": 26.969424460431654, "step": 29990 }, { "epoch": 26.969424460431654, "step": 29990, "torque_loss": 0.05086081102490425 }, { "epoch": 26.97841726618705, "grad_norm": 0.3254881799221039, "learning_rate": 5.413171363797713e-05, "loss": 0.0721, "step": 30000 }, { "action_loss": 0.006877134088426828, "epoch": 26.97841726618705, "step": 30000 }, { "epoch": 26.97841726618705, "step": 30000, "torque_loss": 0.12238214164972305 }, { "epoch": 26.987410071942445, "grad_norm": 0.2870929539203644, "learning_rate": 5.4104249430951116e-05, "loss": 0.0655, "step": 30010 }, { "action_loss": 0.0022727984469383955, "epoch": 26.987410071942445, "step": 30010 }, { "epoch": 26.987410071942445, "step": 30010, "torque_loss": 0.08314968645572662 }, { "epoch": 26.996402877697843, "grad_norm": 0.3113866448402405, "learning_rate": 5.4076783977162494e-05, "loss": 0.0687, "step": 30020 }, { "action_loss": 0.007338478695601225, "epoch": 26.996402877697843, "step": 30020 }, { "epoch": 26.996402877697843, "step": 30020, "torque_loss": 0.14573651552200317 }, { "epoch": 27.005395683453237, "grad_norm": 0.35446035861968994, "learning_rate": 5.4049317284954525e-05, "loss": 0.0619, "step": 30030 }, { "action_loss": 0.007201528642326593, "epoch": 27.005395683453237, "step": 30030 }, { "epoch": 27.005395683453237, "step": 30030, "torque_loss": 0.14853371679782867 }, { "epoch": 27.014388489208635, "grad_norm": 0.2764380872249603, "learning_rate": 5.4021849362670884e-05, "loss": 0.066, "step": 30040 }, { "action_loss": 0.0034432178363204002, "epoch": 27.014388489208635, "step": 30040 }, { "epoch": 27.014388489208635, "step": 30040, "torque_loss": 0.0914415493607521 }, { "epoch": 27.02338129496403, "grad_norm": 0.3570222854614258, "learning_rate": 5.3994380218655604e-05, "loss": 0.0746, "step": 30050 }, { "action_loss": 0.01146482303738594, "epoch": 27.02338129496403, "step": 30050 }, { "epoch": 27.02338129496403, "step": 30050, "torque_loss": 0.13661058247089386 }, { "epoch": 27.032374100719423, "grad_norm": 0.4737832248210907, "learning_rate": 5.396690986125309e-05, "loss": 0.0721, "step": 30060 }, { "action_loss": 0.0035669587086886168, "epoch": 27.032374100719423, "step": 30060 }, { "epoch": 27.032374100719423, "step": 30060, "torque_loss": 0.10991842299699783 }, { "epoch": 27.04136690647482, "grad_norm": 0.32674020528793335, "learning_rate": 5.3939438298808075e-05, "loss": 0.0721, "step": 30070 }, { "action_loss": 0.012896577827632427, "epoch": 27.04136690647482, "step": 30070 }, { "epoch": 27.04136690647482, "step": 30070, "torque_loss": 0.158639058470726 }, { "epoch": 27.050359712230215, "grad_norm": 0.3118785619735718, "learning_rate": 5.3911965539665744e-05, "loss": 0.0732, "step": 30080 }, { "action_loss": 0.0033253126312047243, "epoch": 27.050359712230215, "step": 30080 }, { "epoch": 27.050359712230215, "step": 30080, "torque_loss": 0.10089931637048721 }, { "epoch": 27.059352517985612, "grad_norm": 0.23210307955741882, "learning_rate": 5.388449159217156e-05, "loss": 0.0686, "step": 30090 }, { "action_loss": 0.006812678184360266, "epoch": 27.059352517985612, "step": 30090 }, { "epoch": 27.059352517985612, "step": 30090, "torque_loss": 0.09056618064641953 }, { "epoch": 27.068345323741006, "grad_norm": 0.3282026946544647, "learning_rate": 5.3857016464671385e-05, "loss": 0.0673, "step": 30100 }, { "action_loss": 0.0045183259062469006, "epoch": 27.068345323741006, "step": 30100 }, { "epoch": 27.068345323741006, "step": 30100, "torque_loss": 0.13428974151611328 }, { "epoch": 27.077338129496404, "grad_norm": 0.25497397780418396, "learning_rate": 5.382954016551146e-05, "loss": 0.0639, "step": 30110 }, { "action_loss": 0.003321348689496517, "epoch": 27.077338129496404, "step": 30110 }, { "epoch": 27.077338129496404, "step": 30110, "torque_loss": 0.06518032401800156 }, { "epoch": 27.086330935251798, "grad_norm": 0.3618171513080597, "learning_rate": 5.380206270303835e-05, "loss": 0.0702, "step": 30120 }, { "action_loss": 0.00682506337761879, "epoch": 27.086330935251798, "step": 30120 }, { "epoch": 27.086330935251798, "step": 30120, "torque_loss": 0.10464584827423096 }, { "epoch": 27.095323741007196, "grad_norm": 0.3795301914215088, "learning_rate": 5.377458408559897e-05, "loss": 0.0752, "step": 30130 }, { "action_loss": 0.002983690472319722, "epoch": 27.095323741007196, "step": 30130 }, { "epoch": 27.095323741007196, "step": 30130, "torque_loss": 0.0809200257062912 }, { "epoch": 27.10431654676259, "grad_norm": 0.24688534438610077, "learning_rate": 5.374710432154061e-05, "loss": 0.0679, "step": 30140 }, { "action_loss": 0.005344029050320387, "epoch": 27.10431654676259, "step": 30140 }, { "epoch": 27.10431654676259, "step": 30140, "torque_loss": 0.1163698211312294 }, { "epoch": 27.113309352517987, "grad_norm": 0.25107598304748535, "learning_rate": 5.3719623419210886e-05, "loss": 0.062, "step": 30150 }, { "action_loss": 0.004215391818434, "epoch": 27.113309352517987, "step": 30150 }, { "epoch": 27.113309352517987, "step": 30150, "torque_loss": 0.11604311317205429 }, { "epoch": 27.12230215827338, "grad_norm": 0.3062775731086731, "learning_rate": 5.3692141386957786e-05, "loss": 0.063, "step": 30160 }, { "action_loss": 0.0037986489478498697, "epoch": 27.12230215827338, "step": 30160 }, { "epoch": 27.12230215827338, "step": 30160, "torque_loss": 0.08492539077997208 }, { "epoch": 27.131294964028775, "grad_norm": 0.3931138217449188, "learning_rate": 5.3664658233129616e-05, "loss": 0.0685, "step": 30170 }, { "action_loss": 0.002688667504116893, "epoch": 27.131294964028775, "step": 30170 }, { "epoch": 27.131294964028775, "step": 30170, "torque_loss": 0.09550741314888 }, { "epoch": 27.140287769784173, "grad_norm": 0.213384747505188, "learning_rate": 5.363717396607504e-05, "loss": 0.0627, "step": 30180 }, { "action_loss": 0.0026805733796209097, "epoch": 27.140287769784173, "step": 30180 }, { "epoch": 27.140287769784173, "step": 30180, "torque_loss": 0.07270669937133789 }, { "epoch": 27.149280575539567, "grad_norm": 0.2942027747631073, "learning_rate": 5.360968859414305e-05, "loss": 0.0667, "step": 30190 }, { "action_loss": 0.003767048940062523, "epoch": 27.149280575539567, "step": 30190 }, { "epoch": 27.149280575539567, "step": 30190, "torque_loss": 0.07545100897550583 }, { "epoch": 27.158273381294965, "grad_norm": 0.35300499200820923, "learning_rate": 5.358220212568295e-05, "loss": 0.062, "step": 30200 }, { "action_loss": 0.01210014894604683, "epoch": 27.158273381294965, "step": 30200 }, { "epoch": 27.158273381294965, "step": 30200, "torque_loss": 0.10971236228942871 }, { "epoch": 27.16726618705036, "grad_norm": 0.2663075029850006, "learning_rate": 5.355471456904444e-05, "loss": 0.0607, "step": 30210 }, { "action_loss": 0.018967745825648308, "epoch": 27.16726618705036, "step": 30210 }, { "epoch": 27.16726618705036, "step": 30210, "torque_loss": 0.1913456916809082 }, { "epoch": 27.176258992805757, "grad_norm": 0.3051484227180481, "learning_rate": 5.3527225932577495e-05, "loss": 0.0888, "step": 30220 }, { "action_loss": 0.040510620921850204, "epoch": 27.176258992805757, "step": 30220 }, { "epoch": 27.176258992805757, "step": 30220, "torque_loss": 0.24368439614772797 }, { "epoch": 27.18525179856115, "grad_norm": 0.27669501304626465, "learning_rate": 5.349973622463246e-05, "loss": 0.0882, "step": 30230 }, { "action_loss": 0.014718112535774708, "epoch": 27.18525179856115, "step": 30230 }, { "epoch": 27.18525179856115, "step": 30230, "torque_loss": 0.17461366951465607 }, { "epoch": 27.194244604316548, "grad_norm": 0.3840002715587616, "learning_rate": 5.3472245453559956e-05, "loss": 0.0642, "step": 30240 }, { "action_loss": 0.010368725284934044, "epoch": 27.194244604316548, "step": 30240 }, { "epoch": 27.194244604316548, "step": 30240, "torque_loss": 0.10901536792516708 }, { "epoch": 27.203237410071942, "grad_norm": 0.2685491740703583, "learning_rate": 5.3444753627710955e-05, "loss": 0.0728, "step": 30250 }, { "action_loss": 0.005355076398700476, "epoch": 27.203237410071942, "step": 30250 }, { "epoch": 27.203237410071942, "step": 30250, "torque_loss": 0.1512160748243332 }, { "epoch": 27.21223021582734, "grad_norm": 0.38832831382751465, "learning_rate": 5.341726075543676e-05, "loss": 0.0667, "step": 30260 }, { "action_loss": 0.00184527353849262, "epoch": 27.21223021582734, "step": 30260 }, { "epoch": 27.21223021582734, "step": 30260, "torque_loss": 0.07616659253835678 }, { "epoch": 27.221223021582734, "grad_norm": 0.2977113127708435, "learning_rate": 5.338976684508898e-05, "loss": 0.0631, "step": 30270 }, { "action_loss": 0.006884824484586716, "epoch": 27.221223021582734, "step": 30270 }, { "epoch": 27.221223021582734, "step": 30270, "torque_loss": 0.09522608667612076 }, { "epoch": 27.230215827338128, "grad_norm": 0.27474895119667053, "learning_rate": 5.336227190501953e-05, "loss": 0.06, "step": 30280 }, { "action_loss": 0.00370377697981894, "epoch": 27.230215827338128, "step": 30280 }, { "epoch": 27.230215827338128, "step": 30280, "torque_loss": 0.06715195626020432 }, { "epoch": 27.239208633093526, "grad_norm": 0.2281142622232437, "learning_rate": 5.3334775943580664e-05, "loss": 0.0641, "step": 30290 }, { "action_loss": 0.004632537718862295, "epoch": 27.239208633093526, "step": 30290 }, { "epoch": 27.239208633093526, "step": 30290, "torque_loss": 0.09415573626756668 }, { "epoch": 27.24820143884892, "grad_norm": 0.1932963877916336, "learning_rate": 5.330727896912491e-05, "loss": 0.0585, "step": 30300 }, { "action_loss": 0.0025000853929668665, "epoch": 27.24820143884892, "step": 30300 }, { "epoch": 27.24820143884892, "step": 30300, "torque_loss": 0.07337672263383865 }, { "epoch": 27.257194244604317, "grad_norm": 0.26565027236938477, "learning_rate": 5.327978099000511e-05, "loss": 0.0605, "step": 30310 }, { "action_loss": 0.002402024809271097, "epoch": 27.257194244604317, "step": 30310 }, { "epoch": 27.257194244604317, "step": 30310, "torque_loss": 0.1078658476471901 }, { "epoch": 27.26618705035971, "grad_norm": 0.34053540229797363, "learning_rate": 5.3252282014574465e-05, "loss": 0.0647, "step": 30320 }, { "action_loss": 0.00424422649666667, "epoch": 27.26618705035971, "step": 30320 }, { "epoch": 27.26618705035971, "step": 30320, "torque_loss": 0.13725024461746216 }, { "epoch": 27.27517985611511, "grad_norm": 0.21277637779712677, "learning_rate": 5.322478205118641e-05, "loss": 0.0582, "step": 30330 }, { "action_loss": 0.01010659895837307, "epoch": 27.27517985611511, "step": 30330 }, { "epoch": 27.27517985611511, "step": 30330, "torque_loss": 0.1522255539894104 }, { "epoch": 27.284172661870503, "grad_norm": 0.3802861273288727, "learning_rate": 5.3197281108194704e-05, "loss": 0.0802, "step": 30340 }, { "action_loss": 0.0036650567781180143, "epoch": 27.284172661870503, "step": 30340 }, { "epoch": 27.284172661870503, "step": 30340, "torque_loss": 0.10933467000722885 }, { "epoch": 27.2931654676259, "grad_norm": 0.2613627016544342, "learning_rate": 5.316977919395342e-05, "loss": 0.0566, "step": 30350 }, { "action_loss": 0.014185170643031597, "epoch": 27.2931654676259, "step": 30350 }, { "epoch": 27.2931654676259, "step": 30350, "torque_loss": 0.153416246175766 }, { "epoch": 27.302158273381295, "grad_norm": 0.2655777931213379, "learning_rate": 5.314227631681691e-05, "loss": 0.0699, "step": 30360 }, { "action_loss": 0.006335163023322821, "epoch": 27.302158273381295, "step": 30360 }, { "epoch": 27.302158273381295, "step": 30360, "torque_loss": 0.12568135559558868 }, { "epoch": 27.31115107913669, "grad_norm": 0.32997435331344604, "learning_rate": 5.311477248513982e-05, "loss": 0.0641, "step": 30370 }, { "action_loss": 0.004129697568714619, "epoch": 27.31115107913669, "step": 30370 }, { "epoch": 27.31115107913669, "step": 30370, "torque_loss": 0.06114852428436279 }, { "epoch": 27.320143884892087, "grad_norm": 0.32394132018089294, "learning_rate": 5.30872677072771e-05, "loss": 0.0652, "step": 30380 }, { "action_loss": 0.0023128774482756853, "epoch": 27.320143884892087, "step": 30380 }, { "epoch": 27.320143884892087, "step": 30380, "torque_loss": 0.0901615247130394 }, { "epoch": 27.32913669064748, "grad_norm": 0.33142951130867004, "learning_rate": 5.3059761991583954e-05, "loss": 0.0769, "step": 30390 }, { "action_loss": 0.009126594290137291, "epoch": 27.32913669064748, "step": 30390 }, { "epoch": 27.32913669064748, "step": 30390, "torque_loss": 0.17170919477939606 }, { "epoch": 27.33812949640288, "grad_norm": 0.3160141408443451, "learning_rate": 5.303225534641592e-05, "loss": 0.0658, "step": 30400 }, { "action_loss": 0.00889818649739027, "epoch": 27.33812949640288, "step": 30400 }, { "epoch": 27.33812949640288, "step": 30400, "torque_loss": 0.08576896786689758 }, { "epoch": 27.347122302158272, "grad_norm": 0.3414342701435089, "learning_rate": 5.300474778012875e-05, "loss": 0.0613, "step": 30410 }, { "action_loss": 0.0029395921155810356, "epoch": 27.347122302158272, "step": 30410 }, { "epoch": 27.347122302158272, "step": 30410, "torque_loss": 0.07463601976633072 }, { "epoch": 27.35611510791367, "grad_norm": 0.3357495963573456, "learning_rate": 5.297723930107855e-05, "loss": 0.0625, "step": 30420 }, { "action_loss": 0.010934743098914623, "epoch": 27.35611510791367, "step": 30420 }, { "epoch": 27.35611510791367, "step": 30420, "torque_loss": 0.16541360318660736 }, { "epoch": 27.365107913669064, "grad_norm": 0.29763564467430115, "learning_rate": 5.294972991762167e-05, "loss": 0.0681, "step": 30430 }, { "action_loss": 0.0037354864180088043, "epoch": 27.365107913669064, "step": 30430 }, { "epoch": 27.365107913669064, "step": 30430, "torque_loss": 0.10047703236341476 }, { "epoch": 27.37410071942446, "grad_norm": 0.2734982371330261, "learning_rate": 5.292221963811472e-05, "loss": 0.0669, "step": 30440 }, { "action_loss": 0.009076758287847042, "epoch": 27.37410071942446, "step": 30440 }, { "epoch": 27.37410071942446, "step": 30440, "torque_loss": 0.14828576147556305 }, { "epoch": 27.383093525179856, "grad_norm": 0.2818618714809418, "learning_rate": 5.28947084709146e-05, "loss": 0.0635, "step": 30450 }, { "action_loss": 0.004665733780711889, "epoch": 27.383093525179856, "step": 30450 }, { "epoch": 27.383093525179856, "step": 30450, "torque_loss": 0.09221024066209793 }, { "epoch": 27.392086330935253, "grad_norm": 0.3108813464641571, "learning_rate": 5.2867196424378465e-05, "loss": 0.0706, "step": 30460 }, { "action_loss": 0.004834202118217945, "epoch": 27.392086330935253, "step": 30460 }, { "epoch": 27.392086330935253, "step": 30460, "torque_loss": 0.10162210464477539 }, { "epoch": 27.401079136690647, "grad_norm": 0.34962284564971924, "learning_rate": 5.2839683506863765e-05, "loss": 0.0776, "step": 30470 }, { "action_loss": 0.002656127093359828, "epoch": 27.401079136690647, "step": 30470 }, { "epoch": 27.401079136690647, "step": 30470, "torque_loss": 0.0834234356880188 }, { "epoch": 27.41007194244604, "grad_norm": 0.29024747014045715, "learning_rate": 5.281216972672821e-05, "loss": 0.0711, "step": 30480 }, { "action_loss": 0.0037382866721600294, "epoch": 27.41007194244604, "step": 30480 }, { "epoch": 27.41007194244604, "step": 30480, "torque_loss": 0.10450264066457748 }, { "epoch": 27.41906474820144, "grad_norm": 0.33897867798805237, "learning_rate": 5.278465509232973e-05, "loss": 0.0794, "step": 30490 }, { "action_loss": 0.001530722132883966, "epoch": 27.41906474820144, "step": 30490 }, { "epoch": 27.41906474820144, "step": 30490, "torque_loss": 0.054933976382017136 }, { "epoch": 27.428057553956833, "grad_norm": 0.24643875658512115, "learning_rate": 5.275713961202655e-05, "loss": 0.062, "step": 30500 }, { "action_loss": 0.00391709990799427, "epoch": 27.428057553956833, "step": 30500 }, { "epoch": 27.428057553956833, "step": 30500, "torque_loss": 0.10649462789297104 }, { "epoch": 27.43705035971223, "grad_norm": 0.29779261350631714, "learning_rate": 5.2729623294177165e-05, "loss": 0.0597, "step": 30510 }, { "action_loss": 0.0020388385746628046, "epoch": 27.43705035971223, "step": 30510 }, { "epoch": 27.43705035971223, "step": 30510, "torque_loss": 0.06856478005647659 }, { "epoch": 27.446043165467625, "grad_norm": 0.2893252670764923, "learning_rate": 5.270210614714028e-05, "loss": 0.0728, "step": 30520 }, { "action_loss": 0.00386410322971642, "epoch": 27.446043165467625, "step": 30520 }, { "epoch": 27.446043165467625, "step": 30520, "torque_loss": 0.11182566732168198 }, { "epoch": 27.455035971223023, "grad_norm": 0.33676448464393616, "learning_rate": 5.267458817927491e-05, "loss": 0.0767, "step": 30530 }, { "action_loss": 0.0018861168064177036, "epoch": 27.455035971223023, "step": 30530 }, { "epoch": 27.455035971223023, "step": 30530, "torque_loss": 0.09914698451757431 }, { "epoch": 27.464028776978417, "grad_norm": 0.28472647070884705, "learning_rate": 5.264706939894026e-05, "loss": 0.0696, "step": 30540 }, { "action_loss": 0.0030672152061015368, "epoch": 27.464028776978417, "step": 30540 }, { "epoch": 27.464028776978417, "step": 30540, "torque_loss": 0.09455829858779907 }, { "epoch": 27.473021582733814, "grad_norm": 0.27000364661216736, "learning_rate": 5.261954981449584e-05, "loss": 0.0687, "step": 30550 }, { "action_loss": 0.010119342245161533, "epoch": 27.473021582733814, "step": 30550 }, { "epoch": 27.473021582733814, "step": 30550, "torque_loss": 0.09475690126419067 }, { "epoch": 27.48201438848921, "grad_norm": 0.28739771246910095, "learning_rate": 5.2592029434301324e-05, "loss": 0.0687, "step": 30560 }, { "action_loss": 0.011433704756200314, "epoch": 27.48201438848921, "step": 30560 }, { "epoch": 27.48201438848921, "step": 30560, "torque_loss": 0.12294892221689224 }, { "epoch": 27.491007194244606, "grad_norm": 0.33424171805381775, "learning_rate": 5.256450826671672e-05, "loss": 0.0751, "step": 30570 }, { "action_loss": 0.004728905390948057, "epoch": 27.491007194244606, "step": 30570 }, { "epoch": 27.491007194244606, "step": 30570, "torque_loss": 0.10159063339233398 }, { "epoch": 27.5, "grad_norm": 0.2515795826911926, "learning_rate": 5.253698632010221e-05, "loss": 0.07, "step": 30580 }, { "action_loss": 0.012915901839733124, "epoch": 27.5, "step": 30580 }, { "epoch": 27.5, "step": 30580, "torque_loss": 0.165410578250885 }, { "epoch": 27.508992805755394, "grad_norm": 0.2704910635948181, "learning_rate": 5.2509463602818246e-05, "loss": 0.0719, "step": 30590 }, { "action_loss": 0.0066384077072143555, "epoch": 27.508992805755394, "step": 30590 }, { "epoch": 27.508992805755394, "step": 30590, "torque_loss": 0.11431439965963364 }, { "epoch": 27.51798561151079, "grad_norm": 0.3375019133090973, "learning_rate": 5.248194012322549e-05, "loss": 0.0655, "step": 30600 }, { "action_loss": 0.014646020717918873, "epoch": 27.51798561151079, "step": 30600 }, { "epoch": 27.51798561151079, "step": 30600, "torque_loss": 0.14975476264953613 }, { "epoch": 27.526978417266186, "grad_norm": 0.35575318336486816, "learning_rate": 5.245441588968486e-05, "loss": 0.0707, "step": 30610 }, { "action_loss": 0.003709023119881749, "epoch": 27.526978417266186, "step": 30610 }, { "epoch": 27.526978417266186, "step": 30610, "torque_loss": 0.07160349935293198 }, { "epoch": 27.535971223021583, "grad_norm": 0.3513669967651367, "learning_rate": 5.242689091055748e-05, "loss": 0.0793, "step": 30620 }, { "action_loss": 0.004994290415197611, "epoch": 27.535971223021583, "step": 30620 }, { "epoch": 27.535971223021583, "step": 30620, "torque_loss": 0.12223106622695923 }, { "epoch": 27.544964028776977, "grad_norm": 0.32259252667427063, "learning_rate": 5.239936519420473e-05, "loss": 0.0833, "step": 30630 }, { "action_loss": 0.011836222372949123, "epoch": 27.544964028776977, "step": 30630 }, { "epoch": 27.544964028776977, "step": 30630, "torque_loss": 0.1303999423980713 }, { "epoch": 27.553956834532375, "grad_norm": 0.3635013997554779, "learning_rate": 5.2371838748988175e-05, "loss": 0.0861, "step": 30640 }, { "action_loss": 0.010272227227687836, "epoch": 27.553956834532375, "step": 30640 }, { "epoch": 27.553956834532375, "step": 30640, "torque_loss": 0.12594372034072876 }, { "epoch": 27.56294964028777, "grad_norm": 0.3408564329147339, "learning_rate": 5.234431158326965e-05, "loss": 0.0797, "step": 30650 }, { "action_loss": 0.010601473040878773, "epoch": 27.56294964028777, "step": 30650 }, { "epoch": 27.56294964028777, "step": 30650, "torque_loss": 0.13295823335647583 }, { "epoch": 27.571942446043167, "grad_norm": 0.2982029914855957, "learning_rate": 5.231678370541115e-05, "loss": 0.0709, "step": 30660 }, { "action_loss": 0.02608460746705532, "epoch": 27.571942446043167, "step": 30660 }, { "epoch": 27.571942446043167, "step": 30660, "torque_loss": 0.19423745572566986 }, { "epoch": 27.58093525179856, "grad_norm": 0.3127378225326538, "learning_rate": 5.228925512377495e-05, "loss": 0.0738, "step": 30670 }, { "action_loss": 0.005283508449792862, "epoch": 27.58093525179856, "step": 30670 }, { "epoch": 27.58093525179856, "step": 30670, "torque_loss": 0.11543136090040207 }, { "epoch": 27.58992805755396, "grad_norm": 0.3616526126861572, "learning_rate": 5.2261725846723465e-05, "loss": 0.0752, "step": 30680 }, { "action_loss": 0.0035398041363805532, "epoch": 27.58992805755396, "step": 30680 }, { "epoch": 27.58992805755396, "step": 30680, "torque_loss": 0.07509780675172806 }, { "epoch": 27.598920863309353, "grad_norm": 0.304303914308548, "learning_rate": 5.22341958826194e-05, "loss": 0.0657, "step": 30690 }, { "action_loss": 0.01630779542028904, "epoch": 27.598920863309353, "step": 30690 }, { "epoch": 27.598920863309353, "step": 30690, "torque_loss": 0.12124564498662949 }, { "epoch": 27.607913669064747, "grad_norm": 0.22093622386455536, "learning_rate": 5.22066652398256e-05, "loss": 0.0663, "step": 30700 }, { "action_loss": 0.00638221763074398, "epoch": 27.607913669064747, "step": 30700 }, { "epoch": 27.607913669064747, "step": 30700, "torque_loss": 0.10512000322341919 }, { "epoch": 27.616906474820144, "grad_norm": 0.32925015687942505, "learning_rate": 5.2179133926705185e-05, "loss": 0.0664, "step": 30710 }, { "action_loss": 0.002771182684227824, "epoch": 27.616906474820144, "step": 30710 }, { "epoch": 27.616906474820144, "step": 30710, "torque_loss": 0.07446091622114182 }, { "epoch": 27.62589928057554, "grad_norm": 0.26211461424827576, "learning_rate": 5.215160195162141e-05, "loss": 0.0637, "step": 30720 }, { "action_loss": 0.006480127573013306, "epoch": 27.62589928057554, "step": 30720 }, { "epoch": 27.62589928057554, "step": 30720, "torque_loss": 0.09608682990074158 }, { "epoch": 27.634892086330936, "grad_norm": 0.32613861560821533, "learning_rate": 5.212406932293776e-05, "loss": 0.0568, "step": 30730 }, { "action_loss": 0.006453521549701691, "epoch": 27.634892086330936, "step": 30730 }, { "epoch": 27.634892086330936, "step": 30730, "torque_loss": 0.11692067235708237 }, { "epoch": 27.64388489208633, "grad_norm": 0.42026227712631226, "learning_rate": 5.209653604901795e-05, "loss": 0.0645, "step": 30740 }, { "action_loss": 0.004264110699295998, "epoch": 27.64388489208633, "step": 30740 }, { "epoch": 27.64388489208633, "step": 30740, "torque_loss": 0.10982044786214828 }, { "epoch": 27.652877697841728, "grad_norm": 0.3222639262676239, "learning_rate": 5.206900213822584e-05, "loss": 0.0685, "step": 30750 }, { "action_loss": 0.003980834502726793, "epoch": 27.652877697841728, "step": 30750 }, { "epoch": 27.652877697841728, "step": 30750, "torque_loss": 0.06127515435218811 }, { "epoch": 27.66187050359712, "grad_norm": 0.2724141478538513, "learning_rate": 5.204146759892551e-05, "loss": 0.0693, "step": 30760 }, { "action_loss": 0.0032156186643987894, "epoch": 27.66187050359712, "step": 30760 }, { "epoch": 27.66187050359712, "step": 30760, "torque_loss": 0.08185338973999023 }, { "epoch": 27.67086330935252, "grad_norm": 0.26350679993629456, "learning_rate": 5.2013932439481216e-05, "loss": 0.0659, "step": 30770 }, { "action_loss": 0.02754613757133484, "epoch": 27.67086330935252, "step": 30770 }, { "epoch": 27.67086330935252, "step": 30770, "torque_loss": 0.2713729441165924 }, { "epoch": 27.679856115107913, "grad_norm": 0.2752734124660492, "learning_rate": 5.198639666825743e-05, "loss": 0.0878, "step": 30780 }, { "action_loss": 0.01115492731332779, "epoch": 27.679856115107913, "step": 30780 }, { "epoch": 27.679856115107913, "step": 30780, "torque_loss": 0.12435568124055862 }, { "epoch": 27.68884892086331, "grad_norm": 0.29567909240722656, "learning_rate": 5.195886029361877e-05, "loss": 0.0697, "step": 30790 }, { "action_loss": 0.004041988402605057, "epoch": 27.68884892086331, "step": 30790 }, { "epoch": 27.68884892086331, "step": 30790, "torque_loss": 0.15499238669872284 }, { "epoch": 27.697841726618705, "grad_norm": 0.3508957326412201, "learning_rate": 5.193132332393009e-05, "loss": 0.0765, "step": 30800 }, { "action_loss": 0.024076765403151512, "epoch": 27.697841726618705, "step": 30800 }, { "epoch": 27.697841726618705, "step": 30800, "torque_loss": 0.18604642152786255 }, { "epoch": 27.7068345323741, "grad_norm": 0.2457025647163391, "learning_rate": 5.1903785767556376e-05, "loss": 0.067, "step": 30810 }, { "action_loss": 0.003447478637099266, "epoch": 27.7068345323741, "step": 30810 }, { "epoch": 27.7068345323741, "step": 30810, "torque_loss": 0.05423742160201073 }, { "epoch": 27.715827338129497, "grad_norm": 0.37155112624168396, "learning_rate": 5.187624763286282e-05, "loss": 0.0567, "step": 30820 }, { "action_loss": 0.009105809032917023, "epoch": 27.715827338129497, "step": 30820 }, { "epoch": 27.715827338129497, "step": 30820, "torque_loss": 0.13048236072063446 }, { "epoch": 27.72482014388489, "grad_norm": 0.3997020125389099, "learning_rate": 5.184870892821475e-05, "loss": 0.0923, "step": 30830 }, { "action_loss": 0.003107269061729312, "epoch": 27.72482014388489, "step": 30830 }, { "epoch": 27.72482014388489, "step": 30830, "torque_loss": 0.07492136210203171 }, { "epoch": 27.73381294964029, "grad_norm": 0.3696138560771942, "learning_rate": 5.182116966197773e-05, "loss": 0.0652, "step": 30840 }, { "action_loss": 0.0041135684587061405, "epoch": 27.73381294964029, "step": 30840 }, { "epoch": 27.73381294964029, "step": 30840, "torque_loss": 0.07332175225019455 }, { "epoch": 27.742805755395683, "grad_norm": 0.3052142858505249, "learning_rate": 5.1793629842517466e-05, "loss": 0.0615, "step": 30850 }, { "action_loss": 0.006175318732857704, "epoch": 27.742805755395683, "step": 30850 }, { "epoch": 27.742805755395683, "step": 30850, "torque_loss": 0.12641316652297974 }, { "epoch": 27.75179856115108, "grad_norm": 0.2892895042896271, "learning_rate": 5.17660894781998e-05, "loss": 0.0651, "step": 30860 }, { "action_loss": 0.0021892788354307413, "epoch": 27.75179856115108, "step": 30860 }, { "epoch": 27.75179856115108, "step": 30860, "torque_loss": 0.05734121799468994 }, { "epoch": 27.760791366906474, "grad_norm": 0.30549585819244385, "learning_rate": 5.173854857739079e-05, "loss": 0.0685, "step": 30870 }, { "action_loss": 0.004137205425649881, "epoch": 27.760791366906474, "step": 30870 }, { "epoch": 27.760791366906474, "step": 30870, "torque_loss": 0.06405114382505417 }, { "epoch": 27.769784172661872, "grad_norm": 0.3175692558288574, "learning_rate": 5.171100714845661e-05, "loss": 0.0572, "step": 30880 }, { "action_loss": 0.011248608119785786, "epoch": 27.769784172661872, "step": 30880 }, { "epoch": 27.769784172661872, "step": 30880, "torque_loss": 0.1047271266579628 }, { "epoch": 27.778776978417266, "grad_norm": 0.22851184010505676, "learning_rate": 5.1683465199763646e-05, "loss": 0.0764, "step": 30890 }, { "action_loss": 0.003476514481008053, "epoch": 27.778776978417266, "step": 30890 }, { "epoch": 27.778776978417266, "step": 30890, "torque_loss": 0.0761587843298912 }, { "epoch": 27.78776978417266, "grad_norm": 0.2907366454601288, "learning_rate": 5.16559227396784e-05, "loss": 0.0592, "step": 30900 }, { "action_loss": 0.004031195770949125, "epoch": 27.78776978417266, "step": 30900 }, { "epoch": 27.78776978417266, "step": 30900, "torque_loss": 0.0958227887749672 }, { "epoch": 27.796762589928058, "grad_norm": 0.3233397305011749, "learning_rate": 5.1628379776567556e-05, "loss": 0.0667, "step": 30910 }, { "action_loss": 0.0024853991344571114, "epoch": 27.796762589928058, "step": 30910 }, { "epoch": 27.796762589928058, "step": 30910, "torque_loss": 0.06353891640901566 }, { "epoch": 27.805755395683452, "grad_norm": 0.28306540846824646, "learning_rate": 5.160083631879792e-05, "loss": 0.0659, "step": 30920 }, { "action_loss": 0.002875726670026779, "epoch": 27.805755395683452, "step": 30920 }, { "epoch": 27.805755395683452, "step": 30920, "torque_loss": 0.07317005842924118 }, { "epoch": 27.81474820143885, "grad_norm": 0.32126179337501526, "learning_rate": 5.1573292374736484e-05, "loss": 0.0674, "step": 30930 }, { "action_loss": 0.002815108746290207, "epoch": 27.81474820143885, "step": 30930 }, { "epoch": 27.81474820143885, "step": 30930, "torque_loss": 0.08528163284063339 }, { "epoch": 27.823741007194243, "grad_norm": 0.3479314148426056, "learning_rate": 5.1545747952750356e-05, "loss": 0.0733, "step": 30940 }, { "action_loss": 0.006008153315633535, "epoch": 27.823741007194243, "step": 30940 }, { "epoch": 27.823741007194243, "step": 30940, "torque_loss": 0.10670802742242813 }, { "epoch": 27.83273381294964, "grad_norm": 0.29325827956199646, "learning_rate": 5.151820306120682e-05, "loss": 0.0591, "step": 30950 }, { "action_loss": 0.00388711248524487, "epoch": 27.83273381294964, "step": 30950 }, { "epoch": 27.83273381294964, "step": 30950, "torque_loss": 0.08710583299398422 }, { "epoch": 27.841726618705035, "grad_norm": 0.28612688183784485, "learning_rate": 5.149065770847328e-05, "loss": 0.057, "step": 30960 }, { "action_loss": 0.01105673611164093, "epoch": 27.841726618705035, "step": 30960 }, { "epoch": 27.841726618705035, "step": 30960, "torque_loss": 0.1216701865196228 }, { "epoch": 27.850719424460433, "grad_norm": 0.3539559841156006, "learning_rate": 5.1463111902917297e-05, "loss": 0.0736, "step": 30970 }, { "action_loss": 0.004041273146867752, "epoch": 27.850719424460433, "step": 30970 }, { "epoch": 27.850719424460433, "step": 30970, "torque_loss": 0.13849100470542908 }, { "epoch": 27.859712230215827, "grad_norm": 0.3088609278202057, "learning_rate": 5.143556565290654e-05, "loss": 0.0764, "step": 30980 }, { "action_loss": 0.004094287753105164, "epoch": 27.859712230215827, "step": 30980 }, { "epoch": 27.859712230215827, "step": 30980, "torque_loss": 0.11589208245277405 }, { "epoch": 27.868705035971225, "grad_norm": 0.28916221857070923, "learning_rate": 5.140801896680882e-05, "loss": 0.0624, "step": 30990 }, { "action_loss": 0.009570821188390255, "epoch": 27.868705035971225, "step": 30990 }, { "epoch": 27.868705035971225, "step": 30990, "torque_loss": 0.12686701118946075 }, { "epoch": 27.87769784172662, "grad_norm": 0.28396421670913696, "learning_rate": 5.1380471852992144e-05, "loss": 0.0665, "step": 31000 }, { "action_loss": 0.0051605417393147945, "epoch": 27.87769784172662, "step": 31000 }, { "epoch": 27.87769784172662, "step": 31000, "torque_loss": 0.07096031308174133 }, { "epoch": 27.886690647482013, "grad_norm": 0.22506457567214966, "learning_rate": 5.135292431982457e-05, "loss": 0.0642, "step": 31010 }, { "action_loss": 0.006206301506608725, "epoch": 27.886690647482013, "step": 31010 }, { "epoch": 27.886690647482013, "step": 31010, "torque_loss": 0.09370500594377518 }, { "epoch": 27.89568345323741, "grad_norm": 0.3386896252632141, "learning_rate": 5.1325376375674294e-05, "loss": 0.0657, "step": 31020 }, { "action_loss": 0.015281151048839092, "epoch": 27.89568345323741, "step": 31020 }, { "epoch": 27.89568345323741, "step": 31020, "torque_loss": 0.14366911351680756 }, { "epoch": 27.904676258992804, "grad_norm": 0.21991340816020966, "learning_rate": 5.129782802890968e-05, "loss": 0.0707, "step": 31030 }, { "action_loss": 0.002234814455732703, "epoch": 27.904676258992804, "step": 31030 }, { "epoch": 27.904676258992804, "step": 31030, "torque_loss": 0.07064346224069595 }, { "epoch": 27.913669064748202, "grad_norm": 0.2642335295677185, "learning_rate": 5.127027928789916e-05, "loss": 0.065, "step": 31040 }, { "action_loss": 0.009223397821187973, "epoch": 27.913669064748202, "step": 31040 }, { "epoch": 27.913669064748202, "step": 31040, "torque_loss": 0.072780542075634 }, { "epoch": 27.922661870503596, "grad_norm": 0.3895231783390045, "learning_rate": 5.124273016101135e-05, "loss": 0.0837, "step": 31050 }, { "action_loss": 0.003762780921533704, "epoch": 27.922661870503596, "step": 31050 }, { "epoch": 27.922661870503596, "step": 31050, "torque_loss": 0.07609273493289948 }, { "epoch": 27.931654676258994, "grad_norm": 0.3100006878376007, "learning_rate": 5.121518065661492e-05, "loss": 0.0675, "step": 31060 }, { "action_loss": 0.0025074637960642576, "epoch": 27.931654676258994, "step": 31060 }, { "epoch": 27.931654676258994, "step": 31060, "torque_loss": 0.07389967888593674 }, { "epoch": 27.940647482014388, "grad_norm": 0.20834563672542572, "learning_rate": 5.11876307830787e-05, "loss": 0.0692, "step": 31070 }, { "action_loss": 0.008809915743768215, "epoch": 27.940647482014388, "step": 31070 }, { "epoch": 27.940647482014388, "step": 31070, "torque_loss": 0.14929422736167908 }, { "epoch": 27.949640287769785, "grad_norm": 0.3045310974121094, "learning_rate": 5.1160080548771596e-05, "loss": 0.0763, "step": 31080 }, { "action_loss": 0.004327115137130022, "epoch": 27.949640287769785, "step": 31080 }, { "epoch": 27.949640287769785, "step": 31080, "torque_loss": 0.11793840676546097 }, { "epoch": 27.95863309352518, "grad_norm": 0.2661994695663452, "learning_rate": 5.1132529962062656e-05, "loss": 0.0637, "step": 31090 }, { "action_loss": 0.0018681412329897285, "epoch": 27.95863309352518, "step": 31090 }, { "epoch": 27.95863309352518, "step": 31090, "torque_loss": 0.07679726928472519 }, { "epoch": 27.967625899280577, "grad_norm": 0.3218686580657959, "learning_rate": 5.110497903132101e-05, "loss": 0.0575, "step": 31100 }, { "action_loss": 0.0014515850925818086, "epoch": 27.967625899280577, "step": 31100 }, { "epoch": 27.967625899280577, "step": 31100, "torque_loss": 0.05581963062286377 }, { "epoch": 27.97661870503597, "grad_norm": 0.4098682999610901, "learning_rate": 5.107742776491592e-05, "loss": 0.0677, "step": 31110 }, { "action_loss": 0.0020234861876815557, "epoch": 27.97661870503597, "step": 31110 }, { "epoch": 27.97661870503597, "step": 31110, "torque_loss": 0.07754997164011002 }, { "epoch": 27.985611510791365, "grad_norm": 0.2982770800590515, "learning_rate": 5.104987617121673e-05, "loss": 0.0591, "step": 31120 }, { "action_loss": 0.003237846540287137, "epoch": 27.985611510791365, "step": 31120 }, { "epoch": 27.985611510791365, "step": 31120, "torque_loss": 0.10333994776010513 }, { "epoch": 27.994604316546763, "grad_norm": 0.2785729169845581, "learning_rate": 5.102232425859287e-05, "loss": 0.0628, "step": 31130 }, { "action_loss": 0.0017137593822553754, "epoch": 27.994604316546763, "step": 31130 }, { "epoch": 27.994604316546763, "step": 31130, "torque_loss": 0.04299547150731087 }, { "epoch": 28.003597122302157, "grad_norm": 0.29213425517082214, "learning_rate": 5.09947720354139e-05, "loss": 0.0578, "step": 31140 }, { "action_loss": 0.008210831321775913, "epoch": 28.003597122302157, "step": 31140 }, { "epoch": 28.003597122302157, "step": 31140, "torque_loss": 0.1339065283536911 }, { "epoch": 28.012589928057555, "grad_norm": 0.2773280441761017, "learning_rate": 5.096721951004942e-05, "loss": 0.0688, "step": 31150 }, { "action_loss": 0.002580928383395076, "epoch": 28.012589928057555, "step": 31150 }, { "epoch": 28.012589928057555, "step": 31150, "torque_loss": 0.08250822871923447 }, { "epoch": 28.02158273381295, "grad_norm": 0.3724066913127899, "learning_rate": 5.0939666690869227e-05, "loss": 0.0637, "step": 31160 }, { "action_loss": 0.011963631957769394, "epoch": 28.02158273381295, "step": 31160 }, { "epoch": 28.02158273381295, "step": 31160, "torque_loss": 0.10926777124404907 }, { "epoch": 28.030575539568346, "grad_norm": 0.3239971101284027, "learning_rate": 5.0912113586243096e-05, "loss": 0.067, "step": 31170 }, { "action_loss": 0.003913348540663719, "epoch": 28.030575539568346, "step": 31170 }, { "epoch": 28.030575539568346, "step": 31170, "torque_loss": 0.07884597033262253 }, { "epoch": 28.03956834532374, "grad_norm": 0.3139030337333679, "learning_rate": 5.0884560204540935e-05, "loss": 0.0714, "step": 31180 }, { "action_loss": 0.017848948016762733, "epoch": 28.03956834532374, "step": 31180 }, { "epoch": 28.03956834532374, "step": 31180, "torque_loss": 0.1180240586400032 }, { "epoch": 28.048561151079138, "grad_norm": 0.3091764748096466, "learning_rate": 5.0857006554132736e-05, "loss": 0.0674, "step": 31190 }, { "action_loss": 0.002564231166616082, "epoch": 28.048561151079138, "step": 31190 }, { "epoch": 28.048561151079138, "step": 31190, "torque_loss": 0.08078494668006897 }, { "epoch": 28.057553956834532, "grad_norm": 0.24759899079799652, "learning_rate": 5.0829452643388575e-05, "loss": 0.0589, "step": 31200 }, { "action_loss": 0.004453945439308882, "epoch": 28.057553956834532, "step": 31200 }, { "epoch": 28.057553956834532, "step": 31200, "torque_loss": 0.09516274929046631 }, { "epoch": 28.06654676258993, "grad_norm": 0.35596343874931335, "learning_rate": 5.08018984806786e-05, "loss": 0.0607, "step": 31210 }, { "action_loss": 0.002823249204084277, "epoch": 28.06654676258993, "step": 31210 }, { "epoch": 28.06654676258993, "step": 31210, "torque_loss": 0.0886005088686943 }, { "epoch": 28.075539568345324, "grad_norm": 0.27783486247062683, "learning_rate": 5.0774344074373036e-05, "loss": 0.0667, "step": 31220 }, { "action_loss": 0.002287832088768482, "epoch": 28.075539568345324, "step": 31220 }, { "epoch": 28.075539568345324, "step": 31220, "torque_loss": 0.06120606139302254 }, { "epoch": 28.084532374100718, "grad_norm": 0.27586984634399414, "learning_rate": 5.07467894328422e-05, "loss": 0.0674, "step": 31230 }, { "action_loss": 0.014589470811188221, "epoch": 28.084532374100718, "step": 31230 }, { "epoch": 28.084532374100718, "step": 31230, "torque_loss": 0.13280338048934937 }, { "epoch": 28.093525179856115, "grad_norm": 0.3106343746185303, "learning_rate": 5.0719234564456454e-05, "loss": 0.0663, "step": 31240 }, { "action_loss": 0.008374814875423908, "epoch": 28.093525179856115, "step": 31240 }, { "epoch": 28.093525179856115, "step": 31240, "torque_loss": 0.12344636768102646 }, { "epoch": 28.10251798561151, "grad_norm": 0.2796287536621094, "learning_rate": 5.0691679477586216e-05, "loss": 0.0522, "step": 31250 }, { "action_loss": 0.0033370396122336388, "epoch": 28.10251798561151, "step": 31250 }, { "epoch": 28.10251798561151, "step": 31250, "torque_loss": 0.07487937062978745 }, { "epoch": 28.111510791366907, "grad_norm": 0.2747247815132141, "learning_rate": 5.0664124180602035e-05, "loss": 0.0612, "step": 31260 }, { "action_loss": 0.0020335169974714518, "epoch": 28.111510791366907, "step": 31260 }, { "epoch": 28.111510791366907, "step": 31260, "torque_loss": 0.039319224655628204 }, { "epoch": 28.1205035971223, "grad_norm": 0.30754855275154114, "learning_rate": 5.063656868187447e-05, "loss": 0.0597, "step": 31270 }, { "action_loss": 0.006118417251855135, "epoch": 28.1205035971223, "step": 31270 }, { "epoch": 28.1205035971223, "step": 31270, "torque_loss": 0.08648437261581421 }, { "epoch": 28.1294964028777, "grad_norm": 0.3052823841571808, "learning_rate": 5.060901298977413e-05, "loss": 0.0671, "step": 31280 }, { "action_loss": 0.019856134429574013, "epoch": 28.1294964028777, "step": 31280 }, { "epoch": 28.1294964028777, "step": 31280, "torque_loss": 0.14969904720783234 }, { "epoch": 28.138489208633093, "grad_norm": 0.31773555278778076, "learning_rate": 5.0581457112671725e-05, "loss": 0.076, "step": 31290 }, { "action_loss": 0.003741626627743244, "epoch": 28.138489208633093, "step": 31290 }, { "epoch": 28.138489208633093, "step": 31290, "torque_loss": 0.10268553346395493 }, { "epoch": 28.14748201438849, "grad_norm": 0.24593378603458405, "learning_rate": 5.0553901058938016e-05, "loss": 0.0631, "step": 31300 }, { "action_loss": 0.0028909940738230944, "epoch": 28.14748201438849, "step": 31300 }, { "epoch": 28.14748201438849, "step": 31300, "torque_loss": 0.06903833150863647 }, { "epoch": 28.156474820143885, "grad_norm": 0.2450956255197525, "learning_rate": 5.052634483694377e-05, "loss": 0.0615, "step": 31310 }, { "action_loss": 0.0035714705009013414, "epoch": 28.156474820143885, "step": 31310 }, { "epoch": 28.156474820143885, "step": 31310, "torque_loss": 0.09872066974639893 }, { "epoch": 28.165467625899282, "grad_norm": 0.3522529900074005, "learning_rate": 5.049878845505988e-05, "loss": 0.0646, "step": 31320 }, { "action_loss": 0.0032750118989497423, "epoch": 28.165467625899282, "step": 31320 }, { "epoch": 28.165467625899282, "step": 31320, "torque_loss": 0.08800455927848816 }, { "epoch": 28.174460431654676, "grad_norm": 0.2877991497516632, "learning_rate": 5.047123192165721e-05, "loss": 0.064, "step": 31330 }, { "action_loss": 0.003532766131684184, "epoch": 28.174460431654676, "step": 31330 }, { "epoch": 28.174460431654676, "step": 31330, "torque_loss": 0.07471992075443268 }, { "epoch": 28.18345323741007, "grad_norm": 0.40468859672546387, "learning_rate": 5.0443675245106735e-05, "loss": 0.069, "step": 31340 }, { "action_loss": 0.0022852702531963587, "epoch": 28.18345323741007, "step": 31340 }, { "epoch": 28.18345323741007, "step": 31340, "torque_loss": 0.04584053158760071 }, { "epoch": 28.192446043165468, "grad_norm": 0.39796656370162964, "learning_rate": 5.0416118433779426e-05, "loss": 0.0591, "step": 31350 }, { "action_loss": 0.016455160453915596, "epoch": 28.192446043165468, "step": 31350 }, { "epoch": 28.192446043165468, "step": 31350, "torque_loss": 0.20636701583862305 }, { "epoch": 28.201438848920862, "grad_norm": 0.25322505831718445, "learning_rate": 5.038856149604633e-05, "loss": 0.0652, "step": 31360 }, { "action_loss": 0.005591609049588442, "epoch": 28.201438848920862, "step": 31360 }, { "epoch": 28.201438848920862, "step": 31360, "torque_loss": 0.11871068924665451 }, { "epoch": 28.21043165467626, "grad_norm": 0.26239046454429626, "learning_rate": 5.03610044402785e-05, "loss": 0.0645, "step": 31370 }, { "action_loss": 0.00299827940762043, "epoch": 28.21043165467626, "step": 31370 }, { "epoch": 28.21043165467626, "step": 31370, "torque_loss": 0.08623597770929337 }, { "epoch": 28.219424460431654, "grad_norm": 0.2914130687713623, "learning_rate": 5.033344727484707e-05, "loss": 0.0712, "step": 31380 }, { "action_loss": 0.009958256967365742, "epoch": 28.219424460431654, "step": 31380 }, { "epoch": 28.219424460431654, "step": 31380, "torque_loss": 0.16386817395687103 }, { "epoch": 28.22841726618705, "grad_norm": 0.28202134370803833, "learning_rate": 5.030589000812315e-05, "loss": 0.0653, "step": 31390 }, { "action_loss": 0.003674466861411929, "epoch": 28.22841726618705, "step": 31390 }, { "epoch": 28.22841726618705, "step": 31390, "torque_loss": 0.09705273061990738 }, { "epoch": 28.237410071942445, "grad_norm": 0.28228092193603516, "learning_rate": 5.027833264847793e-05, "loss": 0.0612, "step": 31400 }, { "action_loss": 0.006210524123162031, "epoch": 28.237410071942445, "step": 31400 }, { "epoch": 28.237410071942445, "step": 31400, "torque_loss": 0.10226158052682877 }, { "epoch": 28.246402877697843, "grad_norm": 0.2800460457801819, "learning_rate": 5.025077520428258e-05, "loss": 0.079, "step": 31410 }, { "action_loss": 0.002496164059266448, "epoch": 28.246402877697843, "step": 31410 }, { "epoch": 28.246402877697843, "step": 31410, "torque_loss": 0.06105148792266846 }, { "epoch": 28.255395683453237, "grad_norm": 0.29033181071281433, "learning_rate": 5.022321768390837e-05, "loss": 0.0572, "step": 31420 }, { "action_loss": 0.01799548976123333, "epoch": 28.255395683453237, "step": 31420 }, { "epoch": 28.255395683453237, "step": 31420, "torque_loss": 0.1376689374446869 }, { "epoch": 28.264388489208635, "grad_norm": 0.39491409063339233, "learning_rate": 5.0195660095726516e-05, "loss": 0.0762, "step": 31430 }, { "action_loss": 0.006399159785360098, "epoch": 28.264388489208635, "step": 31430 }, { "epoch": 28.264388489208635, "step": 31430, "torque_loss": 0.11799303442239761 }, { "epoch": 28.27338129496403, "grad_norm": 0.2708837687969208, "learning_rate": 5.016810244810829e-05, "loss": 0.0692, "step": 31440 }, { "action_loss": 0.010596112348139286, "epoch": 28.27338129496403, "step": 31440 }, { "epoch": 28.27338129496403, "step": 31440, "torque_loss": 0.12894947826862335 }, { "epoch": 28.282374100719423, "grad_norm": 0.43105730414390564, "learning_rate": 5.0140544749424976e-05, "loss": 0.0625, "step": 31450 }, { "action_loss": 0.0037231456954032183, "epoch": 28.282374100719423, "step": 31450 }, { "epoch": 28.282374100719423, "step": 31450, "torque_loss": 0.09122574329376221 }, { "epoch": 28.29136690647482, "grad_norm": 0.4201442003250122, "learning_rate": 5.0112987008047874e-05, "loss": 0.0604, "step": 31460 }, { "action_loss": 0.010267500765621662, "epoch": 28.29136690647482, "step": 31460 }, { "epoch": 28.29136690647482, "step": 31460, "torque_loss": 0.12953180074691772 }, { "epoch": 28.300359712230215, "grad_norm": 0.2834095358848572, "learning_rate": 5.008542923234831e-05, "loss": 0.0707, "step": 31470 }, { "action_loss": 0.006020324770361185, "epoch": 28.300359712230215, "step": 31470 }, { "epoch": 28.300359712230215, "step": 31470, "torque_loss": 0.11876048892736435 }, { "epoch": 28.309352517985612, "grad_norm": 0.3976355195045471, "learning_rate": 5.00578714306976e-05, "loss": 0.0753, "step": 31480 }, { "action_loss": 0.003039401024580002, "epoch": 28.309352517985612, "step": 31480 }, { "epoch": 28.309352517985612, "step": 31480, "torque_loss": 0.08824978023767471 }, { "epoch": 28.318345323741006, "grad_norm": 0.32183000445365906, "learning_rate": 5.0030313611467084e-05, "loss": 0.0676, "step": 31490 }, { "action_loss": 0.008772730827331543, "epoch": 28.318345323741006, "step": 31490 }, { "epoch": 28.318345323741006, "step": 31490, "torque_loss": 0.08542981743812561 }, { "epoch": 28.327338129496404, "grad_norm": 0.38200610876083374, "learning_rate": 5.0002755783028074e-05, "loss": 0.066, "step": 31500 }, { "action_loss": 0.013195153325796127, "epoch": 28.327338129496404, "step": 31500 }, { "epoch": 28.327338129496404, "step": 31500, "torque_loss": 0.15796971321105957 }, { "epoch": 28.336330935251798, "grad_norm": 0.3014782965183258, "learning_rate": 4.997519795375194e-05, "loss": 0.0707, "step": 31510 }, { "action_loss": 0.0025545936077833176, "epoch": 28.336330935251798, "step": 31510 }, { "epoch": 28.336330935251798, "step": 31510, "torque_loss": 0.07741712033748627 }, { "epoch": 28.345323741007196, "grad_norm": 0.29256415367126465, "learning_rate": 4.9947640132010016e-05, "loss": 0.0677, "step": 31520 }, { "action_loss": 0.013194024562835693, "epoch": 28.345323741007196, "step": 31520 }, { "epoch": 28.345323741007196, "step": 31520, "torque_loss": 0.1833217889070511 }, { "epoch": 28.35431654676259, "grad_norm": 0.223119854927063, "learning_rate": 4.9920082326173625e-05, "loss": 0.0706, "step": 31530 }, { "action_loss": 0.02068706415593624, "epoch": 28.35431654676259, "step": 31530 }, { "epoch": 28.35431654676259, "step": 31530, "torque_loss": 0.1547653079032898 }, { "epoch": 28.363309352517987, "grad_norm": 0.2781568169593811, "learning_rate": 4.9892524544614114e-05, "loss": 0.0728, "step": 31540 }, { "action_loss": 0.0037999290507286787, "epoch": 28.363309352517987, "step": 31540 }, { "epoch": 28.363309352517987, "step": 31540, "torque_loss": 0.1130412220954895 }, { "epoch": 28.37230215827338, "grad_norm": 0.3428930938243866, "learning_rate": 4.986496679570283e-05, "loss": 0.072, "step": 31550 }, { "action_loss": 0.003342453157529235, "epoch": 28.37230215827338, "step": 31550 }, { "epoch": 28.37230215827338, "step": 31550, "torque_loss": 0.10304389148950577 }, { "epoch": 28.381294964028775, "grad_norm": 0.3131743371486664, "learning_rate": 4.983740908781105e-05, "loss": 0.0734, "step": 31560 }, { "action_loss": 0.002795093460008502, "epoch": 28.381294964028775, "step": 31560 }, { "epoch": 28.381294964028775, "step": 31560, "torque_loss": 0.10853216797113419 }, { "epoch": 28.390287769784173, "grad_norm": 0.43979066610336304, "learning_rate": 4.9809851429310116e-05, "loss": 0.0606, "step": 31570 }, { "action_loss": 0.005281609948724508, "epoch": 28.390287769784173, "step": 31570 }, { "epoch": 28.390287769784173, "step": 31570, "torque_loss": 0.1532198041677475 }, { "epoch": 28.399280575539567, "grad_norm": 0.27300164103507996, "learning_rate": 4.9782293828571275e-05, "loss": 0.0769, "step": 31580 }, { "action_loss": 0.005500939209014177, "epoch": 28.399280575539567, "step": 31580 }, { "epoch": 28.399280575539567, "step": 31580, "torque_loss": 0.07733067125082016 }, { "epoch": 28.408273381294965, "grad_norm": 0.32746440172195435, "learning_rate": 4.9754736293965846e-05, "loss": 0.0626, "step": 31590 }, { "action_loss": 0.006391247268766165, "epoch": 28.408273381294965, "step": 31590 }, { "epoch": 28.408273381294965, "step": 31590, "torque_loss": 0.14764101803302765 }, { "epoch": 28.41726618705036, "grad_norm": 0.3069736957550049, "learning_rate": 4.972717883386502e-05, "loss": 0.0789, "step": 31600 }, { "action_loss": 0.0020379682537168264, "epoch": 28.41726618705036, "step": 31600 }, { "epoch": 28.41726618705036, "step": 31600, "torque_loss": 0.0684228166937828 }, { "epoch": 28.426258992805757, "grad_norm": 0.35578566789627075, "learning_rate": 4.9699621456640075e-05, "loss": 0.0663, "step": 31610 }, { "action_loss": 0.0023075828794389963, "epoch": 28.426258992805757, "step": 31610 }, { "epoch": 28.426258992805757, "step": 31610, "torque_loss": 0.06710200756788254 }, { "epoch": 28.43525179856115, "grad_norm": 0.388370156288147, "learning_rate": 4.9672064170662214e-05, "loss": 0.059, "step": 31620 }, { "action_loss": 0.001477082259953022, "epoch": 28.43525179856115, "step": 31620 }, { "epoch": 28.43525179856115, "step": 31620, "torque_loss": 0.07136467844247818 }, { "epoch": 28.444244604316548, "grad_norm": 0.32813915610313416, "learning_rate": 4.9644506984302583e-05, "loss": 0.0851, "step": 31630 }, { "action_loss": 0.0038654834497720003, "epoch": 28.444244604316548, "step": 31630 }, { "epoch": 28.444244604316548, "step": 31630, "torque_loss": 0.09290915727615356 }, { "epoch": 28.453237410071942, "grad_norm": 0.3308197259902954, "learning_rate": 4.9616949905932356e-05, "loss": 0.0647, "step": 31640 }, { "action_loss": 0.008174627088010311, "epoch": 28.453237410071942, "step": 31640 }, { "epoch": 28.453237410071942, "step": 31640, "torque_loss": 0.08745330572128296 }, { "epoch": 28.46223021582734, "grad_norm": 0.3101043999195099, "learning_rate": 4.9589392943922615e-05, "loss": 0.0698, "step": 31650 }, { "action_loss": 0.002601748099550605, "epoch": 28.46223021582734, "step": 31650 }, { "epoch": 28.46223021582734, "step": 31650, "torque_loss": 0.04916733503341675 }, { "epoch": 28.471223021582734, "grad_norm": 0.30654144287109375, "learning_rate": 4.956183610664447e-05, "loss": 0.0664, "step": 31660 }, { "action_loss": 0.004050689283758402, "epoch": 28.471223021582734, "step": 31660 }, { "epoch": 28.471223021582734, "step": 31660, "torque_loss": 0.1163453534245491 }, { "epoch": 28.480215827338128, "grad_norm": 0.2516360878944397, "learning_rate": 4.9534279402468945e-05, "loss": 0.0605, "step": 31670 }, { "action_loss": 0.012399469502270222, "epoch": 28.480215827338128, "step": 31670 }, { "epoch": 28.480215827338128, "step": 31670, "torque_loss": 0.16513817012310028 }, { "epoch": 28.489208633093526, "grad_norm": 0.23396292328834534, "learning_rate": 4.9506722839767036e-05, "loss": 0.0634, "step": 31680 }, { "action_loss": 0.005722879897803068, "epoch": 28.489208633093526, "step": 31680 }, { "epoch": 28.489208633093526, "step": 31680, "torque_loss": 0.1325863152742386 }, { "epoch": 28.49820143884892, "grad_norm": 0.3046753704547882, "learning_rate": 4.947916642690972e-05, "loss": 0.0849, "step": 31690 }, { "action_loss": 0.004177651833742857, "epoch": 28.49820143884892, "step": 31690 }, { "epoch": 28.49820143884892, "step": 31690, "torque_loss": 0.11755850166082382 }, { "epoch": 28.507194244604317, "grad_norm": 0.24590669572353363, "learning_rate": 4.9451610172267874e-05, "loss": 0.053, "step": 31700 }, { "action_loss": 0.007207924500107765, "epoch": 28.507194244604317, "step": 31700 }, { "epoch": 28.507194244604317, "step": 31700, "torque_loss": 0.11569235473871231 }, { "epoch": 28.51618705035971, "grad_norm": 0.3087103068828583, "learning_rate": 4.9424054084212376e-05, "loss": 0.0627, "step": 31710 }, { "action_loss": 0.007471942808479071, "epoch": 28.51618705035971, "step": 31710 }, { "epoch": 28.51618705035971, "step": 31710, "torque_loss": 0.09038513898849487 }, { "epoch": 28.52517985611511, "grad_norm": 0.28998252749443054, "learning_rate": 4.939649817111407e-05, "loss": 0.0576, "step": 31720 }, { "action_loss": 0.004785229917615652, "epoch": 28.52517985611511, "step": 31720 }, { "epoch": 28.52517985611511, "step": 31720, "torque_loss": 0.11137060075998306 }, { "epoch": 28.534172661870503, "grad_norm": 0.299793541431427, "learning_rate": 4.936894244134365e-05, "loss": 0.0672, "step": 31730 }, { "action_loss": 0.0065445080399513245, "epoch": 28.534172661870503, "step": 31730 }, { "epoch": 28.534172661870503, "step": 31730, "torque_loss": 0.11990179866552353 }, { "epoch": 28.5431654676259, "grad_norm": 0.23225648701190948, "learning_rate": 4.9341386903271886e-05, "loss": 0.0612, "step": 31740 }, { "action_loss": 0.010647374205291271, "epoch": 28.5431654676259, "step": 31740 }, { "epoch": 28.5431654676259, "step": 31740, "torque_loss": 0.10020807385444641 }, { "epoch": 28.552158273381295, "grad_norm": 0.36792710423469543, "learning_rate": 4.931383156526936e-05, "loss": 0.0902, "step": 31750 }, { "action_loss": 0.002878139726817608, "epoch": 28.552158273381295, "step": 31750 }, { "epoch": 28.552158273381295, "step": 31750, "torque_loss": 0.08674157410860062 }, { "epoch": 28.56115107913669, "grad_norm": 0.32273226976394653, "learning_rate": 4.92862764357067e-05, "loss": 0.0685, "step": 31760 }, { "action_loss": 0.002281391294673085, "epoch": 28.56115107913669, "step": 31760 }, { "epoch": 28.56115107913669, "step": 31760, "torque_loss": 0.07063045352697372 }, { "epoch": 28.570143884892087, "grad_norm": 0.26067203283309937, "learning_rate": 4.925872152295443e-05, "loss": 0.0552, "step": 31770 }, { "action_loss": 0.0029643599409610033, "epoch": 28.570143884892087, "step": 31770 }, { "epoch": 28.570143884892087, "step": 31770, "torque_loss": 0.06719879060983658 }, { "epoch": 28.57913669064748, "grad_norm": 0.27071264386177063, "learning_rate": 4.923116683538296e-05, "loss": 0.0696, "step": 31780 }, { "action_loss": 0.010197659954428673, "epoch": 28.57913669064748, "step": 31780 }, { "epoch": 28.57913669064748, "step": 31780, "torque_loss": 0.13599903881549835 }, { "epoch": 28.58812949640288, "grad_norm": 0.30410709977149963, "learning_rate": 4.920361238136273e-05, "loss": 0.0723, "step": 31790 }, { "action_loss": 0.0024902988225221634, "epoch": 28.58812949640288, "step": 31790 }, { "epoch": 28.58812949640288, "step": 31790, "torque_loss": 0.08796028047800064 }, { "epoch": 28.597122302158272, "grad_norm": 0.31475189328193665, "learning_rate": 4.9176058169264014e-05, "loss": 0.0689, "step": 31800 }, { "action_loss": 0.005138773005455732, "epoch": 28.597122302158272, "step": 31800 }, { "epoch": 28.597122302158272, "step": 31800, "torque_loss": 0.08618154376745224 }, { "epoch": 28.60611510791367, "grad_norm": 0.270048052072525, "learning_rate": 4.9148504207457074e-05, "loss": 0.0786, "step": 31810 }, { "action_loss": 0.01621759869158268, "epoch": 28.60611510791367, "step": 31810 }, { "epoch": 28.60611510791367, "step": 31810, "torque_loss": 0.12324217706918716 }, { "epoch": 28.615107913669064, "grad_norm": 0.3736606240272522, "learning_rate": 4.912095050431208e-05, "loss": 0.0679, "step": 31820 }, { "action_loss": 0.0032173870131373405, "epoch": 28.615107913669064, "step": 31820 }, { "epoch": 28.615107913669064, "step": 31820, "torque_loss": 0.08836352825164795 }, { "epoch": 28.62410071942446, "grad_norm": 0.32311975955963135, "learning_rate": 4.909339706819911e-05, "loss": 0.0686, "step": 31830 }, { "action_loss": 0.005138126201927662, "epoch": 28.62410071942446, "step": 31830 }, { "epoch": 28.62410071942446, "step": 31830, "torque_loss": 0.11287647485733032 }, { "epoch": 28.633093525179856, "grad_norm": 0.2275637686252594, "learning_rate": 4.906584390748819e-05, "loss": 0.0677, "step": 31840 }, { "action_loss": 0.008166101761162281, "epoch": 28.633093525179856, "step": 31840 }, { "epoch": 28.633093525179856, "step": 31840, "torque_loss": 0.14857538044452667 }, { "epoch": 28.642086330935253, "grad_norm": 0.26586928963661194, "learning_rate": 4.9038291030549195e-05, "loss": 0.0979, "step": 31850 }, { "action_loss": 0.007189879659563303, "epoch": 28.642086330935253, "step": 31850 }, { "epoch": 28.642086330935253, "step": 31850, "torque_loss": 0.09424064308404922 }, { "epoch": 28.651079136690647, "grad_norm": 0.2971116304397583, "learning_rate": 4.9010738445751995e-05, "loss": 0.0693, "step": 31860 }, { "action_loss": 0.011680196039378643, "epoch": 28.651079136690647, "step": 31860 }, { "epoch": 28.651079136690647, "step": 31860, "torque_loss": 0.19209878146648407 }, { "epoch": 28.66007194244604, "grad_norm": 0.27236828207969666, "learning_rate": 4.8983186161466364e-05, "loss": 0.0581, "step": 31870 }, { "action_loss": 0.008102674968540668, "epoch": 28.66007194244604, "step": 31870 }, { "epoch": 28.66007194244604, "step": 31870, "torque_loss": 0.16027586162090302 }, { "epoch": 28.66906474820144, "grad_norm": 0.27890726923942566, "learning_rate": 4.89556341860619e-05, "loss": 0.0704, "step": 31880 }, { "action_loss": 0.002977905794978142, "epoch": 28.66906474820144, "step": 31880 }, { "epoch": 28.66906474820144, "step": 31880, "torque_loss": 0.09315094351768494 }, { "epoch": 28.678057553956833, "grad_norm": 0.29223141074180603, "learning_rate": 4.892808252790822e-05, "loss": 0.0691, "step": 31890 }, { "action_loss": 0.002660701982676983, "epoch": 28.678057553956833, "step": 31890 }, { "epoch": 28.678057553956833, "step": 31890, "torque_loss": 0.10223966091871262 }, { "epoch": 28.68705035971223, "grad_norm": 0.36208733916282654, "learning_rate": 4.890053119537475e-05, "loss": 0.0703, "step": 31900 }, { "action_loss": 0.01446177065372467, "epoch": 28.68705035971223, "step": 31900 }, { "epoch": 28.68705035971223, "step": 31900, "torque_loss": 0.11811991780996323 }, { "epoch": 28.696043165467625, "grad_norm": 0.31730160117149353, "learning_rate": 4.887298019683087e-05, "loss": 0.0779, "step": 31910 }, { "action_loss": 0.04640693962574005, "epoch": 28.696043165467625, "step": 31910 }, { "epoch": 28.696043165467625, "step": 31910, "torque_loss": 0.13279904425144196 }, { "epoch": 28.705035971223023, "grad_norm": 0.27389755845069885, "learning_rate": 4.884542954064587e-05, "loss": 0.0649, "step": 31920 }, { "action_loss": 0.007426404859870672, "epoch": 28.705035971223023, "step": 31920 }, { "epoch": 28.705035971223023, "step": 31920, "torque_loss": 0.1183759868144989 }, { "epoch": 28.714028776978417, "grad_norm": 0.24567031860351562, "learning_rate": 4.881787923518887e-05, "loss": 0.0756, "step": 31930 }, { "action_loss": 0.002234594663605094, "epoch": 28.714028776978417, "step": 31930 }, { "epoch": 28.714028776978417, "step": 31930, "torque_loss": 0.06536687165498734 }, { "epoch": 28.723021582733814, "grad_norm": 0.2926209270954132, "learning_rate": 4.879032928882896e-05, "loss": 0.0643, "step": 31940 }, { "action_loss": 0.009654898196458817, "epoch": 28.723021582733814, "step": 31940 }, { "epoch": 28.723021582733814, "step": 31940, "torque_loss": 0.14851291477680206 }, { "epoch": 28.73201438848921, "grad_norm": 0.35507723689079285, "learning_rate": 4.876277970993505e-05, "loss": 0.0594, "step": 31950 }, { "action_loss": 0.008468041196465492, "epoch": 28.73201438848921, "step": 31950 }, { "epoch": 28.73201438848921, "step": 31950, "torque_loss": 0.15286117792129517 }, { "epoch": 28.741007194244606, "grad_norm": 0.2263588160276413, "learning_rate": 4.873523050687602e-05, "loss": 0.0713, "step": 31960 }, { "action_loss": 0.00327562284655869, "epoch": 28.741007194244606, "step": 31960 }, { "epoch": 28.741007194244606, "step": 31960, "torque_loss": 0.06758920103311539 }, { "epoch": 28.75, "grad_norm": 0.23735198378562927, "learning_rate": 4.870768168802056e-05, "loss": 0.0564, "step": 31970 }, { "action_loss": 0.0052146571688354015, "epoch": 28.75, "step": 31970 }, { "epoch": 28.75, "step": 31970, "torque_loss": 0.09797094017267227 }, { "epoch": 28.758992805755394, "grad_norm": 0.24823026359081268, "learning_rate": 4.868013326173728e-05, "loss": 0.066, "step": 31980 }, { "action_loss": 0.0041175843216478825, "epoch": 28.758992805755394, "step": 31980 }, { "epoch": 28.758992805755394, "step": 31980, "torque_loss": 0.10473635792732239 }, { "epoch": 28.76798561151079, "grad_norm": 0.3181127607822418, "learning_rate": 4.865258523639468e-05, "loss": 0.0618, "step": 31990 }, { "action_loss": 0.004376610275357962, "epoch": 28.76798561151079, "step": 31990 }, { "epoch": 28.76798561151079, "step": 31990, "torque_loss": 0.1094161868095398 }, { "epoch": 28.776978417266186, "grad_norm": 0.33991166949272156, "learning_rate": 4.862503762036109e-05, "loss": 0.0634, "step": 32000 }, { "action_loss": 0.005965625401586294, "epoch": 28.776978417266186, "step": 32000 }, { "epoch": 28.776978417266186, "step": 32000, "torque_loss": 0.08831057697534561 }, { "epoch": 28.785971223021583, "grad_norm": 0.21545915305614471, "learning_rate": 4.859749042200478e-05, "loss": 0.0549, "step": 32010 }, { "action_loss": 0.0026536546647548676, "epoch": 28.785971223021583, "step": 32010 }, { "epoch": 28.785971223021583, "step": 32010, "torque_loss": 0.0985051766037941 }, { "epoch": 28.794964028776977, "grad_norm": 0.2909517288208008, "learning_rate": 4.856994364969384e-05, "loss": 0.0612, "step": 32020 }, { "action_loss": 0.003095436841249466, "epoch": 28.794964028776977, "step": 32020 }, { "epoch": 28.794964028776977, "step": 32020, "torque_loss": 0.08502105623483658 }, { "epoch": 28.803956834532375, "grad_norm": 0.2877185344696045, "learning_rate": 4.854239731179625e-05, "loss": 0.0506, "step": 32030 }, { "action_loss": 0.0029172247741371393, "epoch": 28.803956834532375, "step": 32030 }, { "epoch": 28.803956834532375, "step": 32030, "torque_loss": 0.09491226822137833 }, { "epoch": 28.81294964028777, "grad_norm": 0.3311561644077301, "learning_rate": 4.85148514166799e-05, "loss": 0.0639, "step": 32040 }, { "action_loss": 0.003562646685168147, "epoch": 28.81294964028777, "step": 32040 }, { "epoch": 28.81294964028777, "step": 32040, "torque_loss": 0.06617926806211472 }, { "epoch": 28.821942446043167, "grad_norm": 0.2692136764526367, "learning_rate": 4.8487305972712456e-05, "loss": 0.0644, "step": 32050 }, { "action_loss": 0.006554977502673864, "epoch": 28.821942446043167, "step": 32050 }, { "epoch": 28.821942446043167, "step": 32050, "torque_loss": 0.10437079519033432 }, { "epoch": 28.83093525179856, "grad_norm": 0.3041023015975952, "learning_rate": 4.8459760988261526e-05, "loss": 0.0598, "step": 32060 }, { "action_loss": 0.006657504942268133, "epoch": 28.83093525179856, "step": 32060 }, { "epoch": 28.83093525179856, "step": 32060, "torque_loss": 0.11072101444005966 }, { "epoch": 28.83992805755396, "grad_norm": 0.24222880601882935, "learning_rate": 4.843221647169453e-05, "loss": 0.06, "step": 32070 }, { "action_loss": 0.005503356922417879, "epoch": 28.83992805755396, "step": 32070 }, { "epoch": 28.83992805755396, "step": 32070, "torque_loss": 0.10266795754432678 }, { "epoch": 28.848920863309353, "grad_norm": 0.28149956464767456, "learning_rate": 4.840467243137878e-05, "loss": 0.0674, "step": 32080 }, { "action_loss": 0.002750155283138156, "epoch": 28.848920863309353, "step": 32080 }, { "epoch": 28.848920863309353, "step": 32080, "torque_loss": 0.09537819027900696 }, { "epoch": 28.857913669064747, "grad_norm": 0.3357076644897461, "learning_rate": 4.837712887568143e-05, "loss": 0.069, "step": 32090 }, { "action_loss": 0.006453806068748236, "epoch": 28.857913669064747, "step": 32090 }, { "epoch": 28.857913669064747, "step": 32090, "torque_loss": 0.0871599093079567 }, { "epoch": 28.866906474820144, "grad_norm": 0.40189388394355774, "learning_rate": 4.8349585812969464e-05, "loss": 0.0683, "step": 32100 }, { "action_loss": 0.009588838554918766, "epoch": 28.866906474820144, "step": 32100 }, { "epoch": 28.866906474820144, "step": 32100, "torque_loss": 0.10995947569608688 }, { "epoch": 28.87589928057554, "grad_norm": 0.25003179907798767, "learning_rate": 4.8322043251609775e-05, "loss": 0.06, "step": 32110 }, { "action_loss": 0.011737263761460781, "epoch": 28.87589928057554, "step": 32110 }, { "epoch": 28.87589928057554, "step": 32110, "torque_loss": 0.11397577077150345 }, { "epoch": 28.884892086330936, "grad_norm": 0.25745075941085815, "learning_rate": 4.8294501199969015e-05, "loss": 0.061, "step": 32120 }, { "action_loss": 0.002065959619358182, "epoch": 28.884892086330936, "step": 32120 }, { "epoch": 28.884892086330936, "step": 32120, "torque_loss": 0.054890941828489304 }, { "epoch": 28.89388489208633, "grad_norm": 0.3084534704685211, "learning_rate": 4.826695966641376e-05, "loss": 0.0583, "step": 32130 }, { "action_loss": 0.0027743412647396326, "epoch": 28.89388489208633, "step": 32130 }, { "epoch": 28.89388489208633, "step": 32130, "torque_loss": 0.06753795593976974 }, { "epoch": 28.902877697841728, "grad_norm": 0.29973214864730835, "learning_rate": 4.823941865931043e-05, "loss": 0.0676, "step": 32140 }, { "action_loss": 0.008783236145973206, "epoch": 28.902877697841728, "step": 32140 }, { "epoch": 28.902877697841728, "step": 32140, "torque_loss": 0.08306551724672318 }, { "epoch": 28.91187050359712, "grad_norm": 0.37561848759651184, "learning_rate": 4.82118781870252e-05, "loss": 0.0748, "step": 32150 }, { "action_loss": 0.002791234292089939, "epoch": 28.91187050359712, "step": 32150 }, { "epoch": 28.91187050359712, "step": 32150, "torque_loss": 0.0680980384349823 }, { "epoch": 28.92086330935252, "grad_norm": 0.32451149821281433, "learning_rate": 4.8184338257924185e-05, "loss": 0.07, "step": 32160 }, { "action_loss": 0.0067703681997954845, "epoch": 28.92086330935252, "step": 32160 }, { "epoch": 28.92086330935252, "step": 32160, "torque_loss": 0.12433215975761414 }, { "epoch": 28.929856115107913, "grad_norm": 0.27472177147865295, "learning_rate": 4.815679888037324e-05, "loss": 0.0687, "step": 32170 }, { "action_loss": 0.002843450754880905, "epoch": 28.929856115107913, "step": 32170 }, { "epoch": 28.929856115107913, "step": 32170, "torque_loss": 0.1050039604306221 }, { "epoch": 28.93884892086331, "grad_norm": 0.31984391808509827, "learning_rate": 4.8129260062738135e-05, "loss": 0.0592, "step": 32180 }, { "action_loss": 0.001217912882566452, "epoch": 28.93884892086331, "step": 32180 }, { "epoch": 28.93884892086331, "step": 32180, "torque_loss": 0.05362749099731445 }, { "epoch": 28.947841726618705, "grad_norm": 0.33434155583381653, "learning_rate": 4.810172181338445e-05, "loss": 0.0702, "step": 32190 }, { "action_loss": 0.0064892456866800785, "epoch": 28.947841726618705, "step": 32190 }, { "epoch": 28.947841726618705, "step": 32190, "torque_loss": 0.1176835224032402 }, { "epoch": 28.9568345323741, "grad_norm": 0.3499426245689392, "learning_rate": 4.807418414067753e-05, "loss": 0.0539, "step": 32200 }, { "action_loss": 0.0037970785051584244, "epoch": 28.9568345323741, "step": 32200 }, { "epoch": 28.9568345323741, "step": 32200, "torque_loss": 0.12311291694641113 }, { "epoch": 28.965827338129497, "grad_norm": 0.3030693233013153, "learning_rate": 4.804664705298264e-05, "loss": 0.0636, "step": 32210 }, { "action_loss": 0.0028325552120804787, "epoch": 28.965827338129497, "step": 32210 }, { "epoch": 28.965827338129497, "step": 32210, "torque_loss": 0.09543496370315552 }, { "epoch": 28.97482014388489, "grad_norm": 0.33464401960372925, "learning_rate": 4.80191105586648e-05, "loss": 0.0642, "step": 32220 }, { "action_loss": 0.00552019290626049, "epoch": 28.97482014388489, "step": 32220 }, { "epoch": 28.97482014388489, "step": 32220, "torque_loss": 0.08545330911874771 }, { "epoch": 28.98381294964029, "grad_norm": 0.4106428027153015, "learning_rate": 4.799157466608886e-05, "loss": 0.0655, "step": 32230 }, { "action_loss": 0.0033772962633520365, "epoch": 28.98381294964029, "step": 32230 }, { "epoch": 28.98381294964029, "step": 32230, "torque_loss": 0.08269260823726654 }, { "epoch": 28.992805755395683, "grad_norm": 0.2883555293083191, "learning_rate": 4.796403938361951e-05, "loss": 0.0731, "step": 32240 }, { "action_loss": 0.004290152806788683, "epoch": 28.992805755395683, "step": 32240 }, { "epoch": 28.992805755395683, "step": 32240, "torque_loss": 0.09306836128234863 }, { "epoch": 29.00179856115108, "grad_norm": 0.3815860450267792, "learning_rate": 4.793650471962123e-05, "loss": 0.055, "step": 32250 }, { "action_loss": 0.007856142707169056, "epoch": 29.00179856115108, "step": 32250 }, { "epoch": 29.00179856115108, "step": 32250, "torque_loss": 0.10384386032819748 }, { "epoch": 29.010791366906474, "grad_norm": 0.3236914277076721, "learning_rate": 4.790897068245835e-05, "loss": 0.0602, "step": 32260 }, { "action_loss": 0.0021244350355118513, "epoch": 29.010791366906474, "step": 32260 }, { "epoch": 29.010791366906474, "step": 32260, "torque_loss": 0.046471595764160156 }, { "epoch": 29.019784172661872, "grad_norm": 0.2590581774711609, "learning_rate": 4.7881437280494954e-05, "loss": 0.0743, "step": 32270 }, { "action_loss": 0.004677900578826666, "epoch": 29.019784172661872, "step": 32270 }, { "epoch": 29.019784172661872, "step": 32270, "torque_loss": 0.14798375964164734 }, { "epoch": 29.028776978417266, "grad_norm": 0.25079718232154846, "learning_rate": 4.7853904522094965e-05, "loss": 0.0675, "step": 32280 }, { "action_loss": 0.0032693129032850266, "epoch": 29.028776978417266, "step": 32280 }, { "epoch": 29.028776978417266, "step": 32280, "torque_loss": 0.14145155251026154 }, { "epoch": 29.037769784172664, "grad_norm": 0.26824164390563965, "learning_rate": 4.782637241562215e-05, "loss": 0.0683, "step": 32290 }, { "action_loss": 0.0017446560086682439, "epoch": 29.037769784172664, "step": 32290 }, { "epoch": 29.037769784172664, "step": 32290, "torque_loss": 0.0683656632900238 }, { "epoch": 29.046762589928058, "grad_norm": 0.257344126701355, "learning_rate": 4.779884096943997e-05, "loss": 0.0591, "step": 32300 }, { "action_loss": 0.002494869288057089, "epoch": 29.046762589928058, "step": 32300 }, { "epoch": 29.046762589928058, "step": 32300, "torque_loss": 0.08958497643470764 }, { "epoch": 29.055755395683452, "grad_norm": 0.39308539032936096, "learning_rate": 4.777131019191182e-05, "loss": 0.0713, "step": 32310 }, { "action_loss": 0.005975678563117981, "epoch": 29.055755395683452, "step": 32310 }, { "epoch": 29.055755395683452, "step": 32310, "torque_loss": 0.08665218204259872 }, { "epoch": 29.06474820143885, "grad_norm": 0.31917810440063477, "learning_rate": 4.774378009140076e-05, "loss": 0.067, "step": 32320 }, { "action_loss": 0.01084208209067583, "epoch": 29.06474820143885, "step": 32320 }, { "epoch": 29.06474820143885, "step": 32320, "torque_loss": 0.10310781002044678 }, { "epoch": 29.073741007194243, "grad_norm": 0.3460904657840729, "learning_rate": 4.7716250676269735e-05, "loss": 0.0586, "step": 32330 }, { "action_loss": 0.002953474409878254, "epoch": 29.073741007194243, "step": 32330 }, { "epoch": 29.073741007194243, "step": 32330, "torque_loss": 0.07303086668252945 }, { "epoch": 29.08273381294964, "grad_norm": 0.30348968505859375, "learning_rate": 4.7688721954881485e-05, "loss": 0.0623, "step": 32340 }, { "action_loss": 0.0034606866538524628, "epoch": 29.08273381294964, "step": 32340 }, { "epoch": 29.08273381294964, "step": 32340, "torque_loss": 0.07354974001646042 }, { "epoch": 29.091726618705035, "grad_norm": 0.215593159198761, "learning_rate": 4.7661193935598446e-05, "loss": 0.0752, "step": 32350 }, { "action_loss": 0.01107862126082182, "epoch": 29.091726618705035, "step": 32350 }, { "epoch": 29.091726618705035, "step": 32350, "torque_loss": 0.1509287804365158 }, { "epoch": 29.100719424460433, "grad_norm": 0.2609368562698364, "learning_rate": 4.763366662678296e-05, "loss": 0.0653, "step": 32360 }, { "action_loss": 0.006613054778426886, "epoch": 29.100719424460433, "step": 32360 }, { "epoch": 29.100719424460433, "step": 32360, "torque_loss": 0.12026890367269516 }, { "epoch": 29.109712230215827, "grad_norm": 0.3639967143535614, "learning_rate": 4.7606140036797064e-05, "loss": 0.0743, "step": 32370 }, { "action_loss": 0.001840040902607143, "epoch": 29.109712230215827, "step": 32370 }, { "epoch": 29.109712230215827, "step": 32370, "torque_loss": 0.0701933279633522 }, { "epoch": 29.118705035971225, "grad_norm": 0.2992399036884308, "learning_rate": 4.7578614174002614e-05, "loss": 0.0567, "step": 32380 }, { "action_loss": 0.0042237588204443455, "epoch": 29.118705035971225, "step": 32380 }, { "epoch": 29.118705035971225, "step": 32380, "torque_loss": 0.11711480468511581 }, { "epoch": 29.12769784172662, "grad_norm": 0.30695948004722595, "learning_rate": 4.755108904676125e-05, "loss": 0.0659, "step": 32390 }, { "action_loss": 0.004437526222318411, "epoch": 29.12769784172662, "step": 32390 }, { "epoch": 29.12769784172662, "step": 32390, "torque_loss": 0.06321658939123154 }, { "epoch": 29.136690647482013, "grad_norm": 0.3203226923942566, "learning_rate": 4.752356466343436e-05, "loss": 0.0711, "step": 32400 }, { "action_loss": 0.00227217935025692, "epoch": 29.136690647482013, "step": 32400 }, { "epoch": 29.136690647482013, "step": 32400, "torque_loss": 0.0788036435842514 }, { "epoch": 29.14568345323741, "grad_norm": 0.2761329412460327, "learning_rate": 4.7496041032383174e-05, "loss": 0.0541, "step": 32410 }, { "action_loss": 0.0029669934883713722, "epoch": 29.14568345323741, "step": 32410 }, { "epoch": 29.14568345323741, "step": 32410, "torque_loss": 0.08870616555213928 }, { "epoch": 29.154676258992804, "grad_norm": 0.26887932419776917, "learning_rate": 4.746851816196858e-05, "loss": 0.0543, "step": 32420 }, { "action_loss": 0.002132419031113386, "epoch": 29.154676258992804, "step": 32420 }, { "epoch": 29.154676258992804, "step": 32420, "torque_loss": 0.0493963360786438 }, { "epoch": 29.163669064748202, "grad_norm": 0.3117718994617462, "learning_rate": 4.744099606055135e-05, "loss": 0.0569, "step": 32430 }, { "action_loss": 0.01590859144926071, "epoch": 29.163669064748202, "step": 32430 }, { "epoch": 29.163669064748202, "step": 32430, "torque_loss": 0.16945551335811615 }, { "epoch": 29.172661870503596, "grad_norm": 0.29866909980773926, "learning_rate": 4.741347473649193e-05, "loss": 0.0698, "step": 32440 }, { "action_loss": 0.012491394765675068, "epoch": 29.172661870503596, "step": 32440 }, { "epoch": 29.172661870503596, "step": 32440, "torque_loss": 0.10472958534955978 }, { "epoch": 29.181654676258994, "grad_norm": 0.22525285184383392, "learning_rate": 4.738595419815058e-05, "loss": 0.0592, "step": 32450 }, { "action_loss": 0.014588392339646816, "epoch": 29.181654676258994, "step": 32450 }, { "epoch": 29.181654676258994, "step": 32450, "torque_loss": 0.14135994017124176 }, { "epoch": 29.190647482014388, "grad_norm": 0.37837347388267517, "learning_rate": 4.7358434453887365e-05, "loss": 0.0733, "step": 32460 }, { "action_loss": 0.004970386158674955, "epoch": 29.190647482014388, "step": 32460 }, { "epoch": 29.190647482014388, "step": 32460, "torque_loss": 0.06518842279911041 }, { "epoch": 29.199640287769785, "grad_norm": 0.3439258933067322, "learning_rate": 4.7330915512061976e-05, "loss": 0.0591, "step": 32470 }, { "action_loss": 0.003211007220670581, "epoch": 29.199640287769785, "step": 32470 }, { "epoch": 29.199640287769785, "step": 32470, "torque_loss": 0.09525913000106812 }, { "epoch": 29.20863309352518, "grad_norm": 0.2739960849285126, "learning_rate": 4.730339738103402e-05, "loss": 0.0703, "step": 32480 }, { "action_loss": 0.002972828224301338, "epoch": 29.20863309352518, "step": 32480 }, { "epoch": 29.20863309352518, "step": 32480, "torque_loss": 0.10513017326593399 }, { "epoch": 29.217625899280577, "grad_norm": 0.2590366005897522, "learning_rate": 4.727588006916271e-05, "loss": 0.07, "step": 32490 }, { "action_loss": 0.0014858405338600278, "epoch": 29.217625899280577, "step": 32490 }, { "epoch": 29.217625899280577, "step": 32490, "torque_loss": 0.0549384243786335 }, { "epoch": 29.22661870503597, "grad_norm": 0.2583823800086975, "learning_rate": 4.724836358480711e-05, "loss": 0.0545, "step": 32500 }, { "action_loss": 0.008583798073232174, "epoch": 29.22661870503597, "step": 32500 }, { "epoch": 29.22661870503597, "step": 32500, "torque_loss": 0.1901874542236328 }, { "epoch": 29.235611510791365, "grad_norm": 0.32411280274391174, "learning_rate": 4.722084793632601e-05, "loss": 0.0686, "step": 32510 }, { "action_loss": 0.0038462227676063776, "epoch": 29.235611510791365, "step": 32510 }, { "epoch": 29.235611510791365, "step": 32510, "torque_loss": 0.09426187723875046 }, { "epoch": 29.244604316546763, "grad_norm": 0.27937766909599304, "learning_rate": 4.719333313207792e-05, "loss": 0.0724, "step": 32520 }, { "action_loss": 0.005286215338855982, "epoch": 29.244604316546763, "step": 32520 }, { "epoch": 29.244604316546763, "step": 32520, "torque_loss": 0.1082206591963768 }, { "epoch": 29.253597122302157, "grad_norm": 0.3524652421474457, "learning_rate": 4.716581918042114e-05, "loss": 0.0675, "step": 32530 }, { "action_loss": 0.012462818063795567, "epoch": 29.253597122302157, "step": 32530 }, { "epoch": 29.253597122302157, "step": 32530, "torque_loss": 0.1119113340973854 }, { "epoch": 29.262589928057555, "grad_norm": 0.22563818097114563, "learning_rate": 4.7138306089713636e-05, "loss": 0.058, "step": 32540 }, { "action_loss": 0.0017349409172311425, "epoch": 29.262589928057555, "step": 32540 }, { "epoch": 29.262589928057555, "step": 32540, "torque_loss": 0.051497384905815125 }, { "epoch": 29.27158273381295, "grad_norm": 0.3100985288619995, "learning_rate": 4.7110793868313183e-05, "loss": 0.0535, "step": 32550 }, { "action_loss": 0.005958022549748421, "epoch": 29.27158273381295, "step": 32550 }, { "epoch": 29.27158273381295, "step": 32550, "torque_loss": 0.09200306981801987 }, { "epoch": 29.280575539568346, "grad_norm": 0.4780476987361908, "learning_rate": 4.708328252457729e-05, "loss": 0.0662, "step": 32560 }, { "action_loss": 0.009126662276685238, "epoch": 29.280575539568346, "step": 32560 }, { "epoch": 29.280575539568346, "step": 32560, "torque_loss": 0.11656457185745239 }, { "epoch": 29.28956834532374, "grad_norm": 0.34797805547714233, "learning_rate": 4.7055772066863135e-05, "loss": 0.0776, "step": 32570 }, { "action_loss": 0.004516115877777338, "epoch": 29.28956834532374, "step": 32570 }, { "epoch": 29.28956834532374, "step": 32570, "torque_loss": 0.1188930869102478 }, { "epoch": 29.298561151079138, "grad_norm": 0.35642945766448975, "learning_rate": 4.702826250352771e-05, "loss": 0.0546, "step": 32580 }, { "action_loss": 0.005597732961177826, "epoch": 29.298561151079138, "step": 32580 }, { "epoch": 29.298561151079138, "step": 32580, "torque_loss": 0.11341562122106552 }, { "epoch": 29.307553956834532, "grad_norm": 0.24814550578594208, "learning_rate": 4.7000753842927653e-05, "loss": 0.0609, "step": 32590 }, { "action_loss": 0.0036126046907156706, "epoch": 29.307553956834532, "step": 32590 }, { "epoch": 29.307553956834532, "step": 32590, "torque_loss": 0.09876600652933121 }, { "epoch": 29.31654676258993, "grad_norm": 0.35197150707244873, "learning_rate": 4.6973246093419384e-05, "loss": 0.0692, "step": 32600 }, { "action_loss": 0.004075984004884958, "epoch": 29.31654676258993, "step": 32600 }, { "epoch": 29.31654676258993, "step": 32600, "torque_loss": 0.07807231694459915 }, { "epoch": 29.325539568345324, "grad_norm": 0.3448934257030487, "learning_rate": 4.694573926335906e-05, "loss": 0.0682, "step": 32610 }, { "action_loss": 0.006321058142930269, "epoch": 29.325539568345324, "step": 32610 }, { "epoch": 29.325539568345324, "step": 32610, "torque_loss": 0.07393894344568253 }, { "epoch": 29.334532374100718, "grad_norm": 0.3372529447078705, "learning_rate": 4.6918233361102476e-05, "loss": 0.0631, "step": 32620 }, { "action_loss": 0.004078098107129335, "epoch": 29.334532374100718, "step": 32620 }, { "epoch": 29.334532374100718, "step": 32620, "torque_loss": 0.08047040551900864 }, { "epoch": 29.343525179856115, "grad_norm": 0.3658962547779083, "learning_rate": 4.689072839500525e-05, "loss": 0.0651, "step": 32630 }, { "action_loss": 0.007025338709354401, "epoch": 29.343525179856115, "step": 32630 }, { "epoch": 29.343525179856115, "step": 32630, "torque_loss": 0.12462329864501953 }, { "epoch": 29.35251798561151, "grad_norm": 0.35204094648361206, "learning_rate": 4.6863224373422635e-05, "loss": 0.0572, "step": 32640 }, { "action_loss": 0.006716158706694841, "epoch": 29.35251798561151, "step": 32640 }, { "epoch": 29.35251798561151, "step": 32640, "torque_loss": 0.13441981375217438 }, { "epoch": 29.361510791366907, "grad_norm": 0.30119410157203674, "learning_rate": 4.683572130470962e-05, "loss": 0.0583, "step": 32650 }, { "action_loss": 0.006578874308615923, "epoch": 29.361510791366907, "step": 32650 }, { "epoch": 29.361510791366907, "step": 32650, "torque_loss": 0.0903191938996315 }, { "epoch": 29.3705035971223, "grad_norm": 0.3005964756011963, "learning_rate": 4.680821919722094e-05, "loss": 0.0782, "step": 32660 }, { "action_loss": 0.008048946037888527, "epoch": 29.3705035971223, "step": 32660 }, { "epoch": 29.3705035971223, "step": 32660, "torque_loss": 0.1281156837940216 }, { "epoch": 29.3794964028777, "grad_norm": 0.25833722949028015, "learning_rate": 4.6780718059310975e-05, "loss": 0.0699, "step": 32670 }, { "action_loss": 0.0030929502099752426, "epoch": 29.3794964028777, "step": 32670 }, { "epoch": 29.3794964028777, "step": 32670, "torque_loss": 0.08246158063411713 }, { "epoch": 29.388489208633093, "grad_norm": 0.2544977366924286, "learning_rate": 4.675321789933389e-05, "loss": 0.0532, "step": 32680 }, { "action_loss": 0.0025192424654960632, "epoch": 29.388489208633093, "step": 32680 }, { "epoch": 29.388489208633093, "step": 32680, "torque_loss": 0.07637765258550644 }, { "epoch": 29.39748201438849, "grad_norm": 0.2863614559173584, "learning_rate": 4.6725718725643464e-05, "loss": 0.0556, "step": 32690 }, { "action_loss": 0.02332640439271927, "epoch": 29.39748201438849, "step": 32690 }, { "epoch": 29.39748201438849, "step": 32690, "torque_loss": 0.1276503950357437 }, { "epoch": 29.406474820143885, "grad_norm": 0.30099406838417053, "learning_rate": 4.669822054659323e-05, "loss": 0.0664, "step": 32700 }, { "action_loss": 0.00671835383400321, "epoch": 29.406474820143885, "step": 32700 }, { "epoch": 29.406474820143885, "step": 32700, "torque_loss": 0.11420655250549316 }, { "epoch": 29.415467625899282, "grad_norm": 0.345368355512619, "learning_rate": 4.667072337053644e-05, "loss": 0.0672, "step": 32710 }, { "action_loss": 0.010053095407783985, "epoch": 29.415467625899282, "step": 32710 }, { "epoch": 29.415467625899282, "step": 32710, "torque_loss": 0.14715619385242462 }, { "epoch": 29.424460431654676, "grad_norm": 0.3177671432495117, "learning_rate": 4.6643227205825965e-05, "loss": 0.0701, "step": 32720 }, { "action_loss": 0.004430997651070356, "epoch": 29.424460431654676, "step": 32720 }, { "epoch": 29.424460431654676, "step": 32720, "torque_loss": 0.0907873809337616 }, { "epoch": 29.43345323741007, "grad_norm": 0.30744969844818115, "learning_rate": 4.6615732060814454e-05, "loss": 0.0629, "step": 32730 }, { "action_loss": 0.0029772352427244186, "epoch": 29.43345323741007, "step": 32730 }, { "epoch": 29.43345323741007, "step": 32730, "torque_loss": 0.07854013890028 }, { "epoch": 29.442446043165468, "grad_norm": 0.21812015771865845, "learning_rate": 4.658823794385417e-05, "loss": 0.0642, "step": 32740 }, { "action_loss": 0.00842786394059658, "epoch": 29.442446043165468, "step": 32740 }, { "epoch": 29.442446043165468, "step": 32740, "torque_loss": 0.10219163447618484 }, { "epoch": 29.451438848920862, "grad_norm": 0.2806660234928131, "learning_rate": 4.6560744863297115e-05, "loss": 0.069, "step": 32750 }, { "action_loss": 0.005042668431997299, "epoch": 29.451438848920862, "step": 32750 }, { "epoch": 29.451438848920862, "step": 32750, "torque_loss": 0.09465968608856201 }, { "epoch": 29.46043165467626, "grad_norm": 0.2744559049606323, "learning_rate": 4.653325282749498e-05, "loss": 0.0681, "step": 32760 }, { "action_loss": 0.008304028771817684, "epoch": 29.46043165467626, "step": 32760 }, { "epoch": 29.46043165467626, "step": 32760, "torque_loss": 0.09593642503023148 }, { "epoch": 29.469424460431654, "grad_norm": 0.31028813123703003, "learning_rate": 4.6505761844799075e-05, "loss": 0.0609, "step": 32770 }, { "action_loss": 0.0021695184987038374, "epoch": 29.469424460431654, "step": 32770 }, { "epoch": 29.469424460431654, "step": 32770, "torque_loss": 0.07481903582811356 }, { "epoch": 29.47841726618705, "grad_norm": 0.3763382136821747, "learning_rate": 4.647827192356048e-05, "loss": 0.0669, "step": 32780 }, { "action_loss": 0.0069783409126102924, "epoch": 29.47841726618705, "step": 32780 }, { "epoch": 29.47841726618705, "step": 32780, "torque_loss": 0.16836892068386078 }, { "epoch": 29.487410071942445, "grad_norm": 0.3876325190067291, "learning_rate": 4.645078307212989e-05, "loss": 0.0708, "step": 32790 }, { "action_loss": 0.006160761695355177, "epoch": 29.487410071942445, "step": 32790 }, { "epoch": 29.487410071942445, "step": 32790, "torque_loss": 0.10231373459100723 }, { "epoch": 29.496402877697843, "grad_norm": 0.33527639508247375, "learning_rate": 4.642329529885768e-05, "loss": 0.063, "step": 32800 }, { "action_loss": 0.007512717042118311, "epoch": 29.496402877697843, "step": 32800 }, { "epoch": 29.496402877697843, "step": 32800, "torque_loss": 0.09318643808364868 }, { "epoch": 29.505395683453237, "grad_norm": 0.40748634934425354, "learning_rate": 4.639580861209393e-05, "loss": 0.0691, "step": 32810 }, { "action_loss": 0.005171755328774452, "epoch": 29.505395683453237, "step": 32810 }, { "epoch": 29.505395683453237, "step": 32810, "torque_loss": 0.09027624130249023 }, { "epoch": 29.514388489208635, "grad_norm": 0.268060564994812, "learning_rate": 4.636832302018835e-05, "loss": 0.0662, "step": 32820 }, { "action_loss": 0.002894410863518715, "epoch": 29.514388489208635, "step": 32820 }, { "epoch": 29.514388489208635, "step": 32820, "torque_loss": 0.1012401357293129 }, { "epoch": 29.52338129496403, "grad_norm": 0.20113727450370789, "learning_rate": 4.6340838531490365e-05, "loss": 0.0543, "step": 32830 }, { "action_loss": 0.0012263768585398793, "epoch": 29.52338129496403, "step": 32830 }, { "epoch": 29.52338129496403, "step": 32830, "torque_loss": 0.042314108461141586 }, { "epoch": 29.532374100719423, "grad_norm": 0.37902742624282837, "learning_rate": 4.6313355154349e-05, "loss": 0.0595, "step": 32840 }, { "action_loss": 0.004743013996630907, "epoch": 29.532374100719423, "step": 32840 }, { "epoch": 29.532374100719423, "step": 32840, "torque_loss": 0.10774457454681396 }, { "epoch": 29.54136690647482, "grad_norm": 0.2614439129829407, "learning_rate": 4.6285872897113025e-05, "loss": 0.06, "step": 32850 }, { "action_loss": 0.01222009677439928, "epoch": 29.54136690647482, "step": 32850 }, { "epoch": 29.54136690647482, "step": 32850, "torque_loss": 0.13135068118572235 }, { "epoch": 29.550359712230215, "grad_norm": 0.24671947956085205, "learning_rate": 4.625839176813077e-05, "loss": 0.0508, "step": 32860 }, { "action_loss": 0.002591080730780959, "epoch": 29.550359712230215, "step": 32860 }, { "epoch": 29.550359712230215, "step": 32860, "torque_loss": 0.06749091297388077 }, { "epoch": 29.559352517985612, "grad_norm": 0.2462218850851059, "learning_rate": 4.623091177575031e-05, "loss": 0.0636, "step": 32870 }, { "action_loss": 0.003773293225094676, "epoch": 29.559352517985612, "step": 32870 }, { "epoch": 29.559352517985612, "step": 32870, "torque_loss": 0.08556420356035233 }, { "epoch": 29.568345323741006, "grad_norm": 0.33540967106819153, "learning_rate": 4.620343292831936e-05, "loss": 0.0648, "step": 32880 }, { "action_loss": 0.0040614488534629345, "epoch": 29.568345323741006, "step": 32880 }, { "epoch": 29.568345323741006, "step": 32880, "torque_loss": 0.08976704627275467 }, { "epoch": 29.577338129496404, "grad_norm": 0.2839775085449219, "learning_rate": 4.6175955234185206e-05, "loss": 0.0666, "step": 32890 }, { "action_loss": 0.0025400437880307436, "epoch": 29.577338129496404, "step": 32890 }, { "epoch": 29.577338129496404, "step": 32890, "torque_loss": 0.07674766331911087 }, { "epoch": 29.586330935251798, "grad_norm": 0.2575780749320984, "learning_rate": 4.614847870169492e-05, "loss": 0.0493, "step": 32900 }, { "action_loss": 0.005337458103895187, "epoch": 29.586330935251798, "step": 32900 }, { "epoch": 29.586330935251798, "step": 32900, "torque_loss": 0.09529060125350952 }, { "epoch": 29.595323741007196, "grad_norm": 0.22428904473781586, "learning_rate": 4.612100333919509e-05, "loss": 0.0598, "step": 32910 }, { "action_loss": 0.003436171216890216, "epoch": 29.595323741007196, "step": 32910 }, { "epoch": 29.595323741007196, "step": 32910, "torque_loss": 0.07868852466344833 }, { "epoch": 29.60431654676259, "grad_norm": 0.2614149749279022, "learning_rate": 4.609352915503202e-05, "loss": 0.0705, "step": 32920 }, { "action_loss": 0.002588581060990691, "epoch": 29.60431654676259, "step": 32920 }, { "epoch": 29.60431654676259, "step": 32920, "torque_loss": 0.07914916425943375 }, { "epoch": 29.613309352517987, "grad_norm": 0.2525327801704407, "learning_rate": 4.606605615755166e-05, "loss": 0.0563, "step": 32930 }, { "action_loss": 0.025808552280068398, "epoch": 29.613309352517987, "step": 32930 }, { "epoch": 29.613309352517987, "step": 32930, "torque_loss": 0.15423183143138885 }, { "epoch": 29.62230215827338, "grad_norm": 0.34553954005241394, "learning_rate": 4.6038584355099576e-05, "loss": 0.0732, "step": 32940 }, { "action_loss": 0.0032750663813203573, "epoch": 29.62230215827338, "step": 32940 }, { "epoch": 29.62230215827338, "step": 32940, "torque_loss": 0.09380576759576797 }, { "epoch": 29.631294964028775, "grad_norm": 0.37198641896247864, "learning_rate": 4.6011113756020964e-05, "loss": 0.0629, "step": 32950 }, { "action_loss": 0.027198636904358864, "epoch": 29.631294964028775, "step": 32950 }, { "epoch": 29.631294964028775, "step": 32950, "torque_loss": 0.16258680820465088 }, { "epoch": 29.640287769784173, "grad_norm": 0.23185408115386963, "learning_rate": 4.598364436866066e-05, "loss": 0.0632, "step": 32960 }, { "action_loss": 0.011275247670710087, "epoch": 29.640287769784173, "step": 32960 }, { "epoch": 29.640287769784173, "step": 32960, "torque_loss": 0.14774933457374573 }, { "epoch": 29.649280575539567, "grad_norm": 0.39541175961494446, "learning_rate": 4.595617620136316e-05, "loss": 0.069, "step": 32970 }, { "action_loss": 0.007035417947918177, "epoch": 29.649280575539567, "step": 32970 }, { "epoch": 29.649280575539567, "step": 32970, "torque_loss": 0.10126108676195145 }, { "epoch": 29.658273381294965, "grad_norm": 0.26681646704673767, "learning_rate": 4.592870926247257e-05, "loss": 0.0698, "step": 32980 }, { "action_loss": 0.0048049259930849075, "epoch": 29.658273381294965, "step": 32980 }, { "epoch": 29.658273381294965, "step": 32980, "torque_loss": 0.09352263808250427 }, { "epoch": 29.66726618705036, "grad_norm": 0.3967770040035248, "learning_rate": 4.5901243560332594e-05, "loss": 0.0697, "step": 32990 }, { "action_loss": 0.0028532755095511675, "epoch": 29.66726618705036, "step": 32990 }, { "epoch": 29.66726618705036, "step": 32990, "torque_loss": 0.06387089937925339 }, { "epoch": 29.676258992805757, "grad_norm": 0.3589377999305725, "learning_rate": 4.587377910328662e-05, "loss": 0.0611, "step": 33000 }, { "action_loss": 0.002924060681834817, "epoch": 29.676258992805757, "step": 33000 }, { "epoch": 29.676258992805757, "step": 33000, "torque_loss": 0.09152057021856308 }, { "epoch": 29.68525179856115, "grad_norm": 0.2896439731121063, "learning_rate": 4.5846315899677586e-05, "loss": 0.0516, "step": 33010 }, { "action_loss": 0.0023993554059416056, "epoch": 29.68525179856115, "step": 33010 }, { "epoch": 29.68525179856115, "step": 33010, "torque_loss": 0.054693955928087234 }, { "epoch": 29.694244604316548, "grad_norm": 0.3082614243030548, "learning_rate": 4.5818853957848114e-05, "loss": 0.0685, "step": 33020 }, { "action_loss": 0.003571959212422371, "epoch": 29.694244604316548, "step": 33020 }, { "epoch": 29.694244604316548, "step": 33020, "torque_loss": 0.07663606107234955 }, { "epoch": 29.703237410071942, "grad_norm": 0.26544100046157837, "learning_rate": 4.579139328614043e-05, "loss": 0.0595, "step": 33030 }, { "action_loss": 0.009983690455555916, "epoch": 29.703237410071942, "step": 33030 }, { "epoch": 29.703237410071942, "step": 33030, "torque_loss": 0.10839255899190903 }, { "epoch": 29.71223021582734, "grad_norm": 0.1968948245048523, "learning_rate": 4.576393389289633e-05, "loss": 0.0623, "step": 33040 }, { "action_loss": 0.004434781149029732, "epoch": 29.71223021582734, "step": 33040 }, { "epoch": 29.71223021582734, "step": 33040, "torque_loss": 0.109588123857975 }, { "epoch": 29.721223021582734, "grad_norm": 0.32113200426101685, "learning_rate": 4.573647578645728e-05, "loss": 0.0696, "step": 33050 }, { "action_loss": 0.0030156599823385477, "epoch": 29.721223021582734, "step": 33050 }, { "epoch": 29.721223021582734, "step": 33050, "torque_loss": 0.0942046046257019 }, { "epoch": 29.730215827338128, "grad_norm": 0.32988160848617554, "learning_rate": 4.57090189751643e-05, "loss": 0.0506, "step": 33060 }, { "action_loss": 0.006498435046523809, "epoch": 29.730215827338128, "step": 33060 }, { "epoch": 29.730215827338128, "step": 33060, "torque_loss": 0.11016762256622314 }, { "epoch": 29.739208633093526, "grad_norm": 0.2736301124095917, "learning_rate": 4.568156346735806e-05, "loss": 0.0625, "step": 33070 }, { "action_loss": 0.002468676073476672, "epoch": 29.739208633093526, "step": 33070 }, { "epoch": 29.739208633093526, "step": 33070, "torque_loss": 0.09430411458015442 }, { "epoch": 29.74820143884892, "grad_norm": 0.24723486602306366, "learning_rate": 4.565410927137882e-05, "loss": 0.0599, "step": 33080 }, { "action_loss": 0.001843650359660387, "epoch": 29.74820143884892, "step": 33080 }, { "epoch": 29.74820143884892, "step": 33080, "torque_loss": 0.057133570313453674 }, { "epoch": 29.757194244604317, "grad_norm": 0.28367629647254944, "learning_rate": 4.562665639556644e-05, "loss": 0.0741, "step": 33090 }, { "action_loss": 0.002217549132183194, "epoch": 29.757194244604317, "step": 33090 }, { "epoch": 29.757194244604317, "step": 33090, "torque_loss": 0.060232579708099365 }, { "epoch": 29.76618705035971, "grad_norm": 0.328286737203598, "learning_rate": 4.559920484826037e-05, "loss": 0.0619, "step": 33100 }, { "action_loss": 0.0018670037388801575, "epoch": 29.76618705035971, "step": 33100 }, { "epoch": 29.76618705035971, "step": 33100, "torque_loss": 0.051932934671640396 }, { "epoch": 29.77517985611511, "grad_norm": 0.2961302101612091, "learning_rate": 4.5571754637799665e-05, "loss": 0.0624, "step": 33110 }, { "action_loss": 0.011167841963469982, "epoch": 29.77517985611511, "step": 33110 }, { "epoch": 29.77517985611511, "step": 33110, "torque_loss": 0.12948794662952423 }, { "epoch": 29.784172661870503, "grad_norm": 0.2666436433792114, "learning_rate": 4.554430577252298e-05, "loss": 0.0596, "step": 33120 }, { "action_loss": 0.004656476434320211, "epoch": 29.784172661870503, "step": 33120 }, { "epoch": 29.784172661870503, "step": 33120, "torque_loss": 0.11884696036577225 }, { "epoch": 29.7931654676259, "grad_norm": 0.26591432094573975, "learning_rate": 4.551685826076858e-05, "loss": 0.0642, "step": 33130 }, { "action_loss": 0.005165961105376482, "epoch": 29.7931654676259, "step": 33130 }, { "epoch": 29.7931654676259, "step": 33130, "torque_loss": 0.1012604832649231 }, { "epoch": 29.802158273381295, "grad_norm": 0.3399190902709961, "learning_rate": 4.5489412110874246e-05, "loss": 0.065, "step": 33140 }, { "action_loss": 0.003978620748966932, "epoch": 29.802158273381295, "step": 33140 }, { "epoch": 29.802158273381295, "step": 33140, "torque_loss": 0.09427622705698013 }, { "epoch": 29.81115107913669, "grad_norm": 0.299060583114624, "learning_rate": 4.5461967331177444e-05, "loss": 0.0721, "step": 33150 }, { "action_loss": 0.0036956581752747297, "epoch": 29.81115107913669, "step": 33150 }, { "epoch": 29.81115107913669, "step": 33150, "torque_loss": 0.10408055037260056 }, { "epoch": 29.820143884892087, "grad_norm": 0.3335393965244293, "learning_rate": 4.5434523930015115e-05, "loss": 0.0715, "step": 33160 }, { "action_loss": 0.006033396814018488, "epoch": 29.820143884892087, "step": 33160 }, { "epoch": 29.820143884892087, "step": 33160, "torque_loss": 0.09237221628427505 }, { "epoch": 29.82913669064748, "grad_norm": 0.27126428484916687, "learning_rate": 4.540708191572388e-05, "loss": 0.0662, "step": 33170 }, { "action_loss": 0.0017502257833257318, "epoch": 29.82913669064748, "step": 33170 }, { "epoch": 29.82913669064748, "step": 33170, "torque_loss": 0.051535140722990036 }, { "epoch": 29.83812949640288, "grad_norm": 0.2869466543197632, "learning_rate": 4.537964129663991e-05, "loss": 0.0525, "step": 33180 }, { "action_loss": 0.0032205015886574984, "epoch": 29.83812949640288, "step": 33180 }, { "epoch": 29.83812949640288, "step": 33180, "torque_loss": 0.09389200806617737 }, { "epoch": 29.847122302158272, "grad_norm": 0.3116796314716339, "learning_rate": 4.535220208109889e-05, "loss": 0.0679, "step": 33190 }, { "action_loss": 0.0023003036621958017, "epoch": 29.847122302158272, "step": 33190 }, { "epoch": 29.847122302158272, "step": 33190, "torque_loss": 0.1045343279838562 }, { "epoch": 29.85611510791367, "grad_norm": 0.2752304673194885, "learning_rate": 4.5324764277436194e-05, "loss": 0.0603, "step": 33200 }, { "action_loss": 0.003834899514913559, "epoch": 29.85611510791367, "step": 33200 }, { "epoch": 29.85611510791367, "step": 33200, "torque_loss": 0.09790375083684921 }, { "epoch": 29.865107913669064, "grad_norm": 0.29593491554260254, "learning_rate": 4.529732789398664e-05, "loss": 0.0678, "step": 33210 }, { "action_loss": 0.0036113013047724962, "epoch": 29.865107913669064, "step": 33210 }, { "epoch": 29.865107913669064, "step": 33210, "torque_loss": 0.06137187406420708 }, { "epoch": 29.87410071942446, "grad_norm": 0.23316338658332825, "learning_rate": 4.526989293908472e-05, "loss": 0.057, "step": 33220 }, { "action_loss": 0.003383081406354904, "epoch": 29.87410071942446, "step": 33220 }, { "epoch": 29.87410071942446, "step": 33220, "torque_loss": 0.07731688022613525 }, { "epoch": 29.883093525179856, "grad_norm": 0.31229302287101746, "learning_rate": 4.524245942106442e-05, "loss": 0.0597, "step": 33230 }, { "action_loss": 0.004263776820152998, "epoch": 29.883093525179856, "step": 33230 }, { "epoch": 29.883093525179856, "step": 33230, "torque_loss": 0.08255743980407715 }, { "epoch": 29.892086330935253, "grad_norm": 0.38949209451675415, "learning_rate": 4.5215027348259345e-05, "loss": 0.0713, "step": 33240 }, { "action_loss": 0.00815741065889597, "epoch": 29.892086330935253, "step": 33240 }, { "epoch": 29.892086330935253, "step": 33240, "torque_loss": 0.10927433520555496 }, { "epoch": 29.901079136690647, "grad_norm": 0.33173850178718567, "learning_rate": 4.5187596729002616e-05, "loss": 0.0645, "step": 33250 }, { "action_loss": 0.014759033918380737, "epoch": 29.901079136690647, "step": 33250 }, { "epoch": 29.901079136690647, "step": 33250, "torque_loss": 0.12306752800941467 }, { "epoch": 29.91007194244604, "grad_norm": 0.30339884757995605, "learning_rate": 4.516016757162693e-05, "loss": 0.0565, "step": 33260 }, { "action_loss": 0.017680931836366653, "epoch": 29.91007194244604, "step": 33260 }, { "epoch": 29.91007194244604, "step": 33260, "torque_loss": 0.15683674812316895 }, { "epoch": 29.91906474820144, "grad_norm": 0.1833275407552719, "learning_rate": 4.513273988446457e-05, "loss": 0.063, "step": 33270 }, { "action_loss": 0.004479800816625357, "epoch": 29.91906474820144, "step": 33270 }, { "epoch": 29.91906474820144, "step": 33270, "torque_loss": 0.09794187545776367 }, { "epoch": 29.928057553956833, "grad_norm": 0.28753864765167236, "learning_rate": 4.5105313675847296e-05, "loss": 0.0583, "step": 33280 }, { "action_loss": 0.00556274875998497, "epoch": 29.928057553956833, "step": 33280 }, { "epoch": 29.928057553956833, "step": 33280, "torque_loss": 0.11731693893671036 }, { "epoch": 29.93705035971223, "grad_norm": 0.31719985604286194, "learning_rate": 4.5077888954106495e-05, "loss": 0.0604, "step": 33290 }, { "action_loss": 0.0030650494154542685, "epoch": 29.93705035971223, "step": 33290 }, { "epoch": 29.93705035971223, "step": 33290, "torque_loss": 0.09859155863523483 }, { "epoch": 29.946043165467625, "grad_norm": 0.31862154603004456, "learning_rate": 4.505046572757309e-05, "loss": 0.0701, "step": 33300 }, { "action_loss": 0.010714162141084671, "epoch": 29.946043165467625, "step": 33300 }, { "epoch": 29.946043165467625, "step": 33300, "torque_loss": 0.1398504078388214 }, { "epoch": 29.955035971223023, "grad_norm": 0.3269478380680084, "learning_rate": 4.502304400457749e-05, "loss": 0.0704, "step": 33310 }, { "action_loss": 0.0032651524525135756, "epoch": 29.955035971223023, "step": 33310 }, { "epoch": 29.955035971223023, "step": 33310, "torque_loss": 0.04916325584053993 }, { "epoch": 29.964028776978417, "grad_norm": 0.29997092485427856, "learning_rate": 4.499562379344973e-05, "loss": 0.0493, "step": 33320 }, { "action_loss": 0.007059264462441206, "epoch": 29.964028776978417, "step": 33320 }, { "epoch": 29.964028776978417, "step": 33320, "torque_loss": 0.10123644024133682 }, { "epoch": 29.973021582733814, "grad_norm": 0.3151254653930664, "learning_rate": 4.4968205102519306e-05, "loss": 0.0637, "step": 33330 }, { "action_loss": 0.002331123221665621, "epoch": 29.973021582733814, "step": 33330 }, { "epoch": 29.973021582733814, "step": 33330, "torque_loss": 0.08748561143875122 }, { "epoch": 29.98201438848921, "grad_norm": 0.33187711238861084, "learning_rate": 4.494078794011532e-05, "loss": 0.0628, "step": 33340 }, { "action_loss": 0.003533242968842387, "epoch": 29.98201438848921, "step": 33340 }, { "epoch": 29.98201438848921, "step": 33340, "torque_loss": 0.08154270797967911 }, { "epoch": 29.991007194244606, "grad_norm": 0.3043470084667206, "learning_rate": 4.491337231456639e-05, "loss": 0.0619, "step": 33350 }, { "action_loss": 0.004318153019994497, "epoch": 29.991007194244606, "step": 33350 }, { "epoch": 29.991007194244606, "step": 33350, "torque_loss": 0.07608211040496826 }, { "epoch": 30.0, "grad_norm": 0.23658022284507751, "learning_rate": 4.4885958234200634e-05, "loss": 0.0528, "step": 33360 }, { "action_loss": 0.0018172316486015916, "epoch": 30.0, "step": 33360 }, { "epoch": 30.0, "step": 33360, "torque_loss": 0.04379098489880562 }, { "epoch": 30.008992805755394, "grad_norm": 0.27261269092559814, "learning_rate": 4.485854570734575e-05, "loss": 0.0565, "step": 33370 }, { "action_loss": 0.013596131466329098, "epoch": 30.008992805755394, "step": 33370 }, { "epoch": 30.008992805755394, "step": 33370, "torque_loss": 0.16884523630142212 }, { "epoch": 30.01798561151079, "grad_norm": 0.36839696764945984, "learning_rate": 4.483113474232891e-05, "loss": 0.0687, "step": 33380 }, { "action_loss": 0.0057900394313037395, "epoch": 30.01798561151079, "step": 33380 }, { "epoch": 30.01798561151079, "step": 33380, "torque_loss": 0.10768038034439087 }, { "epoch": 30.026978417266186, "grad_norm": 0.32446280121803284, "learning_rate": 4.480372534747688e-05, "loss": 0.0606, "step": 33390 }, { "action_loss": 0.0015819318359717727, "epoch": 30.026978417266186, "step": 33390 }, { "epoch": 30.026978417266186, "step": 33390, "torque_loss": 0.08415645360946655 }, { "epoch": 30.035971223021583, "grad_norm": 0.37774011492729187, "learning_rate": 4.477631753111588e-05, "loss": 0.0797, "step": 33400 }, { "action_loss": 0.006601691246032715, "epoch": 30.035971223021583, "step": 33400 }, { "epoch": 30.035971223021583, "step": 33400, "torque_loss": 0.11364950984716415 }, { "epoch": 30.044964028776977, "grad_norm": 0.2934483289718628, "learning_rate": 4.4748911301571686e-05, "loss": 0.0601, "step": 33410 }, { "action_loss": 0.0020841488149017096, "epoch": 30.044964028776977, "step": 33410 }, { "epoch": 30.044964028776977, "step": 33410, "torque_loss": 0.0668627992272377 }, { "epoch": 30.053956834532375, "grad_norm": 0.284323513507843, "learning_rate": 4.472150666716961e-05, "loss": 0.0509, "step": 33420 }, { "action_loss": 0.00805272813886404, "epoch": 30.053956834532375, "step": 33420 }, { "epoch": 30.053956834532375, "step": 33420, "torque_loss": 0.09358229488134384 }, { "epoch": 30.06294964028777, "grad_norm": 0.30184435844421387, "learning_rate": 4.469410363623442e-05, "loss": 0.0526, "step": 33430 }, { "action_loss": 0.003492940217256546, "epoch": 30.06294964028777, "step": 33430 }, { "epoch": 30.06294964028777, "step": 33430, "torque_loss": 0.05200757458806038 }, { "epoch": 30.071942446043167, "grad_norm": 0.3318862318992615, "learning_rate": 4.466670221709044e-05, "loss": 0.0809, "step": 33440 }, { "action_loss": 0.002345068147405982, "epoch": 30.071942446043167, "step": 33440 }, { "epoch": 30.071942446043167, "step": 33440, "torque_loss": 0.04503064230084419 }, { "epoch": 30.08093525179856, "grad_norm": 0.32456862926483154, "learning_rate": 4.463930241806154e-05, "loss": 0.0629, "step": 33450 }, { "action_loss": 0.0020169978961348534, "epoch": 30.08093525179856, "step": 33450 }, { "epoch": 30.08093525179856, "step": 33450, "torque_loss": 0.06407633423805237 }, { "epoch": 30.08992805755396, "grad_norm": 0.3805660009384155, "learning_rate": 4.4611904247471006e-05, "loss": 0.0489, "step": 33460 }, { "action_loss": 0.0029103311244398355, "epoch": 30.08992805755396, "step": 33460 }, { "epoch": 30.08992805755396, "step": 33460, "torque_loss": 0.08116155117750168 }, { "epoch": 30.098920863309353, "grad_norm": 0.3109448254108429, "learning_rate": 4.458450771364171e-05, "loss": 0.0586, "step": 33470 }, { "action_loss": 0.0015622340142726898, "epoch": 30.098920863309353, "step": 33470 }, { "epoch": 30.098920863309353, "step": 33470, "torque_loss": 0.052199963480234146 }, { "epoch": 30.107913669064747, "grad_norm": 0.23485198616981506, "learning_rate": 4.4557112824895965e-05, "loss": 0.0499, "step": 33480 }, { "action_loss": 0.0038563506677746773, "epoch": 30.107913669064747, "step": 33480 }, { "epoch": 30.107913669064747, "step": 33480, "torque_loss": 0.12933552265167236 }, { "epoch": 30.116906474820144, "grad_norm": 0.2688434422016144, "learning_rate": 4.452971958955563e-05, "loss": 0.0577, "step": 33490 }, { "action_loss": 0.0026949571911245584, "epoch": 30.116906474820144, "step": 33490 }, { "epoch": 30.116906474820144, "step": 33490, "torque_loss": 0.0819326862692833 }, { "epoch": 30.12589928057554, "grad_norm": 0.3140889108181, "learning_rate": 4.450232801594208e-05, "loss": 0.0571, "step": 33500 }, { "action_loss": 0.009966863319277763, "epoch": 30.12589928057554, "step": 33500 }, { "epoch": 30.12589928057554, "step": 33500, "torque_loss": 0.10342226177453995 }, { "epoch": 30.134892086330936, "grad_norm": 0.28709059953689575, "learning_rate": 4.447493811237609e-05, "loss": 0.0588, "step": 33510 }, { "action_loss": 0.013007394969463348, "epoch": 30.134892086330936, "step": 33510 }, { "epoch": 30.134892086330936, "step": 33510, "torque_loss": 0.12435313314199448 }, { "epoch": 30.14388489208633, "grad_norm": 0.4407808184623718, "learning_rate": 4.444754988717804e-05, "loss": 0.07, "step": 33520 }, { "action_loss": 0.003081665141507983, "epoch": 30.14388489208633, "step": 33520 }, { "epoch": 30.14388489208633, "step": 33520, "torque_loss": 0.09472844004631042 }, { "epoch": 30.152877697841728, "grad_norm": 0.28623420000076294, "learning_rate": 4.442016334866771e-05, "loss": 0.0768, "step": 33530 }, { "action_loss": 0.0048793405294418335, "epoch": 30.152877697841728, "step": 33530 }, { "epoch": 30.152877697841728, "step": 33530, "torque_loss": 0.09653687477111816 }, { "epoch": 30.16187050359712, "grad_norm": 0.30693694949150085, "learning_rate": 4.4392778505164445e-05, "loss": 0.056, "step": 33540 }, { "action_loss": 0.0027461934369057417, "epoch": 30.16187050359712, "step": 33540 }, { "epoch": 30.16187050359712, "step": 33540, "torque_loss": 0.07417584210634232 }, { "epoch": 30.17086330935252, "grad_norm": 0.36539140343666077, "learning_rate": 4.436539536498702e-05, "loss": 0.0525, "step": 33550 }, { "action_loss": 0.003212639130651951, "epoch": 30.17086330935252, "step": 33550 }, { "epoch": 30.17086330935252, "step": 33550, "torque_loss": 0.10718659311532974 }, { "epoch": 30.179856115107913, "grad_norm": 0.34686028957366943, "learning_rate": 4.433801393645369e-05, "loss": 0.0654, "step": 33560 }, { "action_loss": 0.005291730165481567, "epoch": 30.179856115107913, "step": 33560 }, { "epoch": 30.179856115107913, "step": 33560, "torque_loss": 0.11110826581716537 }, { "epoch": 30.18884892086331, "grad_norm": 0.3335234522819519, "learning_rate": 4.431063422788226e-05, "loss": 0.067, "step": 33570 }, { "action_loss": 0.004364707972854376, "epoch": 30.18884892086331, "step": 33570 }, { "epoch": 30.18884892086331, "step": 33570, "torque_loss": 0.09769779443740845 }, { "epoch": 30.197841726618705, "grad_norm": 0.3367835283279419, "learning_rate": 4.428325624758991e-05, "loss": 0.0591, "step": 33580 }, { "action_loss": 0.0016004638746380806, "epoch": 30.197841726618705, "step": 33580 }, { "epoch": 30.197841726618705, "step": 33580, "torque_loss": 0.0729731023311615 }, { "epoch": 30.2068345323741, "grad_norm": 0.3490860164165497, "learning_rate": 4.4255880003893366e-05, "loss": 0.0671, "step": 33590 }, { "action_loss": 0.004282028879970312, "epoch": 30.2068345323741, "step": 33590 }, { "epoch": 30.2068345323741, "step": 33590, "torque_loss": 0.08076560497283936 }, { "epoch": 30.215827338129497, "grad_norm": 0.21592722833156586, "learning_rate": 4.422850550510884e-05, "loss": 0.0542, "step": 33600 }, { "action_loss": 0.011525504291057587, "epoch": 30.215827338129497, "step": 33600 }, { "epoch": 30.215827338129497, "step": 33600, "torque_loss": 0.15012158453464508 }, { "epoch": 30.22482014388489, "grad_norm": 0.2992290258407593, "learning_rate": 4.4201132759551934e-05, "loss": 0.0674, "step": 33610 }, { "action_loss": 0.004968170076608658, "epoch": 30.22482014388489, "step": 33610 }, { "epoch": 30.22482014388489, "step": 33610, "torque_loss": 0.13588857650756836 }, { "epoch": 30.23381294964029, "grad_norm": 0.3130069077014923, "learning_rate": 4.4173761775537804e-05, "loss": 0.066, "step": 33620 }, { "action_loss": 0.0030165035277605057, "epoch": 30.23381294964029, "step": 33620 }, { "epoch": 30.23381294964029, "step": 33620, "torque_loss": 0.07204258441925049 }, { "epoch": 30.242805755395683, "grad_norm": 0.3596552610397339, "learning_rate": 4.414639256138099e-05, "loss": 0.0645, "step": 33630 }, { "action_loss": 0.003525507403537631, "epoch": 30.242805755395683, "step": 33630 }, { "epoch": 30.242805755395683, "step": 33630, "torque_loss": 0.06880035996437073 }, { "epoch": 30.25179856115108, "grad_norm": 0.3178566098213196, "learning_rate": 4.411902512539557e-05, "loss": 0.0549, "step": 33640 }, { "action_loss": 0.0028420810122042894, "epoch": 30.25179856115108, "step": 33640 }, { "epoch": 30.25179856115108, "step": 33640, "torque_loss": 0.07700914144515991 }, { "epoch": 30.260791366906474, "grad_norm": 0.359613835811615, "learning_rate": 4.4091659475895044e-05, "loss": 0.059, "step": 33650 }, { "action_loss": 0.0022337157279253006, "epoch": 30.260791366906474, "step": 33650 }, { "epoch": 30.260791366906474, "step": 33650, "torque_loss": 0.07171767204999924 }, { "epoch": 30.269784172661872, "grad_norm": 0.2694861590862274, "learning_rate": 4.406429562119235e-05, "loss": 0.0622, "step": 33660 }, { "action_loss": 0.008067626506090164, "epoch": 30.269784172661872, "step": 33660 }, { "epoch": 30.269784172661872, "step": 33660, "torque_loss": 0.10768333822488785 }, { "epoch": 30.278776978417266, "grad_norm": 0.3222379684448242, "learning_rate": 4.4036933569599945e-05, "loss": 0.0643, "step": 33670 }, { "action_loss": 0.0024564999621361494, "epoch": 30.278776978417266, "step": 33670 }, { "epoch": 30.278776978417266, "step": 33670, "torque_loss": 0.0738140419125557 }, { "epoch": 30.28776978417266, "grad_norm": 0.2765671908855438, "learning_rate": 4.400957332942965e-05, "loss": 0.0621, "step": 33680 }, { "action_loss": 0.0043232254683971405, "epoch": 30.28776978417266, "step": 33680 }, { "epoch": 30.28776978417266, "step": 33680, "torque_loss": 0.11097190529108047 }, { "epoch": 30.296762589928058, "grad_norm": 0.2921615242958069, "learning_rate": 4.3982214908992844e-05, "loss": 0.0598, "step": 33690 }, { "action_loss": 0.007372302934527397, "epoch": 30.296762589928058, "step": 33690 }, { "epoch": 30.296762589928058, "step": 33690, "torque_loss": 0.11423388123512268 }, { "epoch": 30.305755395683452, "grad_norm": 0.32507437467575073, "learning_rate": 4.3954858316600235e-05, "loss": 0.0586, "step": 33700 }, { "action_loss": 0.004102627281099558, "epoch": 30.305755395683452, "step": 33700 }, { "epoch": 30.305755395683452, "step": 33700, "torque_loss": 0.08179648965597153 }, { "epoch": 30.31474820143885, "grad_norm": 0.2888505458831787, "learning_rate": 4.392750356056205e-05, "loss": 0.0599, "step": 33710 }, { "action_loss": 0.00444556400179863, "epoch": 30.31474820143885, "step": 33710 }, { "epoch": 30.31474820143885, "step": 33710, "torque_loss": 0.09201905131340027 }, { "epoch": 30.323741007194243, "grad_norm": 0.28240177035331726, "learning_rate": 4.390015064918798e-05, "loss": 0.0538, "step": 33720 }, { "action_loss": 0.0018736724741756916, "epoch": 30.323741007194243, "step": 33720 }, { "epoch": 30.323741007194243, "step": 33720, "torque_loss": 0.08844850212335587 }, { "epoch": 30.33273381294964, "grad_norm": 0.38887110352516174, "learning_rate": 4.387279959078705e-05, "loss": 0.0594, "step": 33730 }, { "action_loss": 0.005509119480848312, "epoch": 30.33273381294964, "step": 33730 }, { "epoch": 30.33273381294964, "step": 33730, "torque_loss": 0.11013839393854141 }, { "epoch": 30.341726618705035, "grad_norm": 0.31425783038139343, "learning_rate": 4.384545039366786e-05, "loss": 0.0575, "step": 33740 }, { "action_loss": 0.00393943814560771, "epoch": 30.341726618705035, "step": 33740 }, { "epoch": 30.341726618705035, "step": 33740, "torque_loss": 0.13951320946216583 }, { "epoch": 30.350719424460433, "grad_norm": 0.358418732881546, "learning_rate": 4.381810306613831e-05, "loss": 0.0712, "step": 33750 }, { "action_loss": 0.008156700991094112, "epoch": 30.350719424460433, "step": 33750 }, { "epoch": 30.350719424460433, "step": 33750, "torque_loss": 0.13782407343387604 }, { "epoch": 30.359712230215827, "grad_norm": 0.27400752902030945, "learning_rate": 4.3790757616505826e-05, "loss": 0.0622, "step": 33760 }, { "action_loss": 0.0015299739316105843, "epoch": 30.359712230215827, "step": 33760 }, { "epoch": 30.359712230215827, "step": 33760, "torque_loss": 0.050354380160570145 }, { "epoch": 30.368705035971225, "grad_norm": 0.29670900106430054, "learning_rate": 4.376341405307725e-05, "loss": 0.069, "step": 33770 }, { "action_loss": 0.001842884928919375, "epoch": 30.368705035971225, "step": 33770 }, { "epoch": 30.368705035971225, "step": 33770, "torque_loss": 0.08675888925790787 }, { "epoch": 30.37769784172662, "grad_norm": 0.22596094012260437, "learning_rate": 4.37360723841588e-05, "loss": 0.0643, "step": 33780 }, { "action_loss": 0.008351834490895271, "epoch": 30.37769784172662, "step": 33780 }, { "epoch": 30.37769784172662, "step": 33780, "torque_loss": 0.14257419109344482 }, { "epoch": 30.386690647482013, "grad_norm": 0.41557782888412476, "learning_rate": 4.370873261805619e-05, "loss": 0.0833, "step": 33790 }, { "action_loss": 0.020124534144997597, "epoch": 30.386690647482013, "step": 33790 }, { "epoch": 30.386690647482013, "step": 33790, "torque_loss": 0.13149689137935638 }, { "epoch": 30.39568345323741, "grad_norm": 0.386057585477829, "learning_rate": 4.368139476307449e-05, "loss": 0.0791, "step": 33800 }, { "action_loss": 0.003913927357643843, "epoch": 30.39568345323741, "step": 33800 }, { "epoch": 30.39568345323741, "step": 33800, "torque_loss": 0.10043511539697647 }, { "epoch": 30.404676258992804, "grad_norm": 0.3172385096549988, "learning_rate": 4.365405882751822e-05, "loss": 0.0678, "step": 33810 }, { "action_loss": 0.005306954029947519, "epoch": 30.404676258992804, "step": 33810 }, { "epoch": 30.404676258992804, "step": 33810, "torque_loss": 0.10457977652549744 }, { "epoch": 30.413669064748202, "grad_norm": 0.2331819385290146, "learning_rate": 4.3626724819691326e-05, "loss": 0.0573, "step": 33820 }, { "action_loss": 0.0027696052566170692, "epoch": 30.413669064748202, "step": 33820 }, { "epoch": 30.413669064748202, "step": 33820, "torque_loss": 0.04316633939743042 }, { "epoch": 30.422661870503596, "grad_norm": 0.2731022834777832, "learning_rate": 4.359939274789715e-05, "loss": 0.0578, "step": 33830 }, { "action_loss": 0.0033429034519940615, "epoch": 30.422661870503596, "step": 33830 }, { "epoch": 30.422661870503596, "step": 33830, "torque_loss": 0.10323480516672134 }, { "epoch": 30.431654676258994, "grad_norm": 0.23615722358226776, "learning_rate": 4.357206262043848e-05, "loss": 0.0587, "step": 33840 }, { "action_loss": 0.0017068390734493732, "epoch": 30.431654676258994, "step": 33840 }, { "epoch": 30.431654676258994, "step": 33840, "torque_loss": 0.059084516018629074 }, { "epoch": 30.440647482014388, "grad_norm": 0.2645811140537262, "learning_rate": 4.354473444561745e-05, "loss": 0.0527, "step": 33850 }, { "action_loss": 0.004012967459857464, "epoch": 30.440647482014388, "step": 33850 }, { "epoch": 30.440647482014388, "step": 33850, "torque_loss": 0.08370282500982285 }, { "epoch": 30.449640287769785, "grad_norm": 0.3646846115589142, "learning_rate": 4.3517408231735644e-05, "loss": 0.0682, "step": 33860 }, { "action_loss": 0.005861061159521341, "epoch": 30.449640287769785, "step": 33860 }, { "epoch": 30.449640287769785, "step": 33860, "torque_loss": 0.09792932122945786 }, { "epoch": 30.45863309352518, "grad_norm": 0.26467689871788025, "learning_rate": 4.3490083987094086e-05, "loss": 0.0577, "step": 33870 }, { "action_loss": 0.0013985047116875648, "epoch": 30.45863309352518, "step": 33870 }, { "epoch": 30.45863309352518, "step": 33870, "torque_loss": 0.06451969593763351 }, { "epoch": 30.467625899280577, "grad_norm": 0.35436269640922546, "learning_rate": 4.34627617199931e-05, "loss": 0.0701, "step": 33880 }, { "action_loss": 0.008377306163311005, "epoch": 30.467625899280577, "step": 33880 }, { "epoch": 30.467625899280577, "step": 33880, "torque_loss": 0.10249257832765579 }, { "epoch": 30.47661870503597, "grad_norm": 0.3105771541595459, "learning_rate": 4.3435441438732526e-05, "loss": 0.0549, "step": 33890 }, { "action_loss": 0.014603443443775177, "epoch": 30.47661870503597, "step": 33890 }, { "epoch": 30.47661870503597, "step": 33890, "torque_loss": 0.0966343879699707 }, { "epoch": 30.485611510791365, "grad_norm": 0.273345410823822, "learning_rate": 4.340812315161149e-05, "loss": 0.0657, "step": 33900 }, { "action_loss": 0.007911230437457561, "epoch": 30.485611510791365, "step": 33900 }, { "epoch": 30.485611510791365, "step": 33900, "torque_loss": 0.13291506469249725 }, { "epoch": 30.494604316546763, "grad_norm": 0.3630085587501526, "learning_rate": 4.338080686692859e-05, "loss": 0.0727, "step": 33910 }, { "action_loss": 0.007264535874128342, "epoch": 30.494604316546763, "step": 33910 }, { "epoch": 30.494604316546763, "step": 33910, "torque_loss": 0.09845525026321411 }, { "epoch": 30.503597122302157, "grad_norm": 0.3664427101612091, "learning_rate": 4.3353492592981816e-05, "loss": 0.0619, "step": 33920 }, { "action_loss": 0.0032248811330646276, "epoch": 30.503597122302157, "step": 33920 }, { "epoch": 30.503597122302157, "step": 33920, "torque_loss": 0.07280495762825012 }, { "epoch": 30.512589928057555, "grad_norm": 0.23588962852954865, "learning_rate": 4.3326180338068485e-05, "loss": 0.0772, "step": 33930 }, { "action_loss": 0.003159475279971957, "epoch": 30.512589928057555, "step": 33930 }, { "epoch": 30.512589928057555, "step": 33930, "torque_loss": 0.0707351565361023 }, { "epoch": 30.52158273381295, "grad_norm": 0.2627086043357849, "learning_rate": 4.3298870110485356e-05, "loss": 0.0589, "step": 33940 }, { "action_loss": 0.004306199494749308, "epoch": 30.52158273381295, "step": 33940 }, { "epoch": 30.52158273381295, "step": 33940, "torque_loss": 0.07100443542003632 }, { "epoch": 30.530575539568346, "grad_norm": 0.24247083067893982, "learning_rate": 4.3271561918528567e-05, "loss": 0.05, "step": 33950 }, { "action_loss": 0.002289316849783063, "epoch": 30.530575539568346, "step": 33950 }, { "epoch": 30.530575539568346, "step": 33950, "torque_loss": 0.06435449421405792 }, { "epoch": 30.53956834532374, "grad_norm": 0.35628530383110046, "learning_rate": 4.324425577049359e-05, "loss": 0.0647, "step": 33960 }, { "action_loss": 0.005983427166938782, "epoch": 30.53956834532374, "step": 33960 }, { "epoch": 30.53956834532374, "step": 33960, "torque_loss": 0.11272013187408447 }, { "epoch": 30.548561151079138, "grad_norm": 0.22685767710208893, "learning_rate": 4.321695167467535e-05, "loss": 0.0632, "step": 33970 }, { "action_loss": 0.0017576393438503146, "epoch": 30.548561151079138, "step": 33970 }, { "epoch": 30.548561151079138, "step": 33970, "torque_loss": 0.05554013326764107 }, { "epoch": 30.557553956834532, "grad_norm": 0.37782180309295654, "learning_rate": 4.3189649639368093e-05, "loss": 0.0698, "step": 33980 }, { "action_loss": 0.0030788190197199583, "epoch": 30.557553956834532, "step": 33980 }, { "epoch": 30.557553956834532, "step": 33980, "torque_loss": 0.08847188949584961 }, { "epoch": 30.56654676258993, "grad_norm": 0.2934577465057373, "learning_rate": 4.316234967286547e-05, "loss": 0.063, "step": 33990 }, { "action_loss": 0.004252913407981396, "epoch": 30.56654676258993, "step": 33990 }, { "epoch": 30.56654676258993, "step": 33990, "torque_loss": 0.12304908037185669 }, { "epoch": 30.575539568345324, "grad_norm": 0.32503587007522583, "learning_rate": 4.313505178346046e-05, "loss": 0.0839, "step": 34000 }, { "action_loss": 0.0031647689174860716, "epoch": 30.575539568345324, "step": 34000 }, { "epoch": 30.575539568345324, "step": 34000, "torque_loss": 0.11912450939416885 }, { "epoch": 30.584532374100718, "grad_norm": 0.2966841757297516, "learning_rate": 4.3107755979445465e-05, "loss": 0.0651, "step": 34010 }, { "action_loss": 0.005464709829539061, "epoch": 30.584532374100718, "step": 34010 }, { "epoch": 30.584532374100718, "step": 34010, "torque_loss": 0.09480801224708557 }, { "epoch": 30.593525179856115, "grad_norm": 0.29060566425323486, "learning_rate": 4.308046226911224e-05, "loss": 0.0601, "step": 34020 }, { "action_loss": 0.006815432105213404, "epoch": 30.593525179856115, "step": 34020 }, { "epoch": 30.593525179856115, "step": 34020, "torque_loss": 0.09001147747039795 }, { "epoch": 30.60251798561151, "grad_norm": 0.26133593916893005, "learning_rate": 4.305317066075185e-05, "loss": 0.0668, "step": 34030 }, { "action_loss": 0.0018527511274442077, "epoch": 30.60251798561151, "step": 34030 }, { "epoch": 30.60251798561151, "step": 34030, "torque_loss": 0.068209707736969 }, { "epoch": 30.611510791366907, "grad_norm": 0.28904473781585693, "learning_rate": 4.302588116265482e-05, "loss": 0.0625, "step": 34040 }, { "action_loss": 0.008996224962174892, "epoch": 30.611510791366907, "step": 34040 }, { "epoch": 30.611510791366907, "step": 34040, "torque_loss": 0.1258578896522522 }, { "epoch": 30.6205035971223, "grad_norm": 0.24592533707618713, "learning_rate": 4.299859378311094e-05, "loss": 0.0678, "step": 34050 }, { "action_loss": 0.003251810325309634, "epoch": 30.6205035971223, "step": 34050 }, { "epoch": 30.6205035971223, "step": 34050, "torque_loss": 0.07066882401704788 }, { "epoch": 30.6294964028777, "grad_norm": 0.33368030190467834, "learning_rate": 4.2971308530409424e-05, "loss": 0.0566, "step": 34060 }, { "action_loss": 0.012348056770861149, "epoch": 30.6294964028777, "step": 34060 }, { "epoch": 30.6294964028777, "step": 34060, "torque_loss": 0.14489135146141052 }, { "epoch": 30.638489208633093, "grad_norm": 0.2912137806415558, "learning_rate": 4.2944025412838765e-05, "loss": 0.0565, "step": 34070 }, { "action_loss": 0.005139234010130167, "epoch": 30.638489208633093, "step": 34070 }, { "epoch": 30.638489208633093, "step": 34070, "torque_loss": 0.1324169635772705 }, { "epoch": 30.64748201438849, "grad_norm": 0.2562739849090576, "learning_rate": 4.291674443868689e-05, "loss": 0.0611, "step": 34080 }, { "action_loss": 0.0036362886894494295, "epoch": 30.64748201438849, "step": 34080 }, { "epoch": 30.64748201438849, "step": 34080, "torque_loss": 0.08317660540342331 }, { "epoch": 30.656474820143885, "grad_norm": 0.31175798177719116, "learning_rate": 4.288946561624104e-05, "loss": 0.0556, "step": 34090 }, { "action_loss": 0.0029341895133256912, "epoch": 30.656474820143885, "step": 34090 }, { "epoch": 30.656474820143885, "step": 34090, "torque_loss": 0.050099927932024 }, { "epoch": 30.665467625899282, "grad_norm": 0.24017862975597382, "learning_rate": 4.2862188953787794e-05, "loss": 0.0623, "step": 34100 }, { "action_loss": 0.005908617749810219, "epoch": 30.665467625899282, "step": 34100 }, { "epoch": 30.665467625899282, "step": 34100, "torque_loss": 0.10236481577157974 }, { "epoch": 30.674460431654676, "grad_norm": 0.2501104176044464, "learning_rate": 4.283491445961308e-05, "loss": 0.0604, "step": 34110 }, { "action_loss": 0.007449714932590723, "epoch": 30.674460431654676, "step": 34110 }, { "epoch": 30.674460431654676, "step": 34110, "torque_loss": 0.1087690070271492 }, { "epoch": 30.68345323741007, "grad_norm": 0.31296858191490173, "learning_rate": 4.2807642142002155e-05, "loss": 0.0632, "step": 34120 }, { "action_loss": 0.003003000281751156, "epoch": 30.68345323741007, "step": 34120 }, { "epoch": 30.68345323741007, "step": 34120, "torque_loss": 0.07164826989173889 }, { "epoch": 30.692446043165468, "grad_norm": 0.276278018951416, "learning_rate": 4.278037200923966e-05, "loss": 0.0535, "step": 34130 }, { "action_loss": 0.003781005507335067, "epoch": 30.692446043165468, "step": 34130 }, { "epoch": 30.692446043165468, "step": 34130, "torque_loss": 0.10950525850057602 }, { "epoch": 30.701438848920862, "grad_norm": 0.3329630196094513, "learning_rate": 4.275310406960953e-05, "loss": 0.0741, "step": 34140 }, { "action_loss": 0.021886946633458138, "epoch": 30.701438848920862, "step": 34140 }, { "epoch": 30.701438848920862, "step": 34140, "torque_loss": 0.17152535915374756 }, { "epoch": 30.71043165467626, "grad_norm": 0.3143312931060791, "learning_rate": 4.272583833139502e-05, "loss": 0.0724, "step": 34150 }, { "action_loss": 0.0026209671050310135, "epoch": 30.71043165467626, "step": 34150 }, { "epoch": 30.71043165467626, "step": 34150, "torque_loss": 0.07825272530317307 }, { "epoch": 30.719424460431654, "grad_norm": 0.36372819542884827, "learning_rate": 4.2698574802878794e-05, "loss": 0.0651, "step": 34160 }, { "action_loss": 0.004695412237197161, "epoch": 30.719424460431654, "step": 34160 }, { "epoch": 30.719424460431654, "step": 34160, "torque_loss": 0.10051365941762924 }, { "epoch": 30.72841726618705, "grad_norm": 0.26629534363746643, "learning_rate": 4.2671313492342734e-05, "loss": 0.0533, "step": 34170 }, { "action_loss": 0.0028277765959501266, "epoch": 30.72841726618705, "step": 34170 }, { "epoch": 30.72841726618705, "step": 34170, "torque_loss": 0.08063261955976486 }, { "epoch": 30.737410071942445, "grad_norm": 0.25778087973594666, "learning_rate": 4.264405440806813e-05, "loss": 0.0532, "step": 34180 }, { "action_loss": 0.0017078156815841794, "epoch": 30.737410071942445, "step": 34180 }, { "epoch": 30.737410071942445, "step": 34180, "torque_loss": 0.06840406358242035 }, { "epoch": 30.746402877697843, "grad_norm": 0.4703577756881714, "learning_rate": 4.26167975583356e-05, "loss": 0.0688, "step": 34190 }, { "action_loss": 0.003598463488742709, "epoch": 30.746402877697843, "step": 34190 }, { "epoch": 30.746402877697843, "step": 34190, "torque_loss": 0.07706273347139359 }, { "epoch": 30.755395683453237, "grad_norm": 0.21748431026935577, "learning_rate": 4.2589542951425e-05, "loss": 0.0496, "step": 34200 }, { "action_loss": 0.005129413213580847, "epoch": 30.755395683453237, "step": 34200 }, { "epoch": 30.755395683453237, "step": 34200, "torque_loss": 0.10942935943603516 }, { "epoch": 30.764388489208635, "grad_norm": 0.33661070466041565, "learning_rate": 4.2562290595615615e-05, "loss": 0.0648, "step": 34210 }, { "action_loss": 0.0036123974714428186, "epoch": 30.764388489208635, "step": 34210 }, { "epoch": 30.764388489208635, "step": 34210, "torque_loss": 0.08706429600715637 }, { "epoch": 30.77338129496403, "grad_norm": 0.26141393184661865, "learning_rate": 4.2535040499185946e-05, "loss": 0.0652, "step": 34220 }, { "action_loss": 0.0044183749705553055, "epoch": 30.77338129496403, "step": 34220 }, { "epoch": 30.77338129496403, "step": 34220, "torque_loss": 0.08927514404058456 }, { "epoch": 30.782374100719423, "grad_norm": 0.3758208453655243, "learning_rate": 4.250779267041387e-05, "loss": 0.0698, "step": 34230 }, { "action_loss": 0.002547771669924259, "epoch": 30.782374100719423, "step": 34230 }, { "epoch": 30.782374100719423, "step": 34230, "torque_loss": 0.056640371680259705 }, { "epoch": 30.79136690647482, "grad_norm": 0.302486389875412, "learning_rate": 4.248054711757657e-05, "loss": 0.0623, "step": 34240 }, { "action_loss": 0.0026988815516233444, "epoch": 30.79136690647482, "step": 34240 }, { "epoch": 30.79136690647482, "step": 34240, "torque_loss": 0.07390442490577698 }, { "epoch": 30.800359712230215, "grad_norm": 0.2969511151313782, "learning_rate": 4.245330384895052e-05, "loss": 0.0634, "step": 34250 }, { "action_loss": 0.01829470507800579, "epoch": 30.800359712230215, "step": 34250 }, { "epoch": 30.800359712230215, "step": 34250, "torque_loss": 0.20819056034088135 }, { "epoch": 30.809352517985612, "grad_norm": 0.24951964616775513, "learning_rate": 4.242606287281151e-05, "loss": 0.0821, "step": 34260 }, { "action_loss": 0.010164803825318813, "epoch": 30.809352517985612, "step": 34260 }, { "epoch": 30.809352517985612, "step": 34260, "torque_loss": 0.11698354035615921 }, { "epoch": 30.818345323741006, "grad_norm": 0.2352665662765503, "learning_rate": 4.2398824197434595e-05, "loss": 0.0806, "step": 34270 }, { "action_loss": 0.007835076190531254, "epoch": 30.818345323741006, "step": 34270 }, { "epoch": 30.818345323741006, "step": 34270, "torque_loss": 0.07337741553783417 }, { "epoch": 30.827338129496404, "grad_norm": 0.25713443756103516, "learning_rate": 4.23715878310942e-05, "loss": 0.0526, "step": 34280 }, { "action_loss": 0.005242606624960899, "epoch": 30.827338129496404, "step": 34280 }, { "epoch": 30.827338129496404, "step": 34280, "torque_loss": 0.09696290642023087 }, { "epoch": 30.836330935251798, "grad_norm": 0.230783611536026, "learning_rate": 4.234435378206402e-05, "loss": 0.0604, "step": 34290 }, { "action_loss": 0.0034088108222931623, "epoch": 30.836330935251798, "step": 34290 }, { "epoch": 30.836330935251798, "step": 34290, "torque_loss": 0.09104996919631958 }, { "epoch": 30.845323741007196, "grad_norm": 0.25840774178504944, "learning_rate": 4.2317122058617006e-05, "loss": 0.065, "step": 34300 }, { "action_loss": 0.0029514033813029528, "epoch": 30.845323741007196, "step": 34300 }, { "epoch": 30.845323741007196, "step": 34300, "torque_loss": 0.07906589657068253 }, { "epoch": 30.85431654676259, "grad_norm": 0.27746400237083435, "learning_rate": 4.2289892669025485e-05, "loss": 0.0597, "step": 34310 }, { "action_loss": 0.0020303495693951845, "epoch": 30.85431654676259, "step": 34310 }, { "epoch": 30.85431654676259, "step": 34310, "torque_loss": 0.07023467868566513 }, { "epoch": 30.863309352517987, "grad_norm": 0.24371285736560822, "learning_rate": 4.226266562156097e-05, "loss": 0.0659, "step": 34320 }, { "action_loss": 0.0017028084257617593, "epoch": 30.863309352517987, "step": 34320 }, { "epoch": 30.863309352517987, "step": 34320, "torque_loss": 0.04866025224328041 }, { "epoch": 30.87230215827338, "grad_norm": 0.2387232780456543, "learning_rate": 4.223544092449435e-05, "loss": 0.0534, "step": 34330 }, { "action_loss": 0.004547747317701578, "epoch": 30.87230215827338, "step": 34330 }, { "epoch": 30.87230215827338, "step": 34330, "torque_loss": 0.07624977082014084 }, { "epoch": 30.881294964028775, "grad_norm": 0.29030999541282654, "learning_rate": 4.2208218586095784e-05, "loss": 0.0587, "step": 34340 }, { "action_loss": 0.002758163260295987, "epoch": 30.881294964028775, "step": 34340 }, { "epoch": 30.881294964028775, "step": 34340, "torque_loss": 0.07939346879720688 }, { "epoch": 30.890287769784173, "grad_norm": 0.32642439007759094, "learning_rate": 4.218099861463466e-05, "loss": 0.0551, "step": 34350 }, { "action_loss": 0.0041245645843446255, "epoch": 30.890287769784173, "step": 34350 }, { "epoch": 30.890287769784173, "step": 34350, "torque_loss": 0.11098330467939377 }, { "epoch": 30.899280575539567, "grad_norm": 0.3856509029865265, "learning_rate": 4.215378101837972e-05, "loss": 0.0709, "step": 34360 }, { "action_loss": 0.0032687669154256582, "epoch": 30.899280575539567, "step": 34360 }, { "epoch": 30.899280575539567, "step": 34360, "torque_loss": 0.07250013202428818 }, { "epoch": 30.908273381294965, "grad_norm": 0.28655877709388733, "learning_rate": 4.2126565805598937e-05, "loss": 0.0588, "step": 34370 }, { "action_loss": 0.002210483653470874, "epoch": 30.908273381294965, "step": 34370 }, { "epoch": 30.908273381294965, "step": 34370, "torque_loss": 0.06094624474644661 }, { "epoch": 30.91726618705036, "grad_norm": 0.3747018277645111, "learning_rate": 4.209935298455957e-05, "loss": 0.0612, "step": 34380 }, { "action_loss": 0.0020623747259378433, "epoch": 30.91726618705036, "step": 34380 }, { "epoch": 30.91726618705036, "step": 34380, "torque_loss": 0.06619484722614288 }, { "epoch": 30.926258992805757, "grad_norm": 0.3757033348083496, "learning_rate": 4.207214256352817e-05, "loss": 0.0551, "step": 34390 }, { "action_loss": 0.0020705375354737043, "epoch": 30.926258992805757, "step": 34390 }, { "epoch": 30.926258992805757, "step": 34390, "torque_loss": 0.07904477417469025 }, { "epoch": 30.93525179856115, "grad_norm": 0.36850112676620483, "learning_rate": 4.2044934550770524e-05, "loss": 0.071, "step": 34400 }, { "action_loss": 0.06348345428705215, "epoch": 30.93525179856115, "step": 34400 }, { "epoch": 30.93525179856115, "step": 34400, "torque_loss": 0.15470902621746063 }, { "epoch": 30.944244604316548, "grad_norm": 0.369124174118042, "learning_rate": 4.201772895455174e-05, "loss": 0.0672, "step": 34410 }, { "action_loss": 0.0032276392448693514, "epoch": 30.944244604316548, "step": 34410 }, { "epoch": 30.944244604316548, "step": 34410, "torque_loss": 0.10138950496912003 }, { "epoch": 30.953237410071942, "grad_norm": 0.2922472059726715, "learning_rate": 4.199052578313613e-05, "loss": 0.0606, "step": 34420 }, { "action_loss": 0.00523274065926671, "epoch": 30.953237410071942, "step": 34420 }, { "epoch": 30.953237410071942, "step": 34420, "torque_loss": 0.06818514317274094 }, { "epoch": 30.96223021582734, "grad_norm": 0.2766851484775543, "learning_rate": 4.1963325044787294e-05, "loss": 0.0634, "step": 34430 }, { "action_loss": 0.0018288145074620843, "epoch": 30.96223021582734, "step": 34430 }, { "epoch": 30.96223021582734, "step": 34430, "torque_loss": 0.05245806649327278 }, { "epoch": 30.971223021582734, "grad_norm": 0.3042222559452057, "learning_rate": 4.193612674776814e-05, "loss": 0.0548, "step": 34440 }, { "action_loss": 0.007609322667121887, "epoch": 30.971223021582734, "step": 34440 }, { "epoch": 30.971223021582734, "step": 34440, "torque_loss": 0.11518775671720505 }, { "epoch": 30.980215827338128, "grad_norm": 0.21861864626407623, "learning_rate": 4.1908930900340745e-05, "loss": 0.0637, "step": 34450 }, { "action_loss": 0.00626777857542038, "epoch": 30.980215827338128, "step": 34450 }, { "epoch": 30.980215827338128, "step": 34450, "torque_loss": 0.11013247817754745 }, { "epoch": 30.989208633093526, "grad_norm": 0.2385271042585373, "learning_rate": 4.1881737510766536e-05, "loss": 0.0579, "step": 34460 }, { "action_loss": 0.0030624570790678263, "epoch": 30.989208633093526, "step": 34460 }, { "epoch": 30.989208633093526, "step": 34460, "torque_loss": 0.07466312497854233 }, { "epoch": 30.99820143884892, "grad_norm": 0.18603073060512543, "learning_rate": 4.185454658730609e-05, "loss": 0.0504, "step": 34470 }, { "action_loss": 0.003496327670291066, "epoch": 30.99820143884892, "step": 34470 }, { "epoch": 30.99820143884892, "step": 34470, "torque_loss": 0.07747790962457657 }, { "epoch": 31.007194244604317, "grad_norm": 0.2429533153772354, "learning_rate": 4.1827358138219355e-05, "loss": 0.0617, "step": 34480 }, { "action_loss": 0.002629300579428673, "epoch": 31.007194244604317, "step": 34480 }, { "epoch": 31.007194244604317, "step": 34480, "torque_loss": 0.052791934460401535 }, { "epoch": 31.01618705035971, "grad_norm": 0.3230243921279907, "learning_rate": 4.1800172171765404e-05, "loss": 0.0755, "step": 34490 }, { "action_loss": 0.004396727308630943, "epoch": 31.01618705035971, "step": 34490 }, { "epoch": 31.01618705035971, "step": 34490, "torque_loss": 0.11099603772163391 }, { "epoch": 31.02517985611511, "grad_norm": 0.24860158562660217, "learning_rate": 4.177298869620264e-05, "loss": 0.0515, "step": 34500 }, { "action_loss": 0.004085333552211523, "epoch": 31.02517985611511, "step": 34500 }, { "epoch": 31.02517985611511, "step": 34500, "torque_loss": 0.09068774431943893 }, { "epoch": 31.034172661870503, "grad_norm": 0.3340616822242737, "learning_rate": 4.1745807719788705e-05, "loss": 0.0647, "step": 34510 }, { "action_loss": 0.0024701105430722237, "epoch": 31.034172661870503, "step": 34510 }, { "epoch": 31.034172661870503, "step": 34510, "torque_loss": 0.08602949231863022 }, { "epoch": 31.0431654676259, "grad_norm": 0.26855236291885376, "learning_rate": 4.1718629250780445e-05, "loss": 0.0637, "step": 34520 }, { "action_loss": 0.005141586530953646, "epoch": 31.0431654676259, "step": 34520 }, { "epoch": 31.0431654676259, "step": 34520, "torque_loss": 0.08711516112089157 }, { "epoch": 31.052158273381295, "grad_norm": 0.2798885703086853, "learning_rate": 4.1691453297433956e-05, "loss": 0.0631, "step": 34530 }, { "action_loss": 0.00469619408249855, "epoch": 31.052158273381295, "step": 34530 }, { "epoch": 31.052158273381295, "step": 34530, "torque_loss": 0.09734531491994858 }, { "epoch": 31.06115107913669, "grad_norm": 0.3612266182899475, "learning_rate": 4.166427986800457e-05, "loss": 0.0665, "step": 34540 }, { "action_loss": 0.0030006945598870516, "epoch": 31.06115107913669, "step": 34540 }, { "epoch": 31.06115107913669, "step": 34540, "torque_loss": 0.0963071957230568 }, { "epoch": 31.070143884892087, "grad_norm": 0.30479925870895386, "learning_rate": 4.163710897074688e-05, "loss": 0.0554, "step": 34550 }, { "action_loss": 0.004573910031467676, "epoch": 31.070143884892087, "step": 34550 }, { "epoch": 31.070143884892087, "step": 34550, "torque_loss": 0.09132403880357742 }, { "epoch": 31.07913669064748, "grad_norm": 0.330363392829895, "learning_rate": 4.1609940613914686e-05, "loss": 0.0709, "step": 34560 }, { "action_loss": 0.00796977523714304, "epoch": 31.07913669064748, "step": 34560 }, { "epoch": 31.07913669064748, "step": 34560, "torque_loss": 0.12472623586654663 }, { "epoch": 31.08812949640288, "grad_norm": 0.28623223304748535, "learning_rate": 4.1582774805760996e-05, "loss": 0.0599, "step": 34570 }, { "action_loss": 0.0026919450610876083, "epoch": 31.08812949640288, "step": 34570 }, { "epoch": 31.08812949640288, "step": 34570, "torque_loss": 0.07069850713014603 }, { "epoch": 31.097122302158272, "grad_norm": 0.32312774658203125, "learning_rate": 4.155561155453809e-05, "loss": 0.0591, "step": 34580 }, { "action_loss": 0.0023878347128629684, "epoch": 31.097122302158272, "step": 34580 }, { "epoch": 31.097122302158272, "step": 34580, "torque_loss": 0.08282224088907242 }, { "epoch": 31.10611510791367, "grad_norm": 0.3072681725025177, "learning_rate": 4.15284508684974e-05, "loss": 0.0678, "step": 34590 }, { "action_loss": 0.002008214360103011, "epoch": 31.10611510791367, "step": 34590 }, { "epoch": 31.10611510791367, "step": 34590, "torque_loss": 0.06655276566743851 }, { "epoch": 31.115107913669064, "grad_norm": 0.30990439653396606, "learning_rate": 4.1501292755889675e-05, "loss": 0.0656, "step": 34600 }, { "action_loss": 0.004439647309482098, "epoch": 31.115107913669064, "step": 34600 }, { "epoch": 31.115107913669064, "step": 34600, "torque_loss": 0.08814636617898941 }, { "epoch": 31.12410071942446, "grad_norm": 0.3664606213569641, "learning_rate": 4.1474137224964833e-05, "loss": 0.0623, "step": 34610 }, { "action_loss": 0.006859472021460533, "epoch": 31.12410071942446, "step": 34610 }, { "epoch": 31.12410071942446, "step": 34610, "torque_loss": 0.09327306598424911 }, { "epoch": 31.133093525179856, "grad_norm": 0.39796018600463867, "learning_rate": 4.144698428397197e-05, "loss": 0.061, "step": 34620 }, { "action_loss": 0.002548327436670661, "epoch": 31.133093525179856, "step": 34620 }, { "epoch": 31.133093525179856, "step": 34620, "torque_loss": 0.10598230361938477 }, { "epoch": 31.142086330935253, "grad_norm": 0.25571683049201965, "learning_rate": 4.1419833941159466e-05, "loss": 0.0506, "step": 34630 }, { "action_loss": 0.002157660201191902, "epoch": 31.142086330935253, "step": 34630 }, { "epoch": 31.142086330935253, "step": 34630, "torque_loss": 0.07634662836790085 }, { "epoch": 31.151079136690647, "grad_norm": 0.30356457829475403, "learning_rate": 4.1392686204774846e-05, "loss": 0.0534, "step": 34640 }, { "action_loss": 0.0021115762647241354, "epoch": 31.151079136690647, "step": 34640 }, { "epoch": 31.151079136690647, "step": 34640, "torque_loss": 0.0700281485915184 }, { "epoch": 31.16007194244604, "grad_norm": 0.30338364839553833, "learning_rate": 4.13655410830649e-05, "loss": 0.0555, "step": 34650 }, { "action_loss": 0.004328013863414526, "epoch": 31.16007194244604, "step": 34650 }, { "epoch": 31.16007194244604, "step": 34650, "torque_loss": 0.08377230167388916 }, { "epoch": 31.16906474820144, "grad_norm": 0.31541091203689575, "learning_rate": 4.1338398584275594e-05, "loss": 0.0804, "step": 34660 }, { "action_loss": 0.011067393235862255, "epoch": 31.16906474820144, "step": 34660 }, { "epoch": 31.16906474820144, "step": 34660, "torque_loss": 0.14297863841056824 }, { "epoch": 31.178057553956833, "grad_norm": 0.3217081129550934, "learning_rate": 4.1311258716652104e-05, "loss": 0.072, "step": 34670 }, { "action_loss": 0.006654748227447271, "epoch": 31.178057553956833, "step": 34670 }, { "epoch": 31.178057553956833, "step": 34670, "torque_loss": 0.12581288814544678 }, { "epoch": 31.18705035971223, "grad_norm": 0.26583364605903625, "learning_rate": 4.128412148843881e-05, "loss": 0.0615, "step": 34680 }, { "action_loss": 0.005006979685276747, "epoch": 31.18705035971223, "step": 34680 }, { "epoch": 31.18705035971223, "step": 34680, "torque_loss": 0.09259893745183945 }, { "epoch": 31.196043165467625, "grad_norm": 0.31064361333847046, "learning_rate": 4.125698690787926e-05, "loss": 0.0628, "step": 34690 }, { "action_loss": 0.0061884052120149136, "epoch": 31.196043165467625, "step": 34690 }, { "epoch": 31.196043165467625, "step": 34690, "torque_loss": 0.10778876394033432 }, { "epoch": 31.205035971223023, "grad_norm": 0.28464624285697937, "learning_rate": 4.1229854983216245e-05, "loss": 0.0643, "step": 34700 }, { "action_loss": 0.00554642966017127, "epoch": 31.205035971223023, "step": 34700 }, { "epoch": 31.205035971223023, "step": 34700, "torque_loss": 0.09203839302062988 }, { "epoch": 31.214028776978417, "grad_norm": 0.4250434339046478, "learning_rate": 4.120272572269175e-05, "loss": 0.0714, "step": 34710 }, { "action_loss": 0.008940215222537518, "epoch": 31.214028776978417, "step": 34710 }, { "epoch": 31.214028776978417, "step": 34710, "torque_loss": 0.11434973031282425 }, { "epoch": 31.223021582733814, "grad_norm": 0.30403009057044983, "learning_rate": 4.117559913454687e-05, "loss": 0.059, "step": 34720 }, { "action_loss": 0.002832844154909253, "epoch": 31.223021582733814, "step": 34720 }, { "epoch": 31.223021582733814, "step": 34720, "torque_loss": 0.08409181982278824 }, { "epoch": 31.23201438848921, "grad_norm": 0.4183998703956604, "learning_rate": 4.114847522702201e-05, "loss": 0.06, "step": 34730 }, { "action_loss": 0.004776230547577143, "epoch": 31.23201438848921, "step": 34730 }, { "epoch": 31.23201438848921, "step": 34730, "torque_loss": 0.08664810657501221 }, { "epoch": 31.241007194244606, "grad_norm": 0.2291468381881714, "learning_rate": 4.112135400835664e-05, "loss": 0.0488, "step": 34740 }, { "action_loss": 0.007703002076596022, "epoch": 31.241007194244606, "step": 34740 }, { "epoch": 31.241007194244606, "step": 34740, "torque_loss": 0.09530019015073776 }, { "epoch": 31.25, "grad_norm": 0.4410642981529236, "learning_rate": 4.109423548678949e-05, "loss": 0.0508, "step": 34750 }, { "action_loss": 0.005001053214073181, "epoch": 31.25, "step": 34750 }, { "epoch": 31.25, "step": 34750, "torque_loss": 0.08360309153795242 }, { "epoch": 31.258992805755394, "grad_norm": 0.33365458250045776, "learning_rate": 4.106711967055848e-05, "loss": 0.0617, "step": 34760 }, { "action_loss": 0.00413215346634388, "epoch": 31.258992805755394, "step": 34760 }, { "epoch": 31.258992805755394, "step": 34760, "torque_loss": 0.053040772676467896 }, { "epoch": 31.26798561151079, "grad_norm": 0.2593609392642975, "learning_rate": 4.1040006567900636e-05, "loss": 0.0566, "step": 34770 }, { "action_loss": 0.0023413379676640034, "epoch": 31.26798561151079, "step": 34770 }, { "epoch": 31.26798561151079, "step": 34770, "torque_loss": 0.06375005096197128 }, { "epoch": 31.276978417266186, "grad_norm": 0.4241836369037628, "learning_rate": 4.101289618705224e-05, "loss": 0.05, "step": 34780 }, { "action_loss": 0.0020827052649110556, "epoch": 31.276978417266186, "step": 34780 }, { "epoch": 31.276978417266186, "step": 34780, "torque_loss": 0.06799068301916122 }, { "epoch": 31.285971223021583, "grad_norm": 0.2973523736000061, "learning_rate": 4.0985788536248675e-05, "loss": 0.0583, "step": 34790 }, { "action_loss": 0.004142870660871267, "epoch": 31.285971223021583, "step": 34790 }, { "epoch": 31.285971223021583, "step": 34790, "torque_loss": 0.08254074305295944 }, { "epoch": 31.294964028776977, "grad_norm": 0.31460317969322205, "learning_rate": 4.095868362372454e-05, "loss": 0.05, "step": 34800 }, { "action_loss": 0.006303913425654173, "epoch": 31.294964028776977, "step": 34800 }, { "epoch": 31.294964028776977, "step": 34800, "torque_loss": 0.13160477578639984 }, { "epoch": 31.303956834532375, "grad_norm": 0.25607526302337646, "learning_rate": 4.0931581457713614e-05, "loss": 0.0602, "step": 34810 }, { "action_loss": 0.014882877469062805, "epoch": 31.303956834532375, "step": 34810 }, { "epoch": 31.303956834532375, "step": 34810, "torque_loss": 0.1389191746711731 }, { "epoch": 31.31294964028777, "grad_norm": 0.3352469205856323, "learning_rate": 4.09044820464488e-05, "loss": 0.0681, "step": 34820 }, { "action_loss": 0.0021424477454274893, "epoch": 31.31294964028777, "step": 34820 }, { "epoch": 31.31294964028777, "step": 34820, "torque_loss": 0.06567160040140152 }, { "epoch": 31.321942446043167, "grad_norm": 0.2688378393650055, "learning_rate": 4.087738539816219e-05, "loss": 0.0625, "step": 34830 }, { "action_loss": 0.008178729563951492, "epoch": 31.321942446043167, "step": 34830 }, { "epoch": 31.321942446043167, "step": 34830, "torque_loss": 0.1348724216222763 }, { "epoch": 31.33093525179856, "grad_norm": 0.3701787292957306, "learning_rate": 4.085029152108501e-05, "loss": 0.0717, "step": 34840 }, { "action_loss": 0.0015229973942041397, "epoch": 31.33093525179856, "step": 34840 }, { "epoch": 31.33093525179856, "step": 34840, "torque_loss": 0.06848826259374619 }, { "epoch": 31.33992805755396, "grad_norm": 0.3372220993041992, "learning_rate": 4.0823200423447714e-05, "loss": 0.0619, "step": 34850 }, { "action_loss": 0.005557490512728691, "epoch": 31.33992805755396, "step": 34850 }, { "epoch": 31.33992805755396, "step": 34850, "torque_loss": 0.06388046592473984 }, { "epoch": 31.348920863309353, "grad_norm": 0.2652972638607025, "learning_rate": 4.079611211347981e-05, "loss": 0.0659, "step": 34860 }, { "action_loss": 0.004847269039601088, "epoch": 31.348920863309353, "step": 34860 }, { "epoch": 31.348920863309353, "step": 34860, "torque_loss": 0.0955960676074028 }, { "epoch": 31.357913669064747, "grad_norm": 0.2755710482597351, "learning_rate": 4.076902659941002e-05, "loss": 0.0534, "step": 34870 }, { "action_loss": 0.005120419897139072, "epoch": 31.357913669064747, "step": 34870 }, { "epoch": 31.357913669064747, "step": 34870, "torque_loss": 0.0986923798918724 }, { "epoch": 31.366906474820144, "grad_norm": 0.3265942335128784, "learning_rate": 4.074194388946624e-05, "loss": 0.0536, "step": 34880 }, { "action_loss": 0.002573632402345538, "epoch": 31.366906474820144, "step": 34880 }, { "epoch": 31.366906474820144, "step": 34880, "torque_loss": 0.0946764424443245 }, { "epoch": 31.37589928057554, "grad_norm": 0.3453352451324463, "learning_rate": 4.071486399187545e-05, "loss": 0.0593, "step": 34890 }, { "action_loss": 0.008479022420942783, "epoch": 31.37589928057554, "step": 34890 }, { "epoch": 31.37589928057554, "step": 34890, "torque_loss": 0.10275748372077942 }, { "epoch": 31.384892086330936, "grad_norm": 0.310770183801651, "learning_rate": 4.0687786914863836e-05, "loss": 0.0561, "step": 34900 }, { "action_loss": 0.0020660872105509043, "epoch": 31.384892086330936, "step": 34900 }, { "epoch": 31.384892086330936, "step": 34900, "torque_loss": 0.06477219611406326 }, { "epoch": 31.39388489208633, "grad_norm": 0.3555241823196411, "learning_rate": 4.0660712666656666e-05, "loss": 0.0555, "step": 34910 }, { "action_loss": 0.003965756390243769, "epoch": 31.39388489208633, "step": 34910 }, { "epoch": 31.39388489208633, "step": 34910, "torque_loss": 0.11926209926605225 }, { "epoch": 31.402877697841728, "grad_norm": 0.3637539744377136, "learning_rate": 4.0633641255478394e-05, "loss": 0.0676, "step": 34920 }, { "action_loss": 0.002889191033318639, "epoch": 31.402877697841728, "step": 34920 }, { "epoch": 31.402877697841728, "step": 34920, "torque_loss": 0.06713756173849106 }, { "epoch": 31.41187050359712, "grad_norm": 0.2731516361236572, "learning_rate": 4.0606572689552624e-05, "loss": 0.0536, "step": 34930 }, { "action_loss": 0.002375385956838727, "epoch": 31.41187050359712, "step": 34930 }, { "epoch": 31.41187050359712, "step": 34930, "torque_loss": 0.0648006945848465 }, { "epoch": 31.42086330935252, "grad_norm": 0.3092925548553467, "learning_rate": 4.0579506977102036e-05, "loss": 0.066, "step": 34940 }, { "action_loss": 0.004712210036814213, "epoch": 31.42086330935252, "step": 34940 }, { "epoch": 31.42086330935252, "step": 34940, "torque_loss": 0.08830883353948593 }, { "epoch": 31.429856115107913, "grad_norm": 0.36548006534576416, "learning_rate": 4.055244412634849e-05, "loss": 0.0627, "step": 34950 }, { "action_loss": 0.0023738888558000326, "epoch": 31.429856115107913, "step": 34950 }, { "epoch": 31.429856115107913, "step": 34950, "torque_loss": 0.05152143910527229 }, { "epoch": 31.43884892086331, "grad_norm": 0.3900492489337921, "learning_rate": 4.052538414551298e-05, "loss": 0.0564, "step": 34960 }, { "action_loss": 0.015367832966148853, "epoch": 31.43884892086331, "step": 34960 }, { "epoch": 31.43884892086331, "step": 34960, "torque_loss": 0.12214603275060654 }, { "epoch": 31.447841726618705, "grad_norm": 0.2863679826259613, "learning_rate": 4.0498327042815596e-05, "loss": 0.0619, "step": 34970 }, { "action_loss": 0.003199925646185875, "epoch": 31.447841726618705, "step": 34970 }, { "epoch": 31.447841726618705, "step": 34970, "torque_loss": 0.057766783982515335 }, { "epoch": 31.4568345323741, "grad_norm": 0.24564500153064728, "learning_rate": 4.047127282647559e-05, "loss": 0.0584, "step": 34980 }, { "action_loss": 0.003720823908224702, "epoch": 31.4568345323741, "step": 34980 }, { "epoch": 31.4568345323741, "step": 34980, "torque_loss": 0.08995795249938965 }, { "epoch": 31.465827338129497, "grad_norm": 0.25220605731010437, "learning_rate": 4.04442215047113e-05, "loss": 0.0494, "step": 34990 }, { "action_loss": 0.003991563338786364, "epoch": 31.465827338129497, "step": 34990 }, { "epoch": 31.465827338129497, "step": 34990, "torque_loss": 0.07395196706056595 }, { "epoch": 31.47482014388489, "grad_norm": 0.24041976034641266, "learning_rate": 4.041717308574023e-05, "loss": 0.0489, "step": 35000 }, { "action_loss": 0.004251400474458933, "epoch": 31.47482014388489, "step": 35000 }, { "epoch": 31.47482014388489, "step": 35000, "torque_loss": 0.09620508551597595 }, { "epoch": 31.48381294964029, "grad_norm": 0.31377115845680237, "learning_rate": 4.039012757777893e-05, "loss": 0.0632, "step": 35010 }, { "action_loss": 0.0037150036077946424, "epoch": 31.48381294964029, "step": 35010 }, { "epoch": 31.48381294964029, "step": 35010, "torque_loss": 0.08039560168981552 }, { "epoch": 31.492805755395683, "grad_norm": 0.2254188358783722, "learning_rate": 4.036308498904314e-05, "loss": 0.048, "step": 35020 }, { "action_loss": 0.0067777507938444614, "epoch": 31.492805755395683, "step": 35020 }, { "epoch": 31.492805755395683, "step": 35020, "torque_loss": 0.11312005668878555 }, { "epoch": 31.50179856115108, "grad_norm": 0.3323999047279358, "learning_rate": 4.033604532774771e-05, "loss": 0.0665, "step": 35030 }, { "action_loss": 0.00445522228255868, "epoch": 31.50179856115108, "step": 35030 }, { "epoch": 31.50179856115108, "step": 35030, "torque_loss": 0.10774891823530197 }, { "epoch": 31.510791366906474, "grad_norm": 0.25014591217041016, "learning_rate": 4.030900860210652e-05, "loss": 0.0534, "step": 35040 }, { "action_loss": 0.02223111502826214, "epoch": 31.510791366906474, "step": 35040 }, { "epoch": 31.510791366906474, "step": 35040, "torque_loss": 0.16148078441619873 }, { "epoch": 31.519784172661872, "grad_norm": 0.23241056501865387, "learning_rate": 4.028197482033266e-05, "loss": 0.0594, "step": 35050 }, { "action_loss": 0.002768582897260785, "epoch": 31.519784172661872, "step": 35050 }, { "epoch": 31.519784172661872, "step": 35050, "torque_loss": 0.07611823081970215 }, { "epoch": 31.528776978417266, "grad_norm": 0.2892669439315796, "learning_rate": 4.0254943990638246e-05, "loss": 0.0706, "step": 35060 }, { "action_loss": 0.0044434103183448315, "epoch": 31.528776978417266, "step": 35060 }, { "epoch": 31.528776978417266, "step": 35060, "torque_loss": 0.06673268228769302 }, { "epoch": 31.53776978417266, "grad_norm": 0.4432819187641144, "learning_rate": 4.022791612123454e-05, "loss": 0.0771, "step": 35070 }, { "action_loss": 0.0028189346194267273, "epoch": 31.53776978417266, "step": 35070 }, { "epoch": 31.53776978417266, "step": 35070, "torque_loss": 0.07742325216531754 }, { "epoch": 31.546762589928058, "grad_norm": 0.2650199830532074, "learning_rate": 4.020089122033192e-05, "loss": 0.0639, "step": 35080 }, { "action_loss": 0.0067059751600027084, "epoch": 31.546762589928058, "step": 35080 }, { "epoch": 31.546762589928058, "step": 35080, "torque_loss": 0.07734134048223495 }, { "epoch": 31.555755395683452, "grad_norm": 0.29138070344924927, "learning_rate": 4.01738692961398e-05, "loss": 0.0733, "step": 35090 }, { "action_loss": 0.001011489424854517, "epoch": 31.555755395683452, "step": 35090 }, { "epoch": 31.555755395683452, "step": 35090, "torque_loss": 0.07573197036981583 }, { "epoch": 31.56474820143885, "grad_norm": 0.24043668806552887, "learning_rate": 4.014685035686675e-05, "loss": 0.0532, "step": 35100 }, { "action_loss": 0.0031076520681381226, "epoch": 31.56474820143885, "step": 35100 }, { "epoch": 31.56474820143885, "step": 35100, "torque_loss": 0.09029518812894821 }, { "epoch": 31.573741007194243, "grad_norm": 0.32707080245018005, "learning_rate": 4.011983441072039e-05, "loss": 0.0543, "step": 35110 }, { "action_loss": 0.0032920429948717356, "epoch": 31.573741007194243, "step": 35110 }, { "epoch": 31.573741007194243, "step": 35110, "torque_loss": 0.11555864661931992 }, { "epoch": 31.58273381294964, "grad_norm": 0.25993281602859497, "learning_rate": 4.0092821465907485e-05, "loss": 0.07, "step": 35120 }, { "action_loss": 0.0032079543452709913, "epoch": 31.58273381294964, "step": 35120 }, { "epoch": 31.58273381294964, "step": 35120, "torque_loss": 0.07757211476564407 }, { "epoch": 31.591726618705035, "grad_norm": 0.3442010283470154, "learning_rate": 4.006581153063383e-05, "loss": 0.0557, "step": 35130 }, { "action_loss": 0.0035493585746735334, "epoch": 31.591726618705035, "step": 35130 }, { "epoch": 31.591726618705035, "step": 35130, "torque_loss": 0.07865891605615616 }, { "epoch": 31.600719424460433, "grad_norm": 0.32204851508140564, "learning_rate": 4.003880461310432e-05, "loss": 0.0629, "step": 35140 }, { "action_loss": 0.0029069979209452868, "epoch": 31.600719424460433, "step": 35140 }, { "epoch": 31.600719424460433, "step": 35140, "torque_loss": 0.07307401299476624 }, { "epoch": 31.609712230215827, "grad_norm": 0.37113523483276367, "learning_rate": 4.001180072152298e-05, "loss": 0.0675, "step": 35150 }, { "action_loss": 0.003552058944478631, "epoch": 31.609712230215827, "step": 35150 }, { "epoch": 31.609712230215827, "step": 35150, "torque_loss": 0.08288849890232086 }, { "epoch": 31.618705035971225, "grad_norm": 0.27618512511253357, "learning_rate": 3.998479986409285e-05, "loss": 0.0695, "step": 35160 }, { "action_loss": 0.0029976731166243553, "epoch": 31.618705035971225, "step": 35160 }, { "epoch": 31.618705035971225, "step": 35160, "torque_loss": 0.08102819323539734 }, { "epoch": 31.62769784172662, "grad_norm": 0.2219248265028, "learning_rate": 3.995780204901607e-05, "loss": 0.0541, "step": 35170 }, { "action_loss": 0.0030400182586163282, "epoch": 31.62769784172662, "step": 35170 }, { "epoch": 31.62769784172662, "step": 35170, "torque_loss": 0.10006871074438095 }, { "epoch": 31.636690647482013, "grad_norm": 0.3808257579803467, "learning_rate": 3.993080728449391e-05, "loss": 0.0533, "step": 35180 }, { "action_loss": 0.0024581709876656532, "epoch": 31.636690647482013, "step": 35180 }, { "epoch": 31.636690647482013, "step": 35180, "torque_loss": 0.0771753191947937 }, { "epoch": 31.64568345323741, "grad_norm": 0.2877640426158905, "learning_rate": 3.990381557872661e-05, "loss": 0.0491, "step": 35190 }, { "action_loss": 0.017041148617863655, "epoch": 31.64568345323741, "step": 35190 }, { "epoch": 31.64568345323741, "step": 35190, "torque_loss": 0.13536785542964935 }, { "epoch": 31.654676258992804, "grad_norm": 0.25818681716918945, "learning_rate": 3.987682693991359e-05, "loss": 0.0542, "step": 35200 }, { "action_loss": 0.00521836569532752, "epoch": 31.654676258992804, "step": 35200 }, { "epoch": 31.654676258992804, "step": 35200, "torque_loss": 0.10553515702486038 }, { "epoch": 31.663669064748202, "grad_norm": 0.2465234398841858, "learning_rate": 3.9849841376253226e-05, "loss": 0.0455, "step": 35210 }, { "action_loss": 0.002254786901175976, "epoch": 31.663669064748202, "step": 35210 }, { "epoch": 31.663669064748202, "step": 35210, "torque_loss": 0.08592533320188522 }, { "epoch": 31.672661870503596, "grad_norm": 0.3280999958515167, "learning_rate": 3.982285889594306e-05, "loss": 0.0527, "step": 35220 }, { "action_loss": 0.003362143412232399, "epoch": 31.672661870503596, "step": 35220 }, { "epoch": 31.672661870503596, "step": 35220, "torque_loss": 0.09892424941062927 }, { "epoch": 31.681654676258994, "grad_norm": 0.38175734877586365, "learning_rate": 3.9795879507179665e-05, "loss": 0.0595, "step": 35230 }, { "action_loss": 0.01019049808382988, "epoch": 31.681654676258994, "step": 35230 }, { "epoch": 31.681654676258994, "step": 35230, "torque_loss": 0.11803876608610153 }, { "epoch": 31.690647482014388, "grad_norm": 0.3268234133720398, "learning_rate": 3.9768903218158634e-05, "loss": 0.0559, "step": 35240 }, { "action_loss": 0.0026251000817865133, "epoch": 31.690647482014388, "step": 35240 }, { "epoch": 31.690647482014388, "step": 35240, "torque_loss": 0.08611493557691574 }, { "epoch": 31.699640287769785, "grad_norm": 0.3404930531978607, "learning_rate": 3.974193003707468e-05, "loss": 0.0637, "step": 35250 }, { "action_loss": 0.00526542728766799, "epoch": 31.699640287769785, "step": 35250 }, { "epoch": 31.699640287769785, "step": 35250, "torque_loss": 0.11810912936925888 }, { "epoch": 31.70863309352518, "grad_norm": 0.2853618264198303, "learning_rate": 3.971495997212152e-05, "loss": 0.0547, "step": 35260 }, { "action_loss": 0.0032226808834820986, "epoch": 31.70863309352518, "step": 35260 }, { "epoch": 31.70863309352518, "step": 35260, "torque_loss": 0.08899185061454773 }, { "epoch": 31.717625899280577, "grad_norm": 0.45530757308006287, "learning_rate": 3.9687993031491985e-05, "loss": 0.0643, "step": 35270 }, { "action_loss": 0.012006930075585842, "epoch": 31.717625899280577, "step": 35270 }, { "epoch": 31.717625899280577, "step": 35270, "torque_loss": 0.16036319732666016 }, { "epoch": 31.72661870503597, "grad_norm": 0.2864227592945099, "learning_rate": 3.966102922337787e-05, "loss": 0.0661, "step": 35280 }, { "action_loss": 0.004116796888411045, "epoch": 31.72661870503597, "step": 35280 }, { "epoch": 31.72661870503597, "step": 35280, "torque_loss": 0.10892520099878311 }, { "epoch": 31.735611510791365, "grad_norm": 0.29653313755989075, "learning_rate": 3.963406855597009e-05, "loss": 0.055, "step": 35290 }, { "action_loss": 0.004538594279438257, "epoch": 31.735611510791365, "step": 35290 }, { "epoch": 31.735611510791365, "step": 35290, "torque_loss": 0.11561540514230728 }, { "epoch": 31.744604316546763, "grad_norm": 0.29406705498695374, "learning_rate": 3.960711103745861e-05, "loss": 0.0586, "step": 35300 }, { "action_loss": 0.008299264125525951, "epoch": 31.744604316546763, "step": 35300 }, { "epoch": 31.744604316546763, "step": 35300, "torque_loss": 0.11039558053016663 }, { "epoch": 31.753597122302157, "grad_norm": 0.2554750442504883, "learning_rate": 3.958015667603237e-05, "loss": 0.064, "step": 35310 }, { "action_loss": 0.004410102963447571, "epoch": 31.753597122302157, "step": 35310 }, { "epoch": 31.753597122302157, "step": 35310, "torque_loss": 0.1362849771976471 }, { "epoch": 31.762589928057555, "grad_norm": 0.321995347738266, "learning_rate": 3.955320547987943e-05, "loss": 0.0585, "step": 35320 }, { "action_loss": 0.0035186114255338907, "epoch": 31.762589928057555, "step": 35320 }, { "epoch": 31.762589928057555, "step": 35320, "torque_loss": 0.0808115303516388 }, { "epoch": 31.77158273381295, "grad_norm": 0.38624265789985657, "learning_rate": 3.952625745718681e-05, "loss": 0.0749, "step": 35330 }, { "action_loss": 0.012149296700954437, "epoch": 31.77158273381295, "step": 35330 }, { "epoch": 31.77158273381295, "step": 35330, "torque_loss": 0.11593427509069443 }, { "epoch": 31.780575539568346, "grad_norm": 0.29139289259910583, "learning_rate": 3.949931261614064e-05, "loss": 0.0656, "step": 35340 }, { "action_loss": 0.003631375962868333, "epoch": 31.780575539568346, "step": 35340 }, { "epoch": 31.780575539568346, "step": 35340, "torque_loss": 0.10676293820142746 }, { "epoch": 31.78956834532374, "grad_norm": 0.35781651735305786, "learning_rate": 3.947237096492605e-05, "loss": 0.0492, "step": 35350 }, { "action_loss": 0.00600782223045826, "epoch": 31.78956834532374, "step": 35350 }, { "epoch": 31.78956834532374, "step": 35350, "torque_loss": 0.1263609677553177 }, { "epoch": 31.798561151079138, "grad_norm": 0.3496779799461365, "learning_rate": 3.944543251172719e-05, "loss": 0.0656, "step": 35360 }, { "action_loss": 0.003890917869284749, "epoch": 31.798561151079138, "step": 35360 }, { "epoch": 31.798561151079138, "step": 35360, "torque_loss": 0.08222194015979767 }, { "epoch": 31.807553956834532, "grad_norm": 0.28822240233421326, "learning_rate": 3.941849726472725e-05, "loss": 0.0535, "step": 35370 }, { "action_loss": 0.010655514895915985, "epoch": 31.807553956834532, "step": 35370 }, { "epoch": 31.807553956834532, "step": 35370, "torque_loss": 0.15044939517974854 }, { "epoch": 31.81654676258993, "grad_norm": 0.2737051546573639, "learning_rate": 3.939156523210846e-05, "loss": 0.056, "step": 35380 }, { "action_loss": 0.013504807837307453, "epoch": 31.81654676258993, "step": 35380 }, { "epoch": 31.81654676258993, "step": 35380, "torque_loss": 0.16191788017749786 }, { "epoch": 31.825539568345324, "grad_norm": 0.3075866401195526, "learning_rate": 3.9364636422052046e-05, "loss": 0.0774, "step": 35390 }, { "action_loss": 0.0029117511585354805, "epoch": 31.825539568345324, "step": 35390 }, { "epoch": 31.825539568345324, "step": 35390, "torque_loss": 0.09000823646783829 }, { "epoch": 31.834532374100718, "grad_norm": 0.2642801105976105, "learning_rate": 3.933771084273828e-05, "loss": 0.073, "step": 35400 }, { "action_loss": 0.006750606000423431, "epoch": 31.834532374100718, "step": 35400 }, { "epoch": 31.834532374100718, "step": 35400, "torque_loss": 0.140900656580925 }, { "epoch": 31.843525179856115, "grad_norm": 0.4126783311367035, "learning_rate": 3.931078850234643e-05, "loss": 0.0644, "step": 35410 }, { "action_loss": 0.004437337163835764, "epoch": 31.843525179856115, "step": 35410 }, { "epoch": 31.843525179856115, "step": 35410, "torque_loss": 0.07861251384019852 }, { "epoch": 31.85251798561151, "grad_norm": 0.37724050879478455, "learning_rate": 3.928386940905483e-05, "loss": 0.0529, "step": 35420 }, { "action_loss": 0.005642462521791458, "epoch": 31.85251798561151, "step": 35420 }, { "epoch": 31.85251798561151, "step": 35420, "torque_loss": 0.07644098997116089 }, { "epoch": 31.861510791366907, "grad_norm": 0.25699591636657715, "learning_rate": 3.925695357104073e-05, "loss": 0.0493, "step": 35430 }, { "action_loss": 0.00899561122059822, "epoch": 31.861510791366907, "step": 35430 }, { "epoch": 31.861510791366907, "step": 35430, "torque_loss": 0.18242354691028595 }, { "epoch": 31.8705035971223, "grad_norm": 0.34117162227630615, "learning_rate": 3.923004099648049e-05, "loss": 0.0671, "step": 35440 }, { "action_loss": 0.0016871989937499166, "epoch": 31.8705035971223, "step": 35440 }, { "epoch": 31.8705035971223, "step": 35440, "torque_loss": 0.0753808319568634 }, { "epoch": 31.8794964028777, "grad_norm": 0.2295457422733307, "learning_rate": 3.920313169354944e-05, "loss": 0.0547, "step": 35450 }, { "action_loss": 0.001938045141287148, "epoch": 31.8794964028777, "step": 35450 }, { "epoch": 31.8794964028777, "step": 35450, "torque_loss": 0.07509761303663254 }, { "epoch": 31.888489208633093, "grad_norm": 0.2861594557762146, "learning_rate": 3.9176225670421897e-05, "loss": 0.0743, "step": 35460 }, { "action_loss": 0.0026558376848697662, "epoch": 31.888489208633093, "step": 35460 }, { "epoch": 31.888489208633093, "step": 35460, "torque_loss": 0.08582381159067154 }, { "epoch": 31.89748201438849, "grad_norm": 0.24590623378753662, "learning_rate": 3.9149322935271224e-05, "loss": 0.0588, "step": 35470 }, { "action_loss": 0.01170958299189806, "epoch": 31.89748201438849, "step": 35470 }, { "epoch": 31.89748201438849, "step": 35470, "torque_loss": 0.09460270404815674 }, { "epoch": 31.906474820143885, "grad_norm": 0.4043864905834198, "learning_rate": 3.9122423496269725e-05, "loss": 0.0633, "step": 35480 }, { "action_loss": 0.0018759999657049775, "epoch": 31.906474820143885, "step": 35480 }, { "epoch": 31.906474820143885, "step": 35480, "torque_loss": 0.048510149121284485 }, { "epoch": 31.915467625899282, "grad_norm": 0.37305134534835815, "learning_rate": 3.909552736158877e-05, "loss": 0.0604, "step": 35490 }, { "action_loss": 0.003921728115528822, "epoch": 31.915467625899282, "step": 35490 }, { "epoch": 31.915467625899282, "step": 35490, "torque_loss": 0.09800800681114197 }, { "epoch": 31.924460431654676, "grad_norm": 0.24162018299102783, "learning_rate": 3.90686345393987e-05, "loss": 0.0518, "step": 35500 }, { "action_loss": 0.001817291951738298, "epoch": 31.924460431654676, "step": 35500 }, { "epoch": 31.924460431654676, "step": 35500, "torque_loss": 0.07873290777206421 }, { "epoch": 31.93345323741007, "grad_norm": 0.22832979261875153, "learning_rate": 3.9041745037868816e-05, "loss": 0.0516, "step": 35510 }, { "action_loss": 0.0021279852371662855, "epoch": 31.93345323741007, "step": 35510 }, { "epoch": 31.93345323741007, "step": 35510, "torque_loss": 0.06796035915613174 }, { "epoch": 31.942446043165468, "grad_norm": 0.3429839015007019, "learning_rate": 3.9014858865167465e-05, "loss": 0.0684, "step": 35520 }, { "action_loss": 0.005061831790953875, "epoch": 31.942446043165468, "step": 35520 }, { "epoch": 31.942446043165468, "step": 35520, "torque_loss": 0.09408923238515854 }, { "epoch": 31.951438848920862, "grad_norm": 0.33306583762168884, "learning_rate": 3.8987976029461935e-05, "loss": 0.0714, "step": 35530 }, { "action_loss": 0.0028288334142416716, "epoch": 31.951438848920862, "step": 35530 }, { "epoch": 31.951438848920862, "step": 35530, "torque_loss": 0.07577425986528397 }, { "epoch": 31.96043165467626, "grad_norm": 0.26958581805229187, "learning_rate": 3.896109653891853e-05, "loss": 0.0531, "step": 35540 }, { "action_loss": 0.004681373946368694, "epoch": 31.96043165467626, "step": 35540 }, { "epoch": 31.96043165467626, "step": 35540, "torque_loss": 0.07840388268232346 }, { "epoch": 31.969424460431654, "grad_norm": 0.2591448724269867, "learning_rate": 3.893422040170254e-05, "loss": 0.0475, "step": 35550 }, { "action_loss": 0.007428538054227829, "epoch": 31.969424460431654, "step": 35550 }, { "epoch": 31.969424460431654, "step": 35550, "torque_loss": 0.09882930666208267 }, { "epoch": 31.97841726618705, "grad_norm": 0.26250436902046204, "learning_rate": 3.8907347625978207e-05, "loss": 0.0707, "step": 35560 }, { "action_loss": 0.012482539750635624, "epoch": 31.97841726618705, "step": 35560 }, { "epoch": 31.97841726618705, "step": 35560, "torque_loss": 0.18106169998645782 }, { "epoch": 31.987410071942445, "grad_norm": 0.32673975825309753, "learning_rate": 3.88804782199088e-05, "loss": 0.0601, "step": 35570 }, { "action_loss": 0.008304803632199764, "epoch": 31.987410071942445, "step": 35570 }, { "epoch": 31.987410071942445, "step": 35570, "torque_loss": 0.12349053472280502 }, { "epoch": 31.996402877697843, "grad_norm": 0.38454556465148926, "learning_rate": 3.8853612191656495e-05, "loss": 0.0521, "step": 35580 }, { "action_loss": 0.0030290994327515364, "epoch": 31.996402877697843, "step": 35580 }, { "epoch": 31.996402877697843, "step": 35580, "torque_loss": 0.10027690976858139 }, { "epoch": 32.00539568345324, "grad_norm": 0.2484116554260254, "learning_rate": 3.88267495493825e-05, "loss": 0.0534, "step": 35590 }, { "action_loss": 0.0026542663108557463, "epoch": 32.00539568345324, "step": 35590 }, { "epoch": 32.00539568345324, "step": 35590, "torque_loss": 0.06427865475416183 }, { "epoch": 32.014388489208635, "grad_norm": 0.3079954981803894, "learning_rate": 3.8799890301247004e-05, "loss": 0.0688, "step": 35600 }, { "action_loss": 0.01675603538751602, "epoch": 32.014388489208635, "step": 35600 }, { "epoch": 32.014388489208635, "step": 35600, "torque_loss": 0.12367454916238785 }, { "epoch": 32.023381294964025, "grad_norm": 0.28709882497787476, "learning_rate": 3.8773034455409096e-05, "loss": 0.0597, "step": 35610 }, { "action_loss": 0.010551069863140583, "epoch": 32.023381294964025, "step": 35610 }, { "epoch": 32.023381294964025, "step": 35610, "torque_loss": 0.15305458009243011 }, { "epoch": 32.03237410071942, "grad_norm": 0.3234317898750305, "learning_rate": 3.8746182020026904e-05, "loss": 0.0613, "step": 35620 }, { "action_loss": 0.005535243544727564, "epoch": 32.03237410071942, "step": 35620 }, { "epoch": 32.03237410071942, "step": 35620, "torque_loss": 0.09212088584899902 }, { "epoch": 32.04136690647482, "grad_norm": 0.3160717785358429, "learning_rate": 3.871933300325745e-05, "loss": 0.0529, "step": 35630 }, { "action_loss": 0.004500210750848055, "epoch": 32.04136690647482, "step": 35630 }, { "epoch": 32.04136690647482, "step": 35630, "torque_loss": 0.10059712082147598 }, { "epoch": 32.05035971223022, "grad_norm": 0.38315898180007935, "learning_rate": 3.869248741325679e-05, "loss": 0.0642, "step": 35640 }, { "action_loss": 0.004927635658532381, "epoch": 32.05035971223022, "step": 35640 }, { "epoch": 32.05035971223022, "step": 35640, "torque_loss": 0.09322161227464676 }, { "epoch": 32.05935251798561, "grad_norm": 0.27592602372169495, "learning_rate": 3.866564525817992e-05, "loss": 0.0526, "step": 35650 }, { "action_loss": 0.006050774361938238, "epoch": 32.05935251798561, "step": 35650 }, { "epoch": 32.05935251798561, "step": 35650, "torque_loss": 0.09293007850646973 }, { "epoch": 32.068345323741006, "grad_norm": 0.22634729743003845, "learning_rate": 3.8638806546180725e-05, "loss": 0.058, "step": 35660 }, { "action_loss": 0.0022437297739088535, "epoch": 32.068345323741006, "step": 35660 }, { "epoch": 32.068345323741006, "step": 35660, "torque_loss": 0.0815233364701271 }, { "epoch": 32.077338129496404, "grad_norm": 0.3500893712043762, "learning_rate": 3.861197128541213e-05, "loss": 0.0684, "step": 35670 }, { "action_loss": 0.007673824671655893, "epoch": 32.077338129496404, "step": 35670 }, { "epoch": 32.077338129496404, "step": 35670, "torque_loss": 0.09772006422281265 }, { "epoch": 32.0863309352518, "grad_norm": 0.21908894181251526, "learning_rate": 3.858513948402599e-05, "loss": 0.0651, "step": 35680 }, { "action_loss": 0.0054310038685798645, "epoch": 32.0863309352518, "step": 35680 }, { "epoch": 32.0863309352518, "step": 35680, "torque_loss": 0.10643590241670609 }, { "epoch": 32.09532374100719, "grad_norm": 0.29895949363708496, "learning_rate": 3.8558311150173077e-05, "loss": 0.0835, "step": 35690 }, { "action_loss": 0.004206227604299784, "epoch": 32.09532374100719, "step": 35690 }, { "epoch": 32.09532374100719, "step": 35690, "torque_loss": 0.10960084199905396 }, { "epoch": 32.10431654676259, "grad_norm": 0.2651638090610504, "learning_rate": 3.853148629200312e-05, "loss": 0.0681, "step": 35700 }, { "action_loss": 0.0020580824930220842, "epoch": 32.10431654676259, "step": 35700 }, { "epoch": 32.10431654676259, "step": 35700, "torque_loss": 0.06510689854621887 }, { "epoch": 32.11330935251799, "grad_norm": 0.2797759771347046, "learning_rate": 3.850466491766482e-05, "loss": 0.0602, "step": 35710 }, { "action_loss": 0.005464175716042519, "epoch": 32.11330935251799, "step": 35710 }, { "epoch": 32.11330935251799, "step": 35710, "torque_loss": 0.07410752773284912 }, { "epoch": 32.12230215827338, "grad_norm": 0.37195277214050293, "learning_rate": 3.847784703530583e-05, "loss": 0.0509, "step": 35720 }, { "action_loss": 0.00397942028939724, "epoch": 32.12230215827338, "step": 35720 }, { "epoch": 32.12230215827338, "step": 35720, "torque_loss": 0.08147724717855453 }, { "epoch": 32.131294964028775, "grad_norm": 0.2609516978263855, "learning_rate": 3.845103265307266e-05, "loss": 0.0594, "step": 35730 }, { "action_loss": 0.001990770921111107, "epoch": 32.131294964028775, "step": 35730 }, { "epoch": 32.131294964028775, "step": 35730, "torque_loss": 0.0841192901134491 }, { "epoch": 32.14028776978417, "grad_norm": 0.27439069747924805, "learning_rate": 3.842422177911086e-05, "loss": 0.053, "step": 35740 }, { "action_loss": 0.005345831159502268, "epoch": 32.14028776978417, "step": 35740 }, { "epoch": 32.14028776978417, "step": 35740, "torque_loss": 0.07961393147706985 }, { "epoch": 32.14928057553957, "grad_norm": 0.2726941704750061, "learning_rate": 3.8397414421564826e-05, "loss": 0.0671, "step": 35750 }, { "action_loss": 0.004341829102486372, "epoch": 32.14928057553957, "step": 35750 }, { "epoch": 32.14928057553957, "step": 35750, "torque_loss": 0.08356201648712158 }, { "epoch": 32.15827338129496, "grad_norm": 0.3610464632511139, "learning_rate": 3.8370610588577935e-05, "loss": 0.0627, "step": 35760 }, { "action_loss": 0.0054461113177239895, "epoch": 32.15827338129496, "step": 35760 }, { "epoch": 32.15827338129496, "step": 35760, "torque_loss": 0.09354569762945175 }, { "epoch": 32.16726618705036, "grad_norm": 0.2528301477432251, "learning_rate": 3.834381028829251e-05, "loss": 0.0536, "step": 35770 }, { "action_loss": 0.005276754032820463, "epoch": 32.16726618705036, "step": 35770 }, { "epoch": 32.16726618705036, "step": 35770, "torque_loss": 0.09157904982566833 }, { "epoch": 32.17625899280576, "grad_norm": 0.19315695762634277, "learning_rate": 3.8317013528849745e-05, "loss": 0.0516, "step": 35780 }, { "action_loss": 0.004053207114338875, "epoch": 32.17625899280576, "step": 35780 }, { "epoch": 32.17625899280576, "step": 35780, "torque_loss": 0.09890294075012207 }, { "epoch": 32.185251798561154, "grad_norm": 0.33175817131996155, "learning_rate": 3.8290220318389815e-05, "loss": 0.0784, "step": 35790 }, { "action_loss": 0.00211123819462955, "epoch": 32.185251798561154, "step": 35790 }, { "epoch": 32.185251798561154, "step": 35790, "torque_loss": 0.07684404402971268 }, { "epoch": 32.194244604316545, "grad_norm": 0.27749043703079224, "learning_rate": 3.8263430665051746e-05, "loss": 0.0495, "step": 35800 }, { "action_loss": 0.00318780355155468, "epoch": 32.194244604316545, "step": 35800 }, { "epoch": 32.194244604316545, "step": 35800, "torque_loss": 0.09279713779687881 }, { "epoch": 32.20323741007194, "grad_norm": 0.28206560015678406, "learning_rate": 3.8236644576973554e-05, "loss": 0.0535, "step": 35810 }, { "action_loss": 0.003263951977714896, "epoch": 32.20323741007194, "step": 35810 }, { "epoch": 32.20323741007194, "step": 35810, "torque_loss": 0.07581052929162979 }, { "epoch": 32.21223021582734, "grad_norm": 0.20962217450141907, "learning_rate": 3.820986206229217e-05, "loss": 0.0572, "step": 35820 }, { "action_loss": 0.006685425061732531, "epoch": 32.21223021582734, "step": 35820 }, { "epoch": 32.21223021582734, "step": 35820, "torque_loss": 0.12075433880090714 }, { "epoch": 32.22122302158273, "grad_norm": 0.3077782690525055, "learning_rate": 3.8183083129143384e-05, "loss": 0.0531, "step": 35830 }, { "action_loss": 0.0030366850551217794, "epoch": 32.22122302158273, "step": 35830 }, { "epoch": 32.22122302158273, "step": 35830, "torque_loss": 0.0709196999669075 }, { "epoch": 32.23021582733813, "grad_norm": 0.24274948239326477, "learning_rate": 3.815630778566193e-05, "loss": 0.0801, "step": 35840 }, { "action_loss": 0.007901065982878208, "epoch": 32.23021582733813, "step": 35840 }, { "epoch": 32.23021582733813, "step": 35840, "torque_loss": 0.11350494623184204 }, { "epoch": 32.239208633093526, "grad_norm": 0.2690768241882324, "learning_rate": 3.812953603998145e-05, "loss": 0.065, "step": 35850 }, { "action_loss": 0.0021226259414106607, "epoch": 32.239208633093526, "step": 35850 }, { "epoch": 32.239208633093526, "step": 35850, "torque_loss": 0.05414343997836113 }, { "epoch": 32.24820143884892, "grad_norm": 0.3140333294868469, "learning_rate": 3.8102767900234504e-05, "loss": 0.0563, "step": 35860 }, { "action_loss": 0.0023765244986861944, "epoch": 32.24820143884892, "step": 35860 }, { "epoch": 32.24820143884892, "step": 35860, "torque_loss": 0.08057127147912979 }, { "epoch": 32.257194244604314, "grad_norm": 0.29232653975486755, "learning_rate": 3.807600337455256e-05, "loss": 0.0618, "step": 35870 }, { "action_loss": 0.01035669818520546, "epoch": 32.257194244604314, "step": 35870 }, { "epoch": 32.257194244604314, "step": 35870, "torque_loss": 0.12166806310415268 }, { "epoch": 32.26618705035971, "grad_norm": 0.31599944829940796, "learning_rate": 3.804924247106593e-05, "loss": 0.0634, "step": 35880 }, { "action_loss": 0.007596049923449755, "epoch": 32.26618705035971, "step": 35880 }, { "epoch": 32.26618705035971, "step": 35880, "torque_loss": 0.11065343022346497 }, { "epoch": 32.27517985611511, "grad_norm": 0.36169663071632385, "learning_rate": 3.8022485197903925e-05, "loss": 0.0606, "step": 35890 }, { "action_loss": 0.0013914518058300018, "epoch": 32.27517985611511, "step": 35890 }, { "epoch": 32.27517985611511, "step": 35890, "torque_loss": 0.04738990589976311 }, { "epoch": 32.28417266187051, "grad_norm": 0.2603940963745117, "learning_rate": 3.799573156319464e-05, "loss": 0.0559, "step": 35900 }, { "action_loss": 0.008018615655601025, "epoch": 32.28417266187051, "step": 35900 }, { "epoch": 32.28417266187051, "step": 35900, "torque_loss": 0.12455106526613235 }, { "epoch": 32.2931654676259, "grad_norm": 0.30838844180107117, "learning_rate": 3.796898157506515e-05, "loss": 0.0599, "step": 35910 }, { "action_loss": 0.0027411512564867735, "epoch": 32.2931654676259, "step": 35910 }, { "epoch": 32.2931654676259, "step": 35910, "torque_loss": 0.05903966352343559 }, { "epoch": 32.302158273381295, "grad_norm": 0.28175368905067444, "learning_rate": 3.794223524164143e-05, "loss": 0.0595, "step": 35920 }, { "action_loss": 0.001529472297988832, "epoch": 32.302158273381295, "step": 35920 }, { "epoch": 32.302158273381295, "step": 35920, "torque_loss": 0.05097358301281929 }, { "epoch": 32.31115107913669, "grad_norm": 0.2894609570503235, "learning_rate": 3.7915492571048245e-05, "loss": 0.0595, "step": 35930 }, { "action_loss": 0.004056012257933617, "epoch": 32.31115107913669, "step": 35930 }, { "epoch": 32.31115107913669, "step": 35930, "torque_loss": 0.06094207242131233 }, { "epoch": 32.32014388489208, "grad_norm": 0.3336101174354553, "learning_rate": 3.788875357140937e-05, "loss": 0.068, "step": 35940 }, { "action_loss": 0.010716182179749012, "epoch": 32.32014388489208, "step": 35940 }, { "epoch": 32.32014388489208, "step": 35940, "torque_loss": 0.11790785938501358 }, { "epoch": 32.32913669064748, "grad_norm": 0.3216100335121155, "learning_rate": 3.786201825084736e-05, "loss": 0.0604, "step": 35950 }, { "action_loss": 0.0019886994268745184, "epoch": 32.32913669064748, "step": 35950 }, { "epoch": 32.32913669064748, "step": 35950, "torque_loss": 0.07273276150226593 }, { "epoch": 32.33812949640288, "grad_norm": 0.2147853970527649, "learning_rate": 3.783528661748372e-05, "loss": 0.053, "step": 35960 }, { "action_loss": 0.0036992516834288836, "epoch": 32.33812949640288, "step": 35960 }, { "epoch": 32.33812949640288, "step": 35960, "torque_loss": 0.08059477061033249 }, { "epoch": 32.347122302158276, "grad_norm": 0.3908962905406952, "learning_rate": 3.780855867943882e-05, "loss": 0.0664, "step": 35970 }, { "action_loss": 0.0016517852200195193, "epoch": 32.347122302158276, "step": 35970 }, { "epoch": 32.347122302158276, "step": 35970, "torque_loss": 0.05338958278298378 }, { "epoch": 32.356115107913666, "grad_norm": 0.3038104176521301, "learning_rate": 3.778183444483189e-05, "loss": 0.0559, "step": 35980 }, { "action_loss": 0.006018077488988638, "epoch": 32.356115107913666, "step": 35980 }, { "epoch": 32.356115107913666, "step": 35980, "torque_loss": 0.08633601665496826 }, { "epoch": 32.365107913669064, "grad_norm": 0.2989422380924225, "learning_rate": 3.775511392178108e-05, "loss": 0.0596, "step": 35990 }, { "action_loss": 0.004057139623910189, "epoch": 32.365107913669064, "step": 35990 }, { "epoch": 32.365107913669064, "step": 35990, "torque_loss": 0.07744041085243225 }, { "epoch": 32.37410071942446, "grad_norm": 0.2753714919090271, "learning_rate": 3.772839711840332e-05, "loss": 0.0636, "step": 36000 }, { "action_loss": 0.005376704502850771, "epoch": 32.37410071942446, "step": 36000 }, { "epoch": 32.37410071942446, "step": 36000, "torque_loss": 0.11541193723678589 }, { "epoch": 32.38309352517986, "grad_norm": 0.32054299116134644, "learning_rate": 3.7701684042814515e-05, "loss": 0.0645, "step": 36010 }, { "action_loss": 0.0096841836348176, "epoch": 32.38309352517986, "step": 36010 }, { "epoch": 32.38309352517986, "step": 36010, "torque_loss": 0.09469833225011826 }, { "epoch": 32.39208633093525, "grad_norm": 0.2946270704269409, "learning_rate": 3.76749747031294e-05, "loss": 0.0544, "step": 36020 }, { "action_loss": 0.03822534158825874, "epoch": 32.39208633093525, "step": 36020 }, { "epoch": 32.39208633093525, "step": 36020, "torque_loss": 0.16954898834228516 }, { "epoch": 32.40107913669065, "grad_norm": 0.37994682788848877, "learning_rate": 3.764826910746152e-05, "loss": 0.0762, "step": 36030 }, { "action_loss": 0.002205610042437911, "epoch": 32.40107913669065, "step": 36030 }, { "epoch": 32.40107913669065, "step": 36030, "torque_loss": 0.07842446118593216 }, { "epoch": 32.410071942446045, "grad_norm": 0.2863500118255615, "learning_rate": 3.762156726392338e-05, "loss": 0.0527, "step": 36040 }, { "action_loss": 0.003212983487173915, "epoch": 32.410071942446045, "step": 36040 }, { "epoch": 32.410071942446045, "step": 36040, "torque_loss": 0.06960678100585938 }, { "epoch": 32.419064748201436, "grad_norm": 0.3270380198955536, "learning_rate": 3.759486918062625e-05, "loss": 0.0681, "step": 36050 }, { "action_loss": 0.004639493767172098, "epoch": 32.419064748201436, "step": 36050 }, { "epoch": 32.419064748201436, "step": 36050, "torque_loss": 0.08863082528114319 }, { "epoch": 32.42805755395683, "grad_norm": 0.3766387403011322, "learning_rate": 3.756817486568033e-05, "loss": 0.0621, "step": 36060 }, { "action_loss": 0.0013069696724414825, "epoch": 32.42805755395683, "step": 36060 }, { "epoch": 32.42805755395683, "step": 36060, "torque_loss": 0.06467255204916 }, { "epoch": 32.43705035971223, "grad_norm": 0.3657245337963104, "learning_rate": 3.7541484327194654e-05, "loss": 0.0566, "step": 36070 }, { "action_loss": 0.0017289273673668504, "epoch": 32.43705035971223, "step": 36070 }, { "epoch": 32.43705035971223, "step": 36070, "torque_loss": 0.05289437249302864 }, { "epoch": 32.44604316546763, "grad_norm": 0.22091223299503326, "learning_rate": 3.751479757327707e-05, "loss": 0.0612, "step": 36080 }, { "action_loss": 0.002187491627410054, "epoch": 32.44604316546763, "step": 36080 }, { "epoch": 32.44604316546763, "step": 36080, "torque_loss": 0.0741463154554367 }, { "epoch": 32.45503597122302, "grad_norm": 0.29234156012535095, "learning_rate": 3.7488114612034345e-05, "loss": 0.0604, "step": 36090 }, { "action_loss": 0.001542361336760223, "epoch": 32.45503597122302, "step": 36090 }, { "epoch": 32.45503597122302, "step": 36090, "torque_loss": 0.07953622192144394 }, { "epoch": 32.46402877697842, "grad_norm": 0.3126681447029114, "learning_rate": 3.7461435451572044e-05, "loss": 0.065, "step": 36100 }, { "action_loss": 0.008211719803512096, "epoch": 32.46402877697842, "step": 36100 }, { "epoch": 32.46402877697842, "step": 36100, "torque_loss": 0.1395581215620041 }, { "epoch": 32.473021582733814, "grad_norm": 0.23177523910999298, "learning_rate": 3.743476009999459e-05, "loss": 0.0585, "step": 36110 }, { "action_loss": 0.004306246992200613, "epoch": 32.473021582733814, "step": 36110 }, { "epoch": 32.473021582733814, "step": 36110, "torque_loss": 0.07016569375991821 }, { "epoch": 32.48201438848921, "grad_norm": 0.2219744175672531, "learning_rate": 3.7408088565405245e-05, "loss": 0.0621, "step": 36120 }, { "action_loss": 0.0026952214539051056, "epoch": 32.48201438848921, "step": 36120 }, { "epoch": 32.48201438848921, "step": 36120, "torque_loss": 0.09220924228429794 }, { "epoch": 32.4910071942446, "grad_norm": 0.2056090086698532, "learning_rate": 3.738142085590612e-05, "loss": 0.0542, "step": 36130 }, { "action_loss": 0.0023073998745530844, "epoch": 32.4910071942446, "step": 36130 }, { "epoch": 32.4910071942446, "step": 36130, "torque_loss": 0.071440190076828 }, { "epoch": 32.5, "grad_norm": 0.250857412815094, "learning_rate": 3.7354756979598194e-05, "loss": 0.0505, "step": 36140 }, { "action_loss": 0.0037971045821905136, "epoch": 32.5, "step": 36140 }, { "epoch": 32.5, "step": 36140, "torque_loss": 0.1020142212510109 }, { "epoch": 32.5089928057554, "grad_norm": 0.30570000410079956, "learning_rate": 3.7328096944581187e-05, "loss": 0.0532, "step": 36150 }, { "action_loss": 0.001231937319971621, "epoch": 32.5089928057554, "step": 36150 }, { "epoch": 32.5089928057554, "step": 36150, "torque_loss": 0.04615561291575432 }, { "epoch": 32.51798561151079, "grad_norm": 0.356900155544281, "learning_rate": 3.730144075895377e-05, "loss": 0.0636, "step": 36160 }, { "action_loss": 0.01476981956511736, "epoch": 32.51798561151079, "step": 36160 }, { "epoch": 32.51798561151079, "step": 36160, "torque_loss": 0.15486548840999603 }, { "epoch": 32.526978417266186, "grad_norm": 0.36462926864624023, "learning_rate": 3.727478843081335e-05, "loss": 0.0556, "step": 36170 }, { "action_loss": 0.0016950791468843818, "epoch": 32.526978417266186, "step": 36170 }, { "epoch": 32.526978417266186, "step": 36170, "torque_loss": 0.06485513597726822 }, { "epoch": 32.53597122302158, "grad_norm": 0.2794135808944702, "learning_rate": 3.72481399682562e-05, "loss": 0.0496, "step": 36180 }, { "action_loss": 0.04228697344660759, "epoch": 32.53597122302158, "step": 36180 }, { "epoch": 32.53597122302158, "step": 36180, "torque_loss": 0.2339072823524475 }, { "epoch": 32.54496402877698, "grad_norm": 0.3161049783229828, "learning_rate": 3.722149537937747e-05, "loss": 0.0646, "step": 36190 }, { "action_loss": 0.0032651189249008894, "epoch": 32.54496402877698, "step": 36190 }, { "epoch": 32.54496402877698, "step": 36190, "torque_loss": 0.07076802104711533 }, { "epoch": 32.55395683453237, "grad_norm": 0.35013043880462646, "learning_rate": 3.7194854672271015e-05, "loss": 0.0519, "step": 36200 }, { "action_loss": 0.012702112086117268, "epoch": 32.55395683453237, "step": 36200 }, { "epoch": 32.55395683453237, "step": 36200, "torque_loss": 0.11672037839889526 }, { "epoch": 32.56294964028777, "grad_norm": 0.29825523495674133, "learning_rate": 3.7168217855029644e-05, "loss": 0.0649, "step": 36210 }, { "action_loss": 0.004653651267290115, "epoch": 32.56294964028777, "step": 36210 }, { "epoch": 32.56294964028777, "step": 36210, "torque_loss": 0.12487256526947021 }, { "epoch": 32.57194244604317, "grad_norm": 0.3297024071216583, "learning_rate": 3.7141584935744856e-05, "loss": 0.06, "step": 36220 }, { "action_loss": 0.006387906614691019, "epoch": 32.57194244604317, "step": 36220 }, { "epoch": 32.57194244604317, "step": 36220, "torque_loss": 0.1302887350320816 }, { "epoch": 32.580935251798564, "grad_norm": 0.3198273181915283, "learning_rate": 3.7114955922507055e-05, "loss": 0.0532, "step": 36230 }, { "action_loss": 0.001019024639390409, "epoch": 32.580935251798564, "step": 36230 }, { "epoch": 32.580935251798564, "step": 36230, "torque_loss": 0.03975117951631546 }, { "epoch": 32.589928057553955, "grad_norm": 0.41955307126045227, "learning_rate": 3.708833082340545e-05, "loss": 0.0695, "step": 36240 }, { "action_loss": 0.0015011145733296871, "epoch": 32.589928057553955, "step": 36240 }, { "epoch": 32.589928057553955, "step": 36240, "torque_loss": 0.03897419571876526 }, { "epoch": 32.59892086330935, "grad_norm": 0.3846559226512909, "learning_rate": 3.7061709646528034e-05, "loss": 0.044, "step": 36250 }, { "action_loss": 0.007803704123944044, "epoch": 32.59892086330935, "step": 36250 }, { "epoch": 32.59892086330935, "step": 36250, "torque_loss": 0.11791666597127914 }, { "epoch": 32.60791366906475, "grad_norm": 0.3056507110595703, "learning_rate": 3.7035092399961604e-05, "loss": 0.0568, "step": 36260 }, { "action_loss": 0.0042247348465025425, "epoch": 32.60791366906475, "step": 36260 }, { "epoch": 32.60791366906475, "step": 36260, "torque_loss": 0.06757650524377823 }, { "epoch": 32.61690647482014, "grad_norm": 0.2941071689128876, "learning_rate": 3.700847909179177e-05, "loss": 0.0612, "step": 36270 }, { "action_loss": 0.00704210763797164, "epoch": 32.61690647482014, "step": 36270 }, { "epoch": 32.61690647482014, "step": 36270, "torque_loss": 0.12353792041540146 }, { "epoch": 32.62589928057554, "grad_norm": 0.3905763328075409, "learning_rate": 3.698186973010297e-05, "loss": 0.069, "step": 36280 }, { "action_loss": 0.008031453937292099, "epoch": 32.62589928057554, "step": 36280 }, { "epoch": 32.62589928057554, "step": 36280, "torque_loss": 0.11706504970788956 }, { "epoch": 32.634892086330936, "grad_norm": 0.28283312916755676, "learning_rate": 3.695526432297844e-05, "loss": 0.0545, "step": 36290 }, { "action_loss": 0.0046074590645730495, "epoch": 32.634892086330936, "step": 36290 }, { "epoch": 32.634892086330936, "step": 36290, "torque_loss": 0.127401202917099 }, { "epoch": 32.643884892086334, "grad_norm": 0.2827574908733368, "learning_rate": 3.692866287850017e-05, "loss": 0.0493, "step": 36300 }, { "action_loss": 0.008419301360845566, "epoch": 32.643884892086334, "step": 36300 }, { "epoch": 32.643884892086334, "step": 36300, "torque_loss": 0.1059398278594017 }, { "epoch": 32.652877697841724, "grad_norm": 0.2872828543186188, "learning_rate": 3.6902065404749006e-05, "loss": 0.0566, "step": 36310 }, { "action_loss": 0.01213775109499693, "epoch": 32.652877697841724, "step": 36310 }, { "epoch": 32.652877697841724, "step": 36310, "torque_loss": 0.12507615983486176 }, { "epoch": 32.66187050359712, "grad_norm": 0.37361690402030945, "learning_rate": 3.6875471909804516e-05, "loss": 0.0705, "step": 36320 }, { "action_loss": 0.0011489694006741047, "epoch": 32.66187050359712, "step": 36320 }, { "epoch": 32.66187050359712, "step": 36320, "torque_loss": 0.04929736256599426 }, { "epoch": 32.67086330935252, "grad_norm": 0.22583742439746857, "learning_rate": 3.6848882401745135e-05, "loss": 0.0404, "step": 36330 }, { "action_loss": 0.01160389930009842, "epoch": 32.67086330935252, "step": 36330 }, { "epoch": 32.67086330935252, "step": 36330, "torque_loss": 0.16064776480197906 }, { "epoch": 32.67985611510792, "grad_norm": 0.27345189452171326, "learning_rate": 3.682229688864806e-05, "loss": 0.0648, "step": 36340 }, { "action_loss": 0.0016327761113643646, "epoch": 32.67985611510792, "step": 36340 }, { "epoch": 32.67985611510792, "step": 36340, "torque_loss": 0.06059728190302849 }, { "epoch": 32.68884892086331, "grad_norm": 0.26430389285087585, "learning_rate": 3.6795715378589235e-05, "loss": 0.053, "step": 36350 }, { "action_loss": 0.0014630391960963607, "epoch": 32.68884892086331, "step": 36350 }, { "epoch": 32.68884892086331, "step": 36350, "torque_loss": 0.07496315985918045 }, { "epoch": 32.697841726618705, "grad_norm": 0.2878018617630005, "learning_rate": 3.676913787964345e-05, "loss": 0.0695, "step": 36360 }, { "action_loss": 0.0022253040224313736, "epoch": 32.697841726618705, "step": 36360 }, { "epoch": 32.697841726618705, "step": 36360, "torque_loss": 0.05956282094120979 }, { "epoch": 32.7068345323741, "grad_norm": 0.2980155050754547, "learning_rate": 3.674256439988423e-05, "loss": 0.0664, "step": 36370 }, { "action_loss": 0.003621201729401946, "epoch": 32.7068345323741, "step": 36370 }, { "epoch": 32.7068345323741, "step": 36370, "torque_loss": 0.066524937748909 }, { "epoch": 32.71582733812949, "grad_norm": 0.3134368062019348, "learning_rate": 3.6715994947383904e-05, "loss": 0.0558, "step": 36380 }, { "action_loss": 0.0016210906906053424, "epoch": 32.71582733812949, "step": 36380 }, { "epoch": 32.71582733812949, "step": 36380, "torque_loss": 0.06372586637735367 }, { "epoch": 32.72482014388489, "grad_norm": 0.30455368757247925, "learning_rate": 3.668942953021357e-05, "loss": 0.0487, "step": 36390 }, { "action_loss": 0.012240457348525524, "epoch": 32.72482014388489, "step": 36390 }, { "epoch": 32.72482014388489, "step": 36390, "torque_loss": 0.11704951524734497 }, { "epoch": 32.73381294964029, "grad_norm": 0.2633321285247803, "learning_rate": 3.66628681564431e-05, "loss": 0.053, "step": 36400 }, { "action_loss": 0.0019357810961082578, "epoch": 32.73381294964029, "step": 36400 }, { "epoch": 32.73381294964029, "step": 36400, "torque_loss": 0.05715416371822357 }, { "epoch": 32.742805755395686, "grad_norm": 0.2842896580696106, "learning_rate": 3.663631083414114e-05, "loss": 0.0457, "step": 36410 }, { "action_loss": 0.0022762161679565907, "epoch": 32.742805755395686, "step": 36410 }, { "epoch": 32.742805755395686, "step": 36410, "torque_loss": 0.08425746113061905 }, { "epoch": 32.75179856115108, "grad_norm": 0.30106887221336365, "learning_rate": 3.660975757137509e-05, "loss": 0.0535, "step": 36420 }, { "action_loss": 0.0017126001184806228, "epoch": 32.75179856115108, "step": 36420 }, { "epoch": 32.75179856115108, "step": 36420, "torque_loss": 0.057321902364492416 }, { "epoch": 32.760791366906474, "grad_norm": 0.2702959179878235, "learning_rate": 3.658320837621114e-05, "loss": 0.053, "step": 36430 }, { "action_loss": 0.001684873946942389, "epoch": 32.760791366906474, "step": 36430 }, { "epoch": 32.760791366906474, "step": 36430, "torque_loss": 0.04298076033592224 }, { "epoch": 32.76978417266187, "grad_norm": 0.3744123578071594, "learning_rate": 3.655666325671426e-05, "loss": 0.0597, "step": 36440 }, { "action_loss": 0.0022958454210311174, "epoch": 32.76978417266187, "step": 36440 }, { "epoch": 32.76978417266187, "step": 36440, "torque_loss": 0.06823164224624634 }, { "epoch": 32.77877697841727, "grad_norm": 0.27826154232025146, "learning_rate": 3.65301222209481e-05, "loss": 0.052, "step": 36450 }, { "action_loss": 0.006129346787929535, "epoch": 32.77877697841727, "step": 36450 }, { "epoch": 32.77877697841727, "step": 36450, "torque_loss": 0.11861220747232437 }, { "epoch": 32.78776978417266, "grad_norm": 0.3396196663379669, "learning_rate": 3.650358527697519e-05, "loss": 0.0663, "step": 36460 }, { "action_loss": 0.0040786066092550755, "epoch": 32.78776978417266, "step": 36460 }, { "epoch": 32.78776978417266, "step": 36460, "torque_loss": 0.08230864256620407 }, { "epoch": 32.79676258992806, "grad_norm": 0.3630322515964508, "learning_rate": 3.64770524328567e-05, "loss": 0.0578, "step": 36470 }, { "action_loss": 0.010228767059743404, "epoch": 32.79676258992806, "step": 36470 }, { "epoch": 32.79676258992806, "step": 36470, "torque_loss": 0.12248816341161728 }, { "epoch": 32.805755395683455, "grad_norm": 0.2714793086051941, "learning_rate": 3.645052369665265e-05, "loss": 0.0604, "step": 36480 }, { "action_loss": 0.013707637786865234, "epoch": 32.805755395683455, "step": 36480 }, { "epoch": 32.805755395683455, "step": 36480, "torque_loss": 0.1801542043685913 }, { "epoch": 32.814748201438846, "grad_norm": 0.2604643702507019, "learning_rate": 3.6423999076421724e-05, "loss": 0.0631, "step": 36490 }, { "action_loss": 0.004012505058199167, "epoch": 32.814748201438846, "step": 36490 }, { "epoch": 32.814748201438846, "step": 36490, "torque_loss": 0.09598908573389053 }, { "epoch": 32.82374100719424, "grad_norm": 0.30558544397354126, "learning_rate": 3.639747858022142e-05, "loss": 0.0703, "step": 36500 }, { "action_loss": 0.0033761656377464533, "epoch": 32.82374100719424, "step": 36500 }, { "epoch": 32.82374100719424, "step": 36500, "torque_loss": 0.08981496840715408 }, { "epoch": 32.83273381294964, "grad_norm": 0.27503764629364014, "learning_rate": 3.637096221610799e-05, "loss": 0.0561, "step": 36510 }, { "action_loss": 0.0017052688635885715, "epoch": 32.83273381294964, "step": 36510 }, { "epoch": 32.83273381294964, "step": 36510, "torque_loss": 0.05328337475657463 }, { "epoch": 32.84172661870504, "grad_norm": 0.3536607325077057, "learning_rate": 3.634444999213638e-05, "loss": 0.0458, "step": 36520 }, { "action_loss": 0.002188299549743533, "epoch": 32.84172661870504, "step": 36520 }, { "epoch": 32.84172661870504, "step": 36520, "torque_loss": 0.0916023924946785 }, { "epoch": 32.85071942446043, "grad_norm": 0.3369126617908478, "learning_rate": 3.6317941916360296e-05, "loss": 0.064, "step": 36530 }, { "action_loss": 0.0026575420051813126, "epoch": 32.85071942446043, "step": 36530 }, { "epoch": 32.85071942446043, "step": 36530, "torque_loss": 0.0760810375213623 }, { "epoch": 32.85971223021583, "grad_norm": 0.2649470865726471, "learning_rate": 3.629143799683221e-05, "loss": 0.0506, "step": 36540 }, { "action_loss": 0.003119681030511856, "epoch": 32.85971223021583, "step": 36540 }, { "epoch": 32.85971223021583, "step": 36540, "torque_loss": 0.057361382991075516 }, { "epoch": 32.868705035971225, "grad_norm": 0.24570588767528534, "learning_rate": 3.626493824160331e-05, "loss": 0.0603, "step": 36550 }, { "action_loss": 0.00426449254155159, "epoch": 32.868705035971225, "step": 36550 }, { "epoch": 32.868705035971225, "step": 36550, "torque_loss": 0.08251482248306274 }, { "epoch": 32.87769784172662, "grad_norm": 0.23225735127925873, "learning_rate": 3.623844265872352e-05, "loss": 0.041, "step": 36560 }, { "action_loss": 0.00202669994905591, "epoch": 32.87769784172662, "step": 36560 }, { "epoch": 32.87769784172662, "step": 36560, "torque_loss": 0.04117592051625252 }, { "epoch": 32.88669064748201, "grad_norm": 0.2706722617149353, "learning_rate": 3.621195125624149e-05, "loss": 0.0598, "step": 36570 }, { "action_loss": 0.0049363733269274235, "epoch": 32.88669064748201, "step": 36570 }, { "epoch": 32.88669064748201, "step": 36570, "torque_loss": 0.10933848470449448 }, { "epoch": 32.89568345323741, "grad_norm": 0.34811335802078247, "learning_rate": 3.618546404220463e-05, "loss": 0.0657, "step": 36580 }, { "action_loss": 0.007562959101051092, "epoch": 32.89568345323741, "step": 36580 }, { "epoch": 32.89568345323741, "step": 36580, "torque_loss": 0.11469987034797668 }, { "epoch": 32.90467625899281, "grad_norm": 0.24207672476768494, "learning_rate": 3.615898102465903e-05, "loss": 0.0655, "step": 36590 }, { "action_loss": 0.004899099934846163, "epoch": 32.90467625899281, "step": 36590 }, { "epoch": 32.90467625899281, "step": 36590, "torque_loss": 0.09915494918823242 }, { "epoch": 32.9136690647482, "grad_norm": 0.2987949252128601, "learning_rate": 3.6132502211649544e-05, "loss": 0.0637, "step": 36600 }, { "action_loss": 0.0037115884479135275, "epoch": 32.9136690647482, "step": 36600 }, { "epoch": 32.9136690647482, "step": 36600, "torque_loss": 0.050602417439222336 }, { "epoch": 32.922661870503596, "grad_norm": 0.27422717213630676, "learning_rate": 3.610602761121975e-05, "loss": 0.0557, "step": 36610 }, { "action_loss": 0.0033440596889704466, "epoch": 32.922661870503596, "step": 36610 }, { "epoch": 32.922661870503596, "step": 36610, "torque_loss": 0.1207445040345192 }, { "epoch": 32.931654676258994, "grad_norm": 0.20140542089939117, "learning_rate": 3.6079557231411897e-05, "loss": 0.0574, "step": 36620 }, { "action_loss": 0.0022721216082572937, "epoch": 32.931654676258994, "step": 36620 }, { "epoch": 32.931654676258994, "step": 36620, "torque_loss": 0.0744888186454773 }, { "epoch": 32.94064748201439, "grad_norm": 0.17267370223999023, "learning_rate": 3.6053091080267035e-05, "loss": 0.0441, "step": 36630 }, { "action_loss": 0.005438920110464096, "epoch": 32.94064748201439, "step": 36630 }, { "epoch": 32.94064748201439, "step": 36630, "torque_loss": 0.09333425760269165 }, { "epoch": 32.94964028776978, "grad_norm": 0.2947958707809448, "learning_rate": 3.602662916582483e-05, "loss": 0.0459, "step": 36640 }, { "action_loss": 0.004243556875735521, "epoch": 32.94964028776978, "step": 36640 }, { "epoch": 32.94964028776978, "step": 36640, "torque_loss": 0.0968356505036354 }, { "epoch": 32.95863309352518, "grad_norm": 0.21169143915176392, "learning_rate": 3.600017149612375e-05, "loss": 0.0565, "step": 36650 }, { "action_loss": 0.001408012118190527, "epoch": 32.95863309352518, "step": 36650 }, { "epoch": 32.95863309352518, "step": 36650, "torque_loss": 0.053786102682352066 }, { "epoch": 32.96762589928058, "grad_norm": 0.27043938636779785, "learning_rate": 3.5973718079200935e-05, "loss": 0.0588, "step": 36660 }, { "action_loss": 0.0012168491957709193, "epoch": 32.96762589928058, "step": 36660 }, { "epoch": 32.96762589928058, "step": 36660, "torque_loss": 0.05194646120071411 }, { "epoch": 32.976618705035975, "grad_norm": 0.30052751302719116, "learning_rate": 3.5947268923092216e-05, "loss": 0.0512, "step": 36670 }, { "action_loss": 0.002042414853349328, "epoch": 32.976618705035975, "step": 36670 }, { "epoch": 32.976618705035975, "step": 36670, "torque_loss": 0.06165614724159241 }, { "epoch": 32.985611510791365, "grad_norm": 0.33347561955451965, "learning_rate": 3.592082403583216e-05, "loss": 0.0565, "step": 36680 }, { "action_loss": 0.0011630243388935924, "epoch": 32.985611510791365, "step": 36680 }, { "epoch": 32.985611510791365, "step": 36680, "torque_loss": 0.04807698726654053 }, { "epoch": 32.99460431654676, "grad_norm": 0.26899582147598267, "learning_rate": 3.5894383425454004e-05, "loss": 0.0546, "step": 36690 }, { "action_loss": 0.0029720377642661333, "epoch": 32.99460431654676, "step": 36690 }, { "epoch": 32.99460431654676, "step": 36690, "torque_loss": 0.0648357942700386 }, { "epoch": 33.00359712230216, "grad_norm": 0.25615203380584717, "learning_rate": 3.586794709998975e-05, "loss": 0.0416, "step": 36700 }, { "action_loss": 0.003101033391430974, "epoch": 33.00359712230216, "step": 36700 }, { "epoch": 33.00359712230216, "step": 36700, "torque_loss": 0.09014272689819336 }, { "epoch": 33.01258992805755, "grad_norm": 0.24139560759067535, "learning_rate": 3.584151506747002e-05, "loss": 0.0572, "step": 36710 }, { "action_loss": 0.0015490766381844878, "epoch": 33.01258992805755, "step": 36710 }, { "epoch": 33.01258992805755, "step": 36710, "torque_loss": 0.06072153523564339 }, { "epoch": 33.02158273381295, "grad_norm": 0.2363813817501068, "learning_rate": 3.581508733592418e-05, "loss": 0.0614, "step": 36720 }, { "action_loss": 0.014375182799994946, "epoch": 33.02158273381295, "step": 36720 }, { "epoch": 33.02158273381295, "step": 36720, "torque_loss": 0.11038383841514587 }, { "epoch": 33.030575539568346, "grad_norm": 0.23529978096485138, "learning_rate": 3.5788663913380297e-05, "loss": 0.0571, "step": 36730 }, { "action_loss": 0.0062383138574659824, "epoch": 33.030575539568346, "step": 36730 }, { "epoch": 33.030575539568346, "step": 36730, "torque_loss": 0.10670939087867737 }, { "epoch": 33.039568345323744, "grad_norm": 0.2512344419956207, "learning_rate": 3.576224480786506e-05, "loss": 0.0698, "step": 36740 }, { "action_loss": 0.003904009936377406, "epoch": 33.039568345323744, "step": 36740 }, { "epoch": 33.039568345323744, "step": 36740, "torque_loss": 0.06500057876110077 }, { "epoch": 33.048561151079134, "grad_norm": 0.27947086095809937, "learning_rate": 3.573583002740393e-05, "loss": 0.0531, "step": 36750 }, { "action_loss": 0.010288065299391747, "epoch": 33.048561151079134, "step": 36750 }, { "epoch": 33.048561151079134, "step": 36750, "torque_loss": 0.11606823652982712 }, { "epoch": 33.05755395683453, "grad_norm": 0.3089178502559662, "learning_rate": 3.570941958002103e-05, "loss": 0.0645, "step": 36760 }, { "action_loss": 0.0014816070906817913, "epoch": 33.05755395683453, "step": 36760 }, { "epoch": 33.05755395683453, "step": 36760, "torque_loss": 0.05328604206442833 }, { "epoch": 33.06654676258993, "grad_norm": 0.262704998254776, "learning_rate": 3.568301347373912e-05, "loss": 0.0513, "step": 36770 }, { "action_loss": 0.0018236618489027023, "epoch": 33.06654676258993, "step": 36770 }, { "epoch": 33.06654676258993, "step": 36770, "torque_loss": 0.05487732216715813 }, { "epoch": 33.07553956834533, "grad_norm": 0.2679978311061859, "learning_rate": 3.5656611716579726e-05, "loss": 0.0544, "step": 36780 }, { "action_loss": 0.002899470506235957, "epoch": 33.07553956834533, "step": 36780 }, { "epoch": 33.07553956834533, "step": 36780, "torque_loss": 0.07322753220796585 }, { "epoch": 33.08453237410072, "grad_norm": 0.25997090339660645, "learning_rate": 3.5630214316562946e-05, "loss": 0.0749, "step": 36790 }, { "action_loss": 0.003192286938428879, "epoch": 33.08453237410072, "step": 36790 }, { "epoch": 33.08453237410072, "step": 36790, "torque_loss": 0.08959728479385376 }, { "epoch": 33.093525179856115, "grad_norm": 0.21907176077365875, "learning_rate": 3.560382128170766e-05, "loss": 0.0496, "step": 36800 }, { "action_loss": 0.054670076817274094, "epoch": 33.093525179856115, "step": 36800 }, { "epoch": 33.093525179856115, "step": 36800, "torque_loss": 0.1166720762848854 }, { "epoch": 33.10251798561151, "grad_norm": 0.23938961327075958, "learning_rate": 3.5577432620031374e-05, "loss": 0.0607, "step": 36810 }, { "action_loss": 0.003184010274708271, "epoch": 33.10251798561151, "step": 36810 }, { "epoch": 33.10251798561151, "step": 36810, "torque_loss": 0.08658963441848755 }, { "epoch": 33.111510791366904, "grad_norm": 0.293136328458786, "learning_rate": 3.5551048339550216e-05, "loss": 0.063, "step": 36820 }, { "action_loss": 0.011178155429661274, "epoch": 33.111510791366904, "step": 36820 }, { "epoch": 33.111510791366904, "step": 36820, "torque_loss": 0.10814318805932999 }, { "epoch": 33.1205035971223, "grad_norm": 0.3234040439128876, "learning_rate": 3.55246684482791e-05, "loss": 0.0633, "step": 36830 }, { "action_loss": 0.005275169387459755, "epoch": 33.1205035971223, "step": 36830 }, { "epoch": 33.1205035971223, "step": 36830, "torque_loss": 0.09241718053817749 }, { "epoch": 33.1294964028777, "grad_norm": 0.401597261428833, "learning_rate": 3.5498292954231496e-05, "loss": 0.0524, "step": 36840 }, { "action_loss": 0.0018937239656224847, "epoch": 33.1294964028777, "step": 36840 }, { "epoch": 33.1294964028777, "step": 36840, "torque_loss": 0.06612800806760788 }, { "epoch": 33.138489208633096, "grad_norm": 0.35346633195877075, "learning_rate": 3.54719218654196e-05, "loss": 0.0574, "step": 36850 }, { "action_loss": 0.017683017998933792, "epoch": 33.138489208633096, "step": 36850 }, { "epoch": 33.138489208633096, "step": 36850, "torque_loss": 0.14216814935207367 }, { "epoch": 33.14748201438849, "grad_norm": 0.32332339882850647, "learning_rate": 3.544555518985425e-05, "loss": 0.0593, "step": 36860 }, { "action_loss": 0.0027862731367349625, "epoch": 33.14748201438849, "step": 36860 }, { "epoch": 33.14748201438849, "step": 36860, "torque_loss": 0.08079109340906143 }, { "epoch": 33.156474820143885, "grad_norm": 0.2771643102169037, "learning_rate": 3.541919293554494e-05, "loss": 0.0549, "step": 36870 }, { "action_loss": 0.0024867334868758917, "epoch": 33.156474820143885, "step": 36870 }, { "epoch": 33.156474820143885, "step": 36870, "torque_loss": 0.07365840673446655 }, { "epoch": 33.16546762589928, "grad_norm": 0.35152721405029297, "learning_rate": 3.539283511049985e-05, "loss": 0.0587, "step": 36880 }, { "action_loss": 0.008174959570169449, "epoch": 33.16546762589928, "step": 36880 }, { "epoch": 33.16546762589928, "step": 36880, "torque_loss": 0.12530146539211273 }, { "epoch": 33.17446043165467, "grad_norm": 0.35666218400001526, "learning_rate": 3.5366481722725755e-05, "loss": 0.0567, "step": 36890 }, { "action_loss": 0.014554495923221111, "epoch": 33.17446043165467, "step": 36890 }, { "epoch": 33.17446043165467, "step": 36890, "torque_loss": 0.12195201963186264 }, { "epoch": 33.18345323741007, "grad_norm": 0.3256581723690033, "learning_rate": 3.534013278022816e-05, "loss": 0.0565, "step": 36900 }, { "action_loss": 0.020276471972465515, "epoch": 33.18345323741007, "step": 36900 }, { "epoch": 33.18345323741007, "step": 36900, "torque_loss": 0.14728225767612457 }, { "epoch": 33.19244604316547, "grad_norm": 0.3444722294807434, "learning_rate": 3.531378829101113e-05, "loss": 0.0602, "step": 36910 }, { "action_loss": 0.006174067500978708, "epoch": 33.19244604316547, "step": 36910 }, { "epoch": 33.19244604316547, "step": 36910, "torque_loss": 0.07917722314596176 }, { "epoch": 33.201438848920866, "grad_norm": 0.3164653182029724, "learning_rate": 3.528744826307746e-05, "loss": 0.0627, "step": 36920 }, { "action_loss": 0.0033422072883695364, "epoch": 33.201438848920866, "step": 36920 }, { "epoch": 33.201438848920866, "step": 36920, "torque_loss": 0.09416977316141129 }, { "epoch": 33.210431654676256, "grad_norm": 0.29929640889167786, "learning_rate": 3.5261112704428554e-05, "loss": 0.0555, "step": 36930 }, { "action_loss": 0.001874165260232985, "epoch": 33.210431654676256, "step": 36930 }, { "epoch": 33.210431654676256, "step": 36930, "torque_loss": 0.07994759827852249 }, { "epoch": 33.219424460431654, "grad_norm": 0.2733328342437744, "learning_rate": 3.523478162306443e-05, "loss": 0.052, "step": 36940 }, { "action_loss": 0.0026903010439127684, "epoch": 33.219424460431654, "step": 36940 }, { "epoch": 33.219424460431654, "step": 36940, "torque_loss": 0.08953364938497543 }, { "epoch": 33.22841726618705, "grad_norm": 0.23541375994682312, "learning_rate": 3.520845502698381e-05, "loss": 0.0474, "step": 36950 }, { "action_loss": 0.0014111846685409546, "epoch": 33.22841726618705, "step": 36950 }, { "epoch": 33.22841726618705, "step": 36950, "torque_loss": 0.053791988641023636 }, { "epoch": 33.23741007194245, "grad_norm": 0.35081976652145386, "learning_rate": 3.5182132924184005e-05, "loss": 0.0538, "step": 36960 }, { "action_loss": 0.0064592622220516205, "epoch": 33.23741007194245, "step": 36960 }, { "epoch": 33.23741007194245, "step": 36960, "torque_loss": 0.09353271871805191 }, { "epoch": 33.24640287769784, "grad_norm": 0.3484102487564087, "learning_rate": 3.5155815322660966e-05, "loss": 0.061, "step": 36970 }, { "action_loss": 0.011097974143922329, "epoch": 33.24640287769784, "step": 36970 }, { "epoch": 33.24640287769784, "step": 36970, "torque_loss": 0.11181912571191788 }, { "epoch": 33.25539568345324, "grad_norm": 0.35762861371040344, "learning_rate": 3.512950223040931e-05, "loss": 0.0532, "step": 36980 }, { "action_loss": 0.007937815971672535, "epoch": 33.25539568345324, "step": 36980 }, { "epoch": 33.25539568345324, "step": 36980, "torque_loss": 0.13292069733142853 }, { "epoch": 33.264388489208635, "grad_norm": 0.2583511471748352, "learning_rate": 3.5103193655422216e-05, "loss": 0.0573, "step": 36990 }, { "action_loss": 0.004702199716120958, "epoch": 33.264388489208635, "step": 36990 }, { "epoch": 33.264388489208635, "step": 36990, "torque_loss": 0.0878707692027092 }, { "epoch": 33.273381294964025, "grad_norm": 0.2702264189720154, "learning_rate": 3.5076889605691596e-05, "loss": 0.0535, "step": 37000 }, { "action_loss": 0.0025377331767231226, "epoch": 33.273381294964025, "step": 37000 }, { "epoch": 33.273381294964025, "step": 37000, "torque_loss": 0.08933863788843155 }, { "epoch": 33.28237410071942, "grad_norm": 0.30465075373649597, "learning_rate": 3.505059008920787e-05, "loss": 0.0648, "step": 37010 }, { "action_loss": 0.004466598387807608, "epoch": 33.28237410071942, "step": 37010 }, { "epoch": 33.28237410071942, "step": 37010, "torque_loss": 0.07928436249494553 }, { "epoch": 33.29136690647482, "grad_norm": 0.3110445439815521, "learning_rate": 3.502429511396016e-05, "loss": 0.061, "step": 37020 }, { "action_loss": 0.002268859650939703, "epoch": 33.29136690647482, "step": 37020 }, { "epoch": 33.29136690647482, "step": 37020, "torque_loss": 0.0800376608967781 }, { "epoch": 33.30035971223022, "grad_norm": 0.2014431208372116, "learning_rate": 3.4998004687936196e-05, "loss": 0.0573, "step": 37030 }, { "action_loss": 0.001102806651033461, "epoch": 33.30035971223022, "step": 37030 }, { "epoch": 33.30035971223022, "step": 37030, "torque_loss": 0.04886804521083832 }, { "epoch": 33.30935251798561, "grad_norm": 0.27407652139663696, "learning_rate": 3.497171881912229e-05, "loss": 0.0506, "step": 37040 }, { "action_loss": 0.0035424986854195595, "epoch": 33.30935251798561, "step": 37040 }, { "epoch": 33.30935251798561, "step": 37040, "torque_loss": 0.089542455971241 }, { "epoch": 33.318345323741006, "grad_norm": 0.23158708214759827, "learning_rate": 3.494543751550342e-05, "loss": 0.0534, "step": 37050 }, { "action_loss": 0.003868843661621213, "epoch": 33.318345323741006, "step": 37050 }, { "epoch": 33.318345323741006, "step": 37050, "torque_loss": 0.10792569071054459 }, { "epoch": 33.327338129496404, "grad_norm": 0.24131686985492706, "learning_rate": 3.491916078506313e-05, "loss": 0.0629, "step": 37060 }, { "action_loss": 0.001974719576537609, "epoch": 33.327338129496404, "step": 37060 }, { "epoch": 33.327338129496404, "step": 37060, "torque_loss": 0.06452658027410507 }, { "epoch": 33.3363309352518, "grad_norm": 0.2858755588531494, "learning_rate": 3.489288863578361e-05, "loss": 0.0692, "step": 37070 }, { "action_loss": 0.01026849914342165, "epoch": 33.3363309352518, "step": 37070 }, { "epoch": 33.3363309352518, "step": 37070, "torque_loss": 0.1443767547607422 }, { "epoch": 33.34532374100719, "grad_norm": 0.3119972348213196, "learning_rate": 3.4866621075645646e-05, "loss": 0.0683, "step": 37080 }, { "action_loss": 0.0019226297736167908, "epoch": 33.34532374100719, "step": 37080 }, { "epoch": 33.34532374100719, "step": 37080, "torque_loss": 0.06624343246221542 }, { "epoch": 33.35431654676259, "grad_norm": 0.27960431575775146, "learning_rate": 3.4840358112628614e-05, "loss": 0.055, "step": 37090 }, { "action_loss": 0.00894214492291212, "epoch": 33.35431654676259, "step": 37090 }, { "epoch": 33.35431654676259, "step": 37090, "torque_loss": 0.106178879737854 }, { "epoch": 33.36330935251799, "grad_norm": 0.28987225890159607, "learning_rate": 3.481409975471053e-05, "loss": 0.0481, "step": 37100 }, { "action_loss": 0.0013026585802435875, "epoch": 33.36330935251799, "step": 37100 }, { "epoch": 33.36330935251799, "step": 37100, "torque_loss": 0.05408094450831413 }, { "epoch": 33.37230215827338, "grad_norm": 0.33820462226867676, "learning_rate": 3.4787846009867986e-05, "loss": 0.0626, "step": 37110 }, { "action_loss": 0.006757061928510666, "epoch": 33.37230215827338, "step": 37110 }, { "epoch": 33.37230215827338, "step": 37110, "torque_loss": 0.10280927270650864 }, { "epoch": 33.381294964028775, "grad_norm": 0.33411335945129395, "learning_rate": 3.476159688607615e-05, "loss": 0.064, "step": 37120 }, { "action_loss": 0.0021123208571225405, "epoch": 33.381294964028775, "step": 37120 }, { "epoch": 33.381294964028775, "step": 37120, "torque_loss": 0.07901335507631302 }, { "epoch": 33.39028776978417, "grad_norm": 0.2726345360279083, "learning_rate": 3.4735352391308854e-05, "loss": 0.0528, "step": 37130 }, { "action_loss": 0.0038750257808715105, "epoch": 33.39028776978417, "step": 37130 }, { "epoch": 33.39028776978417, "step": 37130, "torque_loss": 0.0923149362206459 }, { "epoch": 33.39928057553957, "grad_norm": 0.25428682565689087, "learning_rate": 3.4709112533538446e-05, "loss": 0.0605, "step": 37140 }, { "action_loss": 0.001968022668734193, "epoch": 33.39928057553957, "step": 37140 }, { "epoch": 33.39928057553957, "step": 37140, "torque_loss": 0.05023103952407837 }, { "epoch": 33.40827338129496, "grad_norm": 0.2301008552312851, "learning_rate": 3.4682877320735934e-05, "loss": 0.0371, "step": 37150 }, { "action_loss": 0.0026219747960567474, "epoch": 33.40827338129496, "step": 37150 }, { "epoch": 33.40827338129496, "step": 37150, "torque_loss": 0.1092928946018219 }, { "epoch": 33.41726618705036, "grad_norm": 0.2738284468650818, "learning_rate": 3.465664676087085e-05, "loss": 0.0677, "step": 37160 }, { "action_loss": 0.008607697673141956, "epoch": 33.41726618705036, "step": 37160 }, { "epoch": 33.41726618705036, "step": 37160, "torque_loss": 0.11534786224365234 }, { "epoch": 33.42625899280576, "grad_norm": 0.22069774568080902, "learning_rate": 3.463042086191136e-05, "loss": 0.054, "step": 37170 }, { "action_loss": 0.005434510763734579, "epoch": 33.42625899280576, "step": 37170 }, { "epoch": 33.42625899280576, "step": 37170, "torque_loss": 0.1232428178191185 }, { "epoch": 33.435251798561154, "grad_norm": 0.41143038868904114, "learning_rate": 3.460419963182423e-05, "loss": 0.062, "step": 37180 }, { "action_loss": 0.004426969215273857, "epoch": 33.435251798561154, "step": 37180 }, { "epoch": 33.435251798561154, "step": 37180, "torque_loss": 0.1000814437866211 }, { "epoch": 33.444244604316545, "grad_norm": 0.27655625343322754, "learning_rate": 3.457798307857473e-05, "loss": 0.0611, "step": 37190 }, { "action_loss": 0.001188861788250506, "epoch": 33.444244604316545, "step": 37190 }, { "epoch": 33.444244604316545, "step": 37190, "torque_loss": 0.0559866726398468 }, { "epoch": 33.45323741007194, "grad_norm": 0.30455926060676575, "learning_rate": 3.455177121012678e-05, "loss": 0.0597, "step": 37200 }, { "action_loss": 0.0022769819479435682, "epoch": 33.45323741007194, "step": 37200 }, { "epoch": 33.45323741007194, "step": 37200, "torque_loss": 0.0545378178358078 }, { "epoch": 33.46223021582734, "grad_norm": 0.2828160524368286, "learning_rate": 3.452556403444285e-05, "loss": 0.0608, "step": 37210 }, { "action_loss": 0.0019505760865285993, "epoch": 33.46223021582734, "step": 37210 }, { "epoch": 33.46223021582734, "step": 37210, "torque_loss": 0.0646144226193428 }, { "epoch": 33.47122302158273, "grad_norm": 0.33284929394721985, "learning_rate": 3.4499361559483975e-05, "loss": 0.0584, "step": 37220 }, { "action_loss": 0.0020524447318166494, "epoch": 33.47122302158273, "step": 37220 }, { "epoch": 33.47122302158273, "step": 37220, "torque_loss": 0.0712059959769249 }, { "epoch": 33.48021582733813, "grad_norm": 0.27965840697288513, "learning_rate": 3.44731637932098e-05, "loss": 0.0482, "step": 37230 }, { "action_loss": 0.0019654312636703253, "epoch": 33.48021582733813, "step": 37230 }, { "epoch": 33.48021582733813, "step": 37230, "torque_loss": 0.08152401447296143 }, { "epoch": 33.489208633093526, "grad_norm": 0.44091740250587463, "learning_rate": 3.44469707435785e-05, "loss": 0.0534, "step": 37240 }, { "action_loss": 0.0029514965135604143, "epoch": 33.489208633093526, "step": 37240 }, { "epoch": 33.489208633093526, "step": 37240, "torque_loss": 0.10787595063447952 }, { "epoch": 33.49820143884892, "grad_norm": 0.32924115657806396, "learning_rate": 3.4420782418546835e-05, "loss": 0.0552, "step": 37250 }, { "action_loss": 0.005926153156906366, "epoch": 33.49820143884892, "step": 37250 }, { "epoch": 33.49820143884892, "step": 37250, "torque_loss": 0.10797403007745743 }, { "epoch": 33.507194244604314, "grad_norm": 0.23756080865859985, "learning_rate": 3.439459882607012e-05, "loss": 0.0562, "step": 37260 }, { "action_loss": 0.002351502887904644, "epoch": 33.507194244604314, "step": 37260 }, { "epoch": 33.507194244604314, "step": 37260, "torque_loss": 0.08093033730983734 }, { "epoch": 33.51618705035971, "grad_norm": 0.39906036853790283, "learning_rate": 3.436841997410225e-05, "loss": 0.0494, "step": 37270 }, { "action_loss": 0.0016397886211052537, "epoch": 33.51618705035971, "step": 37270 }, { "epoch": 33.51618705035971, "step": 37270, "torque_loss": 0.05211116746068001 }, { "epoch": 33.52517985611511, "grad_norm": 0.21491007506847382, "learning_rate": 3.434224587059567e-05, "loss": 0.0505, "step": 37280 }, { "action_loss": 0.009628486819565296, "epoch": 33.52517985611511, "step": 37280 }, { "epoch": 33.52517985611511, "step": 37280, "torque_loss": 0.1188640370965004 }, { "epoch": 33.53417266187051, "grad_norm": 0.2522684931755066, "learning_rate": 3.431607652350136e-05, "loss": 0.0582, "step": 37290 }, { "action_loss": 0.0054841251112520695, "epoch": 33.53417266187051, "step": 37290 }, { "epoch": 33.53417266187051, "step": 37290, "torque_loss": 0.11271490901708603 }, { "epoch": 33.5431654676259, "grad_norm": 0.3959525525569916, "learning_rate": 3.428991194076891e-05, "loss": 0.0565, "step": 37300 }, { "action_loss": 0.0020403247326612473, "epoch": 33.5431654676259, "step": 37300 }, { "epoch": 33.5431654676259, "step": 37300, "torque_loss": 0.072377048432827 }, { "epoch": 33.552158273381295, "grad_norm": 0.20375168323516846, "learning_rate": 3.4263752130346394e-05, "loss": 0.052, "step": 37310 }, { "action_loss": 0.005095877218991518, "epoch": 33.552158273381295, "step": 37310 }, { "epoch": 33.552158273381295, "step": 37310, "torque_loss": 0.0660669133067131 }, { "epoch": 33.56115107913669, "grad_norm": 0.27819177508354187, "learning_rate": 3.4237597100180515e-05, "loss": 0.0605, "step": 37320 }, { "action_loss": 0.0010237363167107105, "epoch": 33.56115107913669, "step": 37320 }, { "epoch": 33.56115107913669, "step": 37320, "torque_loss": 0.03465039283037186 }, { "epoch": 33.57014388489208, "grad_norm": 0.29729363322257996, "learning_rate": 3.4211446858216427e-05, "loss": 0.0689, "step": 37330 }, { "action_loss": 0.00801481306552887, "epoch": 33.57014388489208, "step": 37330 }, { "epoch": 33.57014388489208, "step": 37330, "torque_loss": 0.12272224575281143 }, { "epoch": 33.57913669064748, "grad_norm": 0.3828751742839813, "learning_rate": 3.4185301412397915e-05, "loss": 0.0733, "step": 37340 }, { "action_loss": 0.005285175982862711, "epoch": 33.57913669064748, "step": 37340 }, { "epoch": 33.57913669064748, "step": 37340, "torque_loss": 0.06922837346792221 }, { "epoch": 33.58812949640288, "grad_norm": 0.29912739992141724, "learning_rate": 3.415916077066729e-05, "loss": 0.0619, "step": 37350 }, { "action_loss": 0.0022708948235958815, "epoch": 33.58812949640288, "step": 37350 }, { "epoch": 33.58812949640288, "step": 37350, "torque_loss": 0.07124527543783188 }, { "epoch": 33.597122302158276, "grad_norm": 0.2527572512626648, "learning_rate": 3.413302494096535e-05, "loss": 0.046, "step": 37360 }, { "action_loss": 0.007209406699985266, "epoch": 33.597122302158276, "step": 37360 }, { "epoch": 33.597122302158276, "step": 37360, "torque_loss": 0.11031421273946762 }, { "epoch": 33.606115107913666, "grad_norm": 0.2624550759792328, "learning_rate": 3.410689393123151e-05, "loss": 0.0592, "step": 37370 }, { "action_loss": 0.005031134467571974, "epoch": 33.606115107913666, "step": 37370 }, { "epoch": 33.606115107913666, "step": 37370, "torque_loss": 0.07891971617937088 }, { "epoch": 33.615107913669064, "grad_norm": 0.2912076711654663, "learning_rate": 3.408076774940364e-05, "loss": 0.0605, "step": 37380 }, { "action_loss": 0.0070844064466655254, "epoch": 33.615107913669064, "step": 37380 }, { "epoch": 33.615107913669064, "step": 37380, "torque_loss": 0.09607044607400894 }, { "epoch": 33.62410071942446, "grad_norm": 0.3028906285762787, "learning_rate": 3.40546464034182e-05, "loss": 0.0546, "step": 37390 }, { "action_loss": 0.0034726292360574007, "epoch": 33.62410071942446, "step": 37390 }, { "epoch": 33.62410071942446, "step": 37390, "torque_loss": 0.09162608534097672 }, { "epoch": 33.63309352517986, "grad_norm": 0.17688459157943726, "learning_rate": 3.4028529901210185e-05, "loss": 0.0622, "step": 37400 }, { "action_loss": 0.008478695526719093, "epoch": 33.63309352517986, "step": 37400 }, { "epoch": 33.63309352517986, "step": 37400, "torque_loss": 0.10146617889404297 }, { "epoch": 33.64208633093525, "grad_norm": 0.19823019206523895, "learning_rate": 3.4002418250713086e-05, "loss": 0.0644, "step": 37410 }, { "action_loss": 0.0029285706114023924, "epoch": 33.64208633093525, "step": 37410 }, { "epoch": 33.64208633093525, "step": 37410, "torque_loss": 0.07672133296728134 }, { "epoch": 33.65107913669065, "grad_norm": 0.27872234582901, "learning_rate": 3.3976311459858936e-05, "loss": 0.0504, "step": 37420 }, { "action_loss": 0.004005079623311758, "epoch": 33.65107913669065, "step": 37420 }, { "epoch": 33.65107913669065, "step": 37420, "torque_loss": 0.11706298589706421 }, { "epoch": 33.660071942446045, "grad_norm": 0.3007199466228485, "learning_rate": 3.395020953657826e-05, "loss": 0.0588, "step": 37430 }, { "action_loss": 0.002268593991175294, "epoch": 33.660071942446045, "step": 37430 }, { "epoch": 33.660071942446045, "step": 37430, "torque_loss": 0.07844044268131256 }, { "epoch": 33.669064748201436, "grad_norm": 0.34049445390701294, "learning_rate": 3.3924112488800165e-05, "loss": 0.0482, "step": 37440 }, { "action_loss": 0.004173099063336849, "epoch": 33.669064748201436, "step": 37440 }, { "epoch": 33.669064748201436, "step": 37440, "torque_loss": 0.09743207693099976 }, { "epoch": 33.67805755395683, "grad_norm": 0.32311221957206726, "learning_rate": 3.389802032445225e-05, "loss": 0.0488, "step": 37450 }, { "action_loss": 0.0024960434529930353, "epoch": 33.67805755395683, "step": 37450 }, { "epoch": 33.67805755395683, "step": 37450, "torque_loss": 0.07539161294698715 }, { "epoch": 33.68705035971223, "grad_norm": 0.18136346340179443, "learning_rate": 3.38719330514606e-05, "loss": 0.0537, "step": 37460 }, { "action_loss": 0.003817471442744136, "epoch": 33.68705035971223, "step": 37460 }, { "epoch": 33.68705035971223, "step": 37460, "torque_loss": 0.06712379306554794 }, { "epoch": 33.69604316546763, "grad_norm": 0.2911567986011505, "learning_rate": 3.3845850677749866e-05, "loss": 0.0595, "step": 37470 }, { "action_loss": 0.012142769992351532, "epoch": 33.69604316546763, "step": 37470 }, { "epoch": 33.69604316546763, "step": 37470, "torque_loss": 0.16335968673229218 }, { "epoch": 33.70503597122302, "grad_norm": 0.3009502589702606, "learning_rate": 3.3819773211243157e-05, "loss": 0.0509, "step": 37480 }, { "action_loss": 0.002940460341051221, "epoch": 33.70503597122302, "step": 37480 }, { "epoch": 33.70503597122302, "step": 37480, "torque_loss": 0.08045411854982376 }, { "epoch": 33.71402877697842, "grad_norm": 0.3112732470035553, "learning_rate": 3.379370065986213e-05, "loss": 0.0492, "step": 37490 }, { "action_loss": 0.0029694789554923773, "epoch": 33.71402877697842, "step": 37490 }, { "epoch": 33.71402877697842, "step": 37490, "torque_loss": 0.07059658318758011 }, { "epoch": 33.723021582733814, "grad_norm": 0.3250070810317993, "learning_rate": 3.3767633031526955e-05, "loss": 0.0515, "step": 37500 }, { "action_loss": 0.0044365813955664635, "epoch": 33.723021582733814, "step": 37500 }, { "epoch": 33.723021582733814, "step": 37500, "torque_loss": 0.10788267850875854 }, { "epoch": 33.73201438848921, "grad_norm": 0.22350116074085236, "learning_rate": 3.374157033415626e-05, "loss": 0.0519, "step": 37510 }, { "action_loss": 0.010748873464763165, "epoch": 33.73201438848921, "step": 37510 }, { "epoch": 33.73201438848921, "step": 37510, "torque_loss": 0.13603322207927704 }, { "epoch": 33.7410071942446, "grad_norm": 0.2477906346321106, "learning_rate": 3.371551257566723e-05, "loss": 0.0527, "step": 37520 }, { "action_loss": 0.006939686834812164, "epoch": 33.7410071942446, "step": 37520 }, { "epoch": 33.7410071942446, "step": 37520, "torque_loss": 0.15307724475860596 }, { "epoch": 33.75, "grad_norm": 0.3594174087047577, "learning_rate": 3.36894597639755e-05, "loss": 0.0579, "step": 37530 }, { "action_loss": 0.007322041783481836, "epoch": 33.75, "step": 37530 }, { "epoch": 33.75, "step": 37530, "torque_loss": 0.10660991817712784 }, { "epoch": 33.7589928057554, "grad_norm": 0.2083999663591385, "learning_rate": 3.366341190699523e-05, "loss": 0.0537, "step": 37540 }, { "action_loss": 0.0035717973951250315, "epoch": 33.7589928057554, "step": 37540 }, { "epoch": 33.7589928057554, "step": 37540, "torque_loss": 0.08811676502227783 }, { "epoch": 33.76798561151079, "grad_norm": 0.43623653054237366, "learning_rate": 3.36373690126391e-05, "loss": 0.0543, "step": 37550 }, { "action_loss": 0.0013508191332221031, "epoch": 33.76798561151079, "step": 37550 }, { "epoch": 33.76798561151079, "step": 37550, "torque_loss": 0.06115543469786644 }, { "epoch": 33.776978417266186, "grad_norm": 0.2846590280532837, "learning_rate": 3.3611331088818234e-05, "loss": 0.0551, "step": 37560 }, { "action_loss": 0.004277973901480436, "epoch": 33.776978417266186, "step": 37560 }, { "epoch": 33.776978417266186, "step": 37560, "torque_loss": 0.08404501527547836 }, { "epoch": 33.78597122302158, "grad_norm": 0.36195650696754456, "learning_rate": 3.3585298143442265e-05, "loss": 0.0615, "step": 37570 }, { "action_loss": 0.0009776052320376039, "epoch": 33.78597122302158, "step": 37570 }, { "epoch": 33.78597122302158, "step": 37570, "torque_loss": 0.06893008202314377 }, { "epoch": 33.79496402877698, "grad_norm": 0.24593372642993927, "learning_rate": 3.35592701844193e-05, "loss": 0.057, "step": 37580 }, { "action_loss": 0.0017167730256915092, "epoch": 33.79496402877698, "step": 37580 }, { "epoch": 33.79496402877698, "step": 37580, "torque_loss": 0.0752708688378334 }, { "epoch": 33.80395683453237, "grad_norm": 0.27799907326698303, "learning_rate": 3.353324721965596e-05, "loss": 0.0619, "step": 37590 }, { "action_loss": 0.0014481717953458428, "epoch": 33.80395683453237, "step": 37590 }, { "epoch": 33.80395683453237, "step": 37590, "torque_loss": 0.05803672969341278 }, { "epoch": 33.81294964028777, "grad_norm": 0.29750582575798035, "learning_rate": 3.350722925705736e-05, "loss": 0.0493, "step": 37600 }, { "action_loss": 0.014469262212514877, "epoch": 33.81294964028777, "step": 37600 }, { "epoch": 33.81294964028777, "step": 37600, "torque_loss": 0.10490325093269348 }, { "epoch": 33.82194244604317, "grad_norm": 0.31820645928382874, "learning_rate": 3.348121630452703e-05, "loss": 0.0702, "step": 37610 }, { "action_loss": 0.01053574774414301, "epoch": 33.82194244604317, "step": 37610 }, { "epoch": 33.82194244604317, "step": 37610, "torque_loss": 0.10295286029577255 }, { "epoch": 33.830935251798564, "grad_norm": 0.2495049238204956, "learning_rate": 3.3455208369967044e-05, "loss": 0.0514, "step": 37620 }, { "action_loss": 0.0019301855936646461, "epoch": 33.830935251798564, "step": 37620 }, { "epoch": 33.830935251798564, "step": 37620, "torque_loss": 0.06677908450365067 }, { "epoch": 33.839928057553955, "grad_norm": 0.22886218130588531, "learning_rate": 3.34292054612779e-05, "loss": 0.0483, "step": 37630 }, { "action_loss": 0.0036857977975159883, "epoch": 33.839928057553955, "step": 37630 }, { "epoch": 33.839928057553955, "step": 37630, "torque_loss": 0.07540524750947952 }, { "epoch": 33.84892086330935, "grad_norm": 0.25252604484558105, "learning_rate": 3.340320758635861e-05, "loss": 0.0571, "step": 37640 }, { "action_loss": 0.001564274076372385, "epoch": 33.84892086330935, "step": 37640 }, { "epoch": 33.84892086330935, "step": 37640, "torque_loss": 0.05790714547038078 }, { "epoch": 33.85791366906475, "grad_norm": 0.25947105884552, "learning_rate": 3.337721475310666e-05, "loss": 0.0515, "step": 37650 }, { "action_loss": 0.003242895007133484, "epoch": 33.85791366906475, "step": 37650 }, { "epoch": 33.85791366906475, "step": 37650, "torque_loss": 0.11070368438959122 }, { "epoch": 33.86690647482014, "grad_norm": 0.2509274184703827, "learning_rate": 3.335122696941795e-05, "loss": 0.0521, "step": 37660 }, { "action_loss": 0.0027782784309238195, "epoch": 33.86690647482014, "step": 37660 }, { "epoch": 33.86690647482014, "step": 37660, "torque_loss": 0.07400012761354446 }, { "epoch": 33.87589928057554, "grad_norm": 0.39752471446990967, "learning_rate": 3.332524424318692e-05, "loss": 0.0519, "step": 37670 }, { "action_loss": 0.0010843113996088505, "epoch": 33.87589928057554, "step": 37670 }, { "epoch": 33.87589928057554, "step": 37670, "torque_loss": 0.046145182102918625 }, { "epoch": 33.884892086330936, "grad_norm": 0.28616270422935486, "learning_rate": 3.32992665823064e-05, "loss": 0.0485, "step": 37680 }, { "action_loss": 0.008432804606854916, "epoch": 33.884892086330936, "step": 37680 }, { "epoch": 33.884892086330936, "step": 37680, "torque_loss": 0.08386864513158798 }, { "epoch": 33.893884892086334, "grad_norm": 0.32998138666152954, "learning_rate": 3.327329399466774e-05, "loss": 0.0599, "step": 37690 }, { "action_loss": 0.0017241016030311584, "epoch": 33.893884892086334, "step": 37690 }, { "epoch": 33.893884892086334, "step": 37690, "torque_loss": 0.04347914084792137 }, { "epoch": 33.902877697841724, "grad_norm": 0.31352320313453674, "learning_rate": 3.324732648816072e-05, "loss": 0.0548, "step": 37700 }, { "action_loss": 0.002535861451178789, "epoch": 33.902877697841724, "step": 37700 }, { "epoch": 33.902877697841724, "step": 37700, "torque_loss": 0.07087687402963638 }, { "epoch": 33.91187050359712, "grad_norm": 0.3188319504261017, "learning_rate": 3.322136407067358e-05, "loss": 0.0486, "step": 37710 }, { "action_loss": 0.007994130253791809, "epoch": 33.91187050359712, "step": 37710 }, { "epoch": 33.91187050359712, "step": 37710, "torque_loss": 0.11264649033546448 }, { "epoch": 33.92086330935252, "grad_norm": 0.19783523678779602, "learning_rate": 3.3195406750093036e-05, "loss": 0.0544, "step": 37720 }, { "action_loss": 0.0019341070437803864, "epoch": 33.92086330935252, "step": 37720 }, { "epoch": 33.92086330935252, "step": 37720, "torque_loss": 0.04344714805483818 }, { "epoch": 33.92985611510792, "grad_norm": 0.2770645022392273, "learning_rate": 3.3169454534304205e-05, "loss": 0.0463, "step": 37730 }, { "action_loss": 0.0018922453746199608, "epoch": 33.92985611510792, "step": 37730 }, { "epoch": 33.92985611510792, "step": 37730, "torque_loss": 0.05900077894330025 }, { "epoch": 33.93884892086331, "grad_norm": 0.20815549790859222, "learning_rate": 3.3143507431190725e-05, "loss": 0.0598, "step": 37740 }, { "action_loss": 0.005012002307921648, "epoch": 33.93884892086331, "step": 37740 }, { "epoch": 33.93884892086331, "step": 37740, "torque_loss": 0.09849957376718521 }, { "epoch": 33.947841726618705, "grad_norm": 0.3113410174846649, "learning_rate": 3.311756544863459e-05, "loss": 0.0494, "step": 37750 }, { "action_loss": 0.002394674113020301, "epoch": 33.947841726618705, "step": 37750 }, { "epoch": 33.947841726618705, "step": 37750, "torque_loss": 0.06911910325288773 }, { "epoch": 33.9568345323741, "grad_norm": 0.1998601108789444, "learning_rate": 3.309162859451633e-05, "loss": 0.0628, "step": 37760 }, { "action_loss": 0.004886744078248739, "epoch": 33.9568345323741, "step": 37760 }, { "epoch": 33.9568345323741, "step": 37760, "torque_loss": 0.06834455579519272 }, { "epoch": 33.96582733812949, "grad_norm": 0.23077891767024994, "learning_rate": 3.306569687671487e-05, "loss": 0.0479, "step": 37770 }, { "action_loss": 0.011107418686151505, "epoch": 33.96582733812949, "step": 37770 }, { "epoch": 33.96582733812949, "step": 37770, "torque_loss": 0.14682726562023163 }, { "epoch": 33.97482014388489, "grad_norm": 0.3978191912174225, "learning_rate": 3.303977030310756e-05, "loss": 0.0575, "step": 37780 }, { "action_loss": 0.00672430545091629, "epoch": 33.97482014388489, "step": 37780 }, { "epoch": 33.97482014388489, "step": 37780, "torque_loss": 0.09862902760505676 }, { "epoch": 33.98381294964029, "grad_norm": 0.3080693781375885, "learning_rate": 3.3013848881570245e-05, "loss": 0.048, "step": 37790 }, { "action_loss": 0.002875323174521327, "epoch": 33.98381294964029, "step": 37790 }, { "epoch": 33.98381294964029, "step": 37790, "torque_loss": 0.05449589714407921 }, { "epoch": 33.992805755395686, "grad_norm": 0.2127414345741272, "learning_rate": 3.298793261997712e-05, "loss": 0.0614, "step": 37800 }, { "action_loss": 0.0012289034202694893, "epoch": 33.992805755395686, "step": 37800 }, { "epoch": 33.992805755395686, "step": 37800, "torque_loss": 0.052993688732385635 }, { "epoch": 34.00179856115108, "grad_norm": 0.3054056763648987, "learning_rate": 3.2962021526200893e-05, "loss": 0.0495, "step": 37810 }, { "action_loss": 0.004578463267534971, "epoch": 34.00179856115108, "step": 37810 }, { "epoch": 34.00179856115108, "step": 37810, "torque_loss": 0.07003771513700485 }, { "epoch": 34.010791366906474, "grad_norm": 0.2694181501865387, "learning_rate": 3.293611560811268e-05, "loss": 0.05, "step": 37820 }, { "action_loss": 0.005045953672379255, "epoch": 34.010791366906474, "step": 37820 }, { "epoch": 34.010791366906474, "step": 37820, "torque_loss": 0.08290624618530273 }, { "epoch": 34.01978417266187, "grad_norm": 0.3673964738845825, "learning_rate": 3.291021487358199e-05, "loss": 0.0646, "step": 37830 }, { "action_loss": 0.009315635077655315, "epoch": 34.01978417266187, "step": 37830 }, { "epoch": 34.01978417266187, "step": 37830, "torque_loss": 0.12945912778377533 }, { "epoch": 34.02877697841727, "grad_norm": 0.2996341586112976, "learning_rate": 3.28843193304768e-05, "loss": 0.0644, "step": 37840 }, { "action_loss": 0.010733458213508129, "epoch": 34.02877697841727, "step": 37840 }, { "epoch": 34.02877697841727, "step": 37840, "torque_loss": 0.11061901599168777 }, { "epoch": 34.03776978417266, "grad_norm": 0.3322160542011261, "learning_rate": 3.2858428986663456e-05, "loss": 0.0668, "step": 37850 }, { "action_loss": 0.0034387882333248854, "epoch": 34.03776978417266, "step": 37850 }, { "epoch": 34.03776978417266, "step": 37850, "torque_loss": 0.06505057960748672 }, { "epoch": 34.04676258992806, "grad_norm": 0.23232221603393555, "learning_rate": 3.283254385000681e-05, "loss": 0.0478, "step": 37860 }, { "action_loss": 0.006880661007016897, "epoch": 34.04676258992806, "step": 37860 }, { "epoch": 34.04676258992806, "step": 37860, "torque_loss": 0.14095641672611237 }, { "epoch": 34.055755395683455, "grad_norm": 0.251394659280777, "learning_rate": 3.2806663928370076e-05, "loss": 0.0572, "step": 37870 }, { "action_loss": 0.0046509685926139355, "epoch": 34.055755395683455, "step": 37870 }, { "epoch": 34.055755395683455, "step": 37870, "torque_loss": 0.09688965231180191 }, { "epoch": 34.064748201438846, "grad_norm": 0.27845698595046997, "learning_rate": 3.278078922961485e-05, "loss": 0.0549, "step": 37880 }, { "action_loss": 0.0033191998954862356, "epoch": 34.064748201438846, "step": 37880 }, { "epoch": 34.064748201438846, "step": 37880, "torque_loss": 0.050061631947755814 }, { "epoch": 34.07374100719424, "grad_norm": 0.1995571106672287, "learning_rate": 3.275491976160123e-05, "loss": 0.0507, "step": 37890 }, { "action_loss": 0.0070935399271547794, "epoch": 34.07374100719424, "step": 37890 }, { "epoch": 34.07374100719424, "step": 37890, "torque_loss": 0.11911070346832275 }, { "epoch": 34.08273381294964, "grad_norm": 0.30012276768684387, "learning_rate": 3.2729055532187645e-05, "loss": 0.0465, "step": 37900 }, { "action_loss": 0.0031794484239071608, "epoch": 34.08273381294964, "step": 37900 }, { "epoch": 34.08273381294964, "step": 37900, "torque_loss": 0.13336236774921417 }, { "epoch": 34.09172661870504, "grad_norm": 0.2823377251625061, "learning_rate": 3.270319654923097e-05, "loss": 0.064, "step": 37910 }, { "action_loss": 0.011932525783777237, "epoch": 34.09172661870504, "step": 37910 }, { "epoch": 34.09172661870504, "step": 37910, "torque_loss": 0.1919134259223938 }, { "epoch": 34.10071942446043, "grad_norm": 0.2703757584095001, "learning_rate": 3.2677342820586506e-05, "loss": 0.0605, "step": 37920 }, { "action_loss": 0.006650824099779129, "epoch": 34.10071942446043, "step": 37920 }, { "epoch": 34.10071942446043, "step": 37920, "torque_loss": 0.0793154239654541 }, { "epoch": 34.10971223021583, "grad_norm": 0.3426964282989502, "learning_rate": 3.2651494354107905e-05, "loss": 0.0598, "step": 37930 }, { "action_loss": 0.0008919609826989472, "epoch": 34.10971223021583, "step": 37930 }, { "epoch": 34.10971223021583, "step": 37930, "torque_loss": 0.037677112966775894 }, { "epoch": 34.118705035971225, "grad_norm": 0.3030616343021393, "learning_rate": 3.2625651157647266e-05, "loss": 0.0439, "step": 37940 }, { "action_loss": 0.0019626037683337927, "epoch": 34.118705035971225, "step": 37940 }, { "epoch": 34.118705035971225, "step": 37940, "torque_loss": 0.06582195311784744 }, { "epoch": 34.12769784172662, "grad_norm": 0.27795523405075073, "learning_rate": 3.259981323905505e-05, "loss": 0.0547, "step": 37950 }, { "action_loss": 0.0017319634789600968, "epoch": 34.12769784172662, "step": 37950 }, { "epoch": 34.12769784172662, "step": 37950, "torque_loss": 0.052278339862823486 }, { "epoch": 34.13669064748201, "grad_norm": 0.4290396273136139, "learning_rate": 3.257398060618014e-05, "loss": 0.0527, "step": 37960 }, { "action_loss": 0.003403909271582961, "epoch": 34.13669064748201, "step": 37960 }, { "epoch": 34.13669064748201, "step": 37960, "torque_loss": 0.09536242485046387 }, { "epoch": 34.14568345323741, "grad_norm": 0.29497382044792175, "learning_rate": 3.254815326686983e-05, "loss": 0.0451, "step": 37970 }, { "action_loss": 0.0022255154326558113, "epoch": 34.14568345323741, "step": 37970 }, { "epoch": 34.14568345323741, "step": 37970, "torque_loss": 0.07749443501234055 }, { "epoch": 34.15467625899281, "grad_norm": 0.206138014793396, "learning_rate": 3.2522331228969774e-05, "loss": 0.044, "step": 37980 }, { "action_loss": 0.0017386260442435741, "epoch": 34.15467625899281, "step": 37980 }, { "epoch": 34.15467625899281, "step": 37980, "torque_loss": 0.03764783963561058 }, { "epoch": 34.1636690647482, "grad_norm": 0.24417556822299957, "learning_rate": 3.2496514500324006e-05, "loss": 0.0538, "step": 37990 }, { "action_loss": 0.001734288758598268, "epoch": 34.1636690647482, "step": 37990 }, { "epoch": 34.1636690647482, "step": 37990, "torque_loss": 0.0653471127152443 }, { "epoch": 34.172661870503596, "grad_norm": 0.2270416021347046, "learning_rate": 3.247070308877498e-05, "loss": 0.0419, "step": 38000 }, { "action_loss": 0.008202153258025646, "epoch": 34.172661870503596, "step": 38000 }, { "epoch": 34.172661870503596, "step": 38000, "torque_loss": 0.09228383749723434 }, { "epoch": 34.181654676258994, "grad_norm": 0.3254702091217041, "learning_rate": 3.2444897002163515e-05, "loss": 0.0589, "step": 38010 }, { "action_loss": 0.001377831562422216, "epoch": 34.181654676258994, "step": 38010 }, { "epoch": 34.181654676258994, "step": 38010, "torque_loss": 0.04511584714055061 }, { "epoch": 34.19064748201439, "grad_norm": 0.34799081087112427, "learning_rate": 3.241909624832885e-05, "loss": 0.0497, "step": 38020 }, { "action_loss": 0.002285820199176669, "epoch": 34.19064748201439, "step": 38020 }, { "epoch": 34.19064748201439, "step": 38020, "torque_loss": 0.05610109865665436 }, { "epoch": 34.19964028776978, "grad_norm": 0.24067513644695282, "learning_rate": 3.239330083510852e-05, "loss": 0.063, "step": 38030 }, { "action_loss": 0.00420875521376729, "epoch": 34.19964028776978, "step": 38030 }, { "epoch": 34.19964028776978, "step": 38030, "torque_loss": 0.08454015105962753 }, { "epoch": 34.20863309352518, "grad_norm": 0.26688137650489807, "learning_rate": 3.236751077033855e-05, "loss": 0.0525, "step": 38040 }, { "action_loss": 0.0017139998963102698, "epoch": 34.20863309352518, "step": 38040 }, { "epoch": 34.20863309352518, "step": 38040, "torque_loss": 0.07742791622877121 }, { "epoch": 34.21762589928058, "grad_norm": 0.30242908000946045, "learning_rate": 3.234172606185322e-05, "loss": 0.061, "step": 38050 }, { "action_loss": 0.0021435243543237448, "epoch": 34.21762589928058, "step": 38050 }, { "epoch": 34.21762589928058, "step": 38050, "torque_loss": 0.09821665287017822 }, { "epoch": 34.226618705035975, "grad_norm": 0.4272827208042145, "learning_rate": 3.231594671748528e-05, "loss": 0.0623, "step": 38060 }, { "action_loss": 0.004930725786834955, "epoch": 34.226618705035975, "step": 38060 }, { "epoch": 34.226618705035975, "step": 38060, "torque_loss": 0.12629219889640808 }, { "epoch": 34.235611510791365, "grad_norm": 0.31309106945991516, "learning_rate": 3.2290172745065815e-05, "loss": 0.0489, "step": 38070 }, { "action_loss": 0.0038254850078374147, "epoch": 34.235611510791365, "step": 38070 }, { "epoch": 34.235611510791365, "step": 38070, "torque_loss": 0.09094556421041489 }, { "epoch": 34.24460431654676, "grad_norm": 0.3296452462673187, "learning_rate": 3.226440415242426e-05, "loss": 0.0546, "step": 38080 }, { "action_loss": 0.002041042782366276, "epoch": 34.24460431654676, "step": 38080 }, { "epoch": 34.24460431654676, "step": 38080, "torque_loss": 0.07136210799217224 }, { "epoch": 34.25359712230216, "grad_norm": 0.22441139817237854, "learning_rate": 3.223864094738846e-05, "loss": 0.0499, "step": 38090 }, { "action_loss": 0.0016322970623150468, "epoch": 34.25359712230216, "step": 38090 }, { "epoch": 34.25359712230216, "step": 38090, "torque_loss": 0.06191481649875641 }, { "epoch": 34.26258992805755, "grad_norm": 0.2498627007007599, "learning_rate": 3.221288313778456e-05, "loss": 0.044, "step": 38100 }, { "action_loss": 0.00168722087983042, "epoch": 34.26258992805755, "step": 38100 }, { "epoch": 34.26258992805755, "step": 38100, "torque_loss": 0.05682830139994621 }, { "epoch": 34.27158273381295, "grad_norm": 0.20701415836811066, "learning_rate": 3.2187130731437125e-05, "loss": 0.0399, "step": 38110 }, { "action_loss": 0.007923792116343975, "epoch": 34.27158273381295, "step": 38110 }, { "epoch": 34.27158273381295, "step": 38110, "torque_loss": 0.1031932607293129 }, { "epoch": 34.280575539568346, "grad_norm": 0.3202216923236847, "learning_rate": 3.216138373616905e-05, "loss": 0.0574, "step": 38120 }, { "action_loss": 0.0025006637442857027, "epoch": 34.280575539568346, "step": 38120 }, { "epoch": 34.280575539568346, "step": 38120, "torque_loss": 0.08382993191480637 }, { "epoch": 34.289568345323744, "grad_norm": 0.3115246593952179, "learning_rate": 3.21356421598016e-05, "loss": 0.0549, "step": 38130 }, { "action_loss": 0.004024407360702753, "epoch": 34.289568345323744, "step": 38130 }, { "epoch": 34.289568345323744, "step": 38130, "torque_loss": 0.11392968893051147 }, { "epoch": 34.298561151079134, "grad_norm": 0.3122250437736511, "learning_rate": 3.210990601015438e-05, "loss": 0.055, "step": 38140 }, { "action_loss": 0.001774271484464407, "epoch": 34.298561151079134, "step": 38140 }, { "epoch": 34.298561151079134, "step": 38140, "torque_loss": 0.05614476278424263 }, { "epoch": 34.30755395683453, "grad_norm": 0.37306204438209534, "learning_rate": 3.208417529504535e-05, "loss": 0.0463, "step": 38150 }, { "action_loss": 0.0036741800140589476, "epoch": 34.30755395683453, "step": 38150 }, { "epoch": 34.30755395683453, "step": 38150, "torque_loss": 0.10798487067222595 }, { "epoch": 34.31654676258993, "grad_norm": 0.31935903429985046, "learning_rate": 3.205845002229084e-05, "loss": 0.0565, "step": 38160 }, { "action_loss": 0.0023110939655452967, "epoch": 34.31654676258993, "step": 38160 }, { "epoch": 34.31654676258993, "step": 38160, "torque_loss": 0.0604291707277298 }, { "epoch": 34.32553956834532, "grad_norm": 0.27267274260520935, "learning_rate": 3.203273019970547e-05, "loss": 0.0446, "step": 38170 }, { "action_loss": 0.017285266891121864, "epoch": 34.32553956834532, "step": 38170 }, { "epoch": 34.32553956834532, "step": 38170, "torque_loss": 0.151328444480896 }, { "epoch": 34.33453237410072, "grad_norm": 0.2958511412143707, "learning_rate": 3.200701583510227e-05, "loss": 0.0577, "step": 38180 }, { "action_loss": 0.001955469837412238, "epoch": 34.33453237410072, "step": 38180 }, { "epoch": 34.33453237410072, "step": 38180, "torque_loss": 0.05513252690434456 }, { "epoch": 34.343525179856115, "grad_norm": 0.22207185626029968, "learning_rate": 3.198130693629261e-05, "loss": 0.0517, "step": 38190 }, { "action_loss": 0.001175880548544228, "epoch": 34.343525179856115, "step": 38190 }, { "epoch": 34.343525179856115, "step": 38190, "torque_loss": 0.05631871148943901 }, { "epoch": 34.35251798561151, "grad_norm": 0.27181103825569153, "learning_rate": 3.195560351108612e-05, "loss": 0.0575, "step": 38200 }, { "action_loss": 0.002148186322301626, "epoch": 34.35251798561151, "step": 38200 }, { "epoch": 34.35251798561151, "step": 38200, "torque_loss": 0.06993600726127625 }, { "epoch": 34.361510791366904, "grad_norm": 0.3584538996219635, "learning_rate": 3.1929905567290865e-05, "loss": 0.0503, "step": 38210 }, { "action_loss": 0.004286851733922958, "epoch": 34.361510791366904, "step": 38210 }, { "epoch": 34.361510791366904, "step": 38210, "torque_loss": 0.09408938139677048 }, { "epoch": 34.3705035971223, "grad_norm": 0.22973497211933136, "learning_rate": 3.1904213112713164e-05, "loss": 0.0515, "step": 38220 }, { "action_loss": 0.0050764745101332664, "epoch": 34.3705035971223, "step": 38220 }, { "epoch": 34.3705035971223, "step": 38220, "torque_loss": 0.10848388075828552 }, { "epoch": 34.3794964028777, "grad_norm": 0.2854699492454529, "learning_rate": 3.187852615515774e-05, "loss": 0.0561, "step": 38230 }, { "action_loss": 0.001610254985280335, "epoch": 34.3794964028777, "step": 38230 }, { "epoch": 34.3794964028777, "step": 38230, "torque_loss": 0.054266367107629776 }, { "epoch": 34.388489208633096, "grad_norm": 0.24385353922843933, "learning_rate": 3.1852844702427606e-05, "loss": 0.0418, "step": 38240 }, { "action_loss": 0.003549326444044709, "epoch": 34.388489208633096, "step": 38240 }, { "epoch": 34.388489208633096, "step": 38240, "torque_loss": 0.07724455744028091 }, { "epoch": 34.39748201438849, "grad_norm": 0.2522512972354889, "learning_rate": 3.18271687623241e-05, "loss": 0.0544, "step": 38250 }, { "action_loss": 0.00246493355371058, "epoch": 34.39748201438849, "step": 38250 }, { "epoch": 34.39748201438849, "step": 38250, "torque_loss": 0.06705322861671448 }, { "epoch": 34.406474820143885, "grad_norm": 0.24996976554393768, "learning_rate": 3.1801498342646896e-05, "loss": 0.0629, "step": 38260 }, { "action_loss": 0.0022079881746321917, "epoch": 34.406474820143885, "step": 38260 }, { "epoch": 34.406474820143885, "step": 38260, "torque_loss": 0.057330965995788574 }, { "epoch": 34.41546762589928, "grad_norm": 0.2864677906036377, "learning_rate": 3.177583345119398e-05, "loss": 0.055, "step": 38270 }, { "action_loss": 0.002011645818129182, "epoch": 34.41546762589928, "step": 38270 }, { "epoch": 34.41546762589928, "step": 38270, "torque_loss": 0.06761328130960464 }, { "epoch": 34.42446043165468, "grad_norm": 0.313671737909317, "learning_rate": 3.17501740957617e-05, "loss": 0.0443, "step": 38280 }, { "action_loss": 0.0015542618930339813, "epoch": 34.42446043165468, "step": 38280 }, { "epoch": 34.42446043165468, "step": 38280, "torque_loss": 0.038836266845464706 }, { "epoch": 34.43345323741007, "grad_norm": 0.31120648980140686, "learning_rate": 3.172452028414467e-05, "loss": 0.0527, "step": 38290 }, { "action_loss": 0.0024952711537480354, "epoch": 34.43345323741007, "step": 38290 }, { "epoch": 34.43345323741007, "step": 38290, "torque_loss": 0.07486492395401001 }, { "epoch": 34.44244604316547, "grad_norm": 0.3807530105113983, "learning_rate": 3.169887202413583e-05, "loss": 0.0584, "step": 38300 }, { "action_loss": 0.0030910249333828688, "epoch": 34.44244604316547, "step": 38300 }, { "epoch": 34.44244604316547, "step": 38300, "torque_loss": 0.060010362416505814 }, { "epoch": 34.451438848920866, "grad_norm": 0.33567866683006287, "learning_rate": 3.167322932352646e-05, "loss": 0.0591, "step": 38310 }, { "action_loss": 0.002499769674614072, "epoch": 34.451438848920866, "step": 38310 }, { "epoch": 34.451438848920866, "step": 38310, "torque_loss": 0.06106550619006157 }, { "epoch": 34.460431654676256, "grad_norm": 0.2295835018157959, "learning_rate": 3.164759219010613e-05, "loss": 0.0555, "step": 38320 }, { "action_loss": 0.00440480699762702, "epoch": 34.460431654676256, "step": 38320 }, { "epoch": 34.460431654676256, "step": 38320, "torque_loss": 0.0696580782532692 }, { "epoch": 34.469424460431654, "grad_norm": 0.2857149839401245, "learning_rate": 3.1621960631662725e-05, "loss": 0.0503, "step": 38330 }, { "action_loss": 0.006606314796954393, "epoch": 34.469424460431654, "step": 38330 }, { "epoch": 34.469424460431654, "step": 38330, "torque_loss": 0.06903756409883499 }, { "epoch": 34.47841726618705, "grad_norm": 0.31949424743652344, "learning_rate": 3.159633465598245e-05, "loss": 0.0643, "step": 38340 }, { "action_loss": 0.0033596239518374205, "epoch": 34.47841726618705, "step": 38340 }, { "epoch": 34.47841726618705, "step": 38340, "torque_loss": 0.12150028347969055 }, { "epoch": 34.48741007194245, "grad_norm": 0.26234522461891174, "learning_rate": 3.1570714270849767e-05, "loss": 0.0531, "step": 38350 }, { "action_loss": 0.0017751340055838227, "epoch": 34.48741007194245, "step": 38350 }, { "epoch": 34.48741007194245, "step": 38350, "torque_loss": 0.0576714389026165 }, { "epoch": 34.49640287769784, "grad_norm": 0.3870868980884552, "learning_rate": 3.1545099484047516e-05, "loss": 0.0534, "step": 38360 }, { "action_loss": 0.003070131642743945, "epoch": 34.49640287769784, "step": 38360 }, { "epoch": 34.49640287769784, "step": 38360, "torque_loss": 0.08530431985855103 }, { "epoch": 34.50539568345324, "grad_norm": 0.36740967631340027, "learning_rate": 3.151949030335674e-05, "loss": 0.0491, "step": 38370 }, { "action_loss": 0.002765512093901634, "epoch": 34.50539568345324, "step": 38370 }, { "epoch": 34.50539568345324, "step": 38370, "torque_loss": 0.09235131740570068 }, { "epoch": 34.514388489208635, "grad_norm": 0.23413190245628357, "learning_rate": 3.149388673655687e-05, "loss": 0.0625, "step": 38380 }, { "action_loss": 0.004500462207943201, "epoch": 34.514388489208635, "step": 38380 }, { "epoch": 34.514388489208635, "step": 38380, "torque_loss": 0.09061109274625778 }, { "epoch": 34.523381294964025, "grad_norm": 0.3468685746192932, "learning_rate": 3.146828879142559e-05, "loss": 0.059, "step": 38390 }, { "action_loss": 0.0012393933720886707, "epoch": 34.523381294964025, "step": 38390 }, { "epoch": 34.523381294964025, "step": 38390, "torque_loss": 0.04671938717365265 }, { "epoch": 34.53237410071942, "grad_norm": 0.27864938974380493, "learning_rate": 3.1442696475738866e-05, "loss": 0.0456, "step": 38400 }, { "action_loss": 0.0025607671122998, "epoch": 34.53237410071942, "step": 38400 }, { "epoch": 34.53237410071942, "step": 38400, "torque_loss": 0.07662006467580795 }, { "epoch": 34.54136690647482, "grad_norm": 0.30066680908203125, "learning_rate": 3.141710979727098e-05, "loss": 0.0494, "step": 38410 }, { "action_loss": 0.002034207107499242, "epoch": 34.54136690647482, "step": 38410 }, { "epoch": 34.54136690647482, "step": 38410, "torque_loss": 0.06000642105937004 }, { "epoch": 34.55035971223022, "grad_norm": 0.2657882571220398, "learning_rate": 3.139152876379447e-05, "loss": 0.0498, "step": 38420 }, { "action_loss": 0.002788831712678075, "epoch": 34.55035971223022, "step": 38420 }, { "epoch": 34.55035971223022, "step": 38420, "torque_loss": 0.0652516633272171 }, { "epoch": 34.55935251798561, "grad_norm": 0.23877167701721191, "learning_rate": 3.1365953383080214e-05, "loss": 0.0564, "step": 38430 }, { "action_loss": 0.0030814725905656815, "epoch": 34.55935251798561, "step": 38430 }, { "epoch": 34.55935251798561, "step": 38430, "torque_loss": 0.07545076310634613 }, { "epoch": 34.568345323741006, "grad_norm": 0.33889153599739075, "learning_rate": 3.134038366289731e-05, "loss": 0.0527, "step": 38440 }, { "action_loss": 0.0023057928774505854, "epoch": 34.568345323741006, "step": 38440 }, { "epoch": 34.568345323741006, "step": 38440, "torque_loss": 0.05812200531363487 }, { "epoch": 34.577338129496404, "grad_norm": 0.3007315397262573, "learning_rate": 3.131481961101317e-05, "loss": 0.0576, "step": 38450 }, { "action_loss": 0.003738507628440857, "epoch": 34.577338129496404, "step": 38450 }, { "epoch": 34.577338129496404, "step": 38450, "torque_loss": 0.07411739975214005 }, { "epoch": 34.5863309352518, "grad_norm": 0.2725914418697357, "learning_rate": 3.128926123519349e-05, "loss": 0.0583, "step": 38460 }, { "action_loss": 0.001572663546539843, "epoch": 34.5863309352518, "step": 38460 }, { "epoch": 34.5863309352518, "step": 38460, "torque_loss": 0.051688533276319504 }, { "epoch": 34.59532374100719, "grad_norm": 0.18214206397533417, "learning_rate": 3.1263708543202194e-05, "loss": 0.041, "step": 38470 }, { "action_loss": 0.0032720293384045362, "epoch": 34.59532374100719, "step": 38470 }, { "epoch": 34.59532374100719, "step": 38470, "torque_loss": 0.08228351920843124 }, { "epoch": 34.60431654676259, "grad_norm": 0.27113115787506104, "learning_rate": 3.123816154280155e-05, "loss": 0.0616, "step": 38480 }, { "action_loss": 0.004181068856269121, "epoch": 34.60431654676259, "step": 38480 }, { "epoch": 34.60431654676259, "step": 38480, "torque_loss": 0.09741243720054626 }, { "epoch": 34.61330935251799, "grad_norm": 0.2606617510318756, "learning_rate": 3.121262024175207e-05, "loss": 0.0568, "step": 38490 }, { "action_loss": 0.006382405757904053, "epoch": 34.61330935251799, "step": 38490 }, { "epoch": 34.61330935251799, "step": 38490, "torque_loss": 0.07982641458511353 }, { "epoch": 34.62230215827338, "grad_norm": 0.26984703540802, "learning_rate": 3.118708464781248e-05, "loss": 0.0573, "step": 38500 }, { "action_loss": 0.0028731580823659897, "epoch": 34.62230215827338, "step": 38500 }, { "epoch": 34.62230215827338, "step": 38500, "torque_loss": 0.07023695111274719 }, { "epoch": 34.631294964028775, "grad_norm": 0.2802664041519165, "learning_rate": 3.116155476873987e-05, "loss": 0.0591, "step": 38510 }, { "action_loss": 0.0016165176639333367, "epoch": 34.631294964028775, "step": 38510 }, { "epoch": 34.631294964028775, "step": 38510, "torque_loss": 0.04895497485995293 }, { "epoch": 34.64028776978417, "grad_norm": 0.3627713918685913, "learning_rate": 3.11360306122895e-05, "loss": 0.0547, "step": 38520 }, { "action_loss": 0.007515324279665947, "epoch": 34.64028776978417, "step": 38520 }, { "epoch": 34.64028776978417, "step": 38520, "torque_loss": 0.08323487639427185 }, { "epoch": 34.64928057553957, "grad_norm": 0.3157113194465637, "learning_rate": 3.1110512186214975e-05, "loss": 0.0519, "step": 38530 }, { "action_loss": 0.0016718622064217925, "epoch": 34.64928057553957, "step": 38530 }, { "epoch": 34.64928057553957, "step": 38530, "torque_loss": 0.06725048273801804 }, { "epoch": 34.65827338129496, "grad_norm": 0.2979092299938202, "learning_rate": 3.1084999498268095e-05, "loss": 0.0518, "step": 38540 }, { "action_loss": 0.010454676114022732, "epoch": 34.65827338129496, "step": 38540 }, { "epoch": 34.65827338129496, "step": 38540, "torque_loss": 0.10201100260019302 }, { "epoch": 34.66726618705036, "grad_norm": 0.3535771071910858, "learning_rate": 3.1059492556198934e-05, "loss": 0.0583, "step": 38550 }, { "action_loss": 0.0038590419571846724, "epoch": 34.66726618705036, "step": 38550 }, { "epoch": 34.66726618705036, "step": 38550, "torque_loss": 0.09129321575164795 }, { "epoch": 34.67625899280576, "grad_norm": 0.3120175004005432, "learning_rate": 3.103399136775586e-05, "loss": 0.0578, "step": 38560 }, { "action_loss": 0.006797898560762405, "epoch": 34.67625899280576, "step": 38560 }, { "epoch": 34.67625899280576, "step": 38560, "torque_loss": 0.10978253930807114 }, { "epoch": 34.685251798561154, "grad_norm": 0.5327492952346802, "learning_rate": 3.100849594068541e-05, "loss": 0.0646, "step": 38570 }, { "action_loss": 0.001547367312014103, "epoch": 34.685251798561154, "step": 38570 }, { "epoch": 34.685251798561154, "step": 38570, "torque_loss": 0.05124242976307869 }, { "epoch": 34.694244604316545, "grad_norm": 0.3057284653186798, "learning_rate": 3.0983006282732484e-05, "loss": 0.0526, "step": 38580 }, { "action_loss": 0.0021866855677217245, "epoch": 34.694244604316545, "step": 38580 }, { "epoch": 34.694244604316545, "step": 38580, "torque_loss": 0.08130775392055511 }, { "epoch": 34.70323741007194, "grad_norm": 0.2794843912124634, "learning_rate": 3.0957522401640116e-05, "loss": 0.0546, "step": 38590 }, { "action_loss": 0.001964566297829151, "epoch": 34.70323741007194, "step": 38590 }, { "epoch": 34.70323741007194, "step": 38590, "torque_loss": 0.048683252185583115 }, { "epoch": 34.71223021582734, "grad_norm": 0.2996554970741272, "learning_rate": 3.0932044305149645e-05, "loss": 0.045, "step": 38600 }, { "action_loss": 0.0023852342274039984, "epoch": 34.71223021582734, "step": 38600 }, { "epoch": 34.71223021582734, "step": 38600, "torque_loss": 0.06799871474504471 }, { "epoch": 34.72122302158273, "grad_norm": 0.3081621527671814, "learning_rate": 3.090657200100068e-05, "loss": 0.0494, "step": 38610 }, { "action_loss": 0.0032036944758147, "epoch": 34.72122302158273, "step": 38610 }, { "epoch": 34.72122302158273, "step": 38610, "torque_loss": 0.07668334990739822 }, { "epoch": 34.73021582733813, "grad_norm": 0.2992580235004425, "learning_rate": 3.088110549693099e-05, "loss": 0.0534, "step": 38620 }, { "action_loss": 0.0022156520280987024, "epoch": 34.73021582733813, "step": 38620 }, { "epoch": 34.73021582733813, "step": 38620, "torque_loss": 0.08156847953796387 }, { "epoch": 34.739208633093526, "grad_norm": 0.3013323247432709, "learning_rate": 3.085564480067667e-05, "loss": 0.0479, "step": 38630 }, { "action_loss": 0.0012966833310201764, "epoch": 34.739208633093526, "step": 38630 }, { "epoch": 34.739208633093526, "step": 38630, "torque_loss": 0.05807584524154663 }, { "epoch": 34.74820143884892, "grad_norm": 0.34622272849082947, "learning_rate": 3.0830189919971955e-05, "loss": 0.0433, "step": 38640 }, { "action_loss": 0.001347453915514052, "epoch": 34.74820143884892, "step": 38640 }, { "epoch": 34.74820143884892, "step": 38640, "torque_loss": 0.049444448202848434 }, { "epoch": 34.757194244604314, "grad_norm": 0.39167529344558716, "learning_rate": 3.080474086254939e-05, "loss": 0.0516, "step": 38650 }, { "action_loss": 0.0021096384152770042, "epoch": 34.757194244604314, "step": 38650 }, { "epoch": 34.757194244604314, "step": 38650, "torque_loss": 0.05581964552402496 }, { "epoch": 34.76618705035971, "grad_norm": 0.2505802810192108, "learning_rate": 3.077929763613975e-05, "loss": 0.0538, "step": 38660 }, { "action_loss": 0.002220290480181575, "epoch": 34.76618705035971, "step": 38660 }, { "epoch": 34.76618705035971, "step": 38660, "torque_loss": 0.0734400674700737 }, { "epoch": 34.77517985611511, "grad_norm": 0.23799997568130493, "learning_rate": 3.075386024847198e-05, "loss": 0.058, "step": 38670 }, { "action_loss": 0.004777266178280115, "epoch": 34.77517985611511, "step": 38670 }, { "epoch": 34.77517985611511, "step": 38670, "torque_loss": 0.10725895315408707 }, { "epoch": 34.78417266187051, "grad_norm": 0.2568604052066803, "learning_rate": 3.072842870727331e-05, "loss": 0.0498, "step": 38680 }, { "action_loss": 0.001143697532825172, "epoch": 34.78417266187051, "step": 38680 }, { "epoch": 34.78417266187051, "step": 38680, "torque_loss": 0.04767514392733574 }, { "epoch": 34.7931654676259, "grad_norm": 0.23969541490077972, "learning_rate": 3.070300302026916e-05, "loss": 0.0481, "step": 38690 }, { "action_loss": 0.0035183809231966734, "epoch": 34.7931654676259, "step": 38690 }, { "epoch": 34.7931654676259, "step": 38690, "torque_loss": 0.05847584083676338 }, { "epoch": 34.802158273381295, "grad_norm": 0.35971856117248535, "learning_rate": 3.067758319518318e-05, "loss": 0.0596, "step": 38700 }, { "action_loss": 0.013200894929468632, "epoch": 34.802158273381295, "step": 38700 }, { "epoch": 34.802158273381295, "step": 38700, "torque_loss": 0.12978792190551758 }, { "epoch": 34.81115107913669, "grad_norm": 0.24976935982704163, "learning_rate": 3.065216923973725e-05, "loss": 0.0494, "step": 38710 }, { "action_loss": 0.0023246898781508207, "epoch": 34.81115107913669, "step": 38710 }, { "epoch": 34.81115107913669, "step": 38710, "torque_loss": 0.09353815764188766 }, { "epoch": 34.82014388489208, "grad_norm": 0.33583784103393555, "learning_rate": 3.062676116165145e-05, "loss": 0.0467, "step": 38720 }, { "action_loss": 0.0036099704448133707, "epoch": 34.82014388489208, "step": 38720 }, { "epoch": 34.82014388489208, "step": 38720, "torque_loss": 0.09338852018117905 }, { "epoch": 34.82913669064748, "grad_norm": 0.2990938723087311, "learning_rate": 3.06013589686441e-05, "loss": 0.0491, "step": 38730 }, { "action_loss": 0.0015563304768875241, "epoch": 34.82913669064748, "step": 38730 }, { "epoch": 34.82913669064748, "step": 38730, "torque_loss": 0.04197133705019951 }, { "epoch": 34.83812949640288, "grad_norm": 0.19474823772907257, "learning_rate": 3.05759626684317e-05, "loss": 0.0525, "step": 38740 }, { "action_loss": 0.004877304192632437, "epoch": 34.83812949640288, "step": 38740 }, { "epoch": 34.83812949640288, "step": 38740, "torque_loss": 0.11896101385354996 }, { "epoch": 34.847122302158276, "grad_norm": 0.24160122871398926, "learning_rate": 3.055057226872896e-05, "loss": 0.0505, "step": 38750 }, { "action_loss": 0.010972167365252972, "epoch": 34.847122302158276, "step": 38750 }, { "epoch": 34.847122302158276, "step": 38750, "torque_loss": 0.16986744105815887 }, { "epoch": 34.856115107913666, "grad_norm": 0.33486276865005493, "learning_rate": 3.052518777724887e-05, "loss": 0.0561, "step": 38760 }, { "action_loss": 0.003235258860513568, "epoch": 34.856115107913666, "step": 38760 }, { "epoch": 34.856115107913666, "step": 38760, "torque_loss": 0.07118397206068039 }, { "epoch": 34.865107913669064, "grad_norm": 0.2198241651058197, "learning_rate": 3.04998092017025e-05, "loss": 0.0476, "step": 38770 }, { "action_loss": 0.012340432964265347, "epoch": 34.865107913669064, "step": 38770 }, { "epoch": 34.865107913669064, "step": 38770, "torque_loss": 0.13467474281787872 }, { "epoch": 34.87410071942446, "grad_norm": 0.29888996481895447, "learning_rate": 3.0474436549799246e-05, "loss": 0.0631, "step": 38780 }, { "action_loss": 0.0023053076583892107, "epoch": 34.87410071942446, "step": 38780 }, { "epoch": 34.87410071942446, "step": 38780, "torque_loss": 0.0722636952996254 }, { "epoch": 34.88309352517986, "grad_norm": 0.26408651471138, "learning_rate": 3.044906982924661e-05, "loss": 0.0561, "step": 38790 }, { "action_loss": 0.001690192031674087, "epoch": 34.88309352517986, "step": 38790 }, { "epoch": 34.88309352517986, "step": 38790, "torque_loss": 0.0662146583199501 }, { "epoch": 34.89208633093525, "grad_norm": 0.2669190466403961, "learning_rate": 3.0423709047750337e-05, "loss": 0.0505, "step": 38800 }, { "action_loss": 0.0017946549924090505, "epoch": 34.89208633093525, "step": 38800 }, { "epoch": 34.89208633093525, "step": 38800, "torque_loss": 0.040657129138708115 }, { "epoch": 34.90107913669065, "grad_norm": 0.37812933325767517, "learning_rate": 3.03983542130144e-05, "loss": 0.0532, "step": 38810 }, { "action_loss": 0.0037097346503287554, "epoch": 34.90107913669065, "step": 38810 }, { "epoch": 34.90107913669065, "step": 38810, "torque_loss": 0.10589317232370377 }, { "epoch": 34.910071942446045, "grad_norm": 0.3293079137802124, "learning_rate": 3.0373005332740877e-05, "loss": 0.0657, "step": 38820 }, { "action_loss": 0.0039398702792823315, "epoch": 34.910071942446045, "step": 38820 }, { "epoch": 34.910071942446045, "step": 38820, "torque_loss": 0.09099968522787094 }, { "epoch": 34.919064748201436, "grad_norm": 0.24090509116649628, "learning_rate": 3.034766241463013e-05, "loss": 0.053, "step": 38830 }, { "action_loss": 0.001289478619582951, "epoch": 34.919064748201436, "step": 38830 }, { "epoch": 34.919064748201436, "step": 38830, "torque_loss": 0.04339016601443291 }, { "epoch": 34.92805755395683, "grad_norm": 0.28679075837135315, "learning_rate": 3.032232546638064e-05, "loss": 0.045, "step": 38840 }, { "action_loss": 0.004168445710092783, "epoch": 34.92805755395683, "step": 38840 }, { "epoch": 34.92805755395683, "step": 38840, "torque_loss": 0.09062165766954422 }, { "epoch": 34.93705035971223, "grad_norm": 0.3355144262313843, "learning_rate": 3.0296994495689114e-05, "loss": 0.0475, "step": 38850 }, { "action_loss": 0.009954315610229969, "epoch": 34.93705035971223, "step": 38850 }, { "epoch": 34.93705035971223, "step": 38850, "torque_loss": 0.1604011058807373 }, { "epoch": 34.94604316546763, "grad_norm": 0.2800748348236084, "learning_rate": 3.0271669510250444e-05, "loss": 0.0571, "step": 38860 }, { "action_loss": 0.0016661611152812839, "epoch": 34.94604316546763, "step": 38860 }, { "epoch": 34.94604316546763, "step": 38860, "torque_loss": 0.04229286313056946 }, { "epoch": 34.95503597122302, "grad_norm": 0.2197420299053192, "learning_rate": 3.024635051775766e-05, "loss": 0.0479, "step": 38870 }, { "action_loss": 0.0019368970533832908, "epoch": 34.95503597122302, "step": 38870 }, { "epoch": 34.95503597122302, "step": 38870, "torque_loss": 0.05749806761741638 }, { "epoch": 34.96402877697842, "grad_norm": 0.2809690237045288, "learning_rate": 3.022103752590205e-05, "loss": 0.0558, "step": 38880 }, { "action_loss": 0.003845377592369914, "epoch": 34.96402877697842, "step": 38880 }, { "epoch": 34.96402877697842, "step": 38880, "torque_loss": 0.058661241084337234 }, { "epoch": 34.973021582733814, "grad_norm": 0.22352029383182526, "learning_rate": 3.0195730542372992e-05, "loss": 0.0558, "step": 38890 }, { "action_loss": 0.0065893749706447124, "epoch": 34.973021582733814, "step": 38890 }, { "epoch": 34.973021582733814, "step": 38890, "torque_loss": 0.12281852215528488 }, { "epoch": 34.98201438848921, "grad_norm": 0.2678719460964203, "learning_rate": 3.0170429574858084e-05, "loss": 0.0448, "step": 38900 }, { "action_loss": 0.007010933477431536, "epoch": 34.98201438848921, "step": 38900 }, { "epoch": 34.98201438848921, "step": 38900, "torque_loss": 0.09836983680725098 }, { "epoch": 34.9910071942446, "grad_norm": 0.20518237352371216, "learning_rate": 3.0145134631043127e-05, "loss": 0.048, "step": 38910 }, { "action_loss": 0.0026785265654325485, "epoch": 34.9910071942446, "step": 38910 }, { "epoch": 34.9910071942446, "step": 38910, "torque_loss": 0.06681307405233383 }, { "epoch": 35.0, "grad_norm": 0.33923155069351196, "learning_rate": 3.0119845718612018e-05, "loss": 0.0468, "step": 38920 }, { "action_loss": 0.0023083521518856287, "epoch": 35.0, "step": 38920 }, { "epoch": 35.0, "step": 38920, "torque_loss": 0.0618797205388546 }, { "epoch": 35.0089928057554, "grad_norm": 0.4110211133956909, "learning_rate": 3.009456284524688e-05, "loss": 0.0597, "step": 38930 }, { "action_loss": 0.005606494378298521, "epoch": 35.0089928057554, "step": 38930 }, { "epoch": 35.0089928057554, "step": 38930, "torque_loss": 0.09061869233846664 }, { "epoch": 35.01798561151079, "grad_norm": 0.21688592433929443, "learning_rate": 3.0069286018627967e-05, "loss": 0.0545, "step": 38940 }, { "action_loss": 0.005285936873406172, "epoch": 35.01798561151079, "step": 38940 }, { "epoch": 35.01798561151079, "step": 38940, "torque_loss": 0.10877611488103867 }, { "epoch": 35.026978417266186, "grad_norm": 0.23105934262275696, "learning_rate": 3.0044015246433743e-05, "loss": 0.0663, "step": 38950 }, { "action_loss": 0.010837581939995289, "epoch": 35.026978417266186, "step": 38950 }, { "epoch": 35.026978417266186, "step": 38950, "torque_loss": 0.12946277856826782 }, { "epoch": 35.03597122302158, "grad_norm": 0.32567575573921204, "learning_rate": 3.0018750536340755e-05, "loss": 0.0535, "step": 38960 }, { "action_loss": 0.021463856101036072, "epoch": 35.03597122302158, "step": 38960 }, { "epoch": 35.03597122302158, "step": 38960, "torque_loss": 0.18757779896259308 }, { "epoch": 35.04496402877698, "grad_norm": 0.2596427798271179, "learning_rate": 2.999349189602378e-05, "loss": 0.0859, "step": 38970 }, { "action_loss": 0.0018907474586740136, "epoch": 35.04496402877698, "step": 38970 }, { "epoch": 35.04496402877698, "step": 38970, "torque_loss": 0.04068891704082489 }, { "epoch": 35.05395683453237, "grad_norm": 0.27674704790115356, "learning_rate": 2.9968239333155733e-05, "loss": 0.0462, "step": 38980 }, { "action_loss": 0.010119394399225712, "epoch": 35.05395683453237, "step": 38980 }, { "epoch": 35.05395683453237, "step": 38980, "torque_loss": 0.09141095727682114 }, { "epoch": 35.06294964028777, "grad_norm": 0.2225031554698944, "learning_rate": 2.994299285540767e-05, "loss": 0.0521, "step": 38990 }, { "action_loss": 0.0011943575227633119, "epoch": 35.06294964028777, "step": 38990 }, { "epoch": 35.06294964028777, "step": 38990, "torque_loss": 0.041404519230127335 }, { "epoch": 35.07194244604317, "grad_norm": 0.3242126405239105, "learning_rate": 2.9917752470448813e-05, "loss": 0.0544, "step": 39000 }, { "action_loss": 0.0018637696048244834, "epoch": 35.07194244604317, "step": 39000 }, { "epoch": 35.07194244604317, "step": 39000, "torque_loss": 0.050159797072410583 }, { "epoch": 35.080935251798564, "grad_norm": 0.2363380640745163, "learning_rate": 2.9892518185946495e-05, "loss": 0.0468, "step": 39010 }, { "action_loss": 0.003272888483479619, "epoch": 35.080935251798564, "step": 39010 }, { "epoch": 35.080935251798564, "step": 39010, "torque_loss": 0.06901510804891586 }, { "epoch": 35.089928057553955, "grad_norm": 0.2781916856765747, "learning_rate": 2.986729000956624e-05, "loss": 0.0568, "step": 39020 }, { "action_loss": 0.005960026755928993, "epoch": 35.089928057553955, "step": 39020 }, { "epoch": 35.089928057553955, "step": 39020, "torque_loss": 0.08732350915670395 }, { "epoch": 35.09892086330935, "grad_norm": 0.28933387994766235, "learning_rate": 2.9842067948971736e-05, "loss": 0.055, "step": 39030 }, { "action_loss": 0.001988996984437108, "epoch": 35.09892086330935, "step": 39030 }, { "epoch": 35.09892086330935, "step": 39030, "torque_loss": 0.07515210658311844 }, { "epoch": 35.10791366906475, "grad_norm": 0.2521302103996277, "learning_rate": 2.9816852011824727e-05, "loss": 0.0441, "step": 39040 }, { "action_loss": 0.0016894437139853835, "epoch": 35.10791366906475, "step": 39040 }, { "epoch": 35.10791366906475, "step": 39040, "torque_loss": 0.08108695596456528 }, { "epoch": 35.11690647482014, "grad_norm": 0.21283851563930511, "learning_rate": 2.979164220578519e-05, "loss": 0.0567, "step": 39050 }, { "action_loss": 0.0013162953546270728, "epoch": 35.11690647482014, "step": 39050 }, { "epoch": 35.11690647482014, "step": 39050, "torque_loss": 0.05106915161013603 }, { "epoch": 35.12589928057554, "grad_norm": 0.34390076994895935, "learning_rate": 2.9766438538511165e-05, "loss": 0.0548, "step": 39060 }, { "action_loss": 0.0023420408833771944, "epoch": 35.12589928057554, "step": 39060 }, { "epoch": 35.12589928057554, "step": 39060, "torque_loss": 0.05675308406352997 }, { "epoch": 35.134892086330936, "grad_norm": 0.2743828594684601, "learning_rate": 2.9741241017658873e-05, "loss": 0.0491, "step": 39070 }, { "action_loss": 0.0029054787009954453, "epoch": 35.134892086330936, "step": 39070 }, { "epoch": 35.134892086330936, "step": 39070, "torque_loss": 0.08222448825836182 }, { "epoch": 35.143884892086334, "grad_norm": 0.2878972291946411, "learning_rate": 2.971604965088267e-05, "loss": 0.0523, "step": 39080 }, { "action_loss": 0.0036751972511410713, "epoch": 35.143884892086334, "step": 39080 }, { "epoch": 35.143884892086334, "step": 39080, "torque_loss": 0.053437769412994385 }, { "epoch": 35.152877697841724, "grad_norm": 0.2281772494316101, "learning_rate": 2.9690864445835008e-05, "loss": 0.0462, "step": 39090 }, { "action_loss": 0.007134587038308382, "epoch": 35.152877697841724, "step": 39090 }, { "epoch": 35.152877697841724, "step": 39090, "torque_loss": 0.07925034314393997 }, { "epoch": 35.16187050359712, "grad_norm": 0.41007092595100403, "learning_rate": 2.966568541016651e-05, "loss": 0.0665, "step": 39100 }, { "action_loss": 0.0013106971746310592, "epoch": 35.16187050359712, "step": 39100 }, { "epoch": 35.16187050359712, "step": 39100, "torque_loss": 0.044105369597673416 }, { "epoch": 35.17086330935252, "grad_norm": 0.29020464420318604, "learning_rate": 2.9640512551525867e-05, "loss": 0.0556, "step": 39110 }, { "action_loss": 0.0024503336753696203, "epoch": 35.17086330935252, "step": 39110 }, { "epoch": 35.17086330935252, "step": 39110, "torque_loss": 0.08698854595422745 }, { "epoch": 35.17985611510792, "grad_norm": 0.2974553108215332, "learning_rate": 2.961534587755995e-05, "loss": 0.0546, "step": 39120 }, { "action_loss": 0.0030016840901225805, "epoch": 35.17985611510792, "step": 39120 }, { "epoch": 35.17985611510792, "step": 39120, "torque_loss": 0.08910578489303589 }, { "epoch": 35.18884892086331, "grad_norm": 0.30446434020996094, "learning_rate": 2.959018539591375e-05, "loss": 0.0639, "step": 39130 }, { "action_loss": 0.004359475802630186, "epoch": 35.18884892086331, "step": 39130 }, { "epoch": 35.18884892086331, "step": 39130, "torque_loss": 0.08890210837125778 }, { "epoch": 35.197841726618705, "grad_norm": 0.3289213478565216, "learning_rate": 2.9565031114230325e-05, "loss": 0.0549, "step": 39140 }, { "action_loss": 0.003926787059754133, "epoch": 35.197841726618705, "step": 39140 }, { "epoch": 35.197841726618705, "step": 39140, "torque_loss": 0.07387750595808029 }, { "epoch": 35.2068345323741, "grad_norm": 0.3056769073009491, "learning_rate": 2.9539883040150895e-05, "loss": 0.0507, "step": 39150 }, { "action_loss": 0.0035679449792951345, "epoch": 35.2068345323741, "step": 39150 }, { "epoch": 35.2068345323741, "step": 39150, "torque_loss": 0.09238455444574356 }, { "epoch": 35.21582733812949, "grad_norm": 0.25227952003479004, "learning_rate": 2.9514741181314774e-05, "loss": 0.0572, "step": 39160 }, { "action_loss": 0.004289575386792421, "epoch": 35.21582733812949, "step": 39160 }, { "epoch": 35.21582733812949, "step": 39160, "torque_loss": 0.09730847924947739 }, { "epoch": 35.22482014388489, "grad_norm": 0.28557685017585754, "learning_rate": 2.94896055453594e-05, "loss": 0.0396, "step": 39170 }, { "action_loss": 0.0019596421625465155, "epoch": 35.22482014388489, "step": 39170 }, { "epoch": 35.22482014388489, "step": 39170, "torque_loss": 0.058082204312086105 }, { "epoch": 35.23381294964029, "grad_norm": 0.23024237155914307, "learning_rate": 2.9464476139920332e-05, "loss": 0.0488, "step": 39180 }, { "action_loss": 0.005247674882411957, "epoch": 35.23381294964029, "step": 39180 }, { "epoch": 35.23381294964029, "step": 39180, "torque_loss": 0.0967283844947815 }, { "epoch": 35.242805755395686, "grad_norm": 0.3448805809020996, "learning_rate": 2.9439352972631186e-05, "loss": 0.0406, "step": 39190 }, { "action_loss": 0.005998195614665747, "epoch": 35.242805755395686, "step": 39190 }, { "epoch": 35.242805755395686, "step": 39190, "torque_loss": 0.11218118667602539 }, { "epoch": 35.25179856115108, "grad_norm": 0.24258919060230255, "learning_rate": 2.9414236051123757e-05, "loss": 0.0528, "step": 39200 }, { "action_loss": 0.0017895487835630774, "epoch": 35.25179856115108, "step": 39200 }, { "epoch": 35.25179856115108, "step": 39200, "torque_loss": 0.06704065203666687 }, { "epoch": 35.260791366906474, "grad_norm": 0.27932196855545044, "learning_rate": 2.938912538302785e-05, "loss": 0.0481, "step": 39210 }, { "action_loss": 0.003033614717423916, "epoch": 35.260791366906474, "step": 39210 }, { "epoch": 35.260791366906474, "step": 39210, "torque_loss": 0.0940064787864685 }, { "epoch": 35.26978417266187, "grad_norm": 0.4576978385448456, "learning_rate": 2.9364020975971464e-05, "loss": 0.0434, "step": 39220 }, { "action_loss": 0.001869362429715693, "epoch": 35.26978417266187, "step": 39220 }, { "epoch": 35.26978417266187, "step": 39220, "torque_loss": 0.08103983849287033 }, { "epoch": 35.27877697841727, "grad_norm": 0.3051412105560303, "learning_rate": 2.9338922837580657e-05, "loss": 0.0534, "step": 39230 }, { "action_loss": 0.004068082198500633, "epoch": 35.27877697841727, "step": 39230 }, { "epoch": 35.27877697841727, "step": 39230, "torque_loss": 0.07556071132421494 }, { "epoch": 35.28776978417266, "grad_norm": 0.3033774495124817, "learning_rate": 2.931383097547955e-05, "loss": 0.0539, "step": 39240 }, { "action_loss": 0.007048665080219507, "epoch": 35.28776978417266, "step": 39240 }, { "epoch": 35.28776978417266, "step": 39240, "torque_loss": 0.12211830168962479 }, { "epoch": 35.29676258992806, "grad_norm": 0.39261990785598755, "learning_rate": 2.928874539729043e-05, "loss": 0.057, "step": 39250 }, { "action_loss": 0.003163845045492053, "epoch": 35.29676258992806, "step": 39250 }, { "epoch": 35.29676258992806, "step": 39250, "torque_loss": 0.06300396472215652 }, { "epoch": 35.305755395683455, "grad_norm": 0.3206816613674164, "learning_rate": 2.926366611063358e-05, "loss": 0.0458, "step": 39260 }, { "action_loss": 0.0053705014288425446, "epoch": 35.305755395683455, "step": 39260 }, { "epoch": 35.305755395683455, "step": 39260, "torque_loss": 0.09789454936981201 }, { "epoch": 35.314748201438846, "grad_norm": 0.24568654596805573, "learning_rate": 2.9238593123127463e-05, "loss": 0.05, "step": 39270 }, { "action_loss": 0.0025020402390509844, "epoch": 35.314748201438846, "step": 39270 }, { "epoch": 35.314748201438846, "step": 39270, "torque_loss": 0.0451844185590744 }, { "epoch": 35.32374100719424, "grad_norm": 0.34569206833839417, "learning_rate": 2.9213526442388583e-05, "loss": 0.0418, "step": 39280 }, { "action_loss": 0.0012809688923880458, "epoch": 35.32374100719424, "step": 39280 }, { "epoch": 35.32374100719424, "step": 39280, "torque_loss": 0.060299623757600784 }, { "epoch": 35.33273381294964, "grad_norm": 0.24298079311847687, "learning_rate": 2.9188466076031545e-05, "loss": 0.0441, "step": 39290 }, { "action_loss": 0.0018490668153390288, "epoch": 35.33273381294964, "step": 39290 }, { "epoch": 35.33273381294964, "step": 39290, "torque_loss": 0.06250990182161331 }, { "epoch": 35.34172661870504, "grad_norm": 0.27215972542762756, "learning_rate": 2.9163412031669012e-05, "loss": 0.0477, "step": 39300 }, { "action_loss": 0.004573144018650055, "epoch": 35.34172661870504, "step": 39300 }, { "epoch": 35.34172661870504, "step": 39300, "torque_loss": 0.0998997688293457 }, { "epoch": 35.35071942446043, "grad_norm": 0.32827481627464294, "learning_rate": 2.913836431691175e-05, "loss": 0.0607, "step": 39310 }, { "action_loss": 0.0008643201435916126, "epoch": 35.35071942446043, "step": 39310 }, { "epoch": 35.35071942446043, "step": 39310, "torque_loss": 0.04410867765545845 }, { "epoch": 35.35971223021583, "grad_norm": 0.2964474856853485, "learning_rate": 2.9113322939368583e-05, "loss": 0.0615, "step": 39320 }, { "action_loss": 0.002544696442782879, "epoch": 35.35971223021583, "step": 39320 }, { "epoch": 35.35971223021583, "step": 39320, "torque_loss": 0.052108000963926315 }, { "epoch": 35.368705035971225, "grad_norm": 0.557138204574585, "learning_rate": 2.9088287906646427e-05, "loss": 0.0626, "step": 39330 }, { "action_loss": 0.005983924958854914, "epoch": 35.368705035971225, "step": 39330 }, { "epoch": 35.368705035971225, "step": 39330, "torque_loss": 0.12492441385984421 }, { "epoch": 35.37769784172662, "grad_norm": 0.3065059185028076, "learning_rate": 2.906325922635024e-05, "loss": 0.0555, "step": 39340 }, { "action_loss": 0.0010930727003142238, "epoch": 35.37769784172662, "step": 39340 }, { "epoch": 35.37769784172662, "step": 39340, "torque_loss": 0.055405180901288986 }, { "epoch": 35.38669064748201, "grad_norm": 0.3170100748538971, "learning_rate": 2.903823690608313e-05, "loss": 0.0401, "step": 39350 }, { "action_loss": 0.003669436788186431, "epoch": 35.38669064748201, "step": 39350 }, { "epoch": 35.38669064748201, "step": 39350, "torque_loss": 0.07924158126115799 }, { "epoch": 35.39568345323741, "grad_norm": 0.2805895507335663, "learning_rate": 2.9013220953446174e-05, "loss": 0.0512, "step": 39360 }, { "action_loss": 0.0023347302339971066, "epoch": 35.39568345323741, "step": 39360 }, { "epoch": 35.39568345323741, "step": 39360, "torque_loss": 0.057221412658691406 }, { "epoch": 35.40467625899281, "grad_norm": 0.3034849762916565, "learning_rate": 2.8988211376038564e-05, "loss": 0.046, "step": 39370 }, { "action_loss": 0.00150105485226959, "epoch": 35.40467625899281, "step": 39370 }, { "epoch": 35.40467625899281, "step": 39370, "torque_loss": 0.0801461860537529 }, { "epoch": 35.4136690647482, "grad_norm": 0.2527206242084503, "learning_rate": 2.8963208181457564e-05, "loss": 0.0534, "step": 39380 }, { "action_loss": 0.02687050960958004, "epoch": 35.4136690647482, "step": 39380 }, { "epoch": 35.4136690647482, "step": 39380, "torque_loss": 0.10441833734512329 }, { "epoch": 35.422661870503596, "grad_norm": 0.2088402807712555, "learning_rate": 2.8938211377298453e-05, "loss": 0.0596, "step": 39390 }, { "action_loss": 0.0037989020347595215, "epoch": 35.422661870503596, "step": 39390 }, { "epoch": 35.422661870503596, "step": 39390, "torque_loss": 0.08969030529260635 }, { "epoch": 35.431654676258994, "grad_norm": 0.34175926446914673, "learning_rate": 2.8913220971154652e-05, "loss": 0.0635, "step": 39400 }, { "action_loss": 0.006289734970778227, "epoch": 35.431654676258994, "step": 39400 }, { "epoch": 35.431654676258994, "step": 39400, "torque_loss": 0.08305517584085464 }, { "epoch": 35.44064748201439, "grad_norm": 0.19793739914894104, "learning_rate": 2.888823697061753e-05, "loss": 0.0449, "step": 39410 }, { "action_loss": 0.003231540322303772, "epoch": 35.44064748201439, "step": 39410 }, { "epoch": 35.44064748201439, "step": 39410, "torque_loss": 0.0671200156211853 }, { "epoch": 35.44964028776978, "grad_norm": 0.24881984293460846, "learning_rate": 2.8863259383276618e-05, "loss": 0.0523, "step": 39420 }, { "action_loss": 0.005634593311697245, "epoch": 35.44964028776978, "step": 39420 }, { "epoch": 35.44964028776978, "step": 39420, "torque_loss": 0.09448012709617615 }, { "epoch": 35.45863309352518, "grad_norm": 0.4860468804836273, "learning_rate": 2.8838288216719395e-05, "loss": 0.0616, "step": 39430 }, { "action_loss": 0.005086854565888643, "epoch": 35.45863309352518, "step": 39430 }, { "epoch": 35.45863309352518, "step": 39430, "torque_loss": 0.08472660183906555 }, { "epoch": 35.46762589928058, "grad_norm": 0.3677115738391876, "learning_rate": 2.8813323478531484e-05, "loss": 0.0472, "step": 39440 }, { "action_loss": 0.0021078467834740877, "epoch": 35.46762589928058, "step": 39440 }, { "epoch": 35.46762589928058, "step": 39440, "torque_loss": 0.06446368247270584 }, { "epoch": 35.476618705035975, "grad_norm": 0.3234059810638428, "learning_rate": 2.8788365176296496e-05, "loss": 0.0649, "step": 39450 }, { "action_loss": 0.0028734728693962097, "epoch": 35.476618705035975, "step": 39450 }, { "epoch": 35.476618705035975, "step": 39450, "torque_loss": 0.0781412422657013 }, { "epoch": 35.485611510791365, "grad_norm": 0.3224029839038849, "learning_rate": 2.876341331759611e-05, "loss": 0.0436, "step": 39460 }, { "action_loss": 0.004972123075276613, "epoch": 35.485611510791365, "step": 39460 }, { "epoch": 35.485611510791365, "step": 39460, "torque_loss": 0.06509286910295486 }, { "epoch": 35.49460431654676, "grad_norm": 0.23750536143779755, "learning_rate": 2.8738467910010036e-05, "loss": 0.0562, "step": 39470 }, { "action_loss": 0.006143095437437296, "epoch": 35.49460431654676, "step": 39470 }, { "epoch": 35.49460431654676, "step": 39470, "torque_loss": 0.0758875235915184 }, { "epoch": 35.50359712230216, "grad_norm": 0.3400069773197174, "learning_rate": 2.8713528961116032e-05, "loss": 0.0549, "step": 39480 }, { "action_loss": 0.012766629457473755, "epoch": 35.50359712230216, "step": 39480 }, { "epoch": 35.50359712230216, "step": 39480, "torque_loss": 0.10923445224761963 }, { "epoch": 35.51258992805755, "grad_norm": 0.27076154947280884, "learning_rate": 2.8688596478489875e-05, "loss": 0.0629, "step": 39490 }, { "action_loss": 0.00361534277908504, "epoch": 35.51258992805755, "step": 39490 }, { "epoch": 35.51258992805755, "step": 39490, "torque_loss": 0.06795011460781097 }, { "epoch": 35.52158273381295, "grad_norm": 0.2548551559448242, "learning_rate": 2.8663670469705434e-05, "loss": 0.0449, "step": 39500 }, { "action_loss": 0.014938871376216412, "epoch": 35.52158273381295, "step": 39500 }, { "epoch": 35.52158273381295, "step": 39500, "torque_loss": 0.12610460817813873 }, { "epoch": 35.530575539568346, "grad_norm": 0.2219255119562149, "learning_rate": 2.8638750942334546e-05, "loss": 0.0553, "step": 39510 }, { "action_loss": 0.0010457743192091584, "epoch": 35.530575539568346, "step": 39510 }, { "epoch": 35.530575539568346, "step": 39510, "torque_loss": 0.05047034099698067 }, { "epoch": 35.539568345323744, "grad_norm": 0.26296576857566833, "learning_rate": 2.8613837903947115e-05, "loss": 0.0451, "step": 39520 }, { "action_loss": 0.010227108374238014, "epoch": 35.539568345323744, "step": 39520 }, { "epoch": 35.539568345323744, "step": 39520, "torque_loss": 0.13576458394527435 }, { "epoch": 35.548561151079134, "grad_norm": 0.30922290682792664, "learning_rate": 2.858893136211106e-05, "loss": 0.0575, "step": 39530 }, { "action_loss": 0.010550090111792088, "epoch": 35.548561151079134, "step": 39530 }, { "epoch": 35.548561151079134, "step": 39530, "torque_loss": 0.10706377029418945 }, { "epoch": 35.55755395683453, "grad_norm": 0.3183472752571106, "learning_rate": 2.8564031324392315e-05, "loss": 0.0541, "step": 39540 }, { "action_loss": 0.0020321307238191366, "epoch": 35.55755395683453, "step": 39540 }, { "epoch": 35.55755395683453, "step": 39540, "torque_loss": 0.07022081315517426 }, { "epoch": 35.56654676258993, "grad_norm": 0.3215215802192688, "learning_rate": 2.85391377983549e-05, "loss": 0.0558, "step": 39550 }, { "action_loss": 0.0020871541928499937, "epoch": 35.56654676258993, "step": 39550 }, { "epoch": 35.56654676258993, "step": 39550, "torque_loss": 0.07350979000329971 }, { "epoch": 35.57553956834532, "grad_norm": 0.2869607210159302, "learning_rate": 2.851425079156075e-05, "loss": 0.0497, "step": 39560 }, { "action_loss": 0.005214788019657135, "epoch": 35.57553956834532, "step": 39560 }, { "epoch": 35.57553956834532, "step": 39560, "torque_loss": 0.12563945353031158 }, { "epoch": 35.58453237410072, "grad_norm": 0.3436306416988373, "learning_rate": 2.848937031156994e-05, "loss": 0.0539, "step": 39570 }, { "action_loss": 0.0017264533089473844, "epoch": 35.58453237410072, "step": 39570 }, { "epoch": 35.58453237410072, "step": 39570, "torque_loss": 0.04041733965277672 }, { "epoch": 35.593525179856115, "grad_norm": 0.29979845881462097, "learning_rate": 2.846449636594044e-05, "loss": 0.0579, "step": 39580 }, { "action_loss": 0.00956295058131218, "epoch": 35.593525179856115, "step": 39580 }, { "epoch": 35.593525179856115, "step": 39580, "torque_loss": 0.09863210469484329 }, { "epoch": 35.60251798561151, "grad_norm": 0.3589041531085968, "learning_rate": 2.843962896222836e-05, "loss": 0.0484, "step": 39590 }, { "action_loss": 0.0025563451927155256, "epoch": 35.60251798561151, "step": 39590 }, { "epoch": 35.60251798561151, "step": 39590, "torque_loss": 0.06457792967557907 }, { "epoch": 35.611510791366904, "grad_norm": 0.252210795879364, "learning_rate": 2.8414768107987722e-05, "loss": 0.0553, "step": 39600 }, { "action_loss": 0.005316292401403189, "epoch": 35.611510791366904, "step": 39600 }, { "epoch": 35.611510791366904, "step": 39600, "torque_loss": 0.059460800141096115 }, { "epoch": 35.6205035971223, "grad_norm": 0.271744966506958, "learning_rate": 2.838991381077061e-05, "loss": 0.0595, "step": 39610 }, { "action_loss": 0.006635343190282583, "epoch": 35.6205035971223, "step": 39610 }, { "epoch": 35.6205035971223, "step": 39610, "torque_loss": 0.08689885586500168 }, { "epoch": 35.6294964028777, "grad_norm": 0.3865700662136078, "learning_rate": 2.83650660781271e-05, "loss": 0.0489, "step": 39620 }, { "action_loss": 0.001965229632332921, "epoch": 35.6294964028777, "step": 39620 }, { "epoch": 35.6294964028777, "step": 39620, "torque_loss": 0.045382797718048096 }, { "epoch": 35.638489208633096, "grad_norm": 0.3120872676372528, "learning_rate": 2.8340224917605285e-05, "loss": 0.0444, "step": 39630 }, { "action_loss": 0.0034629330039024353, "epoch": 35.638489208633096, "step": 39630 }, { "epoch": 35.638489208633096, "step": 39630, "torque_loss": 0.10302868485450745 }, { "epoch": 35.64748201438849, "grad_norm": 0.28015467524528503, "learning_rate": 2.831539033675122e-05, "loss": 0.0579, "step": 39640 }, { "action_loss": 0.0011526461457833648, "epoch": 35.64748201438849, "step": 39640 }, { "epoch": 35.64748201438849, "step": 39640, "torque_loss": 0.04232148453593254 }, { "epoch": 35.656474820143885, "grad_norm": 0.23134773969650269, "learning_rate": 2.8290562343109038e-05, "loss": 0.0645, "step": 39650 }, { "action_loss": 0.00861335638910532, "epoch": 35.656474820143885, "step": 39650 }, { "epoch": 35.656474820143885, "step": 39650, "torque_loss": 0.09665101766586304 }, { "epoch": 35.66546762589928, "grad_norm": 0.3580179214477539, "learning_rate": 2.826574094422082e-05, "loss": 0.0435, "step": 39660 }, { "action_loss": 0.002409568289294839, "epoch": 35.66546762589928, "step": 39660 }, { "epoch": 35.66546762589928, "step": 39660, "torque_loss": 0.08891254663467407 }, { "epoch": 35.67446043165468, "grad_norm": 0.20993465185165405, "learning_rate": 2.8240926147626645e-05, "loss": 0.0607, "step": 39670 }, { "action_loss": 0.0020322203636169434, "epoch": 35.67446043165468, "step": 39670 }, { "epoch": 35.67446043165468, "step": 39670, "torque_loss": 0.05065414309501648 }, { "epoch": 35.68345323741007, "grad_norm": 0.2814153730869293, "learning_rate": 2.8216117960864586e-05, "loss": 0.0555, "step": 39680 }, { "action_loss": 0.006416769232600927, "epoch": 35.68345323741007, "step": 39680 }, { "epoch": 35.68345323741007, "step": 39680, "torque_loss": 0.13756276667118073 }, { "epoch": 35.69244604316547, "grad_norm": 0.2961360514163971, "learning_rate": 2.8191316391470703e-05, "loss": 0.0587, "step": 39690 }, { "action_loss": 0.005075001623481512, "epoch": 35.69244604316547, "step": 39690 }, { "epoch": 35.69244604316547, "step": 39690, "torque_loss": 0.08623813837766647 }, { "epoch": 35.701438848920866, "grad_norm": 0.34289178252220154, "learning_rate": 2.816652144697911e-05, "loss": 0.0713, "step": 39700 }, { "action_loss": 0.0013888954417780042, "epoch": 35.701438848920866, "step": 39700 }, { "epoch": 35.701438848920866, "step": 39700, "torque_loss": 0.049900203943252563 }, { "epoch": 35.710431654676256, "grad_norm": 0.2255123108625412, "learning_rate": 2.8141733134921783e-05, "loss": 0.062, "step": 39710 }, { "action_loss": 0.0026976726949214935, "epoch": 35.710431654676256, "step": 39710 }, { "epoch": 35.710431654676256, "step": 39710, "torque_loss": 0.07462779432535172 }, { "epoch": 35.719424460431654, "grad_norm": 0.32960045337677, "learning_rate": 2.811695146282884e-05, "loss": 0.0592, "step": 39720 }, { "action_loss": 0.0028112970758229494, "epoch": 35.719424460431654, "step": 39720 }, { "epoch": 35.719424460431654, "step": 39720, "torque_loss": 0.06344179064035416 }, { "epoch": 35.72841726618705, "grad_norm": 0.3065878450870514, "learning_rate": 2.8092176438228212e-05, "loss": 0.0504, "step": 39730 }, { "action_loss": 0.0034583155065774918, "epoch": 35.72841726618705, "step": 39730 }, { "epoch": 35.72841726618705, "step": 39730, "torque_loss": 0.07153698056936264 }, { "epoch": 35.73741007194245, "grad_norm": 0.22907477617263794, "learning_rate": 2.806740806864598e-05, "loss": 0.0571, "step": 39740 }, { "action_loss": 0.006973743438720703, "epoch": 35.73741007194245, "step": 39740 }, { "epoch": 35.73741007194245, "step": 39740, "torque_loss": 0.13871540129184723 }, { "epoch": 35.74640287769784, "grad_norm": 0.27405500411987305, "learning_rate": 2.804264636160604e-05, "loss": 0.0492, "step": 39750 }, { "action_loss": 0.006654342170804739, "epoch": 35.74640287769784, "step": 39750 }, { "epoch": 35.74640287769784, "step": 39750, "torque_loss": 0.11758449673652649 }, { "epoch": 35.75539568345324, "grad_norm": 0.27996891736984253, "learning_rate": 2.8017891324630402e-05, "loss": 0.0491, "step": 39760 }, { "action_loss": 0.004938945639878511, "epoch": 35.75539568345324, "step": 39760 }, { "epoch": 35.75539568345324, "step": 39760, "torque_loss": 0.09470636397600174 }, { "epoch": 35.764388489208635, "grad_norm": 0.41773712635040283, "learning_rate": 2.7993142965238976e-05, "loss": 0.0548, "step": 39770 }, { "action_loss": 0.0022397746797651052, "epoch": 35.764388489208635, "step": 39770 }, { "epoch": 35.764388489208635, "step": 39770, "torque_loss": 0.05422748997807503 }, { "epoch": 35.773381294964025, "grad_norm": 0.4172411561012268, "learning_rate": 2.7968401290949665e-05, "loss": 0.0531, "step": 39780 }, { "action_loss": 0.007150414865463972, "epoch": 35.773381294964025, "step": 39780 }, { "epoch": 35.773381294964025, "step": 39780, "torque_loss": 0.12319376319646835 }, { "epoch": 35.78237410071942, "grad_norm": 0.22979363799095154, "learning_rate": 2.7943666309278328e-05, "loss": 0.0499, "step": 39790 }, { "action_loss": 0.003334498731419444, "epoch": 35.78237410071942, "step": 39790 }, { "epoch": 35.78237410071942, "step": 39790, "torque_loss": 0.07424802333116531 }, { "epoch": 35.79136690647482, "grad_norm": 0.2620571255683899, "learning_rate": 2.7918938027738783e-05, "loss": 0.0568, "step": 39800 }, { "action_loss": 0.0016169935697689652, "epoch": 35.79136690647482, "step": 39800 }, { "epoch": 35.79136690647482, "step": 39800, "torque_loss": 0.06668872386217117 }, { "epoch": 35.80035971223022, "grad_norm": 0.2505740821361542, "learning_rate": 2.789421645384287e-05, "loss": 0.0403, "step": 39810 }, { "action_loss": 0.0474495105445385, "epoch": 35.80035971223022, "step": 39810 }, { "epoch": 35.80035971223022, "step": 39810, "torque_loss": 0.16158294677734375 }, { "epoch": 35.80935251798561, "grad_norm": 0.24452057480812073, "learning_rate": 2.786950159510032e-05, "loss": 0.0555, "step": 39820 }, { "action_loss": 0.0025357739068567753, "epoch": 35.80935251798561, "step": 39820 }, { "epoch": 35.80935251798561, "step": 39820, "torque_loss": 0.06945682317018509 }, { "epoch": 35.818345323741006, "grad_norm": 0.3000852167606354, "learning_rate": 2.7844793459018876e-05, "loss": 0.0517, "step": 39830 }, { "action_loss": 0.0021735166665166616, "epoch": 35.818345323741006, "step": 39830 }, { "epoch": 35.818345323741006, "step": 39830, "torque_loss": 0.05134619399905205 }, { "epoch": 35.827338129496404, "grad_norm": 0.2540571987628937, "learning_rate": 2.7820092053104195e-05, "loss": 0.0473, "step": 39840 }, { "action_loss": 0.0036756836343556643, "epoch": 35.827338129496404, "step": 39840 }, { "epoch": 35.827338129496404, "step": 39840, "torque_loss": 0.08697953075170517 }, { "epoch": 35.8363309352518, "grad_norm": 0.24116703867912292, "learning_rate": 2.7795397384859933e-05, "loss": 0.0429, "step": 39850 }, { "action_loss": 0.00705656036734581, "epoch": 35.8363309352518, "step": 39850 }, { "epoch": 35.8363309352518, "step": 39850, "torque_loss": 0.11384273320436478 }, { "epoch": 35.84532374100719, "grad_norm": 0.28967830538749695, "learning_rate": 2.7770709461787638e-05, "loss": 0.0458, "step": 39860 }, { "action_loss": 0.0016835728893056512, "epoch": 35.84532374100719, "step": 39860 }, { "epoch": 35.84532374100719, "step": 39860, "torque_loss": 0.03367292881011963 }, { "epoch": 35.85431654676259, "grad_norm": 0.2437746822834015, "learning_rate": 2.7746028291386915e-05, "loss": 0.0434, "step": 39870 }, { "action_loss": 0.0016744928434491158, "epoch": 35.85431654676259, "step": 39870 }, { "epoch": 35.85431654676259, "step": 39870, "torque_loss": 0.05869213119149208 }, { "epoch": 35.86330935251799, "grad_norm": 0.33335521817207336, "learning_rate": 2.772135388115519e-05, "loss": 0.052, "step": 39880 }, { "action_loss": 0.002350383438169956, "epoch": 35.86330935251799, "step": 39880 }, { "epoch": 35.86330935251799, "step": 39880, "torque_loss": 0.09792587906122208 }, { "epoch": 35.87230215827338, "grad_norm": 0.29550400376319885, "learning_rate": 2.7696686238587945e-05, "loss": 0.0476, "step": 39890 }, { "action_loss": 0.008746772073209286, "epoch": 35.87230215827338, "step": 39890 }, { "epoch": 35.87230215827338, "step": 39890, "torque_loss": 0.09638889878988266 }, { "epoch": 35.881294964028775, "grad_norm": 0.3712659478187561, "learning_rate": 2.7672025371178505e-05, "loss": 0.0592, "step": 39900 }, { "action_loss": 0.0012956345453858376, "epoch": 35.881294964028775, "step": 39900 }, { "epoch": 35.881294964028775, "step": 39900, "torque_loss": 0.0535832941532135 }, { "epoch": 35.89028776978417, "grad_norm": 0.2985726296901703, "learning_rate": 2.7647371286418238e-05, "loss": 0.0437, "step": 39910 }, { "action_loss": 0.005041013937443495, "epoch": 35.89028776978417, "step": 39910 }, { "epoch": 35.89028776978417, "step": 39910, "torque_loss": 0.08920028060674667 }, { "epoch": 35.89928057553957, "grad_norm": 0.23325149714946747, "learning_rate": 2.762272399179639e-05, "loss": 0.0521, "step": 39920 }, { "action_loss": 0.007838386110961437, "epoch": 35.89928057553957, "step": 39920 }, { "epoch": 35.89928057553957, "step": 39920, "torque_loss": 0.09586736559867859 }, { "epoch": 35.90827338129496, "grad_norm": 0.24637407064437866, "learning_rate": 2.7598083494800154e-05, "loss": 0.0517, "step": 39930 }, { "action_loss": 0.012690332718193531, "epoch": 35.90827338129496, "step": 39930 }, { "epoch": 35.90827338129496, "step": 39930, "torque_loss": 0.1213272213935852 }, { "epoch": 35.91726618705036, "grad_norm": 0.344461590051651, "learning_rate": 2.7573449802914664e-05, "loss": 0.0591, "step": 39940 }, { "action_loss": 0.0024823390413075686, "epoch": 35.91726618705036, "step": 39940 }, { "epoch": 35.91726618705036, "step": 39940, "torque_loss": 0.07592912018299103 }, { "epoch": 35.92625899280576, "grad_norm": 0.3549812436103821, "learning_rate": 2.7548822923622964e-05, "loss": 0.0429, "step": 39950 }, { "action_loss": 0.0015605208463966846, "epoch": 35.92625899280576, "step": 39950 }, { "epoch": 35.92625899280576, "step": 39950, "torque_loss": 0.05959552526473999 }, { "epoch": 35.935251798561154, "grad_norm": 0.3000171482563019, "learning_rate": 2.752420286440609e-05, "loss": 0.051, "step": 39960 }, { "action_loss": 0.0008055532234720886, "epoch": 35.935251798561154, "step": 39960 }, { "epoch": 35.935251798561154, "step": 39960, "torque_loss": 0.05820358917117119 }, { "epoch": 35.944244604316545, "grad_norm": 0.28951495885849, "learning_rate": 2.749958963274295e-05, "loss": 0.0519, "step": 39970 }, { "action_loss": 0.005666801240295172, "epoch": 35.944244604316545, "step": 39970 }, { "epoch": 35.944244604316545, "step": 39970, "torque_loss": 0.1030765250325203 }, { "epoch": 35.95323741007194, "grad_norm": 0.3269881308078766, "learning_rate": 2.747498323611039e-05, "loss": 0.0592, "step": 39980 }, { "action_loss": 0.0010796411661431193, "epoch": 35.95323741007194, "step": 39980 }, { "epoch": 35.95323741007194, "step": 39980, "torque_loss": 0.03995769843459129 }, { "epoch": 35.96223021582734, "grad_norm": 0.19677650928497314, "learning_rate": 2.7450383681983184e-05, "loss": 0.0418, "step": 39990 }, { "action_loss": 0.0036199346650391817, "epoch": 35.96223021582734, "step": 39990 }, { "epoch": 35.96223021582734, "step": 39990, "torque_loss": 0.11065011471509933 }, { "epoch": 35.97122302158273, "grad_norm": 0.32281750440597534, "learning_rate": 2.742579097783403e-05, "loss": 0.0521, "step": 40000 } ], "logging_steps": 10, "max_steps": 60000, "num_input_tokens_seen": 0, "num_train_epochs": 54, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }