diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,110034 @@ +{ + "best_global_step": null, + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 44.96402877697842, + "eval_steps": 500, + "global_step": 50000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "action_loss": 3.6376631259918213, + "epoch": 0, + "step": 0 + }, + { + "epoch": 0, + "step": 0, + "torque_loss": 0.1310535967350006 + }, + { + "epoch": 0, + "force_loss": 0.02128283679485321, + "step": 0 + }, + { + "epoch": 0.008992805755395683, + "grad_norm": 9.891151428222656, + "learning_rate": 3.0000000000000004e-07, + "loss": 2.1385, + "step": 10 + }, + { + "action_loss": 1.5190473794937134, + "epoch": 0.008992805755395683, + "step": 10 + }, + { + "epoch": 0.008992805755395683, + "step": 10, + "torque_loss": 0.12258154153823853 + }, + { + "epoch": 0.008992805755395683, + "force_loss": 0.004153921268880367, + "step": 10 + }, + { + "epoch": 0.017985611510791366, + "grad_norm": 27.13469696044922, + "learning_rate": 6.333333333333333e-07, + "loss": 2.2744, + "step": 20 + }, + { + "action_loss": 1.9660362005233765, + "epoch": 0.017985611510791366, + "step": 20 + }, + { + "epoch": 0.017985611510791366, + "step": 20, + "torque_loss": 0.11698909848928452 + }, + { + "epoch": 0.017985611510791366, + "force_loss": 0.005360100418329239, + "step": 20 + }, + { + "epoch": 0.02697841726618705, + "grad_norm": 8.638763427734375, + "learning_rate": 9.666666666666668e-07, + "loss": 1.9938, + "step": 30 + }, + { + "action_loss": 2.620020866394043, + "epoch": 0.02697841726618705, + "step": 30 + }, + { + "epoch": 0.02697841726618705, + "step": 30, + "torque_loss": 0.1213768944144249 + }, + { + "epoch": 0.02697841726618705, + "force_loss": 0.005952438339591026, + "step": 30 + }, + { + "epoch": 0.03597122302158273, + "grad_norm": 7.296123504638672, + "learning_rate": 1.3e-06, + "loss": 1.8309, + "step": 40 + }, + { + "action_loss": 2.0353288650512695, + "epoch": 0.03597122302158273, + "step": 40 + }, + { + "epoch": 0.03597122302158273, + "step": 40, + "torque_loss": 0.10332077741622925 + }, + { + "epoch": 0.03597122302158273, + "force_loss": 0.004071989096701145, + "step": 40 + }, + { + "epoch": 0.044964028776978415, + "grad_norm": 9.959613800048828, + "learning_rate": 1.6333333333333333e-06, + "loss": 1.5416, + "step": 50 + }, + { + "action_loss": 1.45693838596344, + "epoch": 0.044964028776978415, + "step": 50 + }, + { + "epoch": 0.044964028776978415, + "step": 50, + "torque_loss": 0.15176773071289062 + }, + { + "epoch": 0.044964028776978415, + "force_loss": 0.0103449122980237, + "step": 50 + }, + { + "epoch": 0.0539568345323741, + "grad_norm": 9.581283569335938, + "learning_rate": 1.9666666666666668e-06, + "loss": 1.304, + "step": 60 + }, + { + "action_loss": 0.7612078785896301, + "epoch": 0.0539568345323741, + "step": 60 + }, + { + "epoch": 0.0539568345323741, + "step": 60, + "torque_loss": 0.12856867909431458 + }, + { + "epoch": 0.0539568345323741, + "force_loss": 0.008561134338378906, + "step": 60 + }, + { + "epoch": 0.06294964028776978, + "grad_norm": 2.9715723991394043, + "learning_rate": 2.3e-06, + "loss": 0.6737, + "step": 70 + }, + { + "action_loss": 0.5321075916290283, + "epoch": 0.06294964028776978, + "step": 70 + }, + { + "epoch": 0.06294964028776978, + "step": 70, + "torque_loss": 0.1495532989501953 + }, + { + "epoch": 0.06294964028776978, + "force_loss": 0.009209034964442253, + "step": 70 + }, + { + "epoch": 0.07194244604316546, + "grad_norm": 1.6039245128631592, + "learning_rate": 2.6333333333333337e-06, + "loss": 0.5001, + "step": 80 + }, + { + "action_loss": 0.3438389301300049, + "epoch": 0.07194244604316546, + "step": 80 + }, + { + "epoch": 0.07194244604316546, + "step": 80, + "torque_loss": 0.1306103616952896 + }, + { + "epoch": 0.07194244604316546, + "force_loss": 0.0066747204400599, + "step": 80 + }, + { + "epoch": 0.08093525179856115, + "grad_norm": 1.0983778238296509, + "learning_rate": 2.966666666666667e-06, + "loss": 0.3208, + "step": 90 + }, + { + "action_loss": 0.20547671616077423, + "epoch": 0.08093525179856115, + "step": 90 + }, + { + "epoch": 0.08093525179856115, + "step": 90, + "torque_loss": 0.0799812451004982 + }, + { + "epoch": 0.08093525179856115, + "force_loss": 0.010391616262495518, + "step": 90 + }, + { + "epoch": 0.08992805755395683, + "grad_norm": 1.26064133644104, + "learning_rate": 3.3e-06, + "loss": 0.2245, + "step": 100 + }, + { + "action_loss": 0.21050791442394257, + "epoch": 0.08992805755395683, + "step": 100 + }, + { + "epoch": 0.08992805755395683, + "step": 100, + "torque_loss": 0.12303809076547623 + }, + { + "epoch": 0.08992805755395683, + "force_loss": 0.03322964906692505, + "step": 100 + }, + { + "epoch": 0.09892086330935251, + "grad_norm": 1.037462830543518, + "learning_rate": 3.633333333333334e-06, + "loss": 0.1602, + "step": 110 + }, + { + "action_loss": 0.1163199171423912, + "epoch": 0.09892086330935251, + "step": 110 + }, + { + "epoch": 0.09892086330935251, + "step": 110, + "torque_loss": 0.13896340131759644 + }, + { + "epoch": 0.09892086330935251, + "force_loss": 0.012037482112646103, + "step": 110 + }, + { + "epoch": 0.1079136690647482, + "grad_norm": 0.969456136226654, + "learning_rate": 3.966666666666667e-06, + "loss": 0.141, + "step": 120 + }, + { + "action_loss": 0.08728659152984619, + "epoch": 0.1079136690647482, + "step": 120 + }, + { + "epoch": 0.1079136690647482, + "step": 120, + "torque_loss": 0.16430842876434326 + }, + { + "epoch": 0.1079136690647482, + "force_loss": 0.008811325766146183, + "step": 120 + }, + { + "epoch": 0.11690647482014388, + "grad_norm": 0.6086524724960327, + "learning_rate": 4.2999999999999995e-06, + "loss": 0.0974, + "step": 130 + }, + { + "action_loss": 0.0822930559515953, + "epoch": 0.11690647482014388, + "step": 130 + }, + { + "epoch": 0.11690647482014388, + "step": 130, + "torque_loss": 0.11346596479415894 + }, + { + "epoch": 0.11690647482014388, + "force_loss": 0.007961176335811615, + "step": 130 + }, + { + "epoch": 0.12589928057553956, + "grad_norm": 0.8939685225486755, + "learning_rate": 4.633333333333334e-06, + "loss": 0.098, + "step": 140 + }, + { + "action_loss": 0.0769037976861, + "epoch": 0.12589928057553956, + "step": 140 + }, + { + "epoch": 0.12589928057553956, + "step": 140, + "torque_loss": 0.09801127761602402 + }, + { + "epoch": 0.12589928057553956, + "force_loss": 0.012784930877387524, + "step": 140 + }, + { + "epoch": 0.13489208633093525, + "grad_norm": 0.6305805444717407, + "learning_rate": 4.966666666666667e-06, + "loss": 0.1025, + "step": 150 + }, + { + "action_loss": 0.1941053420305252, + "epoch": 0.13489208633093525, + "step": 150 + }, + { + "epoch": 0.13489208633093525, + "step": 150, + "torque_loss": 0.10263881832361221 + }, + { + "epoch": 0.13489208633093525, + "force_loss": 0.02107587456703186, + "step": 150 + }, + { + "epoch": 0.14388489208633093, + "grad_norm": 0.4861755669116974, + "learning_rate": 5.3e-06, + "loss": 0.0901, + "step": 160 + }, + { + "action_loss": 0.09890081733465195, + "epoch": 0.14388489208633093, + "step": 160 + }, + { + "epoch": 0.14388489208633093, + "step": 160, + "torque_loss": 0.17149204015731812 + }, + { + "epoch": 0.14388489208633093, + "force_loss": 0.022561684250831604, + "step": 160 + }, + { + "epoch": 0.1528776978417266, + "grad_norm": 0.6252902150154114, + "learning_rate": 5.633333333333333e-06, + "loss": 0.083, + "step": 170 + }, + { + "action_loss": 0.05647003650665283, + "epoch": 0.1528776978417266, + "step": 170 + }, + { + "epoch": 0.1528776978417266, + "step": 170, + "torque_loss": 0.08202075213193893 + }, + { + "epoch": 0.1528776978417266, + "force_loss": 0.006358828395605087, + "step": 170 + }, + { + "epoch": 0.1618705035971223, + "grad_norm": 0.5395506620407104, + "learning_rate": 5.9666666666666666e-06, + "loss": 0.0722, + "step": 180 + }, + { + "action_loss": 0.06473840028047562, + "epoch": 0.1618705035971223, + "step": 180 + }, + { + "epoch": 0.1618705035971223, + "step": 180, + "torque_loss": 0.15107816457748413 + }, + { + "epoch": 0.1618705035971223, + "force_loss": 0.008181636221706867, + "step": 180 + }, + { + "epoch": 0.17086330935251798, + "grad_norm": 0.5020890831947327, + "learning_rate": 6.300000000000001e-06, + "loss": 0.0903, + "step": 190 + }, + { + "action_loss": 0.04997554421424866, + "epoch": 0.17086330935251798, + "step": 190 + }, + { + "epoch": 0.17086330935251798, + "step": 190, + "torque_loss": 0.1321963667869568 + }, + { + "epoch": 0.17086330935251798, + "force_loss": 0.005873364862054586, + "step": 190 + }, + { + "epoch": 0.17985611510791366, + "grad_norm": 0.748670220375061, + "learning_rate": 6.633333333333333e-06, + "loss": 0.0817, + "step": 200 + }, + { + "action_loss": 0.05113643407821655, + "epoch": 0.17985611510791366, + "step": 200 + }, + { + "epoch": 0.17985611510791366, + "step": 200, + "torque_loss": 0.08320024609565735 + }, + { + "epoch": 0.17985611510791366, + "force_loss": 0.004743098746985197, + "step": 200 + }, + { + "epoch": 0.18884892086330934, + "grad_norm": 0.6618114113807678, + "learning_rate": 6.966666666666667e-06, + "loss": 0.0751, + "step": 210 + }, + { + "action_loss": 0.10174226760864258, + "epoch": 0.18884892086330934, + "step": 210 + }, + { + "epoch": 0.18884892086330934, + "step": 210, + "torque_loss": 0.14920921623706818 + }, + { + "epoch": 0.18884892086330934, + "force_loss": 0.026553340256214142, + "step": 210 + }, + { + "epoch": 0.19784172661870503, + "grad_norm": 0.9313657283782959, + "learning_rate": 7.2999999999999996e-06, + "loss": 0.0785, + "step": 220 + }, + { + "action_loss": 0.047935888171195984, + "epoch": 0.19784172661870503, + "step": 220 + }, + { + "epoch": 0.19784172661870503, + "step": 220, + "torque_loss": 0.10199981182813644 + }, + { + "epoch": 0.19784172661870503, + "force_loss": 0.006586765870451927, + "step": 220 + }, + { + "epoch": 0.2068345323741007, + "grad_norm": 0.6217069625854492, + "learning_rate": 7.633333333333334e-06, + "loss": 0.0726, + "step": 230 + }, + { + "action_loss": 0.06976015865802765, + "epoch": 0.2068345323741007, + "step": 230 + }, + { + "epoch": 0.2068345323741007, + "step": 230, + "torque_loss": 0.1708124876022339 + }, + { + "epoch": 0.2068345323741007, + "force_loss": 0.009130173362791538, + "step": 230 + }, + { + "epoch": 0.2158273381294964, + "grad_norm": 0.5605801939964294, + "learning_rate": 7.966666666666666e-06, + "loss": 0.0661, + "step": 240 + }, + { + "action_loss": 0.05596521869301796, + "epoch": 0.2158273381294964, + "step": 240 + }, + { + "epoch": 0.2158273381294964, + "step": 240, + "torque_loss": 0.1426011323928833 + }, + { + "epoch": 0.2158273381294964, + "force_loss": 0.02059830166399479, + "step": 240 + }, + { + "epoch": 0.22482014388489208, + "grad_norm": 0.8638727068901062, + "learning_rate": 8.3e-06, + "loss": 0.0681, + "step": 250 + }, + { + "action_loss": 0.040239691734313965, + "epoch": 0.22482014388489208, + "step": 250 + }, + { + "epoch": 0.22482014388489208, + "step": 250, + "torque_loss": 0.12120440602302551 + }, + { + "epoch": 0.22482014388489208, + "force_loss": 0.007772465702146292, + "step": 250 + }, + { + "epoch": 0.23381294964028776, + "grad_norm": 0.871470034122467, + "learning_rate": 8.633333333333334e-06, + "loss": 0.0692, + "step": 260 + }, + { + "action_loss": 0.03378848358988762, + "epoch": 0.23381294964028776, + "step": 260 + }, + { + "epoch": 0.23381294964028776, + "step": 260, + "torque_loss": 0.11716548353433609 + }, + { + "epoch": 0.23381294964028776, + "force_loss": 0.008904024958610535, + "step": 260 + }, + { + "epoch": 0.24280575539568344, + "grad_norm": 0.7010048031806946, + "learning_rate": 8.966666666666668e-06, + "loss": 0.0616, + "step": 270 + }, + { + "action_loss": 0.05553820729255676, + "epoch": 0.24280575539568344, + "step": 270 + }, + { + "epoch": 0.24280575539568344, + "step": 270, + "torque_loss": 0.12629489600658417 + }, + { + "epoch": 0.24280575539568344, + "force_loss": 0.011371352709829807, + "step": 270 + }, + { + "epoch": 0.2517985611510791, + "grad_norm": 0.967866063117981, + "learning_rate": 9.3e-06, + "loss": 0.0667, + "step": 280 + }, + { + "action_loss": 0.03285503759980202, + "epoch": 0.2517985611510791, + "step": 280 + }, + { + "epoch": 0.2517985611510791, + "step": 280, + "torque_loss": 0.13254724442958832 + }, + { + "epoch": 0.2517985611510791, + "force_loss": 0.007841754704713821, + "step": 280 + }, + { + "epoch": 0.2607913669064748, + "grad_norm": 0.917709231376648, + "learning_rate": 9.633333333333335e-06, + "loss": 0.0565, + "step": 290 + }, + { + "action_loss": 0.05310952663421631, + "epoch": 0.2607913669064748, + "step": 290 + }, + { + "epoch": 0.2607913669064748, + "step": 290, + "torque_loss": 0.12525951862335205 + }, + { + "epoch": 0.2607913669064748, + "force_loss": 0.015282098203897476, + "step": 290 + }, + { + "epoch": 0.2697841726618705, + "grad_norm": 0.8989470601081848, + "learning_rate": 9.966666666666667e-06, + "loss": 0.0683, + "step": 300 + }, + { + "action_loss": 0.05016603693366051, + "epoch": 0.2697841726618705, + "step": 300 + }, + { + "epoch": 0.2697841726618705, + "step": 300, + "torque_loss": 0.15589874982833862 + }, + { + "epoch": 0.2697841726618705, + "force_loss": 0.00809991080313921, + "step": 300 + }, + { + "epoch": 0.2787769784172662, + "grad_norm": 0.6818510890007019, + "learning_rate": 1.03e-05, + "loss": 0.0609, + "step": 310 + }, + { + "action_loss": 0.07677297294139862, + "epoch": 0.2787769784172662, + "step": 310 + }, + { + "epoch": 0.2787769784172662, + "step": 310, + "torque_loss": 0.14252005517482758 + }, + { + "epoch": 0.2787769784172662, + "force_loss": 0.016483528539538383, + "step": 310 + }, + { + "epoch": 0.28776978417266186, + "grad_norm": 0.7003334164619446, + "learning_rate": 1.0633333333333334e-05, + "loss": 0.0673, + "step": 320 + }, + { + "action_loss": 0.034718334674835205, + "epoch": 0.28776978417266186, + "step": 320 + }, + { + "epoch": 0.28776978417266186, + "step": 320, + "torque_loss": 0.1005847230553627 + }, + { + "epoch": 0.28776978417266186, + "force_loss": 0.007835912518203259, + "step": 320 + }, + { + "epoch": 0.29676258992805754, + "grad_norm": 1.7371386289596558, + "learning_rate": 1.0966666666666666e-05, + "loss": 0.0613, + "step": 330 + }, + { + "action_loss": 0.02670879103243351, + "epoch": 0.29676258992805754, + "step": 330 + }, + { + "epoch": 0.29676258992805754, + "step": 330, + "torque_loss": 0.12702833116054535 + }, + { + "epoch": 0.29676258992805754, + "force_loss": 0.006304746028035879, + "step": 330 + }, + { + "epoch": 0.3057553956834532, + "grad_norm": 0.8466010689735413, + "learning_rate": 1.13e-05, + "loss": 0.05, + "step": 340 + }, + { + "action_loss": 0.03459268435835838, + "epoch": 0.3057553956834532, + "step": 340 + }, + { + "epoch": 0.3057553956834532, + "step": 340, + "torque_loss": 0.16588880121707916 + }, + { + "epoch": 0.3057553956834532, + "force_loss": 0.005099931266158819, + "step": 340 + }, + { + "epoch": 0.3147482014388489, + "grad_norm": 0.8969944715499878, + "learning_rate": 1.1633333333333334e-05, + "loss": 0.0595, + "step": 350 + }, + { + "action_loss": 0.04767943546175957, + "epoch": 0.3147482014388489, + "step": 350 + }, + { + "epoch": 0.3147482014388489, + "step": 350, + "torque_loss": 0.11656659841537476 + }, + { + "epoch": 0.3147482014388489, + "force_loss": 0.010912281461060047, + "step": 350 + }, + { + "epoch": 0.3237410071942446, + "grad_norm": 1.1630769968032837, + "learning_rate": 1.1966666666666668e-05, + "loss": 0.0616, + "step": 360 + }, + { + "action_loss": 0.03671069070696831, + "epoch": 0.3237410071942446, + "step": 360 + }, + { + "epoch": 0.3237410071942446, + "step": 360, + "torque_loss": 0.1766238659620285 + }, + { + "epoch": 0.3237410071942446, + "force_loss": 0.005978018045425415, + "step": 360 + }, + { + "epoch": 0.3327338129496403, + "grad_norm": 1.4596916437149048, + "learning_rate": 1.23e-05, + "loss": 0.0484, + "step": 370 + }, + { + "action_loss": 0.051470864564180374, + "epoch": 0.3327338129496403, + "step": 370 + }, + { + "epoch": 0.3327338129496403, + "step": 370, + "torque_loss": 0.17661406099796295 + }, + { + "epoch": 0.3327338129496403, + "force_loss": 0.01126351673156023, + "step": 370 + }, + { + "epoch": 0.34172661870503596, + "grad_norm": 0.7545153498649597, + "learning_rate": 1.2633333333333333e-05, + "loss": 0.052, + "step": 380 + }, + { + "action_loss": 0.025191066786646843, + "epoch": 0.34172661870503596, + "step": 380 + }, + { + "epoch": 0.34172661870503596, + "step": 380, + "torque_loss": 0.1213822141289711 + }, + { + "epoch": 0.34172661870503596, + "force_loss": 0.004954246804118156, + "step": 380 + }, + { + "epoch": 0.35071942446043164, + "grad_norm": 0.7178869247436523, + "learning_rate": 1.2966666666666669e-05, + "loss": 0.0615, + "step": 390 + }, + { + "action_loss": 0.013403424061834812, + "epoch": 0.35071942446043164, + "step": 390 + }, + { + "epoch": 0.35071942446043164, + "step": 390, + "torque_loss": 0.11117445677518845 + }, + { + "epoch": 0.35071942446043164, + "force_loss": 0.004236349370330572, + "step": 390 + }, + { + "epoch": 0.3597122302158273, + "grad_norm": 0.997737467288971, + "learning_rate": 1.3300000000000001e-05, + "loss": 0.0447, + "step": 400 + }, + { + "action_loss": 0.045525435358285904, + "epoch": 0.3597122302158273, + "step": 400 + }, + { + "epoch": 0.3597122302158273, + "step": 400, + "torque_loss": 0.1176399514079094 + }, + { + "epoch": 0.3597122302158273, + "force_loss": 0.019651243463158607, + "step": 400 + }, + { + "epoch": 0.368705035971223, + "grad_norm": 0.7645737528800964, + "learning_rate": 1.3633333333333334e-05, + "loss": 0.0545, + "step": 410 + }, + { + "action_loss": 0.03362283110618591, + "epoch": 0.368705035971223, + "step": 410 + }, + { + "epoch": 0.368705035971223, + "step": 410, + "torque_loss": 0.16924066841602325 + }, + { + "epoch": 0.368705035971223, + "force_loss": 0.005955790635198355, + "step": 410 + }, + { + "epoch": 0.3776978417266187, + "grad_norm": 0.99172443151474, + "learning_rate": 1.3966666666666666e-05, + "loss": 0.0612, + "step": 420 + }, + { + "action_loss": 0.046323012560606, + "epoch": 0.3776978417266187, + "step": 420 + }, + { + "epoch": 0.3776978417266187, + "step": 420, + "torque_loss": 0.13669370114803314 + }, + { + "epoch": 0.3776978417266187, + "force_loss": 0.008809066377580166, + "step": 420 + }, + { + "epoch": 0.38669064748201437, + "grad_norm": 0.7131702899932861, + "learning_rate": 1.43e-05, + "loss": 0.0557, + "step": 430 + }, + { + "action_loss": 0.06484134495258331, + "epoch": 0.38669064748201437, + "step": 430 + }, + { + "epoch": 0.38669064748201437, + "step": 430, + "torque_loss": 0.1730773001909256 + }, + { + "epoch": 0.38669064748201437, + "force_loss": 0.022902347147464752, + "step": 430 + }, + { + "epoch": 0.39568345323741005, + "grad_norm": 0.9485829472541809, + "learning_rate": 1.4633333333333334e-05, + "loss": 0.0576, + "step": 440 + }, + { + "action_loss": 0.0656992644071579, + "epoch": 0.39568345323741005, + "step": 440 + }, + { + "epoch": 0.39568345323741005, + "step": 440, + "torque_loss": 0.14429962635040283 + }, + { + "epoch": 0.39568345323741005, + "force_loss": 0.020188216120004654, + "step": 440 + }, + { + "epoch": 0.40467625899280574, + "grad_norm": 1.7684407234191895, + "learning_rate": 1.4966666666666668e-05, + "loss": 0.0572, + "step": 450 + }, + { + "action_loss": 0.03498392179608345, + "epoch": 0.40467625899280574, + "step": 450 + }, + { + "epoch": 0.40467625899280574, + "step": 450, + "torque_loss": 0.12798847258090973 + }, + { + "epoch": 0.40467625899280574, + "force_loss": 0.007528254296630621, + "step": 450 + }, + { + "epoch": 0.4136690647482014, + "grad_norm": 1.1263477802276611, + "learning_rate": 1.53e-05, + "loss": 0.0498, + "step": 460 + }, + { + "action_loss": 0.03849872946739197, + "epoch": 0.4136690647482014, + "step": 460 + }, + { + "epoch": 0.4136690647482014, + "step": 460, + "torque_loss": 0.1360778659582138 + }, + { + "epoch": 0.4136690647482014, + "force_loss": 0.015374623239040375, + "step": 460 + }, + { + "epoch": 0.4226618705035971, + "grad_norm": 0.7700405716896057, + "learning_rate": 1.563333333333333e-05, + "loss": 0.0518, + "step": 470 + }, + { + "action_loss": 0.019634079188108444, + "epoch": 0.4226618705035971, + "step": 470 + }, + { + "epoch": 0.4226618705035971, + "step": 470, + "torque_loss": 0.09089621156454086 + }, + { + "epoch": 0.4226618705035971, + "force_loss": 0.004453516099601984, + "step": 470 + }, + { + "epoch": 0.4316546762589928, + "grad_norm": 1.2913683652877808, + "learning_rate": 1.5966666666666667e-05, + "loss": 0.0433, + "step": 480 + }, + { + "action_loss": 0.02762473188340664, + "epoch": 0.4316546762589928, + "step": 480 + }, + { + "epoch": 0.4316546762589928, + "step": 480, + "torque_loss": 0.14060725271701813 + }, + { + "epoch": 0.4316546762589928, + "force_loss": 0.007750004529953003, + "step": 480 + }, + { + "epoch": 0.44064748201438847, + "grad_norm": 1.2174705266952515, + "learning_rate": 1.63e-05, + "loss": 0.052, + "step": 490 + }, + { + "action_loss": 0.04802360013127327, + "epoch": 0.44064748201438847, + "step": 490 + }, + { + "epoch": 0.44064748201438847, + "step": 490, + "torque_loss": 0.14837199449539185 + }, + { + "epoch": 0.44064748201438847, + "force_loss": 0.016758328303694725, + "step": 490 + }, + { + "epoch": 0.44964028776978415, + "grad_norm": 1.0038282871246338, + "learning_rate": 1.6633333333333336e-05, + "loss": 0.0426, + "step": 500 + }, + { + "action_loss": 0.03698794171214104, + "epoch": 0.44964028776978415, + "step": 500 + }, + { + "epoch": 0.44964028776978415, + "step": 500, + "torque_loss": 0.11550172418355942 + }, + { + "epoch": 0.44964028776978415, + "force_loss": 0.009756727144122124, + "step": 500 + }, + { + "epoch": 0.45863309352517984, + "grad_norm": 1.243269443511963, + "learning_rate": 1.6966666666666668e-05, + "loss": 0.048, + "step": 510 + }, + { + "action_loss": 0.028438905254006386, + "epoch": 0.45863309352517984, + "step": 510 + }, + { + "epoch": 0.45863309352517984, + "step": 510, + "torque_loss": 0.10196426510810852 + }, + { + "epoch": 0.45863309352517984, + "force_loss": 0.005013458896428347, + "step": 510 + }, + { + "epoch": 0.4676258992805755, + "grad_norm": 1.2900280952453613, + "learning_rate": 1.73e-05, + "loss": 0.0418, + "step": 520 + }, + { + "action_loss": 0.034887660294771194, + "epoch": 0.4676258992805755, + "step": 520 + }, + { + "epoch": 0.4676258992805755, + "step": 520, + "torque_loss": 0.12301575392484665 + }, + { + "epoch": 0.4676258992805755, + "force_loss": 0.008527296595275402, + "step": 520 + }, + { + "epoch": 0.4766187050359712, + "grad_norm": 0.5747576951980591, + "learning_rate": 1.7633333333333336e-05, + "loss": 0.05, + "step": 530 + }, + { + "action_loss": 0.05602313578128815, + "epoch": 0.4766187050359712, + "step": 530 + }, + { + "epoch": 0.4766187050359712, + "step": 530, + "torque_loss": 0.15821845829486847 + }, + { + "epoch": 0.4766187050359712, + "force_loss": 0.01597823202610016, + "step": 530 + }, + { + "epoch": 0.4856115107913669, + "grad_norm": 0.7638629674911499, + "learning_rate": 1.796666666666667e-05, + "loss": 0.0564, + "step": 540 + }, + { + "action_loss": 0.04084310308098793, + "epoch": 0.4856115107913669, + "step": 540 + }, + { + "epoch": 0.4856115107913669, + "step": 540, + "torque_loss": 0.1660570353269577 + }, + { + "epoch": 0.4856115107913669, + "force_loss": 0.01596181094646454, + "step": 540 + }, + { + "epoch": 0.49460431654676257, + "grad_norm": 1.6049985885620117, + "learning_rate": 1.83e-05, + "loss": 0.0453, + "step": 550 + }, + { + "action_loss": 0.045941516757011414, + "epoch": 0.49460431654676257, + "step": 550 + }, + { + "epoch": 0.49460431654676257, + "step": 550, + "torque_loss": 0.13734887540340424 + }, + { + "epoch": 0.49460431654676257, + "force_loss": 0.02087695710361004, + "step": 550 + }, + { + "epoch": 0.5035971223021583, + "grad_norm": 0.9019855260848999, + "learning_rate": 1.8633333333333333e-05, + "loss": 0.048, + "step": 560 + }, + { + "action_loss": 0.017615145072340965, + "epoch": 0.5035971223021583, + "step": 560 + }, + { + "epoch": 0.5035971223021583, + "step": 560, + "torque_loss": 0.16029873490333557 + }, + { + "epoch": 0.5035971223021583, + "force_loss": 0.005696360487490892, + "step": 560 + }, + { + "epoch": 0.512589928057554, + "grad_norm": 0.8535636067390442, + "learning_rate": 1.896666666666667e-05, + "loss": 0.0563, + "step": 570 + }, + { + "action_loss": 0.031458910554647446, + "epoch": 0.512589928057554, + "step": 570 + }, + { + "epoch": 0.512589928057554, + "step": 570, + "torque_loss": 0.14906558394432068 + }, + { + "epoch": 0.512589928057554, + "force_loss": 0.010886810719966888, + "step": 570 + }, + { + "epoch": 0.5215827338129496, + "grad_norm": 0.8024583458900452, + "learning_rate": 1.93e-05, + "loss": 0.0499, + "step": 580 + }, + { + "action_loss": 0.03816935420036316, + "epoch": 0.5215827338129496, + "step": 580 + }, + { + "epoch": 0.5215827338129496, + "step": 580, + "torque_loss": 0.1175684705376625 + }, + { + "epoch": 0.5215827338129496, + "force_loss": 0.010934433899819851, + "step": 580 + }, + { + "epoch": 0.5305755395683454, + "grad_norm": 2.530550956726074, + "learning_rate": 1.9633333333333334e-05, + "loss": 0.0448, + "step": 590 + }, + { + "action_loss": 0.03544415533542633, + "epoch": 0.5305755395683454, + "step": 590 + }, + { + "epoch": 0.5305755395683454, + "step": 590, + "torque_loss": 0.18601089715957642 + }, + { + "epoch": 0.5305755395683454, + "force_loss": 0.007891523651778698, + "step": 590 + }, + { + "epoch": 0.539568345323741, + "grad_norm": 1.0873814821243286, + "learning_rate": 1.9966666666666666e-05, + "loss": 0.0524, + "step": 600 + }, + { + "action_loss": 0.0685107633471489, + "epoch": 0.539568345323741, + "step": 600 + }, + { + "epoch": 0.539568345323741, + "step": 600, + "torque_loss": 0.2041873335838318 + }, + { + "epoch": 0.539568345323741, + "force_loss": 0.051076740026474, + "step": 600 + }, + { + "epoch": 0.5485611510791367, + "grad_norm": 1.2049381732940674, + "learning_rate": 2.0300000000000002e-05, + "loss": 0.0524, + "step": 610 + }, + { + "action_loss": 0.045146096497774124, + "epoch": 0.5485611510791367, + "step": 610 + }, + { + "epoch": 0.5485611510791367, + "step": 610, + "torque_loss": 0.1632339060306549 + }, + { + "epoch": 0.5485611510791367, + "force_loss": 0.019419750198721886, + "step": 610 + }, + { + "epoch": 0.5575539568345323, + "grad_norm": 1.6513888835906982, + "learning_rate": 2.0633333333333335e-05, + "loss": 0.0495, + "step": 620 + }, + { + "action_loss": 0.023775087669491768, + "epoch": 0.5575539568345323, + "step": 620 + }, + { + "epoch": 0.5575539568345323, + "step": 620, + "torque_loss": 0.1636258214712143 + }, + { + "epoch": 0.5575539568345323, + "force_loss": 0.007806839887052774, + "step": 620 + }, + { + "epoch": 0.5665467625899281, + "grad_norm": 1.9310189485549927, + "learning_rate": 2.0966666666666667e-05, + "loss": 0.047, + "step": 630 + }, + { + "action_loss": 0.025371715426445007, + "epoch": 0.5665467625899281, + "step": 630 + }, + { + "epoch": 0.5665467625899281, + "step": 630, + "torque_loss": 0.11403391510248184 + }, + { + "epoch": 0.5665467625899281, + "force_loss": 0.008687383495271206, + "step": 630 + }, + { + "epoch": 0.5755395683453237, + "grad_norm": 0.7792282104492188, + "learning_rate": 2.13e-05, + "loss": 0.0448, + "step": 640 + }, + { + "action_loss": 0.04153499752283096, + "epoch": 0.5755395683453237, + "step": 640 + }, + { + "epoch": 0.5755395683453237, + "step": 640, + "torque_loss": 0.1256234049797058 + }, + { + "epoch": 0.5755395683453237, + "force_loss": 0.02491765283048153, + "step": 640 + }, + { + "epoch": 0.5845323741007195, + "grad_norm": 0.8404738903045654, + "learning_rate": 2.1633333333333332e-05, + "loss": 0.0429, + "step": 650 + }, + { + "action_loss": 0.012981973588466644, + "epoch": 0.5845323741007195, + "step": 650 + }, + { + "epoch": 0.5845323741007195, + "step": 650, + "torque_loss": 0.1623903065919876 + }, + { + "epoch": 0.5845323741007195, + "force_loss": 0.004864631220698357, + "step": 650 + }, + { + "epoch": 0.5935251798561151, + "grad_norm": 1.0602569580078125, + "learning_rate": 2.1966666666666668e-05, + "loss": 0.0444, + "step": 660 + }, + { + "action_loss": 0.02391764521598816, + "epoch": 0.5935251798561151, + "step": 660 + }, + { + "epoch": 0.5935251798561151, + "step": 660, + "torque_loss": 0.10528260469436646 + }, + { + "epoch": 0.5935251798561151, + "force_loss": 0.006534598767757416, + "step": 660 + }, + { + "epoch": 0.6025179856115108, + "grad_norm": 0.9499192833900452, + "learning_rate": 2.23e-05, + "loss": 0.0338, + "step": 670 + }, + { + "action_loss": 0.04245607927441597, + "epoch": 0.6025179856115108, + "step": 670 + }, + { + "epoch": 0.6025179856115108, + "step": 670, + "torque_loss": 0.21461541950702667 + }, + { + "epoch": 0.6025179856115108, + "force_loss": 0.01296437531709671, + "step": 670 + }, + { + "epoch": 0.6115107913669064, + "grad_norm": 2.2330446243286133, + "learning_rate": 2.2633333333333336e-05, + "loss": 0.0505, + "step": 680 + }, + { + "action_loss": 0.03410147503018379, + "epoch": 0.6115107913669064, + "step": 680 + }, + { + "epoch": 0.6115107913669064, + "step": 680, + "torque_loss": 0.1426069587469101 + }, + { + "epoch": 0.6115107913669064, + "force_loss": 0.016774063929915428, + "step": 680 + }, + { + "epoch": 0.6205035971223022, + "grad_norm": 1.304166555404663, + "learning_rate": 2.2966666666666668e-05, + "loss": 0.0391, + "step": 690 + }, + { + "action_loss": 0.030235351994633675, + "epoch": 0.6205035971223022, + "step": 690 + }, + { + "epoch": 0.6205035971223022, + "step": 690, + "torque_loss": 0.10690677165985107 + }, + { + "epoch": 0.6205035971223022, + "force_loss": 0.011885077692568302, + "step": 690 + }, + { + "epoch": 0.6294964028776978, + "grad_norm": 2.1882855892181396, + "learning_rate": 2.3300000000000004e-05, + "loss": 0.0415, + "step": 700 + }, + { + "action_loss": 0.014563643373548985, + "epoch": 0.6294964028776978, + "step": 700 + }, + { + "epoch": 0.6294964028776978, + "step": 700, + "torque_loss": 0.10989886522293091 + }, + { + "epoch": 0.6294964028776978, + "force_loss": 0.005410848185420036, + "step": 700 + }, + { + "epoch": 0.6384892086330936, + "grad_norm": 1.208815097808838, + "learning_rate": 2.3633333333333336e-05, + "loss": 0.0479, + "step": 710 + }, + { + "action_loss": 0.016269544139504433, + "epoch": 0.6384892086330936, + "step": 710 + }, + { + "epoch": 0.6384892086330936, + "step": 710, + "torque_loss": 0.10121416300535202 + }, + { + "epoch": 0.6384892086330936, + "force_loss": 0.005640504416078329, + "step": 710 + }, + { + "epoch": 0.6474820143884892, + "grad_norm": 3.213498830795288, + "learning_rate": 2.396666666666667e-05, + "loss": 0.0416, + "step": 720 + }, + { + "action_loss": 0.06165416166186333, + "epoch": 0.6474820143884892, + "step": 720 + }, + { + "epoch": 0.6474820143884892, + "step": 720, + "torque_loss": 0.22449012100696564 + }, + { + "epoch": 0.6474820143884892, + "force_loss": 0.012106549926102161, + "step": 720 + }, + { + "epoch": 0.6564748201438849, + "grad_norm": 1.0452207326889038, + "learning_rate": 2.43e-05, + "loss": 0.0515, + "step": 730 + }, + { + "action_loss": 0.043337106704711914, + "epoch": 0.6564748201438849, + "step": 730 + }, + { + "epoch": 0.6564748201438849, + "step": 730, + "torque_loss": 0.1278170943260193 + }, + { + "epoch": 0.6564748201438849, + "force_loss": 0.03837411105632782, + "step": 730 + }, + { + "epoch": 0.6654676258992805, + "grad_norm": 1.0941096544265747, + "learning_rate": 2.4633333333333334e-05, + "loss": 0.0446, + "step": 740 + }, + { + "action_loss": 0.02208028733730316, + "epoch": 0.6654676258992805, + "step": 740 + }, + { + "epoch": 0.6654676258992805, + "step": 740, + "torque_loss": 0.09663611650466919 + }, + { + "epoch": 0.6654676258992805, + "force_loss": 0.01026022806763649, + "step": 740 + }, + { + "epoch": 0.6744604316546763, + "grad_norm": 0.7206782698631287, + "learning_rate": 2.496666666666667e-05, + "loss": 0.0433, + "step": 750 + }, + { + "action_loss": 0.016235915943980217, + "epoch": 0.6744604316546763, + "step": 750 + }, + { + "epoch": 0.6744604316546763, + "step": 750, + "torque_loss": 0.12707258760929108 + }, + { + "epoch": 0.6744604316546763, + "force_loss": 0.004836099687963724, + "step": 750 + }, + { + "epoch": 0.6834532374100719, + "grad_norm": 1.1924729347229004, + "learning_rate": 2.5300000000000002e-05, + "loss": 0.0405, + "step": 760 + }, + { + "action_loss": 0.04032515361905098, + "epoch": 0.6834532374100719, + "step": 760 + }, + { + "epoch": 0.6834532374100719, + "step": 760, + "torque_loss": 0.12080534547567368 + }, + { + "epoch": 0.6834532374100719, + "force_loss": 0.009603076614439487, + "step": 760 + }, + { + "epoch": 0.6924460431654677, + "grad_norm": 1.8415714502334595, + "learning_rate": 2.5633333333333338e-05, + "loss": 0.0459, + "step": 770 + }, + { + "action_loss": 0.017865577712655067, + "epoch": 0.6924460431654677, + "step": 770 + }, + { + "epoch": 0.6924460431654677, + "step": 770, + "torque_loss": 0.15025244653224945 + }, + { + "epoch": 0.6924460431654677, + "force_loss": 0.0042395382188260555, + "step": 770 + }, + { + "epoch": 0.7014388489208633, + "grad_norm": 0.8931247591972351, + "learning_rate": 2.5966666666666667e-05, + "loss": 0.0507, + "step": 780 + }, + { + "action_loss": 0.014569099061191082, + "epoch": 0.7014388489208633, + "step": 780 + }, + { + "epoch": 0.7014388489208633, + "step": 780, + "torque_loss": 0.08025429397821426 + }, + { + "epoch": 0.7014388489208633, + "force_loss": 0.004249536897987127, + "step": 780 + }, + { + "epoch": 0.710431654676259, + "grad_norm": 1.091386318206787, + "learning_rate": 2.6300000000000002e-05, + "loss": 0.0392, + "step": 790 + }, + { + "action_loss": 0.03355321288108826, + "epoch": 0.710431654676259, + "step": 790 + }, + { + "epoch": 0.710431654676259, + "step": 790, + "torque_loss": 0.16912581026554108 + }, + { + "epoch": 0.710431654676259, + "force_loss": 0.010022146627306938, + "step": 790 + }, + { + "epoch": 0.7194244604316546, + "grad_norm": 0.7468305826187134, + "learning_rate": 2.663333333333333e-05, + "loss": 0.0461, + "step": 800 + }, + { + "action_loss": 0.01833420991897583, + "epoch": 0.7194244604316546, + "step": 800 + }, + { + "epoch": 0.7194244604316546, + "step": 800, + "torque_loss": 0.08790832757949829 + }, + { + "epoch": 0.7194244604316546, + "force_loss": 0.006794957909733057, + "step": 800 + }, + { + "epoch": 0.7284172661870504, + "grad_norm": 0.9255871772766113, + "learning_rate": 2.6966666666666667e-05, + "loss": 0.0393, + "step": 810 + }, + { + "action_loss": 0.05293245241045952, + "epoch": 0.7284172661870504, + "step": 810 + }, + { + "epoch": 0.7284172661870504, + "step": 810, + "torque_loss": 0.14482195675373077 + }, + { + "epoch": 0.7284172661870504, + "force_loss": 0.023813588544726372, + "step": 810 + }, + { + "epoch": 0.737410071942446, + "grad_norm": 1.567159652709961, + "learning_rate": 2.7300000000000003e-05, + "loss": 0.0581, + "step": 820 + }, + { + "action_loss": 0.009122147224843502, + "epoch": 0.737410071942446, + "step": 820 + }, + { + "epoch": 0.737410071942446, + "step": 820, + "torque_loss": 0.10559693723917007 + }, + { + "epoch": 0.737410071942446, + "force_loss": 0.004394026938825846, + "step": 820 + }, + { + "epoch": 0.7464028776978417, + "grad_norm": 0.9150029420852661, + "learning_rate": 2.7633333333333332e-05, + "loss": 0.0449, + "step": 830 + }, + { + "action_loss": 0.014827902428805828, + "epoch": 0.7464028776978417, + "step": 830 + }, + { + "epoch": 0.7464028776978417, + "step": 830, + "torque_loss": 0.13311408460140228 + }, + { + "epoch": 0.7464028776978417, + "force_loss": 0.005864080041646957, + "step": 830 + }, + { + "epoch": 0.7553956834532374, + "grad_norm": 1.084446668624878, + "learning_rate": 2.7966666666666668e-05, + "loss": 0.0367, + "step": 840 + }, + { + "action_loss": 0.032788172364234924, + "epoch": 0.7553956834532374, + "step": 840 + }, + { + "epoch": 0.7553956834532374, + "step": 840, + "torque_loss": 0.1256210207939148 + }, + { + "epoch": 0.7553956834532374, + "force_loss": 0.012543238699436188, + "step": 840 + }, + { + "epoch": 0.7643884892086331, + "grad_norm": 0.7480943202972412, + "learning_rate": 2.83e-05, + "loss": 0.0408, + "step": 850 + }, + { + "action_loss": 0.027371756732463837, + "epoch": 0.7643884892086331, + "step": 850 + }, + { + "epoch": 0.7643884892086331, + "step": 850, + "torque_loss": 0.11386056989431381 + }, + { + "epoch": 0.7643884892086331, + "force_loss": 0.022196611389517784, + "step": 850 + }, + { + "epoch": 0.7733812949640287, + "grad_norm": 1.3565993309020996, + "learning_rate": 2.8633333333333336e-05, + "loss": 0.0486, + "step": 860 + }, + { + "action_loss": 0.034494709223508835, + "epoch": 0.7733812949640287, + "step": 860 + }, + { + "epoch": 0.7733812949640287, + "step": 860, + "torque_loss": 0.13771109282970428 + }, + { + "epoch": 0.7733812949640287, + "force_loss": 0.01090602669864893, + "step": 860 + }, + { + "epoch": 0.7823741007194245, + "grad_norm": 1.561873435974121, + "learning_rate": 2.8966666666666668e-05, + "loss": 0.0377, + "step": 870 + }, + { + "action_loss": 0.039113353937864304, + "epoch": 0.7823741007194245, + "step": 870 + }, + { + "epoch": 0.7823741007194245, + "step": 870, + "torque_loss": 0.08401709794998169 + }, + { + "epoch": 0.7823741007194245, + "force_loss": 0.025932809337973595, + "step": 870 + }, + { + "epoch": 0.7913669064748201, + "grad_norm": 1.2967007160186768, + "learning_rate": 2.93e-05, + "loss": 0.0427, + "step": 880 + }, + { + "action_loss": 0.04003247246146202, + "epoch": 0.7913669064748201, + "step": 880 + }, + { + "epoch": 0.7913669064748201, + "step": 880, + "torque_loss": 0.10898923873901367 + }, + { + "epoch": 0.7913669064748201, + "force_loss": 0.008176526986062527, + "step": 880 + }, + { + "epoch": 0.8003597122302158, + "grad_norm": 1.5075591802597046, + "learning_rate": 2.9633333333333336e-05, + "loss": 0.0413, + "step": 890 + }, + { + "action_loss": 0.02105006016790867, + "epoch": 0.8003597122302158, + "step": 890 + }, + { + "epoch": 0.8003597122302158, + "step": 890, + "torque_loss": 0.09274104237556458 + }, + { + "epoch": 0.8003597122302158, + "force_loss": 0.004790377337485552, + "step": 890 + }, + { + "epoch": 0.8093525179856115, + "grad_norm": 1.1103328466415405, + "learning_rate": 2.9966666666666672e-05, + "loss": 0.0417, + "step": 900 + }, + { + "action_loss": 0.04317578300833702, + "epoch": 0.8093525179856115, + "step": 900 + }, + { + "epoch": 0.8093525179856115, + "step": 900, + "torque_loss": 0.1761828064918518 + }, + { + "epoch": 0.8093525179856115, + "force_loss": 0.017960799857974052, + "step": 900 + }, + { + "epoch": 0.8183453237410072, + "grad_norm": 0.800326943397522, + "learning_rate": 3.03e-05, + "loss": 0.0418, + "step": 910 + }, + { + "action_loss": 0.02699863910675049, + "epoch": 0.8183453237410072, + "step": 910 + }, + { + "epoch": 0.8183453237410072, + "step": 910, + "torque_loss": 0.14901010692119598 + }, + { + "epoch": 0.8183453237410072, + "force_loss": 0.012698336504399776, + "step": 910 + }, + { + "epoch": 0.8273381294964028, + "grad_norm": 0.9114958643913269, + "learning_rate": 3.063333333333334e-05, + "loss": 0.0504, + "step": 920 + }, + { + "action_loss": 0.0285883080214262, + "epoch": 0.8273381294964028, + "step": 920 + }, + { + "epoch": 0.8273381294964028, + "step": 920, + "torque_loss": 0.1757916361093521 + }, + { + "epoch": 0.8273381294964028, + "force_loss": 0.0068364874459803104, + "step": 920 + }, + { + "epoch": 0.8363309352517986, + "grad_norm": 1.1808520555496216, + "learning_rate": 3.096666666666666e-05, + "loss": 0.0542, + "step": 930 + }, + { + "action_loss": 0.02321232296526432, + "epoch": 0.8363309352517986, + "step": 930 + }, + { + "epoch": 0.8363309352517986, + "step": 930, + "torque_loss": 0.1254875212907791 + }, + { + "epoch": 0.8363309352517986, + "force_loss": 0.012459876947104931, + "step": 930 + }, + { + "epoch": 0.8453237410071942, + "grad_norm": 0.8187349438667297, + "learning_rate": 3.13e-05, + "loss": 0.0386, + "step": 940 + }, + { + "action_loss": 0.042479436844587326, + "epoch": 0.8453237410071942, + "step": 940 + }, + { + "epoch": 0.8453237410071942, + "step": 940, + "torque_loss": 0.1373445987701416 + }, + { + "epoch": 0.8453237410071942, + "force_loss": 0.02711704932153225, + "step": 940 + }, + { + "epoch": 0.85431654676259, + "grad_norm": 1.4400840997695923, + "learning_rate": 3.1633333333333334e-05, + "loss": 0.0377, + "step": 950 + }, + { + "action_loss": 0.039127472788095474, + "epoch": 0.85431654676259, + "step": 950 + }, + { + "epoch": 0.85431654676259, + "step": 950, + "torque_loss": 0.15974800288677216 + }, + { + "epoch": 0.85431654676259, + "force_loss": 0.01687513291835785, + "step": 950 + }, + { + "epoch": 0.8633093525179856, + "grad_norm": 1.0097832679748535, + "learning_rate": 3.196666666666667e-05, + "loss": 0.0455, + "step": 960 + }, + { + "action_loss": 0.02173447608947754, + "epoch": 0.8633093525179856, + "step": 960 + }, + { + "epoch": 0.8633093525179856, + "step": 960, + "torque_loss": 0.14499664306640625 + }, + { + "epoch": 0.8633093525179856, + "force_loss": 0.008056712336838245, + "step": 960 + }, + { + "epoch": 0.8723021582733813, + "grad_norm": 1.1613527536392212, + "learning_rate": 3.2300000000000006e-05, + "loss": 0.037, + "step": 970 + }, + { + "action_loss": 0.01486013364046812, + "epoch": 0.8723021582733813, + "step": 970 + }, + { + "epoch": 0.8723021582733813, + "step": 970, + "torque_loss": 0.13487625122070312 + }, + { + "epoch": 0.8723021582733813, + "force_loss": 0.006477018352597952, + "step": 970 + }, + { + "epoch": 0.8812949640287769, + "grad_norm": 1.1028674840927124, + "learning_rate": 3.263333333333333e-05, + "loss": 0.0403, + "step": 980 + }, + { + "action_loss": 0.015142365358769894, + "epoch": 0.8812949640287769, + "step": 980 + }, + { + "epoch": 0.8812949640287769, + "step": 980, + "torque_loss": 0.11137839406728745 + }, + { + "epoch": 0.8812949640287769, + "force_loss": 0.003947315271943808, + "step": 980 + }, + { + "epoch": 0.8902877697841727, + "grad_norm": 0.8612143993377686, + "learning_rate": 3.296666666666667e-05, + "loss": 0.041, + "step": 990 + }, + { + "action_loss": 0.01873861812055111, + "epoch": 0.8902877697841727, + "step": 990 + }, + { + "epoch": 0.8902877697841727, + "step": 990, + "torque_loss": 0.12820900976657867 + }, + { + "epoch": 0.8902877697841727, + "force_loss": 0.008802324533462524, + "step": 990 + }, + { + "epoch": 0.8992805755395683, + "grad_norm": 1.2536673545837402, + "learning_rate": 3.33e-05, + "loss": 0.0396, + "step": 1000 + }, + { + "action_loss": 0.043822210282087326, + "epoch": 0.8992805755395683, + "step": 1000 + }, + { + "epoch": 0.8992805755395683, + "step": 1000, + "torque_loss": 0.20643587410449982 + }, + { + "epoch": 0.8992805755395683, + "force_loss": 0.014320352114737034, + "step": 1000 + }, + { + "epoch": 0.908273381294964, + "grad_norm": 0.9552024602890015, + "learning_rate": 3.3633333333333335e-05, + "loss": 0.0469, + "step": 1010 + }, + { + "action_loss": 0.017567720264196396, + "epoch": 0.908273381294964, + "step": 1010 + }, + { + "epoch": 0.908273381294964, + "step": 1010, + "torque_loss": 0.11810340732336044 + }, + { + "epoch": 0.908273381294964, + "force_loss": 0.010300123132765293, + "step": 1010 + }, + { + "epoch": 0.9172661870503597, + "grad_norm": 0.49093523621559143, + "learning_rate": 3.396666666666667e-05, + "loss": 0.0375, + "step": 1020 + }, + { + "action_loss": 0.02955763228237629, + "epoch": 0.9172661870503597, + "step": 1020 + }, + { + "epoch": 0.9172661870503597, + "step": 1020, + "torque_loss": 0.11659219115972519 + }, + { + "epoch": 0.9172661870503597, + "force_loss": 0.009206675924360752, + "step": 1020 + }, + { + "epoch": 0.9262589928057554, + "grad_norm": 1.5677725076675415, + "learning_rate": 3.430000000000001e-05, + "loss": 0.0402, + "step": 1030 + }, + { + "action_loss": 0.014339379034936428, + "epoch": 0.9262589928057554, + "step": 1030 + }, + { + "epoch": 0.9262589928057554, + "step": 1030, + "torque_loss": 0.07817431539297104 + }, + { + "epoch": 0.9262589928057554, + "force_loss": 0.008804906159639359, + "step": 1030 + }, + { + "epoch": 0.935251798561151, + "grad_norm": 3.022254705429077, + "learning_rate": 3.463333333333333e-05, + "loss": 0.0382, + "step": 1040 + }, + { + "action_loss": 0.0204868633300066, + "epoch": 0.935251798561151, + "step": 1040 + }, + { + "epoch": 0.935251798561151, + "step": 1040, + "torque_loss": 0.10651201009750366 + }, + { + "epoch": 0.935251798561151, + "force_loss": 0.009721930138766766, + "step": 1040 + }, + { + "epoch": 0.9442446043165468, + "grad_norm": 1.391775369644165, + "learning_rate": 3.496666666666667e-05, + "loss": 0.0402, + "step": 1050 + }, + { + "action_loss": 0.024866757914423943, + "epoch": 0.9442446043165468, + "step": 1050 + }, + { + "epoch": 0.9442446043165468, + "step": 1050, + "torque_loss": 0.17966289818286896 + }, + { + "epoch": 0.9442446043165468, + "force_loss": 0.012933402322232723, + "step": 1050 + }, + { + "epoch": 0.9532374100719424, + "grad_norm": 0.8274032473564148, + "learning_rate": 3.53e-05, + "loss": 0.0477, + "step": 1060 + }, + { + "action_loss": 0.012262926436960697, + "epoch": 0.9532374100719424, + "step": 1060 + }, + { + "epoch": 0.9532374100719424, + "step": 1060, + "torque_loss": 0.09987610578536987 + }, + { + "epoch": 0.9532374100719424, + "force_loss": 0.004758056718856096, + "step": 1060 + }, + { + "epoch": 0.9622302158273381, + "grad_norm": 0.854238748550415, + "learning_rate": 3.563333333333334e-05, + "loss": 0.0356, + "step": 1070 + }, + { + "action_loss": 0.045313760638237, + "epoch": 0.9622302158273381, + "step": 1070 + }, + { + "epoch": 0.9622302158273381, + "step": 1070, + "torque_loss": 0.20544810593128204 + }, + { + "epoch": 0.9622302158273381, + "force_loss": 0.01412518322467804, + "step": 1070 + }, + { + "epoch": 0.9712230215827338, + "grad_norm": 1.3108383417129517, + "learning_rate": 3.596666666666667e-05, + "loss": 0.0462, + "step": 1080 + }, + { + "action_loss": 0.018526015803217888, + "epoch": 0.9712230215827338, + "step": 1080 + }, + { + "epoch": 0.9712230215827338, + "step": 1080, + "torque_loss": 0.12145505100488663 + }, + { + "epoch": 0.9712230215827338, + "force_loss": 0.006285226438194513, + "step": 1080 + }, + { + "epoch": 0.9802158273381295, + "grad_norm": 1.2655175924301147, + "learning_rate": 3.63e-05, + "loss": 0.0434, + "step": 1090 + }, + { + "action_loss": 0.03329155221581459, + "epoch": 0.9802158273381295, + "step": 1090 + }, + { + "epoch": 0.9802158273381295, + "step": 1090, + "torque_loss": 0.1332734078168869 + }, + { + "epoch": 0.9802158273381295, + "force_loss": 0.01068316400051117, + "step": 1090 + }, + { + "epoch": 0.9892086330935251, + "grad_norm": 0.9130733013153076, + "learning_rate": 3.6633333333333334e-05, + "loss": 0.0383, + "step": 1100 + }, + { + "action_loss": 0.0344303734600544, + "epoch": 0.9892086330935251, + "step": 1100 + }, + { + "epoch": 0.9892086330935251, + "step": 1100, + "torque_loss": 0.13855507969856262 + }, + { + "epoch": 0.9892086330935251, + "force_loss": 0.011614271439611912, + "step": 1100 + }, + { + "epoch": 0.9982014388489209, + "grad_norm": 0.6374790072441101, + "learning_rate": 3.6966666666666666e-05, + "loss": 0.0372, + "step": 1110 + }, + { + "action_loss": 0.01947695203125477, + "epoch": 0.9982014388489209, + "step": 1110 + }, + { + "epoch": 0.9982014388489209, + "step": 1110, + "torque_loss": 0.1781420260667801 + }, + { + "epoch": 0.9982014388489209, + "force_loss": 0.004689342807978392, + "step": 1110 + }, + { + "epoch": 1.0071942446043165, + "grad_norm": 1.2495883703231812, + "learning_rate": 3.73e-05, + "loss": 0.0364, + "step": 1120 + }, + { + "action_loss": 0.011348418891429901, + "epoch": 1.0071942446043165, + "step": 1120 + }, + { + "epoch": 1.0071942446043165, + "step": 1120, + "torque_loss": 0.12806175649166107 + }, + { + "epoch": 1.0071942446043165, + "force_loss": 0.005668466445058584, + "step": 1120 + }, + { + "epoch": 1.0161870503597121, + "grad_norm": 1.0541917085647583, + "learning_rate": 3.763333333333334e-05, + "loss": 0.0364, + "step": 1130 + }, + { + "action_loss": 0.017578043043613434, + "epoch": 1.0161870503597121, + "step": 1130 + }, + { + "epoch": 1.0161870503597121, + "step": 1130, + "torque_loss": 0.09509783983230591 + }, + { + "epoch": 1.0161870503597121, + "force_loss": 0.00580967403948307, + "step": 1130 + }, + { + "epoch": 1.025179856115108, + "grad_norm": 1.0330699682235718, + "learning_rate": 3.796666666666667e-05, + "loss": 0.0388, + "step": 1140 + }, + { + "action_loss": 0.039993640035390854, + "epoch": 1.025179856115108, + "step": 1140 + }, + { + "epoch": 1.025179856115108, + "step": 1140, + "torque_loss": 0.13236945867538452 + }, + { + "epoch": 1.025179856115108, + "force_loss": 0.0089992955327034, + "step": 1140 + }, + { + "epoch": 1.0341726618705036, + "grad_norm": 1.6210577487945557, + "learning_rate": 3.83e-05, + "loss": 0.0395, + "step": 1150 + }, + { + "action_loss": 0.020890899002552032, + "epoch": 1.0341726618705036, + "step": 1150 + }, + { + "epoch": 1.0341726618705036, + "step": 1150, + "torque_loss": 0.15326978266239166 + }, + { + "epoch": 1.0341726618705036, + "force_loss": 0.006767446640878916, + "step": 1150 + }, + { + "epoch": 1.0431654676258992, + "grad_norm": 1.3818142414093018, + "learning_rate": 3.8633333333333335e-05, + "loss": 0.0405, + "step": 1160 + }, + { + "action_loss": 0.02695208229124546, + "epoch": 1.0431654676258992, + "step": 1160 + }, + { + "epoch": 1.0431654676258992, + "step": 1160, + "torque_loss": 0.19758886098861694 + }, + { + "epoch": 1.0431654676258992, + "force_loss": 0.0074464925564825535, + "step": 1160 + }, + { + "epoch": 1.0521582733812949, + "grad_norm": 1.2562706470489502, + "learning_rate": 3.896666666666667e-05, + "loss": 0.0461, + "step": 1170 + }, + { + "action_loss": 0.02570805512368679, + "epoch": 1.0521582733812949, + "step": 1170 + }, + { + "epoch": 1.0521582733812949, + "step": 1170, + "torque_loss": 0.161972776055336 + }, + { + "epoch": 1.0521582733812949, + "force_loss": 0.012070474214851856, + "step": 1170 + }, + { + "epoch": 1.0611510791366907, + "grad_norm": 1.1769518852233887, + "learning_rate": 3.9300000000000007e-05, + "loss": 0.0424, + "step": 1180 + }, + { + "action_loss": 0.029856273904442787, + "epoch": 1.0611510791366907, + "step": 1180 + }, + { + "epoch": 1.0611510791366907, + "step": 1180, + "torque_loss": 0.12464544922113419 + }, + { + "epoch": 1.0611510791366907, + "force_loss": 0.010856080800294876, + "step": 1180 + }, + { + "epoch": 1.0701438848920863, + "grad_norm": 0.895358681678772, + "learning_rate": 3.963333333333333e-05, + "loss": 0.0383, + "step": 1190 + }, + { + "action_loss": 0.0406019426882267, + "epoch": 1.0701438848920863, + "step": 1190 + }, + { + "epoch": 1.0701438848920863, + "step": 1190, + "torque_loss": 0.16710002720355988 + }, + { + "epoch": 1.0701438848920863, + "force_loss": 0.016543073579669, + "step": 1190 + }, + { + "epoch": 1.079136690647482, + "grad_norm": 1.3069195747375488, + "learning_rate": 3.996666666666667e-05, + "loss": 0.0414, + "step": 1200 + }, + { + "action_loss": 0.01567600481212139, + "epoch": 1.079136690647482, + "step": 1200 + }, + { + "epoch": 1.079136690647482, + "step": 1200, + "torque_loss": 0.11840472370386124 + }, + { + "epoch": 1.079136690647482, + "force_loss": 0.005812687333673239, + "step": 1200 + }, + { + "epoch": 1.0881294964028776, + "grad_norm": 0.9081102609634399, + "learning_rate": 4.0300000000000004e-05, + "loss": 0.0483, + "step": 1210 + }, + { + "action_loss": 0.025998719036579132, + "epoch": 1.0881294964028776, + "step": 1210 + }, + { + "epoch": 1.0881294964028776, + "step": 1210, + "torque_loss": 0.16842909157276154 + }, + { + "epoch": 1.0881294964028776, + "force_loss": 0.015264018438756466, + "step": 1210 + }, + { + "epoch": 1.0971223021582734, + "grad_norm": 1.0810637474060059, + "learning_rate": 4.0633333333333336e-05, + "loss": 0.0456, + "step": 1220 + }, + { + "action_loss": 0.028165755793452263, + "epoch": 1.0971223021582734, + "step": 1220 + }, + { + "epoch": 1.0971223021582734, + "step": 1220, + "torque_loss": 0.14498767256736755 + }, + { + "epoch": 1.0971223021582734, + "force_loss": 0.010447487235069275, + "step": 1220 + }, + { + "epoch": 1.106115107913669, + "grad_norm": 0.7632102966308594, + "learning_rate": 4.096666666666667e-05, + "loss": 0.037, + "step": 1230 + }, + { + "action_loss": 0.01392114907503128, + "epoch": 1.106115107913669, + "step": 1230 + }, + { + "epoch": 1.106115107913669, + "step": 1230, + "torque_loss": 0.14419786632061005 + }, + { + "epoch": 1.106115107913669, + "force_loss": 0.005224850028753281, + "step": 1230 + }, + { + "epoch": 1.1151079136690647, + "grad_norm": 2.4905953407287598, + "learning_rate": 4.13e-05, + "loss": 0.0433, + "step": 1240 + }, + { + "action_loss": 0.037794772535562515, + "epoch": 1.1151079136690647, + "step": 1240 + }, + { + "epoch": 1.1151079136690647, + "step": 1240, + "torque_loss": 0.14677703380584717 + }, + { + "epoch": 1.1151079136690647, + "force_loss": 0.027140960097312927, + "step": 1240 + }, + { + "epoch": 1.1241007194244603, + "grad_norm": 1.1650500297546387, + "learning_rate": 4.1633333333333333e-05, + "loss": 0.0404, + "step": 1250 + }, + { + "action_loss": 0.02054390124976635, + "epoch": 1.1241007194244603, + "step": 1250 + }, + { + "epoch": 1.1241007194244603, + "step": 1250, + "torque_loss": 0.12222862243652344 + }, + { + "epoch": 1.1241007194244603, + "force_loss": 0.006180791649967432, + "step": 1250 + }, + { + "epoch": 1.1330935251798562, + "grad_norm": 1.9320732355117798, + "learning_rate": 4.196666666666667e-05, + "loss": 0.0337, + "step": 1260 + }, + { + "action_loss": 0.022450512275099754, + "epoch": 1.1330935251798562, + "step": 1260 + }, + { + "epoch": 1.1330935251798562, + "step": 1260, + "torque_loss": 0.11523070186376572 + }, + { + "epoch": 1.1330935251798562, + "force_loss": 0.006162143778055906, + "step": 1260 + }, + { + "epoch": 1.1420863309352518, + "grad_norm": 2.0869734287261963, + "learning_rate": 4.23e-05, + "loss": 0.0327, + "step": 1270 + }, + { + "action_loss": 0.034509140998125076, + "epoch": 1.1420863309352518, + "step": 1270 + }, + { + "epoch": 1.1420863309352518, + "step": 1270, + "torque_loss": 0.12448292225599289 + }, + { + "epoch": 1.1420863309352518, + "force_loss": 0.03167206421494484, + "step": 1270 + }, + { + "epoch": 1.1510791366906474, + "grad_norm": 0.9672078490257263, + "learning_rate": 4.263333333333334e-05, + "loss": 0.0393, + "step": 1280 + }, + { + "action_loss": 0.0149543983861804, + "epoch": 1.1510791366906474, + "step": 1280 + }, + { + "epoch": 1.1510791366906474, + "step": 1280, + "torque_loss": 0.12495412677526474 + }, + { + "epoch": 1.1510791366906474, + "force_loss": 0.00579708069562912, + "step": 1280 + }, + { + "epoch": 1.1600719424460433, + "grad_norm": 2.0741238594055176, + "learning_rate": 4.296666666666666e-05, + "loss": 0.041, + "step": 1290 + }, + { + "action_loss": 0.029177134856581688, + "epoch": 1.1600719424460433, + "step": 1290 + }, + { + "epoch": 1.1600719424460433, + "step": 1290, + "torque_loss": 0.18909180164337158 + }, + { + "epoch": 1.1600719424460433, + "force_loss": 0.013418339192867279, + "step": 1290 + }, + { + "epoch": 1.169064748201439, + "grad_norm": 0.8136199712753296, + "learning_rate": 4.33e-05, + "loss": 0.0413, + "step": 1300 + }, + { + "action_loss": 0.011396770365536213, + "epoch": 1.169064748201439, + "step": 1300 + }, + { + "epoch": 1.169064748201439, + "step": 1300, + "torque_loss": 0.09374099224805832 + }, + { + "epoch": 1.169064748201439, + "force_loss": 0.006107666064053774, + "step": 1300 + }, + { + "epoch": 1.1780575539568345, + "grad_norm": 0.7694397568702698, + "learning_rate": 4.3633333333333335e-05, + "loss": 0.0362, + "step": 1310 + }, + { + "action_loss": 0.036509785801172256, + "epoch": 1.1780575539568345, + "step": 1310 + }, + { + "epoch": 1.1780575539568345, + "step": 1310, + "torque_loss": 0.14868922531604767 + }, + { + "epoch": 1.1780575539568345, + "force_loss": 0.009908725507557392, + "step": 1310 + }, + { + "epoch": 1.1870503597122302, + "grad_norm": 1.3490244150161743, + "learning_rate": 4.396666666666667e-05, + "loss": 0.0422, + "step": 1320 + }, + { + "action_loss": 0.03237549960613251, + "epoch": 1.1870503597122302, + "step": 1320 + }, + { + "epoch": 1.1870503597122302, + "step": 1320, + "torque_loss": 0.14881716668605804 + }, + { + "epoch": 1.1870503597122302, + "force_loss": 0.016344182193279266, + "step": 1320 + }, + { + "epoch": 1.1960431654676258, + "grad_norm": 0.9727373719215393, + "learning_rate": 4.43e-05, + "loss": 0.0457, + "step": 1330 + }, + { + "action_loss": 0.015569333918392658, + "epoch": 1.1960431654676258, + "step": 1330 + }, + { + "epoch": 1.1960431654676258, + "step": 1330, + "torque_loss": 0.10138320922851562 + }, + { + "epoch": 1.1960431654676258, + "force_loss": 0.00933428667485714, + "step": 1330 + }, + { + "epoch": 1.2050359712230216, + "grad_norm": 1.123683214187622, + "learning_rate": 4.463333333333334e-05, + "loss": 0.0518, + "step": 1340 + }, + { + "action_loss": 0.03274621441960335, + "epoch": 1.2050359712230216, + "step": 1340 + }, + { + "epoch": 1.2050359712230216, + "step": 1340, + "torque_loss": 0.12057191133499146 + }, + { + "epoch": 1.2050359712230216, + "force_loss": 0.011477436870336533, + "step": 1340 + }, + { + "epoch": 1.2140287769784173, + "grad_norm": 0.5538128018379211, + "learning_rate": 4.496666666666667e-05, + "loss": 0.0405, + "step": 1350 + }, + { + "action_loss": 0.021062130108475685, + "epoch": 1.2140287769784173, + "step": 1350 + }, + { + "epoch": 1.2140287769784173, + "step": 1350, + "torque_loss": 0.146018847823143 + }, + { + "epoch": 1.2140287769784173, + "force_loss": 0.010402261279523373, + "step": 1350 + }, + { + "epoch": 1.223021582733813, + "grad_norm": 1.2760074138641357, + "learning_rate": 4.53e-05, + "loss": 0.0528, + "step": 1360 + }, + { + "action_loss": 0.032370369881391525, + "epoch": 1.223021582733813, + "step": 1360 + }, + { + "epoch": 1.223021582733813, + "step": 1360, + "torque_loss": 0.10648646950721741 + }, + { + "epoch": 1.223021582733813, + "force_loss": 0.01716136746108532, + "step": 1360 + }, + { + "epoch": 1.2320143884892087, + "grad_norm": 1.5575224161148071, + "learning_rate": 4.5633333333333336e-05, + "loss": 0.0412, + "step": 1370 + }, + { + "action_loss": 0.01791442185640335, + "epoch": 1.2320143884892087, + "step": 1370 + }, + { + "epoch": 1.2320143884892087, + "step": 1370, + "torque_loss": 0.11422237753868103 + }, + { + "epoch": 1.2320143884892087, + "force_loss": 0.007272792514413595, + "step": 1370 + }, + { + "epoch": 1.2410071942446044, + "grad_norm": 0.8581523895263672, + "learning_rate": 4.596666666666667e-05, + "loss": 0.0377, + "step": 1380 + }, + { + "action_loss": 0.018249275162816048, + "epoch": 1.2410071942446044, + "step": 1380 + }, + { + "epoch": 1.2410071942446044, + "step": 1380, + "torque_loss": 0.13104642927646637 + }, + { + "epoch": 1.2410071942446044, + "force_loss": 0.006711956113576889, + "step": 1380 + }, + { + "epoch": 1.25, + "grad_norm": 1.0329571962356567, + "learning_rate": 4.630000000000001e-05, + "loss": 0.0397, + "step": 1390 + }, + { + "action_loss": 0.016140621155500412, + "epoch": 1.25, + "step": 1390 + }, + { + "epoch": 1.25, + "step": 1390, + "torque_loss": 0.11730929464101791 + }, + { + "epoch": 1.25, + "force_loss": 0.009129363112151623, + "step": 1390 + }, + { + "epoch": 1.2589928057553956, + "grad_norm": 1.5746816396713257, + "learning_rate": 4.663333333333333e-05, + "loss": 0.038, + "step": 1400 + }, + { + "action_loss": 0.02167298085987568, + "epoch": 1.2589928057553956, + "step": 1400 + }, + { + "epoch": 1.2589928057553956, + "step": 1400, + "torque_loss": 0.10270285606384277 + }, + { + "epoch": 1.2589928057553956, + "force_loss": 0.014194023795425892, + "step": 1400 + }, + { + "epoch": 1.2679856115107913, + "grad_norm": 1.1146483421325684, + "learning_rate": 4.696666666666667e-05, + "loss": 0.0567, + "step": 1410 + }, + { + "action_loss": 0.023223230615258217, + "epoch": 1.2679856115107913, + "step": 1410 + }, + { + "epoch": 1.2679856115107913, + "step": 1410, + "torque_loss": 0.19135254621505737 + }, + { + "epoch": 1.2679856115107913, + "force_loss": 0.011045348830521107, + "step": 1410 + }, + { + "epoch": 1.276978417266187, + "grad_norm": 1.0299912691116333, + "learning_rate": 4.73e-05, + "loss": 0.0457, + "step": 1420 + }, + { + "action_loss": 0.049068063497543335, + "epoch": 1.276978417266187, + "step": 1420 + }, + { + "epoch": 1.276978417266187, + "step": 1420, + "torque_loss": 0.14573934674263 + }, + { + "epoch": 1.276978417266187, + "force_loss": 0.029202574864029884, + "step": 1420 + }, + { + "epoch": 1.2859712230215827, + "grad_norm": 1.1865311861038208, + "learning_rate": 4.763333333333334e-05, + "loss": 0.0432, + "step": 1430 + }, + { + "action_loss": 0.030678488314151764, + "epoch": 1.2859712230215827, + "step": 1430 + }, + { + "epoch": 1.2859712230215827, + "step": 1430, + "torque_loss": 0.1374351531267166 + }, + { + "epoch": 1.2859712230215827, + "force_loss": 0.022890103980898857, + "step": 1430 + }, + { + "epoch": 1.2949640287769784, + "grad_norm": 1.7581628561019897, + "learning_rate": 4.796666666666667e-05, + "loss": 0.0341, + "step": 1440 + }, + { + "action_loss": 0.027712544426321983, + "epoch": 1.2949640287769784, + "step": 1440 + }, + { + "epoch": 1.2949640287769784, + "step": 1440, + "torque_loss": 0.13361425697803497 + }, + { + "epoch": 1.2949640287769784, + "force_loss": 0.014639712870121002, + "step": 1440 + }, + { + "epoch": 1.3039568345323742, + "grad_norm": 0.7644403576850891, + "learning_rate": 4.83e-05, + "loss": 0.0443, + "step": 1450 + }, + { + "action_loss": 0.02581537514925003, + "epoch": 1.3039568345323742, + "step": 1450 + }, + { + "epoch": 1.3039568345323742, + "step": 1450, + "torque_loss": 0.21452778577804565 + }, + { + "epoch": 1.3039568345323742, + "force_loss": 0.014838947914540768, + "step": 1450 + }, + { + "epoch": 1.3129496402877698, + "grad_norm": 0.8814418315887451, + "learning_rate": 4.8633333333333334e-05, + "loss": 0.0404, + "step": 1460 + }, + { + "action_loss": 0.01225553173571825, + "epoch": 1.3129496402877698, + "step": 1460 + }, + { + "epoch": 1.3129496402877698, + "step": 1460, + "torque_loss": 0.1041104793548584 + }, + { + "epoch": 1.3129496402877698, + "force_loss": 0.004430586937814951, + "step": 1460 + }, + { + "epoch": 1.3219424460431655, + "grad_norm": 1.0401335954666138, + "learning_rate": 4.8966666666666667e-05, + "loss": 0.0331, + "step": 1470 + }, + { + "action_loss": 0.014025881886482239, + "epoch": 1.3219424460431655, + "step": 1470 + }, + { + "epoch": 1.3219424460431655, + "step": 1470, + "torque_loss": 0.12877392768859863 + }, + { + "epoch": 1.3219424460431655, + "force_loss": 0.00756074720993638, + "step": 1470 + }, + { + "epoch": 1.330935251798561, + "grad_norm": 1.5328564643859863, + "learning_rate": 4.93e-05, + "loss": 0.0394, + "step": 1480 + }, + { + "action_loss": 0.016159093007445335, + "epoch": 1.330935251798561, + "step": 1480 + }, + { + "epoch": 1.330935251798561, + "step": 1480, + "torque_loss": 0.1405736654996872 + }, + { + "epoch": 1.330935251798561, + "force_loss": 0.004206780809909105, + "step": 1480 + }, + { + "epoch": 1.3399280575539567, + "grad_norm": 0.8034462332725525, + "learning_rate": 4.963333333333334e-05, + "loss": 0.0375, + "step": 1490 + }, + { + "action_loss": 0.03001769818365574, + "epoch": 1.3399280575539567, + "step": 1490 + }, + { + "epoch": 1.3399280575539567, + "step": 1490, + "torque_loss": 0.10380522161722183 + }, + { + "epoch": 1.3399280575539567, + "force_loss": 0.015295598655939102, + "step": 1490 + }, + { + "epoch": 1.3489208633093526, + "grad_norm": 0.6252776980400085, + "learning_rate": 4.996666666666667e-05, + "loss": 0.0436, + "step": 1500 + }, + { + "action_loss": 0.017488082870841026, + "epoch": 1.3489208633093526, + "step": 1500 + }, + { + "epoch": 1.3489208633093526, + "step": 1500, + "torque_loss": 0.13915669918060303 + }, + { + "epoch": 1.3489208633093526, + "force_loss": 0.008006691001355648, + "step": 1500 + }, + { + "epoch": 1.3579136690647482, + "grad_norm": 0.5311513543128967, + "learning_rate": 5.03e-05, + "loss": 0.0308, + "step": 1510 + }, + { + "action_loss": 0.0435921736061573, + "epoch": 1.3579136690647482, + "step": 1510 + }, + { + "epoch": 1.3579136690647482, + "step": 1510, + "torque_loss": 0.16664637625217438 + }, + { + "epoch": 1.3579136690647482, + "force_loss": 0.019571689888834953, + "step": 1510 + }, + { + "epoch": 1.3669064748201438, + "grad_norm": 1.3396062850952148, + "learning_rate": 5.0633333333333335e-05, + "loss": 0.038, + "step": 1520 + }, + { + "action_loss": 0.011165883392095566, + "epoch": 1.3669064748201438, + "step": 1520 + }, + { + "epoch": 1.3669064748201438, + "step": 1520, + "torque_loss": 0.12072101980447769 + }, + { + "epoch": 1.3669064748201438, + "force_loss": 0.0072516812942922115, + "step": 1520 + }, + { + "epoch": 1.3758992805755397, + "grad_norm": 1.793840765953064, + "learning_rate": 5.0966666666666674e-05, + "loss": 0.0385, + "step": 1530 + }, + { + "action_loss": 0.020988086238503456, + "epoch": 1.3758992805755397, + "step": 1530 + }, + { + "epoch": 1.3758992805755397, + "step": 1530, + "torque_loss": 0.14598026871681213 + }, + { + "epoch": 1.3758992805755397, + "force_loss": 0.008936233818531036, + "step": 1530 + }, + { + "epoch": 1.3848920863309353, + "grad_norm": 1.572195291519165, + "learning_rate": 5.130000000000001e-05, + "loss": 0.0363, + "step": 1540 + }, + { + "action_loss": 0.01592131517827511, + "epoch": 1.3848920863309353, + "step": 1540 + }, + { + "epoch": 1.3848920863309353, + "step": 1540, + "torque_loss": 0.1095358356833458 + }, + { + "epoch": 1.3848920863309353, + "force_loss": 0.006344737019389868, + "step": 1540 + }, + { + "epoch": 1.393884892086331, + "grad_norm": 1.034877061843872, + "learning_rate": 5.163333333333333e-05, + "loss": 0.0449, + "step": 1550 + }, + { + "action_loss": 0.016093989834189415, + "epoch": 1.393884892086331, + "step": 1550 + }, + { + "epoch": 1.393884892086331, + "step": 1550, + "torque_loss": 0.1271398961544037 + }, + { + "epoch": 1.393884892086331, + "force_loss": 0.008789844810962677, + "step": 1550 + }, + { + "epoch": 1.4028776978417266, + "grad_norm": 0.6551756262779236, + "learning_rate": 5.196666666666667e-05, + "loss": 0.0325, + "step": 1560 + }, + { + "action_loss": 0.013529728166759014, + "epoch": 1.4028776978417266, + "step": 1560 + }, + { + "epoch": 1.4028776978417266, + "step": 1560, + "torque_loss": 0.12017623335123062 + }, + { + "epoch": 1.4028776978417266, + "force_loss": 0.008128629997372627, + "step": 1560 + }, + { + "epoch": 1.4118705035971222, + "grad_norm": 0.8998020887374878, + "learning_rate": 5.2300000000000004e-05, + "loss": 0.0348, + "step": 1570 + }, + { + "action_loss": 0.022844159975647926, + "epoch": 1.4118705035971222, + "step": 1570 + }, + { + "epoch": 1.4118705035971222, + "step": 1570, + "torque_loss": 0.1208563819527626 + }, + { + "epoch": 1.4118705035971222, + "force_loss": 0.005417071282863617, + "step": 1570 + }, + { + "epoch": 1.420863309352518, + "grad_norm": 1.5591024160385132, + "learning_rate": 5.2633333333333336e-05, + "loss": 0.0384, + "step": 1580 + }, + { + "action_loss": 0.01836899295449257, + "epoch": 1.420863309352518, + "step": 1580 + }, + { + "epoch": 1.420863309352518, + "step": 1580, + "torque_loss": 0.13783617317676544 + }, + { + "epoch": 1.420863309352518, + "force_loss": 0.007158016320317984, + "step": 1580 + }, + { + "epoch": 1.4298561151079137, + "grad_norm": 1.5710378885269165, + "learning_rate": 5.296666666666666e-05, + "loss": 0.0417, + "step": 1590 + }, + { + "action_loss": 0.020005570724606514, + "epoch": 1.4298561151079137, + "step": 1590 + }, + { + "epoch": 1.4298561151079137, + "step": 1590, + "torque_loss": 0.155879944562912 + }, + { + "epoch": 1.4298561151079137, + "force_loss": 0.010471027344465256, + "step": 1590 + }, + { + "epoch": 1.4388489208633093, + "grad_norm": 0.8876373171806335, + "learning_rate": 5.330000000000001e-05, + "loss": 0.0403, + "step": 1600 + }, + { + "action_loss": 0.01961771585047245, + "epoch": 1.4388489208633093, + "step": 1600 + }, + { + "epoch": 1.4388489208633093, + "step": 1600, + "torque_loss": 0.1969851702451706 + }, + { + "epoch": 1.4388489208633093, + "force_loss": 0.010652844794094563, + "step": 1600 + }, + { + "epoch": 1.4478417266187051, + "grad_norm": 1.0628294944763184, + "learning_rate": 5.3633333333333334e-05, + "loss": 0.0416, + "step": 1610 + }, + { + "action_loss": 0.038969844579696655, + "epoch": 1.4478417266187051, + "step": 1610 + }, + { + "epoch": 1.4478417266187051, + "step": 1610, + "torque_loss": 0.13635770976543427 + }, + { + "epoch": 1.4478417266187051, + "force_loss": 0.020792650058865547, + "step": 1610 + }, + { + "epoch": 1.4568345323741008, + "grad_norm": 1.399761438369751, + "learning_rate": 5.3966666666666666e-05, + "loss": 0.0386, + "step": 1620 + }, + { + "action_loss": 0.01814407669007778, + "epoch": 1.4568345323741008, + "step": 1620 + }, + { + "epoch": 1.4568345323741008, + "step": 1620, + "torque_loss": 0.12877829372882843 + }, + { + "epoch": 1.4568345323741008, + "force_loss": 0.007057117763906717, + "step": 1620 + }, + { + "epoch": 1.4658273381294964, + "grad_norm": 1.0869120359420776, + "learning_rate": 5.4300000000000005e-05, + "loss": 0.0456, + "step": 1630 + }, + { + "action_loss": 0.017783258110284805, + "epoch": 1.4658273381294964, + "step": 1630 + }, + { + "epoch": 1.4658273381294964, + "step": 1630, + "torque_loss": 0.11481910943984985 + }, + { + "epoch": 1.4658273381294964, + "force_loss": 0.008570303209125996, + "step": 1630 + }, + { + "epoch": 1.474820143884892, + "grad_norm": 2.3219895362854004, + "learning_rate": 5.463333333333334e-05, + "loss": 0.0406, + "step": 1640 + }, + { + "action_loss": 0.025417575612664223, + "epoch": 1.474820143884892, + "step": 1640 + }, + { + "epoch": 1.474820143884892, + "step": 1640, + "torque_loss": 0.14974556863307953 + }, + { + "epoch": 1.474820143884892, + "force_loss": 0.021351585164666176, + "step": 1640 + }, + { + "epoch": 1.4838129496402876, + "grad_norm": 0.5720558166503906, + "learning_rate": 5.496666666666666e-05, + "loss": 0.0356, + "step": 1650 + }, + { + "action_loss": 0.01504466962069273, + "epoch": 1.4838129496402876, + "step": 1650 + }, + { + "epoch": 1.4838129496402876, + "step": 1650, + "torque_loss": 0.11802510172128677 + }, + { + "epoch": 1.4838129496402876, + "force_loss": 0.006154266651719809, + "step": 1650 + }, + { + "epoch": 1.4928057553956835, + "grad_norm": 0.8320698142051697, + "learning_rate": 5.530000000000001e-05, + "loss": 0.0472, + "step": 1660 + }, + { + "action_loss": 0.028241241350769997, + "epoch": 1.4928057553956835, + "step": 1660 + }, + { + "epoch": 1.4928057553956835, + "step": 1660, + "torque_loss": 0.12319370359182358 + }, + { + "epoch": 1.4928057553956835, + "force_loss": 0.010020174086093903, + "step": 1660 + }, + { + "epoch": 1.5017985611510791, + "grad_norm": 2.339298963546753, + "learning_rate": 5.5633333333333335e-05, + "loss": 0.0451, + "step": 1670 + }, + { + "action_loss": 0.054126013070344925, + "epoch": 1.5017985611510791, + "step": 1670 + }, + { + "epoch": 1.5017985611510791, + "step": 1670, + "torque_loss": 0.16401435434818268 + }, + { + "epoch": 1.5017985611510791, + "force_loss": 0.015665413811802864, + "step": 1670 + }, + { + "epoch": 1.5107913669064748, + "grad_norm": 1.5541661977767944, + "learning_rate": 5.596666666666667e-05, + "loss": 0.0484, + "step": 1680 + }, + { + "action_loss": 0.02101924829185009, + "epoch": 1.5107913669064748, + "step": 1680 + }, + { + "epoch": 1.5107913669064748, + "step": 1680, + "torque_loss": 0.12515272200107574 + }, + { + "epoch": 1.5107913669064748, + "force_loss": 0.007671245839446783, + "step": 1680 + }, + { + "epoch": 1.5197841726618706, + "grad_norm": 1.5375518798828125, + "learning_rate": 5.63e-05, + "loss": 0.0632, + "step": 1690 + }, + { + "action_loss": 0.020116286352276802, + "epoch": 1.5197841726618706, + "step": 1690 + }, + { + "epoch": 1.5197841726618706, + "step": 1690, + "torque_loss": 0.1242685541510582 + }, + { + "epoch": 1.5197841726618706, + "force_loss": 0.012467063032090664, + "step": 1690 + }, + { + "epoch": 1.5287769784172662, + "grad_norm": 1.3778345584869385, + "learning_rate": 5.663333333333334e-05, + "loss": 0.0457, + "step": 1700 + }, + { + "action_loss": 0.030835585668683052, + "epoch": 1.5287769784172662, + "step": 1700 + }, + { + "epoch": 1.5287769784172662, + "step": 1700, + "torque_loss": 0.12443309277296066 + }, + { + "epoch": 1.5287769784172662, + "force_loss": 0.013805332593619823, + "step": 1700 + }, + { + "epoch": 1.5377697841726619, + "grad_norm": 1.384293556213379, + "learning_rate": 5.696666666666667e-05, + "loss": 0.0411, + "step": 1710 + }, + { + "action_loss": 0.02501256950199604, + "epoch": 1.5377697841726619, + "step": 1710 + }, + { + "epoch": 1.5377697841726619, + "step": 1710, + "torque_loss": 0.13218729197978973 + }, + { + "epoch": 1.5377697841726619, + "force_loss": 0.008499592542648315, + "step": 1710 + }, + { + "epoch": 1.5467625899280577, + "grad_norm": 1.01209557056427, + "learning_rate": 5.73e-05, + "loss": 0.0453, + "step": 1720 + }, + { + "action_loss": 0.027362314984202385, + "epoch": 1.5467625899280577, + "step": 1720 + }, + { + "epoch": 1.5467625899280577, + "step": 1720, + "torque_loss": 0.1004047766327858 + }, + { + "epoch": 1.5467625899280577, + "force_loss": 0.01234254240989685, + "step": 1720 + }, + { + "epoch": 1.5557553956834531, + "grad_norm": 1.0071674585342407, + "learning_rate": 5.7633333333333336e-05, + "loss": 0.0384, + "step": 1730 + }, + { + "action_loss": 0.05230461433529854, + "epoch": 1.5557553956834531, + "step": 1730 + }, + { + "epoch": 1.5557553956834531, + "step": 1730, + "torque_loss": 0.22703103721141815 + }, + { + "epoch": 1.5557553956834531, + "force_loss": 0.026303531602025032, + "step": 1730 + }, + { + "epoch": 1.564748201438849, + "grad_norm": 1.1060887575149536, + "learning_rate": 5.796666666666667e-05, + "loss": 0.0437, + "step": 1740 + }, + { + "action_loss": 0.027820652350783348, + "epoch": 1.564748201438849, + "step": 1740 + }, + { + "epoch": 1.564748201438849, + "step": 1740, + "torque_loss": 0.11444851011037827 + }, + { + "epoch": 1.564748201438849, + "force_loss": 0.009924407117068768, + "step": 1740 + }, + { + "epoch": 1.5737410071942446, + "grad_norm": 1.1261448860168457, + "learning_rate": 5.83e-05, + "loss": 0.0391, + "step": 1750 + }, + { + "action_loss": 0.023280911147594452, + "epoch": 1.5737410071942446, + "step": 1750 + }, + { + "epoch": 1.5737410071942446, + "step": 1750, + "torque_loss": 0.16219337284564972 + }, + { + "epoch": 1.5737410071942446, + "force_loss": 0.011544729582965374, + "step": 1750 + }, + { + "epoch": 1.5827338129496402, + "grad_norm": 2.5638270378112793, + "learning_rate": 5.863333333333334e-05, + "loss": 0.0399, + "step": 1760 + }, + { + "action_loss": 0.037021443247795105, + "epoch": 1.5827338129496402, + "step": 1760 + }, + { + "epoch": 1.5827338129496402, + "step": 1760, + "torque_loss": 0.12499305605888367 + }, + { + "epoch": 1.5827338129496402, + "force_loss": 0.01578935794532299, + "step": 1760 + }, + { + "epoch": 1.591726618705036, + "grad_norm": 0.9227173924446106, + "learning_rate": 5.896666666666667e-05, + "loss": 0.0405, + "step": 1770 + }, + { + "action_loss": 0.010805748403072357, + "epoch": 1.591726618705036, + "step": 1770 + }, + { + "epoch": 1.591726618705036, + "step": 1770, + "torque_loss": 0.1569158285856247 + }, + { + "epoch": 1.591726618705036, + "force_loss": 0.007633899804204702, + "step": 1770 + }, + { + "epoch": 1.6007194244604317, + "grad_norm": 0.7877851724624634, + "learning_rate": 5.93e-05, + "loss": 0.0311, + "step": 1780 + }, + { + "action_loss": 0.019057104364037514, + "epoch": 1.6007194244604317, + "step": 1780 + }, + { + "epoch": 1.6007194244604317, + "step": 1780, + "torque_loss": 0.13095499575138092 + }, + { + "epoch": 1.6007194244604317, + "force_loss": 0.007099824491888285, + "step": 1780 + }, + { + "epoch": 1.6097122302158273, + "grad_norm": 1.6048319339752197, + "learning_rate": 5.9633333333333344e-05, + "loss": 0.0518, + "step": 1790 + }, + { + "action_loss": 0.022755520418286324, + "epoch": 1.6097122302158273, + "step": 1790 + }, + { + "epoch": 1.6097122302158273, + "step": 1790, + "torque_loss": 0.12178144603967667 + }, + { + "epoch": 1.6097122302158273, + "force_loss": 0.008644222281873226, + "step": 1790 + }, + { + "epoch": 1.6187050359712232, + "grad_norm": 2.2193334102630615, + "learning_rate": 5.996666666666667e-05, + "loss": 0.0496, + "step": 1800 + }, + { + "action_loss": 0.01914903335273266, + "epoch": 1.6187050359712232, + "step": 1800 + }, + { + "epoch": 1.6187050359712232, + "step": 1800, + "torque_loss": 0.13406257331371307 + }, + { + "epoch": 1.6187050359712232, + "force_loss": 0.009918020106852055, + "step": 1800 + }, + { + "epoch": 1.6276978417266186, + "grad_norm": 1.2487359046936035, + "learning_rate": 6.03e-05, + "loss": 0.0385, + "step": 1810 + }, + { + "action_loss": 0.020536141470074654, + "epoch": 1.6276978417266186, + "step": 1810 + }, + { + "epoch": 1.6276978417266186, + "step": 1810, + "torque_loss": 0.10585105419158936 + }, + { + "epoch": 1.6276978417266186, + "force_loss": 0.007804557215422392, + "step": 1810 + }, + { + "epoch": 1.6366906474820144, + "grad_norm": 0.9121411442756653, + "learning_rate": 6.063333333333333e-05, + "loss": 0.0321, + "step": 1820 + }, + { + "action_loss": 0.012331143952906132, + "epoch": 1.6366906474820144, + "step": 1820 + }, + { + "epoch": 1.6366906474820144, + "step": 1820, + "torque_loss": 0.16718094050884247 + }, + { + "epoch": 1.6366906474820144, + "force_loss": 0.00467007327824831, + "step": 1820 + }, + { + "epoch": 1.64568345323741, + "grad_norm": 1.7599858045578003, + "learning_rate": 6.0966666666666674e-05, + "loss": 0.0384, + "step": 1830 + }, + { + "action_loss": 0.028700992465019226, + "epoch": 1.64568345323741, + "step": 1830 + }, + { + "epoch": 1.64568345323741, + "step": 1830, + "torque_loss": 0.13857920467853546 + }, + { + "epoch": 1.64568345323741, + "force_loss": 0.015253528952598572, + "step": 1830 + }, + { + "epoch": 1.6546762589928057, + "grad_norm": 1.1731390953063965, + "learning_rate": 6.13e-05, + "loss": 0.0441, + "step": 1840 + }, + { + "action_loss": 0.02267463319003582, + "epoch": 1.6546762589928057, + "step": 1840 + }, + { + "epoch": 1.6546762589928057, + "step": 1840, + "torque_loss": 0.12330130487680435 + }, + { + "epoch": 1.6546762589928057, + "force_loss": 0.012602976523339748, + "step": 1840 + }, + { + "epoch": 1.6636690647482015, + "grad_norm": 0.6124871373176575, + "learning_rate": 6.163333333333333e-05, + "loss": 0.0366, + "step": 1850 + }, + { + "action_loss": 0.019395431503653526, + "epoch": 1.6636690647482015, + "step": 1850 + }, + { + "epoch": 1.6636690647482015, + "step": 1850, + "torque_loss": 0.12141523510217667 + }, + { + "epoch": 1.6636690647482015, + "force_loss": 0.007657719310373068, + "step": 1850 + }, + { + "epoch": 1.6726618705035972, + "grad_norm": 1.1004042625427246, + "learning_rate": 6.196666666666668e-05, + "loss": 0.0398, + "step": 1860 + }, + { + "action_loss": 0.021091600880026817, + "epoch": 1.6726618705035972, + "step": 1860 + }, + { + "epoch": 1.6726618705035972, + "step": 1860, + "torque_loss": 0.12089512497186661 + }, + { + "epoch": 1.6726618705035972, + "force_loss": 0.016227731481194496, + "step": 1860 + }, + { + "epoch": 1.6816546762589928, + "grad_norm": 1.5495952367782593, + "learning_rate": 6.23e-05, + "loss": 0.0331, + "step": 1870 + }, + { + "action_loss": 0.022025009617209435, + "epoch": 1.6816546762589928, + "step": 1870 + }, + { + "epoch": 1.6816546762589928, + "step": 1870, + "torque_loss": 0.13842914998531342 + }, + { + "epoch": 1.6816546762589928, + "force_loss": 0.012225431390106678, + "step": 1870 + }, + { + "epoch": 1.6906474820143886, + "grad_norm": 0.9583244323730469, + "learning_rate": 6.263333333333333e-05, + "loss": 0.0362, + "step": 1880 + }, + { + "action_loss": 0.011240922845900059, + "epoch": 1.6906474820143886, + "step": 1880 + }, + { + "epoch": 1.6906474820143886, + "step": 1880, + "torque_loss": 0.14442689716815948 + }, + { + "epoch": 1.6906474820143886, + "force_loss": 0.005097482819110155, + "step": 1880 + }, + { + "epoch": 1.699640287769784, + "grad_norm": 0.9254547953605652, + "learning_rate": 6.296666666666667e-05, + "loss": 0.0426, + "step": 1890 + }, + { + "action_loss": 0.03405832126736641, + "epoch": 1.699640287769784, + "step": 1890 + }, + { + "epoch": 1.699640287769784, + "step": 1890, + "torque_loss": 0.151302307844162 + }, + { + "epoch": 1.699640287769784, + "force_loss": 0.017275933176279068, + "step": 1890 + }, + { + "epoch": 1.70863309352518, + "grad_norm": 1.3274579048156738, + "learning_rate": 6.330000000000001e-05, + "loss": 0.0473, + "step": 1900 + }, + { + "action_loss": 0.026476452127099037, + "epoch": 1.70863309352518, + "step": 1900 + }, + { + "epoch": 1.70863309352518, + "step": 1900, + "torque_loss": 0.14379094541072845 + }, + { + "epoch": 1.70863309352518, + "force_loss": 0.00846109539270401, + "step": 1900 + }, + { + "epoch": 1.7176258992805755, + "grad_norm": 2.355393648147583, + "learning_rate": 6.363333333333334e-05, + "loss": 0.0359, + "step": 1910 + }, + { + "action_loss": 0.016802387312054634, + "epoch": 1.7176258992805755, + "step": 1910 + }, + { + "epoch": 1.7176258992805755, + "step": 1910, + "torque_loss": 0.1140458956360817 + }, + { + "epoch": 1.7176258992805755, + "force_loss": 0.007565314415842295, + "step": 1910 + }, + { + "epoch": 1.7266187050359711, + "grad_norm": 1.7675760984420776, + "learning_rate": 6.396666666666667e-05, + "loss": 0.0324, + "step": 1920 + }, + { + "action_loss": 0.06040431559085846, + "epoch": 1.7266187050359711, + "step": 1920 + }, + { + "epoch": 1.7266187050359711, + "step": 1920, + "torque_loss": 0.1805545836687088 + }, + { + "epoch": 1.7266187050359711, + "force_loss": 0.03229805454611778, + "step": 1920 + }, + { + "epoch": 1.735611510791367, + "grad_norm": 1.8652795553207397, + "learning_rate": 6.43e-05, + "loss": 0.0441, + "step": 1930 + }, + { + "action_loss": 0.030441507697105408, + "epoch": 1.735611510791367, + "step": 1930 + }, + { + "epoch": 1.735611510791367, + "step": 1930, + "torque_loss": 0.13247232139110565 + }, + { + "epoch": 1.735611510791367, + "force_loss": 0.00965114776045084, + "step": 1930 + }, + { + "epoch": 1.7446043165467626, + "grad_norm": 1.0935837030410767, + "learning_rate": 6.463333333333334e-05, + "loss": 0.0319, + "step": 1940 + }, + { + "action_loss": 0.030394727364182472, + "epoch": 1.7446043165467626, + "step": 1940 + }, + { + "epoch": 1.7446043165467626, + "step": 1940, + "torque_loss": 0.14992812275886536 + }, + { + "epoch": 1.7446043165467626, + "force_loss": 0.011297706514596939, + "step": 1940 + }, + { + "epoch": 1.7535971223021583, + "grad_norm": 1.1542115211486816, + "learning_rate": 6.496666666666667e-05, + "loss": 0.0407, + "step": 1950 + }, + { + "action_loss": 0.02443859539926052, + "epoch": 1.7535971223021583, + "step": 1950 + }, + { + "epoch": 1.7535971223021583, + "step": 1950, + "torque_loss": 0.14138247072696686 + }, + { + "epoch": 1.7535971223021583, + "force_loss": 0.012840240262448788, + "step": 1950 + }, + { + "epoch": 1.762589928057554, + "grad_norm": 0.8995655179023743, + "learning_rate": 6.53e-05, + "loss": 0.0394, + "step": 1960 + }, + { + "action_loss": 0.014818400144577026, + "epoch": 1.762589928057554, + "step": 1960 + }, + { + "epoch": 1.762589928057554, + "step": 1960, + "torque_loss": 0.08133085817098618 + }, + { + "epoch": 1.762589928057554, + "force_loss": 0.005469355266541243, + "step": 1960 + }, + { + "epoch": 1.7715827338129495, + "grad_norm": 1.2325860261917114, + "learning_rate": 6.563333333333333e-05, + "loss": 0.0428, + "step": 1970 + }, + { + "action_loss": 0.05178338661789894, + "epoch": 1.7715827338129495, + "step": 1970 + }, + { + "epoch": 1.7715827338129495, + "step": 1970, + "torque_loss": 0.11794144660234451 + }, + { + "epoch": 1.7715827338129495, + "force_loss": 0.03476705029606819, + "step": 1970 + }, + { + "epoch": 1.7805755395683454, + "grad_norm": 1.9361802339553833, + "learning_rate": 6.596666666666667e-05, + "loss": 0.047, + "step": 1980 + }, + { + "action_loss": 0.015835566446185112, + "epoch": 1.7805755395683454, + "step": 1980 + }, + { + "epoch": 1.7805755395683454, + "step": 1980, + "torque_loss": 0.11120063066482544 + }, + { + "epoch": 1.7805755395683454, + "force_loss": 0.007940416224300861, + "step": 1980 + }, + { + "epoch": 1.789568345323741, + "grad_norm": 1.2197179794311523, + "learning_rate": 6.630000000000001e-05, + "loss": 0.0407, + "step": 1990 + }, + { + "action_loss": 0.009139064699411392, + "epoch": 1.789568345323741, + "step": 1990 + }, + { + "epoch": 1.789568345323741, + "step": 1990, + "torque_loss": 0.10894829034805298 + }, + { + "epoch": 1.789568345323741, + "force_loss": 0.006585546303540468, + "step": 1990 + }, + { + "epoch": 1.7985611510791366, + "grad_norm": 1.0811537504196167, + "learning_rate": 6.663333333333333e-05, + "loss": 0.0376, + "step": 2000 + }, + { + "action_loss": 0.01651059091091156, + "epoch": 1.7985611510791366, + "step": 2000 + }, + { + "epoch": 1.7985611510791366, + "step": 2000, + "torque_loss": 0.1283266544342041 + }, + { + "epoch": 1.7985611510791366, + "force_loss": 0.008875512517988682, + "step": 2000 + }, + { + "epoch": 1.8075539568345325, + "grad_norm": 2.1722841262817383, + "learning_rate": 6.696666666666666e-05, + "loss": 0.0382, + "step": 2010 + }, + { + "action_loss": 0.0240608062595129, + "epoch": 1.8075539568345325, + "step": 2010 + }, + { + "epoch": 1.8075539568345325, + "step": 2010, + "torque_loss": 0.12283951044082642 + }, + { + "epoch": 1.8075539568345325, + "force_loss": 0.012677472084760666, + "step": 2010 + }, + { + "epoch": 1.816546762589928, + "grad_norm": 1.5948004722595215, + "learning_rate": 6.730000000000001e-05, + "loss": 0.0372, + "step": 2020 + }, + { + "action_loss": 0.017273152247071266, + "epoch": 1.816546762589928, + "step": 2020 + }, + { + "epoch": 1.816546762589928, + "step": 2020, + "torque_loss": 0.1408601701259613 + }, + { + "epoch": 1.816546762589928, + "force_loss": 0.009422182105481625, + "step": 2020 + }, + { + "epoch": 1.8255395683453237, + "grad_norm": 1.5452513694763184, + "learning_rate": 6.763333333333334e-05, + "loss": 0.0389, + "step": 2030 + }, + { + "action_loss": 0.012955605052411556, + "epoch": 1.8255395683453237, + "step": 2030 + }, + { + "epoch": 1.8255395683453237, + "step": 2030, + "torque_loss": 0.13550597429275513 + }, + { + "epoch": 1.8255395683453237, + "force_loss": 0.00767833786085248, + "step": 2030 + }, + { + "epoch": 1.8345323741007196, + "grad_norm": 1.8239887952804565, + "learning_rate": 6.796666666666666e-05, + "loss": 0.0347, + "step": 2040 + }, + { + "action_loss": 0.016966264694929123, + "epoch": 1.8345323741007196, + "step": 2040 + }, + { + "epoch": 1.8345323741007196, + "step": 2040, + "torque_loss": 0.1556432992219925 + }, + { + "epoch": 1.8345323741007196, + "force_loss": 0.008319436572492123, + "step": 2040 + }, + { + "epoch": 1.843525179856115, + "grad_norm": 0.7103005051612854, + "learning_rate": 6.83e-05, + "loss": 0.0374, + "step": 2050 + }, + { + "action_loss": 0.010314648039638996, + "epoch": 1.843525179856115, + "step": 2050 + }, + { + "epoch": 1.843525179856115, + "step": 2050, + "torque_loss": 0.10243510454893112 + }, + { + "epoch": 1.843525179856115, + "force_loss": 0.006999240722507238, + "step": 2050 + }, + { + "epoch": 1.8525179856115108, + "grad_norm": 1.2091851234436035, + "learning_rate": 6.863333333333334e-05, + "loss": 0.041, + "step": 2060 + }, + { + "action_loss": 0.01964065432548523, + "epoch": 1.8525179856115108, + "step": 2060 + }, + { + "epoch": 1.8525179856115108, + "step": 2060, + "torque_loss": 0.2040068656206131 + }, + { + "epoch": 1.8525179856115108, + "force_loss": 0.011498930864036083, + "step": 2060 + }, + { + "epoch": 1.8615107913669064, + "grad_norm": 0.8997297883033752, + "learning_rate": 6.896666666666667e-05, + "loss": 0.041, + "step": 2070 + }, + { + "action_loss": 0.014001811854541302, + "epoch": 1.8615107913669064, + "step": 2070 + }, + { + "epoch": 1.8615107913669064, + "step": 2070, + "torque_loss": 0.14261697232723236 + }, + { + "epoch": 1.8615107913669064, + "force_loss": 0.013104937970638275, + "step": 2070 + }, + { + "epoch": 1.870503597122302, + "grad_norm": 1.457098364830017, + "learning_rate": 6.93e-05, + "loss": 0.0413, + "step": 2080 + }, + { + "action_loss": 0.02490692026913166, + "epoch": 1.870503597122302, + "step": 2080 + }, + { + "epoch": 1.870503597122302, + "step": 2080, + "torque_loss": 0.13433009386062622 + }, + { + "epoch": 1.870503597122302, + "force_loss": 0.020111190155148506, + "step": 2080 + }, + { + "epoch": 1.879496402877698, + "grad_norm": 1.1311707496643066, + "learning_rate": 6.963333333333334e-05, + "loss": 0.0345, + "step": 2090 + }, + { + "action_loss": 0.016696559265255928, + "epoch": 1.879496402877698, + "step": 2090 + }, + { + "epoch": 1.879496402877698, + "step": 2090, + "torque_loss": 0.12842392921447754 + }, + { + "epoch": 1.879496402877698, + "force_loss": 0.011607293970882893, + "step": 2090 + }, + { + "epoch": 1.8884892086330936, + "grad_norm": 1.4410679340362549, + "learning_rate": 6.996666666666667e-05, + "loss": 0.0369, + "step": 2100 + }, + { + "action_loss": 0.013090457767248154, + "epoch": 1.8884892086330936, + "step": 2100 + }, + { + "epoch": 1.8884892086330936, + "step": 2100, + "torque_loss": 0.11940749734640121 + }, + { + "epoch": 1.8884892086330936, + "force_loss": 0.005699618253856897, + "step": 2100 + }, + { + "epoch": 1.8974820143884892, + "grad_norm": 0.9415628910064697, + "learning_rate": 7.03e-05, + "loss": 0.0432, + "step": 2110 + }, + { + "action_loss": 0.020249422639608383, + "epoch": 1.8974820143884892, + "step": 2110 + }, + { + "epoch": 1.8974820143884892, + "step": 2110, + "torque_loss": 0.14470814168453217 + }, + { + "epoch": 1.8974820143884892, + "force_loss": 0.007382591720670462, + "step": 2110 + }, + { + "epoch": 1.906474820143885, + "grad_norm": 1.7874438762664795, + "learning_rate": 7.063333333333333e-05, + "loss": 0.051, + "step": 2120 + }, + { + "action_loss": 0.016154659911990166, + "epoch": 1.906474820143885, + "step": 2120 + }, + { + "epoch": 1.906474820143885, + "step": 2120, + "torque_loss": 0.18611186742782593 + }, + { + "epoch": 1.906474820143885, + "force_loss": 0.013002969324588776, + "step": 2120 + }, + { + "epoch": 1.9154676258992804, + "grad_norm": 1.8646923303604126, + "learning_rate": 7.096666666666667e-05, + "loss": 0.0476, + "step": 2130 + }, + { + "action_loss": 0.03132406249642372, + "epoch": 1.9154676258992804, + "step": 2130 + }, + { + "epoch": 1.9154676258992804, + "step": 2130, + "torque_loss": 0.1391480565071106 + }, + { + "epoch": 1.9154676258992804, + "force_loss": 0.014118481427431107, + "step": 2130 + }, + { + "epoch": 1.9244604316546763, + "grad_norm": 0.6361132860183716, + "learning_rate": 7.13e-05, + "loss": 0.0414, + "step": 2140 + }, + { + "action_loss": 0.020025895908474922, + "epoch": 1.9244604316546763, + "step": 2140 + }, + { + "epoch": 1.9244604316546763, + "step": 2140, + "torque_loss": 0.17662544548511505 + }, + { + "epoch": 1.9244604316546763, + "force_loss": 0.023267442360520363, + "step": 2140 + }, + { + "epoch": 1.933453237410072, + "grad_norm": 0.46818807721138, + "learning_rate": 7.163333333333334e-05, + "loss": 0.0366, + "step": 2150 + }, + { + "action_loss": 0.0124775106087327, + "epoch": 1.933453237410072, + "step": 2150 + }, + { + "epoch": 1.933453237410072, + "step": 2150, + "torque_loss": 0.13043367862701416 + }, + { + "epoch": 1.933453237410072, + "force_loss": 0.0071367863565683365, + "step": 2150 + }, + { + "epoch": 1.9424460431654675, + "grad_norm": 0.7108094096183777, + "learning_rate": 7.196666666666668e-05, + "loss": 0.0459, + "step": 2160 + }, + { + "action_loss": 0.024643266573548317, + "epoch": 1.9424460431654675, + "step": 2160 + }, + { + "epoch": 1.9424460431654675, + "step": 2160, + "torque_loss": 0.16619999706745148 + }, + { + "epoch": 1.9424460431654675, + "force_loss": 0.009283944964408875, + "step": 2160 + }, + { + "epoch": 1.9514388489208634, + "grad_norm": 0.7339408993721008, + "learning_rate": 7.23e-05, + "loss": 0.0328, + "step": 2170 + }, + { + "action_loss": 0.02632194198668003, + "epoch": 1.9514388489208634, + "step": 2170 + }, + { + "epoch": 1.9514388489208634, + "step": 2170, + "torque_loss": 0.10700962692499161 + }, + { + "epoch": 1.9514388489208634, + "force_loss": 0.022282054647803307, + "step": 2170 + }, + { + "epoch": 1.960431654676259, + "grad_norm": 1.891281008720398, + "learning_rate": 7.263333333333334e-05, + "loss": 0.0284, + "step": 2180 + }, + { + "action_loss": 0.019738411530852318, + "epoch": 1.960431654676259, + "step": 2180 + }, + { + "epoch": 1.960431654676259, + "step": 2180, + "torque_loss": 0.1465553194284439 + }, + { + "epoch": 1.960431654676259, + "force_loss": 0.020632626488804817, + "step": 2180 + }, + { + "epoch": 1.9694244604316546, + "grad_norm": 1.102104663848877, + "learning_rate": 7.296666666666667e-05, + "loss": 0.0365, + "step": 2190 + }, + { + "action_loss": 0.02356117218732834, + "epoch": 1.9694244604316546, + "step": 2190 + }, + { + "epoch": 1.9694244604316546, + "step": 2190, + "torque_loss": 0.14908255636692047 + }, + { + "epoch": 1.9694244604316546, + "force_loss": 0.012325418181717396, + "step": 2190 + }, + { + "epoch": 1.9784172661870505, + "grad_norm": 2.1154210567474365, + "learning_rate": 7.33e-05, + "loss": 0.0421, + "step": 2200 + }, + { + "action_loss": 0.04876772686839104, + "epoch": 1.9784172661870505, + "step": 2200 + }, + { + "epoch": 1.9784172661870505, + "step": 2200, + "torque_loss": 0.15356247127056122 + }, + { + "epoch": 1.9784172661870505, + "force_loss": 0.020470868796110153, + "step": 2200 + }, + { + "epoch": 1.987410071942446, + "grad_norm": 1.734969973564148, + "learning_rate": 7.363333333333334e-05, + "loss": 0.0464, + "step": 2210 + }, + { + "action_loss": 0.029785921797156334, + "epoch": 1.987410071942446, + "step": 2210 + }, + { + "epoch": 1.987410071942446, + "step": 2210, + "torque_loss": 0.18497033417224884 + }, + { + "epoch": 1.987410071942446, + "force_loss": 0.01955021359026432, + "step": 2210 + }, + { + "epoch": 1.9964028776978417, + "grad_norm": 1.0125097036361694, + "learning_rate": 7.396666666666667e-05, + "loss": 0.04, + "step": 2220 + }, + { + "action_loss": 0.02043708600103855, + "epoch": 1.9964028776978417, + "step": 2220 + }, + { + "epoch": 1.9964028776978417, + "step": 2220, + "torque_loss": 0.172343909740448 + }, + { + "epoch": 1.9964028776978417, + "force_loss": 0.016166238114237785, + "step": 2220 + }, + { + "epoch": 2.0053956834532376, + "grad_norm": 0.709406852722168, + "learning_rate": 7.43e-05, + "loss": 0.045, + "step": 2230 + }, + { + "action_loss": 0.03095175139605999, + "epoch": 2.0053956834532376, + "step": 2230 + }, + { + "epoch": 2.0053956834532376, + "step": 2230, + "torque_loss": 0.19164550304412842 + }, + { + "epoch": 2.0053956834532376, + "force_loss": 0.020785285159945488, + "step": 2230 + }, + { + "epoch": 2.014388489208633, + "grad_norm": 0.7892938256263733, + "learning_rate": 7.463333333333334e-05, + "loss": 0.0433, + "step": 2240 + }, + { + "action_loss": 0.034813642501831055, + "epoch": 2.014388489208633, + "step": 2240 + }, + { + "epoch": 2.014388489208633, + "step": 2240, + "torque_loss": 0.17479079961776733 + }, + { + "epoch": 2.014388489208633, + "force_loss": 0.018014518544077873, + "step": 2240 + }, + { + "epoch": 2.023381294964029, + "grad_norm": 0.5668711066246033, + "learning_rate": 7.496666666666667e-05, + "loss": 0.0409, + "step": 2250 + }, + { + "action_loss": 0.021270493045449257, + "epoch": 2.023381294964029, + "step": 2250 + }, + { + "epoch": 2.023381294964029, + "step": 2250, + "torque_loss": 0.10206601768732071 + }, + { + "epoch": 2.023381294964029, + "force_loss": 0.011215499602258205, + "step": 2250 + }, + { + "epoch": 2.0323741007194243, + "grad_norm": 1.3354475498199463, + "learning_rate": 7.53e-05, + "loss": 0.0515, + "step": 2260 + }, + { + "action_loss": 0.016435161232948303, + "epoch": 2.0323741007194243, + "step": 2260 + }, + { + "epoch": 2.0323741007194243, + "step": 2260, + "torque_loss": 0.16192492842674255 + }, + { + "epoch": 2.0323741007194243, + "force_loss": 0.008937232196331024, + "step": 2260 + }, + { + "epoch": 2.04136690647482, + "grad_norm": 1.3260750770568848, + "learning_rate": 7.563333333333333e-05, + "loss": 0.0438, + "step": 2270 + }, + { + "action_loss": 0.05003343150019646, + "epoch": 2.04136690647482, + "step": 2270 + }, + { + "epoch": 2.04136690647482, + "step": 2270, + "torque_loss": 0.15236492455005646 + }, + { + "epoch": 2.04136690647482, + "force_loss": 0.028767839074134827, + "step": 2270 + }, + { + "epoch": 2.050359712230216, + "grad_norm": 1.6289191246032715, + "learning_rate": 7.596666666666668e-05, + "loss": 0.0442, + "step": 2280 + }, + { + "action_loss": 0.032095033675432205, + "epoch": 2.050359712230216, + "step": 2280 + }, + { + "epoch": 2.050359712230216, + "step": 2280, + "torque_loss": 0.15725067257881165 + }, + { + "epoch": 2.050359712230216, + "force_loss": 0.01950998790562153, + "step": 2280 + }, + { + "epoch": 2.0593525179856114, + "grad_norm": 1.321658968925476, + "learning_rate": 7.630000000000001e-05, + "loss": 0.0451, + "step": 2290 + }, + { + "action_loss": 0.01688532717525959, + "epoch": 2.0593525179856114, + "step": 2290 + }, + { + "epoch": 2.0593525179856114, + "step": 2290, + "torque_loss": 0.1754584163427353 + }, + { + "epoch": 2.0593525179856114, + "force_loss": 0.00883524026721716, + "step": 2290 + }, + { + "epoch": 2.068345323741007, + "grad_norm": 1.4887975454330444, + "learning_rate": 7.663333333333333e-05, + "loss": 0.0428, + "step": 2300 + }, + { + "action_loss": 0.010168353095650673, + "epoch": 2.068345323741007, + "step": 2300 + }, + { + "epoch": 2.068345323741007, + "step": 2300, + "torque_loss": 0.13163544237613678 + }, + { + "epoch": 2.068345323741007, + "force_loss": 0.009628458879888058, + "step": 2300 + }, + { + "epoch": 2.077338129496403, + "grad_norm": 2.233642578125, + "learning_rate": 7.696666666666668e-05, + "loss": 0.0363, + "step": 2310 + }, + { + "action_loss": 0.02015838585793972, + "epoch": 2.077338129496403, + "step": 2310 + }, + { + "epoch": 2.077338129496403, + "step": 2310, + "torque_loss": 0.12331744283437729 + }, + { + "epoch": 2.077338129496403, + "force_loss": 0.01339507382363081, + "step": 2310 + }, + { + "epoch": 2.0863309352517985, + "grad_norm": 1.5707989931106567, + "learning_rate": 7.730000000000001e-05, + "loss": 0.0444, + "step": 2320 + }, + { + "action_loss": 0.016280699521303177, + "epoch": 2.0863309352517985, + "step": 2320 + }, + { + "epoch": 2.0863309352517985, + "step": 2320, + "torque_loss": 0.10791879892349243 + }, + { + "epoch": 2.0863309352517985, + "force_loss": 0.007361226249486208, + "step": 2320 + }, + { + "epoch": 2.0953237410071943, + "grad_norm": 1.6029785871505737, + "learning_rate": 7.763333333333334e-05, + "loss": 0.0426, + "step": 2330 + }, + { + "action_loss": 0.02585158683359623, + "epoch": 2.0953237410071943, + "step": 2330 + }, + { + "epoch": 2.0953237410071943, + "step": 2330, + "torque_loss": 0.11488571017980576 + }, + { + "epoch": 2.0953237410071943, + "force_loss": 0.008152122609317303, + "step": 2330 + }, + { + "epoch": 2.1043165467625897, + "grad_norm": 1.4889627695083618, + "learning_rate": 7.796666666666666e-05, + "loss": 0.0398, + "step": 2340 + }, + { + "action_loss": 0.011856448836624622, + "epoch": 2.1043165467625897, + "step": 2340 + }, + { + "epoch": 2.1043165467625897, + "step": 2340, + "torque_loss": 0.18074285984039307 + }, + { + "epoch": 2.1043165467625897, + "force_loss": 0.008535501547157764, + "step": 2340 + }, + { + "epoch": 2.1133093525179856, + "grad_norm": 1.0595521926879883, + "learning_rate": 7.83e-05, + "loss": 0.0382, + "step": 2350 + }, + { + "action_loss": 0.015816258266568184, + "epoch": 2.1133093525179856, + "step": 2350 + }, + { + "epoch": 2.1133093525179856, + "step": 2350, + "torque_loss": 0.1994006186723709 + }, + { + "epoch": 2.1133093525179856, + "force_loss": 0.008162776939570904, + "step": 2350 + }, + { + "epoch": 2.1223021582733814, + "grad_norm": 2.5009734630584717, + "learning_rate": 7.863333333333334e-05, + "loss": 0.0371, + "step": 2360 + }, + { + "action_loss": 0.019855961203575134, + "epoch": 2.1223021582733814, + "step": 2360 + }, + { + "epoch": 2.1223021582733814, + "step": 2360, + "torque_loss": 0.09618862718343735 + }, + { + "epoch": 2.1223021582733814, + "force_loss": 0.027574585750699043, + "step": 2360 + }, + { + "epoch": 2.131294964028777, + "grad_norm": 1.38224196434021, + "learning_rate": 7.896666666666667e-05, + "loss": 0.0413, + "step": 2370 + }, + { + "action_loss": 0.012773166410624981, + "epoch": 2.131294964028777, + "step": 2370 + }, + { + "epoch": 2.131294964028777, + "step": 2370, + "torque_loss": 0.14643804728984833 + }, + { + "epoch": 2.131294964028777, + "force_loss": 0.00693405419588089, + "step": 2370 + }, + { + "epoch": 2.1402877697841727, + "grad_norm": 2.8662095069885254, + "learning_rate": 7.93e-05, + "loss": 0.0382, + "step": 2380 + }, + { + "action_loss": 0.019567472860217094, + "epoch": 2.1402877697841727, + "step": 2380 + }, + { + "epoch": 2.1402877697841727, + "step": 2380, + "torque_loss": 0.10939296334981918 + }, + { + "epoch": 2.1402877697841727, + "force_loss": 0.011409125290811062, + "step": 2380 + }, + { + "epoch": 2.1492805755395685, + "grad_norm": 0.5628333687782288, + "learning_rate": 7.963333333333334e-05, + "loss": 0.0443, + "step": 2390 + }, + { + "action_loss": 0.015659721568226814, + "epoch": 2.1492805755395685, + "step": 2390 + }, + { + "epoch": 2.1492805755395685, + "step": 2390, + "torque_loss": 0.1429305225610733 + }, + { + "epoch": 2.1492805755395685, + "force_loss": 0.013227318413555622, + "step": 2390 + }, + { + "epoch": 2.158273381294964, + "grad_norm": 1.2696839570999146, + "learning_rate": 7.996666666666667e-05, + "loss": 0.0361, + "step": 2400 + }, + { + "action_loss": 0.014651473611593246, + "epoch": 2.158273381294964, + "step": 2400 + }, + { + "epoch": 2.158273381294964, + "step": 2400, + "torque_loss": 0.12815704941749573 + }, + { + "epoch": 2.158273381294964, + "force_loss": 0.010313412174582481, + "step": 2400 + }, + { + "epoch": 2.16726618705036, + "grad_norm": 1.1857028007507324, + "learning_rate": 8.030000000000001e-05, + "loss": 0.0399, + "step": 2410 + }, + { + "action_loss": 0.020552780479192734, + "epoch": 2.16726618705036, + "step": 2410 + }, + { + "epoch": 2.16726618705036, + "step": 2410, + "torque_loss": 0.08756649494171143 + }, + { + "epoch": 2.16726618705036, + "force_loss": 0.01110069826245308, + "step": 2410 + }, + { + "epoch": 2.176258992805755, + "grad_norm": 1.1977838277816772, + "learning_rate": 8.063333333333333e-05, + "loss": 0.0384, + "step": 2420 + }, + { + "action_loss": 0.03344753757119179, + "epoch": 2.176258992805755, + "step": 2420 + }, + { + "epoch": 2.176258992805755, + "step": 2420, + "torque_loss": 0.18582697212696075 + }, + { + "epoch": 2.176258992805755, + "force_loss": 0.020389042794704437, + "step": 2420 + }, + { + "epoch": 2.185251798561151, + "grad_norm": 0.6409598588943481, + "learning_rate": 8.096666666666667e-05, + "loss": 0.0352, + "step": 2430 + }, + { + "action_loss": 0.021592723205685616, + "epoch": 2.185251798561151, + "step": 2430 + }, + { + "epoch": 2.185251798561151, + "step": 2430, + "torque_loss": 0.11224746704101562 + }, + { + "epoch": 2.185251798561151, + "force_loss": 0.029283076524734497, + "step": 2430 + }, + { + "epoch": 2.194244604316547, + "grad_norm": 1.0430068969726562, + "learning_rate": 8.13e-05, + "loss": 0.0359, + "step": 2440 + }, + { + "action_loss": 0.021318459883332253, + "epoch": 2.194244604316547, + "step": 2440 + }, + { + "epoch": 2.194244604316547, + "step": 2440, + "torque_loss": 0.14711694419384003 + }, + { + "epoch": 2.194244604316547, + "force_loss": 0.020500699058175087, + "step": 2440 + }, + { + "epoch": 2.2032374100719423, + "grad_norm": 0.7934525012969971, + "learning_rate": 8.163333333333334e-05, + "loss": 0.0459, + "step": 2450 + }, + { + "action_loss": 0.014271169900894165, + "epoch": 2.2032374100719423, + "step": 2450 + }, + { + "epoch": 2.2032374100719423, + "step": 2450, + "torque_loss": 0.10306980460882187 + }, + { + "epoch": 2.2032374100719423, + "force_loss": 0.007952618412673473, + "step": 2450 + }, + { + "epoch": 2.212230215827338, + "grad_norm": 0.858775794506073, + "learning_rate": 8.196666666666668e-05, + "loss": 0.0423, + "step": 2460 + }, + { + "action_loss": 0.02108023501932621, + "epoch": 2.212230215827338, + "step": 2460 + }, + { + "epoch": 2.212230215827338, + "step": 2460, + "torque_loss": 0.19167710840702057 + }, + { + "epoch": 2.212230215827338, + "force_loss": 0.012655667029321194, + "step": 2460 + }, + { + "epoch": 2.221223021582734, + "grad_norm": 0.8969601988792419, + "learning_rate": 8.23e-05, + "loss": 0.0351, + "step": 2470 + }, + { + "action_loss": 0.012416555546224117, + "epoch": 2.221223021582734, + "step": 2470 + }, + { + "epoch": 2.221223021582734, + "step": 2470, + "torque_loss": 0.1196342408657074 + }, + { + "epoch": 2.221223021582734, + "force_loss": 0.008173524402081966, + "step": 2470 + }, + { + "epoch": 2.2302158273381294, + "grad_norm": 1.3362475633621216, + "learning_rate": 8.263333333333334e-05, + "loss": 0.0419, + "step": 2480 + }, + { + "action_loss": 0.048754189163446426, + "epoch": 2.2302158273381294, + "step": 2480 + }, + { + "epoch": 2.2302158273381294, + "step": 2480, + "torque_loss": 0.20010678470134735 + }, + { + "epoch": 2.2302158273381294, + "force_loss": 0.06305330246686935, + "step": 2480 + }, + { + "epoch": 2.2392086330935252, + "grad_norm": 1.9638789892196655, + "learning_rate": 8.296666666666667e-05, + "loss": 0.0458, + "step": 2490 + }, + { + "action_loss": 0.021870682016015053, + "epoch": 2.2392086330935252, + "step": 2490 + }, + { + "epoch": 2.2392086330935252, + "step": 2490, + "torque_loss": 0.11235183477401733 + }, + { + "epoch": 2.2392086330935252, + "force_loss": 0.011008105240762234, + "step": 2490 + }, + { + "epoch": 2.2482014388489207, + "grad_norm": 1.2399091720581055, + "learning_rate": 8.33e-05, + "loss": 0.0437, + "step": 2500 + }, + { + "action_loss": 0.036117155104875565, + "epoch": 2.2482014388489207, + "step": 2500 + }, + { + "epoch": 2.2482014388489207, + "step": 2500, + "torque_loss": 0.1411147117614746 + }, + { + "epoch": 2.2482014388489207, + "force_loss": 0.027891939505934715, + "step": 2500 + }, + { + "epoch": 2.2571942446043165, + "grad_norm": 1.6755642890930176, + "learning_rate": 8.363333333333334e-05, + "loss": 0.0407, + "step": 2510 + }, + { + "action_loss": 0.024394290521740913, + "epoch": 2.2571942446043165, + "step": 2510 + }, + { + "epoch": 2.2571942446043165, + "step": 2510, + "torque_loss": 0.1530729979276657 + }, + { + "epoch": 2.2571942446043165, + "force_loss": 0.016769492998719215, + "step": 2510 + }, + { + "epoch": 2.2661870503597124, + "grad_norm": 0.5203109979629517, + "learning_rate": 8.396666666666667e-05, + "loss": 0.0356, + "step": 2520 + }, + { + "action_loss": 0.029405226930975914, + "epoch": 2.2661870503597124, + "step": 2520 + }, + { + "epoch": 2.2661870503597124, + "step": 2520, + "torque_loss": 0.12017150968313217 + }, + { + "epoch": 2.2661870503597124, + "force_loss": 0.019859282299876213, + "step": 2520 + }, + { + "epoch": 2.2751798561151078, + "grad_norm": 1.0105420351028442, + "learning_rate": 8.43e-05, + "loss": 0.0331, + "step": 2530 + }, + { + "action_loss": 0.03128426522016525, + "epoch": 2.2751798561151078, + "step": 2530 + }, + { + "epoch": 2.2751798561151078, + "step": 2530, + "torque_loss": 0.1860082596540451 + }, + { + "epoch": 2.2751798561151078, + "force_loss": 0.021311545744538307, + "step": 2530 + }, + { + "epoch": 2.2841726618705036, + "grad_norm": 0.5562395453453064, + "learning_rate": 8.463333333333335e-05, + "loss": 0.0478, + "step": 2540 + }, + { + "action_loss": 0.030484000220894814, + "epoch": 2.2841726618705036, + "step": 2540 + }, + { + "epoch": 2.2841726618705036, + "step": 2540, + "torque_loss": 0.14260172843933105 + }, + { + "epoch": 2.2841726618705036, + "force_loss": 0.026872314512729645, + "step": 2540 + }, + { + "epoch": 2.2931654676258995, + "grad_norm": 1.1357439756393433, + "learning_rate": 8.496666666666667e-05, + "loss": 0.0358, + "step": 2550 + }, + { + "action_loss": 0.011914070695638657, + "epoch": 2.2931654676258995, + "step": 2550 + }, + { + "epoch": 2.2931654676258995, + "step": 2550, + "torque_loss": 0.13292258977890015 + }, + { + "epoch": 2.2931654676258995, + "force_loss": 0.009125075303018093, + "step": 2550 + }, + { + "epoch": 2.302158273381295, + "grad_norm": 1.1095728874206543, + "learning_rate": 8.53e-05, + "loss": 0.0458, + "step": 2560 + }, + { + "action_loss": 0.026217376813292503, + "epoch": 2.302158273381295, + "step": 2560 + }, + { + "epoch": 2.302158273381295, + "step": 2560, + "torque_loss": 0.1510026752948761 + }, + { + "epoch": 2.302158273381295, + "force_loss": 0.031473398208618164, + "step": 2560 + }, + { + "epoch": 2.3111510791366907, + "grad_norm": 1.512447476387024, + "learning_rate": 8.563333333333333e-05, + "loss": 0.0442, + "step": 2570 + }, + { + "action_loss": 0.02989950217306614, + "epoch": 2.3111510791366907, + "step": 2570 + }, + { + "epoch": 2.3111510791366907, + "step": 2570, + "torque_loss": 0.11103609949350357 + }, + { + "epoch": 2.3111510791366907, + "force_loss": 0.015186742879450321, + "step": 2570 + }, + { + "epoch": 2.3201438848920866, + "grad_norm": 0.44876179099082947, + "learning_rate": 8.596666666666668e-05, + "loss": 0.0451, + "step": 2580 + }, + { + "action_loss": 0.0363553948700428, + "epoch": 2.3201438848920866, + "step": 2580 + }, + { + "epoch": 2.3201438848920866, + "step": 2580, + "torque_loss": 0.16425319015979767 + }, + { + "epoch": 2.3201438848920866, + "force_loss": 0.02153819240629673, + "step": 2580 + }, + { + "epoch": 2.329136690647482, + "grad_norm": 1.0684629678726196, + "learning_rate": 8.63e-05, + "loss": 0.0386, + "step": 2590 + }, + { + "action_loss": 0.01931898109614849, + "epoch": 2.329136690647482, + "step": 2590 + }, + { + "epoch": 2.329136690647482, + "step": 2590, + "torque_loss": 0.13259916007518768 + }, + { + "epoch": 2.329136690647482, + "force_loss": 0.013353735208511353, + "step": 2590 + }, + { + "epoch": 2.338129496402878, + "grad_norm": 2.289297580718994, + "learning_rate": 8.663333333333333e-05, + "loss": 0.04, + "step": 2600 + }, + { + "action_loss": 0.020353885367512703, + "epoch": 2.338129496402878, + "step": 2600 + }, + { + "epoch": 2.338129496402878, + "step": 2600, + "torque_loss": 0.12929637730121613 + }, + { + "epoch": 2.338129496402878, + "force_loss": 0.015994930639863014, + "step": 2600 + }, + { + "epoch": 2.347122302158273, + "grad_norm": 1.6838279962539673, + "learning_rate": 8.696666666666668e-05, + "loss": 0.0422, + "step": 2610 + }, + { + "action_loss": 0.01813647337257862, + "epoch": 2.347122302158273, + "step": 2610 + }, + { + "epoch": 2.347122302158273, + "step": 2610, + "torque_loss": 0.10282281041145325 + }, + { + "epoch": 2.347122302158273, + "force_loss": 0.010300427675247192, + "step": 2610 + }, + { + "epoch": 2.356115107913669, + "grad_norm": 0.9916293621063232, + "learning_rate": 8.730000000000001e-05, + "loss": 0.0379, + "step": 2620 + }, + { + "action_loss": 0.038559798151254654, + "epoch": 2.356115107913669, + "step": 2620 + }, + { + "epoch": 2.356115107913669, + "step": 2620, + "torque_loss": 0.15211163461208344 + }, + { + "epoch": 2.356115107913669, + "force_loss": 0.020414920523762703, + "step": 2620 + }, + { + "epoch": 2.365107913669065, + "grad_norm": 0.9382750391960144, + "learning_rate": 8.763333333333334e-05, + "loss": 0.0407, + "step": 2630 + }, + { + "action_loss": 0.03999155014753342, + "epoch": 2.365107913669065, + "step": 2630 + }, + { + "epoch": 2.365107913669065, + "step": 2630, + "torque_loss": 0.1645486205816269 + }, + { + "epoch": 2.365107913669065, + "force_loss": 0.023098522797226906, + "step": 2630 + }, + { + "epoch": 2.3741007194244603, + "grad_norm": 0.8879019618034363, + "learning_rate": 8.796666666666667e-05, + "loss": 0.0409, + "step": 2640 + }, + { + "action_loss": 0.04192976653575897, + "epoch": 2.3741007194244603, + "step": 2640 + }, + { + "epoch": 2.3741007194244603, + "step": 2640, + "torque_loss": 0.1851922869682312 + }, + { + "epoch": 2.3741007194244603, + "force_loss": 0.02950838953256607, + "step": 2640 + }, + { + "epoch": 2.383093525179856, + "grad_norm": 0.6435950398445129, + "learning_rate": 8.83e-05, + "loss": 0.0391, + "step": 2650 + }, + { + "action_loss": 0.015594280324876308, + "epoch": 2.383093525179856, + "step": 2650 + }, + { + "epoch": 2.383093525179856, + "step": 2650, + "torque_loss": 0.18892382085323334 + }, + { + "epoch": 2.383093525179856, + "force_loss": 0.010612313635647297, + "step": 2650 + }, + { + "epoch": 2.3920863309352516, + "grad_norm": 1.2399665117263794, + "learning_rate": 8.863333333333334e-05, + "loss": 0.0405, + "step": 2660 + }, + { + "action_loss": 0.06340255588293076, + "epoch": 2.3920863309352516, + "step": 2660 + }, + { + "epoch": 2.3920863309352516, + "step": 2660, + "torque_loss": 0.181207537651062 + }, + { + "epoch": 2.3920863309352516, + "force_loss": 0.05579102039337158, + "step": 2660 + }, + { + "epoch": 2.4010791366906474, + "grad_norm": 0.8568428158760071, + "learning_rate": 8.896666666666667e-05, + "loss": 0.0457, + "step": 2670 + }, + { + "action_loss": 0.019339805468916893, + "epoch": 2.4010791366906474, + "step": 2670 + }, + { + "epoch": 2.4010791366906474, + "step": 2670, + "torque_loss": 0.13437801599502563 + }, + { + "epoch": 2.4010791366906474, + "force_loss": 0.008796132169663906, + "step": 2670 + }, + { + "epoch": 2.4100719424460433, + "grad_norm": 0.705787718296051, + "learning_rate": 8.93e-05, + "loss": 0.0337, + "step": 2680 + }, + { + "action_loss": 0.016918500885367393, + "epoch": 2.4100719424460433, + "step": 2680 + }, + { + "epoch": 2.4100719424460433, + "step": 2680, + "torque_loss": 0.09708541631698608 + }, + { + "epoch": 2.4100719424460433, + "force_loss": 0.009710412472486496, + "step": 2680 + }, + { + "epoch": 2.4190647482014387, + "grad_norm": 0.9660235047340393, + "learning_rate": 8.963333333333333e-05, + "loss": 0.0326, + "step": 2690 + }, + { + "action_loss": 0.01665041781961918, + "epoch": 2.4190647482014387, + "step": 2690 + }, + { + "epoch": 2.4190647482014387, + "step": 2690, + "torque_loss": 0.18289561569690704 + }, + { + "epoch": 2.4190647482014387, + "force_loss": 0.012846916913986206, + "step": 2690 + }, + { + "epoch": 2.4280575539568345, + "grad_norm": 1.450756549835205, + "learning_rate": 8.996666666666667e-05, + "loss": 0.0353, + "step": 2700 + }, + { + "action_loss": 0.012606129981577396, + "epoch": 2.4280575539568345, + "step": 2700 + }, + { + "epoch": 2.4280575539568345, + "step": 2700, + "torque_loss": 0.10766936093568802 + }, + { + "epoch": 2.4280575539568345, + "force_loss": 0.0076093655079603195, + "step": 2700 + }, + { + "epoch": 2.4370503597122304, + "grad_norm": 1.1657781600952148, + "learning_rate": 9.030000000000001e-05, + "loss": 0.0362, + "step": 2710 + }, + { + "action_loss": 0.02287411130964756, + "epoch": 2.4370503597122304, + "step": 2710 + }, + { + "epoch": 2.4370503597122304, + "step": 2710, + "torque_loss": 0.122210793197155 + }, + { + "epoch": 2.4370503597122304, + "force_loss": 0.015399813652038574, + "step": 2710 + }, + { + "epoch": 2.446043165467626, + "grad_norm": 1.0665206909179688, + "learning_rate": 9.063333333333333e-05, + "loss": 0.0421, + "step": 2720 + }, + { + "action_loss": 0.009829004295170307, + "epoch": 2.446043165467626, + "step": 2720 + }, + { + "epoch": 2.446043165467626, + "step": 2720, + "torque_loss": 0.11033079028129578 + }, + { + "epoch": 2.446043165467626, + "force_loss": 0.007507554721087217, + "step": 2720 + }, + { + "epoch": 2.4550359712230216, + "grad_norm": 0.8438506126403809, + "learning_rate": 9.096666666666666e-05, + "loss": 0.0314, + "step": 2730 + }, + { + "action_loss": 0.01683415099978447, + "epoch": 2.4550359712230216, + "step": 2730 + }, + { + "epoch": 2.4550359712230216, + "step": 2730, + "torque_loss": 0.15783236920833588 + }, + { + "epoch": 2.4550359712230216, + "force_loss": 0.024075910449028015, + "step": 2730 + }, + { + "epoch": 2.4640287769784175, + "grad_norm": 2.404555082321167, + "learning_rate": 9.130000000000001e-05, + "loss": 0.0345, + "step": 2740 + }, + { + "action_loss": 0.02086368203163147, + "epoch": 2.4640287769784175, + "step": 2740 + }, + { + "epoch": 2.4640287769784175, + "step": 2740, + "torque_loss": 0.16255338490009308 + }, + { + "epoch": 2.4640287769784175, + "force_loss": 0.011286056600511074, + "step": 2740 + }, + { + "epoch": 2.473021582733813, + "grad_norm": 1.7568706274032593, + "learning_rate": 9.163333333333334e-05, + "loss": 0.0456, + "step": 2750 + }, + { + "action_loss": 0.026560992002487183, + "epoch": 2.473021582733813, + "step": 2750 + }, + { + "epoch": 2.473021582733813, + "step": 2750, + "torque_loss": 0.15280811488628387 + }, + { + "epoch": 2.473021582733813, + "force_loss": 0.01929291896522045, + "step": 2750 + }, + { + "epoch": 2.4820143884892087, + "grad_norm": 0.8318678736686707, + "learning_rate": 9.196666666666666e-05, + "loss": 0.0404, + "step": 2760 + }, + { + "action_loss": 0.01715129427611828, + "epoch": 2.4820143884892087, + "step": 2760 + }, + { + "epoch": 2.4820143884892087, + "step": 2760, + "torque_loss": 0.12959398329257965 + }, + { + "epoch": 2.4820143884892087, + "force_loss": 0.011284488253295422, + "step": 2760 + }, + { + "epoch": 2.491007194244604, + "grad_norm": 1.5090669393539429, + "learning_rate": 9.230000000000001e-05, + "loss": 0.0334, + "step": 2770 + }, + { + "action_loss": 0.03781120851635933, + "epoch": 2.491007194244604, + "step": 2770 + }, + { + "epoch": 2.491007194244604, + "step": 2770, + "torque_loss": 0.14982609450817108 + }, + { + "epoch": 2.491007194244604, + "force_loss": 0.01831093803048134, + "step": 2770 + }, + { + "epoch": 2.5, + "grad_norm": 2.4214015007019043, + "learning_rate": 9.263333333333334e-05, + "loss": 0.0355, + "step": 2780 + }, + { + "action_loss": 0.02332303486764431, + "epoch": 2.5, + "step": 2780 + }, + { + "epoch": 2.5, + "step": 2780, + "torque_loss": 0.11137477308511734 + }, + { + "epoch": 2.5, + "force_loss": 0.013371962122619152, + "step": 2780 + }, + { + "epoch": 2.508992805755396, + "grad_norm": 1.7286295890808105, + "learning_rate": 9.296666666666667e-05, + "loss": 0.0421, + "step": 2790 + }, + { + "action_loss": 0.01142234355211258, + "epoch": 2.508992805755396, + "step": 2790 + }, + { + "epoch": 2.508992805755396, + "step": 2790, + "torque_loss": 0.09397289156913757 + }, + { + "epoch": 2.508992805755396, + "force_loss": 0.009112835861742496, + "step": 2790 + }, + { + "epoch": 2.5179856115107913, + "grad_norm": 0.696000337600708, + "learning_rate": 9.33e-05, + "loss": 0.0371, + "step": 2800 + }, + { + "action_loss": 0.02488979883491993, + "epoch": 2.5179856115107913, + "step": 2800 + }, + { + "epoch": 2.5179856115107913, + "step": 2800, + "torque_loss": 0.17817990481853485 + }, + { + "epoch": 2.5179856115107913, + "force_loss": 0.02367742918431759, + "step": 2800 + }, + { + "epoch": 2.526978417266187, + "grad_norm": 2.2079663276672363, + "learning_rate": 9.363333333333334e-05, + "loss": 0.0344, + "step": 2810 + }, + { + "action_loss": 0.016026629135012627, + "epoch": 2.526978417266187, + "step": 2810 + }, + { + "epoch": 2.526978417266187, + "step": 2810, + "torque_loss": 0.13567723333835602 + }, + { + "epoch": 2.526978417266187, + "force_loss": 0.01275847852230072, + "step": 2810 + }, + { + "epoch": 2.5359712230215825, + "grad_norm": 1.2285265922546387, + "learning_rate": 9.396666666666667e-05, + "loss": 0.0332, + "step": 2820 + }, + { + "action_loss": 0.014813491143286228, + "epoch": 2.5359712230215825, + "step": 2820 + }, + { + "epoch": 2.5359712230215825, + "step": 2820, + "torque_loss": 0.12180599570274353 + }, + { + "epoch": 2.5359712230215825, + "force_loss": 0.010316254571080208, + "step": 2820 + }, + { + "epoch": 2.5449640287769784, + "grad_norm": 1.1561086177825928, + "learning_rate": 9.43e-05, + "loss": 0.0453, + "step": 2830 + }, + { + "action_loss": 0.025606466457247734, + "epoch": 2.5449640287769784, + "step": 2830 + }, + { + "epoch": 2.5449640287769784, + "step": 2830, + "torque_loss": 0.11959757655858994 + }, + { + "epoch": 2.5449640287769784, + "force_loss": 0.02054104395210743, + "step": 2830 + }, + { + "epoch": 2.553956834532374, + "grad_norm": 0.8689306378364563, + "learning_rate": 9.463333333333333e-05, + "loss": 0.0383, + "step": 2840 + }, + { + "action_loss": 0.01377404760569334, + "epoch": 2.553956834532374, + "step": 2840 + }, + { + "epoch": 2.553956834532374, + "step": 2840, + "torque_loss": 0.13466019928455353 + }, + { + "epoch": 2.553956834532374, + "force_loss": 0.010488275438547134, + "step": 2840 + }, + { + "epoch": 2.56294964028777, + "grad_norm": 0.4736124873161316, + "learning_rate": 9.496666666666667e-05, + "loss": 0.0377, + "step": 2850 + }, + { + "action_loss": 0.019194582477211952, + "epoch": 2.56294964028777, + "step": 2850 + }, + { + "epoch": 2.56294964028777, + "step": 2850, + "torque_loss": 0.1544509381055832 + }, + { + "epoch": 2.56294964028777, + "force_loss": 0.011677307076752186, + "step": 2850 + }, + { + "epoch": 2.5719424460431655, + "grad_norm": 1.1699731349945068, + "learning_rate": 9.53e-05, + "loss": 0.0431, + "step": 2860 + }, + { + "action_loss": 0.015994906425476074, + "epoch": 2.5719424460431655, + "step": 2860 + }, + { + "epoch": 2.5719424460431655, + "step": 2860, + "torque_loss": 0.13511356711387634 + }, + { + "epoch": 2.5719424460431655, + "force_loss": 0.011160057969391346, + "step": 2860 + }, + { + "epoch": 2.5809352517985613, + "grad_norm": 2.9630091190338135, + "learning_rate": 9.563333333333334e-05, + "loss": 0.0434, + "step": 2870 + }, + { + "action_loss": 0.02055782824754715, + "epoch": 2.5809352517985613, + "step": 2870 + }, + { + "epoch": 2.5809352517985613, + "step": 2870, + "torque_loss": 0.16927486658096313 + }, + { + "epoch": 2.5809352517985613, + "force_loss": 0.012406043708324432, + "step": 2870 + }, + { + "epoch": 2.5899280575539567, + "grad_norm": 0.8095554113388062, + "learning_rate": 9.596666666666668e-05, + "loss": 0.0338, + "step": 2880 + }, + { + "action_loss": 0.022499138489365578, + "epoch": 2.5899280575539567, + "step": 2880 + }, + { + "epoch": 2.5899280575539567, + "step": 2880, + "torque_loss": 0.12810824811458588 + }, + { + "epoch": 2.5899280575539567, + "force_loss": 0.016522439196705818, + "step": 2880 + }, + { + "epoch": 2.5989208633093526, + "grad_norm": 1.1473280191421509, + "learning_rate": 9.63e-05, + "loss": 0.0396, + "step": 2890 + }, + { + "action_loss": 0.026943305507302284, + "epoch": 2.5989208633093526, + "step": 2890 + }, + { + "epoch": 2.5989208633093526, + "step": 2890, + "torque_loss": 0.1600051373243332 + }, + { + "epoch": 2.5989208633093526, + "force_loss": 0.016423838213086128, + "step": 2890 + }, + { + "epoch": 2.6079136690647484, + "grad_norm": 1.2999749183654785, + "learning_rate": 9.663333333333334e-05, + "loss": 0.0442, + "step": 2900 + }, + { + "action_loss": 0.017461325973272324, + "epoch": 2.6079136690647484, + "step": 2900 + }, + { + "epoch": 2.6079136690647484, + "step": 2900, + "torque_loss": 0.11378884315490723 + }, + { + "epoch": 2.6079136690647484, + "force_loss": 0.010942693799734116, + "step": 2900 + }, + { + "epoch": 2.616906474820144, + "grad_norm": 0.8496351838111877, + "learning_rate": 9.696666666666667e-05, + "loss": 0.0454, + "step": 2910 + }, + { + "action_loss": 0.018060246482491493, + "epoch": 2.616906474820144, + "step": 2910 + }, + { + "epoch": 2.616906474820144, + "step": 2910, + "torque_loss": 0.16233943402767181 + }, + { + "epoch": 2.616906474820144, + "force_loss": 0.013489101082086563, + "step": 2910 + }, + { + "epoch": 2.6258992805755397, + "grad_norm": 1.1536489725112915, + "learning_rate": 9.730000000000001e-05, + "loss": 0.0407, + "step": 2920 + }, + { + "action_loss": 0.0315416194498539, + "epoch": 2.6258992805755397, + "step": 2920 + }, + { + "epoch": 2.6258992805755397, + "step": 2920, + "torque_loss": 0.12135192006826401 + }, + { + "epoch": 2.6258992805755397, + "force_loss": 0.018383117392659187, + "step": 2920 + }, + { + "epoch": 2.634892086330935, + "grad_norm": 0.6894007325172424, + "learning_rate": 9.763333333333334e-05, + "loss": 0.0374, + "step": 2930 + }, + { + "action_loss": 0.022216318175196648, + "epoch": 2.634892086330935, + "step": 2930 + }, + { + "epoch": 2.634892086330935, + "step": 2930, + "torque_loss": 0.15010647475719452 + }, + { + "epoch": 2.634892086330935, + "force_loss": 0.018463822081685066, + "step": 2930 + }, + { + "epoch": 2.643884892086331, + "grad_norm": 2.491455554962158, + "learning_rate": 9.796666666666667e-05, + "loss": 0.0439, + "step": 2940 + }, + { + "action_loss": 0.039160724729299545, + "epoch": 2.643884892086331, + "step": 2940 + }, + { + "epoch": 2.643884892086331, + "step": 2940, + "torque_loss": 0.2687141001224518 + }, + { + "epoch": 2.643884892086331, + "force_loss": 0.024796312674880028, + "step": 2940 + }, + { + "epoch": 2.652877697841727, + "grad_norm": 1.2095692157745361, + "learning_rate": 9.83e-05, + "loss": 0.0505, + "step": 2950 + }, + { + "action_loss": 0.02901189960539341, + "epoch": 2.652877697841727, + "step": 2950 + }, + { + "epoch": 2.652877697841727, + "step": 2950, + "torque_loss": 0.18363797664642334 + }, + { + "epoch": 2.652877697841727, + "force_loss": 0.014832672663033009, + "step": 2950 + }, + { + "epoch": 2.661870503597122, + "grad_norm": 1.1525763273239136, + "learning_rate": 9.863333333333334e-05, + "loss": 0.0428, + "step": 2960 + }, + { + "action_loss": 0.01046085823327303, + "epoch": 2.661870503597122, + "step": 2960 + }, + { + "epoch": 2.661870503597122, + "step": 2960, + "torque_loss": 0.11623861640691757 + }, + { + "epoch": 2.661870503597122, + "force_loss": 0.007993306033313274, + "step": 2960 + }, + { + "epoch": 2.670863309352518, + "grad_norm": 2.065459728240967, + "learning_rate": 9.896666666666667e-05, + "loss": 0.0374, + "step": 2970 + }, + { + "action_loss": 0.02344103343784809, + "epoch": 2.670863309352518, + "step": 2970 + }, + { + "epoch": 2.670863309352518, + "step": 2970, + "torque_loss": 0.15691407024860382 + }, + { + "epoch": 2.670863309352518, + "force_loss": 0.015483230352401733, + "step": 2970 + }, + { + "epoch": 2.6798561151079134, + "grad_norm": 1.7671762704849243, + "learning_rate": 9.93e-05, + "loss": 0.0386, + "step": 2980 + }, + { + "action_loss": 0.020954905077815056, + "epoch": 2.6798561151079134, + "step": 2980 + }, + { + "epoch": 2.6798561151079134, + "step": 2980, + "torque_loss": 0.12083371728658676 + }, + { + "epoch": 2.6798561151079134, + "force_loss": 0.009894735179841518, + "step": 2980 + }, + { + "epoch": 2.6888489208633093, + "grad_norm": 1.8522980213165283, + "learning_rate": 9.963333333333333e-05, + "loss": 0.0451, + "step": 2990 + }, + { + "action_loss": 0.02034606784582138, + "epoch": 2.6888489208633093, + "step": 2990 + }, + { + "epoch": 2.6888489208633093, + "step": 2990, + "torque_loss": 0.19639647006988525 + }, + { + "epoch": 2.6888489208633093, + "force_loss": 0.016407953575253487, + "step": 2990 + }, + { + "epoch": 2.697841726618705, + "grad_norm": 0.999684751033783, + "learning_rate": 9.996666666666668e-05, + "loss": 0.051, + "step": 3000 + }, + { + "action_loss": 0.018691303208470345, + "epoch": 2.697841726618705, + "step": 3000 + }, + { + "epoch": 2.697841726618705, + "step": 3000, + "torque_loss": 0.10870843380689621 + }, + { + "epoch": 2.697841726618705, + "force_loss": 0.01539594680070877, + "step": 3000 + }, + { + "epoch": 2.706834532374101, + "grad_norm": 0.994063675403595, + "learning_rate": 9.999999384858465e-05, + "loss": 0.038, + "step": 3010 + }, + { + "action_loss": 0.016244731843471527, + "epoch": 2.706834532374101, + "step": 3010 + }, + { + "epoch": 2.706834532374101, + "step": 3010, + "torque_loss": 0.10822829604148865 + }, + { + "epoch": 2.706834532374101, + "force_loss": 0.011545299552381039, + "step": 3010 + }, + { + "epoch": 2.7158273381294964, + "grad_norm": 1.5519417524337769, + "learning_rate": 9.999997258443473e-05, + "loss": 0.0401, + "step": 3020 + }, + { + "action_loss": 0.033651623874902725, + "epoch": 2.7158273381294964, + "step": 3020 + }, + { + "epoch": 2.7158273381294964, + "step": 3020, + "torque_loss": 0.16406987607479095 + }, + { + "epoch": 2.7158273381294964, + "force_loss": 0.02589082531630993, + "step": 3020 + }, + { + "epoch": 2.7248201438848922, + "grad_norm": 1.5173958539962769, + "learning_rate": 9.999993613161331e-05, + "loss": 0.042, + "step": 3030 + }, + { + "action_loss": 0.03164878487586975, + "epoch": 2.7248201438848922, + "step": 3030 + }, + { + "epoch": 2.7248201438848922, + "step": 3030, + "torque_loss": 0.15949127078056335 + }, + { + "epoch": 2.7248201438848922, + "force_loss": 0.021960146725177765, + "step": 3030 + }, + { + "epoch": 2.7338129496402876, + "grad_norm": 1.3673168420791626, + "learning_rate": 9.999988449013146e-05, + "loss": 0.0433, + "step": 3040 + }, + { + "action_loss": 0.026378417387604713, + "epoch": 2.7338129496402876, + "step": 3040 + }, + { + "epoch": 2.7338129496402876, + "step": 3040, + "torque_loss": 0.16195188462734222 + }, + { + "epoch": 2.7338129496402876, + "force_loss": 0.01337435096502304, + "step": 3040 + }, + { + "epoch": 2.7428057553956835, + "grad_norm": 0.9938601851463318, + "learning_rate": 9.99998176600049e-05, + "loss": 0.0489, + "step": 3050 + }, + { + "action_loss": 0.011601743288338184, + "epoch": 2.7428057553956835, + "step": 3050 + }, + { + "epoch": 2.7428057553956835, + "step": 3050, + "torque_loss": 0.15897555649280548 + }, + { + "epoch": 2.7428057553956835, + "force_loss": 0.01026233658194542, + "step": 3050 + }, + { + "epoch": 2.7517985611510793, + "grad_norm": 1.2809144258499146, + "learning_rate": 9.999973564125389e-05, + "loss": 0.0361, + "step": 3060 + }, + { + "action_loss": 0.01920217089354992, + "epoch": 2.7517985611510793, + "step": 3060 + }, + { + "epoch": 2.7517985611510793, + "step": 3060, + "torque_loss": 0.13005442917346954 + }, + { + "epoch": 2.7517985611510793, + "force_loss": 0.012239661999046803, + "step": 3060 + }, + { + "epoch": 2.7607913669064748, + "grad_norm": 1.192671298980713, + "learning_rate": 9.999963843390335e-05, + "loss": 0.0373, + "step": 3070 + }, + { + "action_loss": 0.04148460552096367, + "epoch": 2.7607913669064748, + "step": 3070 + }, + { + "epoch": 2.7607913669064748, + "step": 3070, + "torque_loss": 0.1638404279947281 + }, + { + "epoch": 2.7607913669064748, + "force_loss": 0.025303175672888756, + "step": 3070 + }, + { + "epoch": 2.7697841726618706, + "grad_norm": 2.369666814804077, + "learning_rate": 9.999952603798282e-05, + "loss": 0.0562, + "step": 3080 + }, + { + "action_loss": 0.047801535576581955, + "epoch": 2.7697841726618706, + "step": 3080 + }, + { + "epoch": 2.7697841726618706, + "step": 3080, + "torque_loss": 0.20009560883045197 + }, + { + "epoch": 2.7697841726618706, + "force_loss": 0.06334620714187622, + "step": 3080 + }, + { + "epoch": 2.778776978417266, + "grad_norm": 1.7056851387023926, + "learning_rate": 9.999939845352646e-05, + "loss": 0.0467, + "step": 3090 + }, + { + "action_loss": 0.024913745000958443, + "epoch": 2.778776978417266, + "step": 3090 + }, + { + "epoch": 2.778776978417266, + "step": 3090, + "torque_loss": 0.19030427932739258 + }, + { + "epoch": 2.778776978417266, + "force_loss": 0.014128959737718105, + "step": 3090 + }, + { + "epoch": 2.787769784172662, + "grad_norm": 1.7222480773925781, + "learning_rate": 9.999925568057298e-05, + "loss": 0.0455, + "step": 3100 + }, + { + "action_loss": 0.04152792692184448, + "epoch": 2.787769784172662, + "step": 3100 + }, + { + "epoch": 2.787769784172662, + "step": 3100, + "torque_loss": 0.15055231750011444 + }, + { + "epoch": 2.787769784172662, + "force_loss": 0.01842602528631687, + "step": 3100 + }, + { + "epoch": 2.7967625899280577, + "grad_norm": 0.839058518409729, + "learning_rate": 9.999909771916578e-05, + "loss": 0.0426, + "step": 3110 + }, + { + "action_loss": 0.03791915252804756, + "epoch": 2.7967625899280577, + "step": 3110 + }, + { + "epoch": 2.7967625899280577, + "step": 3110, + "torque_loss": 0.13088136911392212 + }, + { + "epoch": 2.7967625899280577, + "force_loss": 0.019136101007461548, + "step": 3110 + }, + { + "epoch": 2.805755395683453, + "grad_norm": 1.6002094745635986, + "learning_rate": 9.999892456935285e-05, + "loss": 0.0456, + "step": 3120 + }, + { + "action_loss": 0.05152573063969612, + "epoch": 2.805755395683453, + "step": 3120 + }, + { + "epoch": 2.805755395683453, + "step": 3120, + "torque_loss": 0.1504654586315155 + }, + { + "epoch": 2.805755395683453, + "force_loss": 0.03299170359969139, + "step": 3120 + }, + { + "epoch": 2.814748201438849, + "grad_norm": 0.798694908618927, + "learning_rate": 9.999873623118679e-05, + "loss": 0.0433, + "step": 3130 + }, + { + "action_loss": 0.013080894947052002, + "epoch": 2.814748201438849, + "step": 3130 + }, + { + "epoch": 2.814748201438849, + "step": 3130, + "torque_loss": 0.16791932284832 + }, + { + "epoch": 2.814748201438849, + "force_loss": 0.012379822321236134, + "step": 3130 + }, + { + "epoch": 2.8237410071942444, + "grad_norm": 0.9472343921661377, + "learning_rate": 9.999853270472479e-05, + "loss": 0.0408, + "step": 3140 + }, + { + "action_loss": 0.02110356092453003, + "epoch": 2.8237410071942444, + "step": 3140 + }, + { + "epoch": 2.8237410071942444, + "step": 3140, + "torque_loss": 0.15463107824325562 + }, + { + "epoch": 2.8237410071942444, + "force_loss": 0.02173072099685669, + "step": 3140 + }, + { + "epoch": 2.83273381294964, + "grad_norm": 1.3234020471572876, + "learning_rate": 9.999831399002871e-05, + "loss": 0.0421, + "step": 3150 + }, + { + "action_loss": 0.01979002356529236, + "epoch": 2.83273381294964, + "step": 3150 + }, + { + "epoch": 2.83273381294964, + "step": 3150, + "torque_loss": 0.12477517127990723 + }, + { + "epoch": 2.83273381294964, + "force_loss": 0.012610559351742268, + "step": 3150 + }, + { + "epoch": 2.841726618705036, + "grad_norm": 1.0811312198638916, + "learning_rate": 9.999808008716494e-05, + "loss": 0.0384, + "step": 3160 + }, + { + "action_loss": 0.03057975135743618, + "epoch": 2.841726618705036, + "step": 3160 + }, + { + "epoch": 2.841726618705036, + "step": 3160, + "torque_loss": 0.2436930537223816 + }, + { + "epoch": 2.841726618705036, + "force_loss": 0.014245248399674892, + "step": 3160 + }, + { + "epoch": 2.850719424460432, + "grad_norm": 0.8396542072296143, + "learning_rate": 9.999783099620459e-05, + "loss": 0.0432, + "step": 3170 + }, + { + "action_loss": 0.013761981390416622, + "epoch": 2.850719424460432, + "step": 3170 + }, + { + "epoch": 2.850719424460432, + "step": 3170, + "torque_loss": 0.12116435915231705 + }, + { + "epoch": 2.850719424460432, + "force_loss": 0.010504312813282013, + "step": 3170 + }, + { + "epoch": 2.8597122302158273, + "grad_norm": 0.9482388496398926, + "learning_rate": 9.999756671722328e-05, + "loss": 0.0393, + "step": 3180 + }, + { + "action_loss": 0.012676995247602463, + "epoch": 2.8597122302158273, + "step": 3180 + }, + { + "epoch": 2.8597122302158273, + "step": 3180, + "torque_loss": 0.1399737149477005 + }, + { + "epoch": 2.8597122302158273, + "force_loss": 0.009577300399541855, + "step": 3180 + }, + { + "epoch": 2.868705035971223, + "grad_norm": 2.415886878967285, + "learning_rate": 9.99972872503013e-05, + "loss": 0.0388, + "step": 3190 + }, + { + "action_loss": 0.029606467112898827, + "epoch": 2.868705035971223, + "step": 3190 + }, + { + "epoch": 2.868705035971223, + "step": 3190, + "torque_loss": 0.15825851261615753 + }, + { + "epoch": 2.868705035971223, + "force_loss": 0.018408508971333504, + "step": 3190 + }, + { + "epoch": 2.8776978417266186, + "grad_norm": 0.8147604465484619, + "learning_rate": 9.999699259552359e-05, + "loss": 0.0422, + "step": 3200 + }, + { + "action_loss": 0.010048252530395985, + "epoch": 2.8776978417266186, + "step": 3200 + }, + { + "epoch": 2.8776978417266186, + "step": 3200, + "torque_loss": 0.12385464459657669 + }, + { + "epoch": 2.8776978417266186, + "force_loss": 0.0070101716555655, + "step": 3200 + }, + { + "epoch": 2.8866906474820144, + "grad_norm": 0.8463675379753113, + "learning_rate": 9.99966827529796e-05, + "loss": 0.0362, + "step": 3210 + }, + { + "action_loss": 0.020685601979494095, + "epoch": 2.8866906474820144, + "step": 3210 + }, + { + "epoch": 2.8866906474820144, + "step": 3210, + "torque_loss": 0.18193601071834564 + }, + { + "epoch": 2.8866906474820144, + "force_loss": 0.012167420238256454, + "step": 3210 + }, + { + "epoch": 2.8956834532374103, + "grad_norm": 1.0764975547790527, + "learning_rate": 9.999635772276348e-05, + "loss": 0.0377, + "step": 3220 + }, + { + "action_loss": 0.010956761427223682, + "epoch": 2.8956834532374103, + "step": 3220 + }, + { + "epoch": 2.8956834532374103, + "step": 3220, + "torque_loss": 0.12044543772935867 + }, + { + "epoch": 2.8956834532374103, + "force_loss": 0.0089794360101223, + "step": 3220 + }, + { + "epoch": 2.9046762589928057, + "grad_norm": 0.772395670413971, + "learning_rate": 9.999601750497396e-05, + "loss": 0.0387, + "step": 3230 + }, + { + "action_loss": 0.017194250598549843, + "epoch": 2.9046762589928057, + "step": 3230 + }, + { + "epoch": 2.9046762589928057, + "step": 3230, + "torque_loss": 0.16221556067466736 + }, + { + "epoch": 2.9046762589928057, + "force_loss": 0.013018782250583172, + "step": 3230 + }, + { + "epoch": 2.9136690647482015, + "grad_norm": 0.7906728386878967, + "learning_rate": 9.99956620997144e-05, + "loss": 0.0349, + "step": 3240 + }, + { + "action_loss": 0.0322260856628418, + "epoch": 2.9136690647482015, + "step": 3240 + }, + { + "epoch": 2.9136690647482015, + "step": 3240, + "torque_loss": 0.14162257313728333 + }, + { + "epoch": 2.9136690647482015, + "force_loss": 0.017613641917705536, + "step": 3240 + }, + { + "epoch": 2.922661870503597, + "grad_norm": 1.1173397302627563, + "learning_rate": 9.999529150709275e-05, + "loss": 0.0415, + "step": 3250 + }, + { + "action_loss": 0.020265748724341393, + "epoch": 2.922661870503597, + "step": 3250 + }, + { + "epoch": 2.922661870503597, + "step": 3250, + "torque_loss": 0.1406986564397812 + }, + { + "epoch": 2.922661870503597, + "force_loss": 0.009460187517106533, + "step": 3250 + }, + { + "epoch": 2.931654676258993, + "grad_norm": 0.6254609823226929, + "learning_rate": 9.999490572722158e-05, + "loss": 0.0372, + "step": 3260 + }, + { + "action_loss": 0.025432869791984558, + "epoch": 2.931654676258993, + "step": 3260 + }, + { + "epoch": 2.931654676258993, + "step": 3260, + "torque_loss": 0.14959515631198883 + }, + { + "epoch": 2.931654676258993, + "force_loss": 0.014176805503666401, + "step": 3260 + }, + { + "epoch": 2.9406474820143886, + "grad_norm": 0.8070449233055115, + "learning_rate": 9.99945047602181e-05, + "loss": 0.0408, + "step": 3270 + }, + { + "action_loss": 0.01896510273218155, + "epoch": 2.9406474820143886, + "step": 3270 + }, + { + "epoch": 2.9406474820143886, + "step": 3270, + "torque_loss": 0.11664503812789917 + }, + { + "epoch": 2.9406474820143886, + "force_loss": 0.011725406162440777, + "step": 3270 + }, + { + "epoch": 2.949640287769784, + "grad_norm": 1.269662857055664, + "learning_rate": 9.99940886062041e-05, + "loss": 0.0389, + "step": 3280 + }, + { + "action_loss": 0.016527635976672173, + "epoch": 2.949640287769784, + "step": 3280 + }, + { + "epoch": 2.949640287769784, + "step": 3280, + "torque_loss": 0.12627343833446503 + }, + { + "epoch": 2.949640287769784, + "force_loss": 0.011279787868261337, + "step": 3280 + }, + { + "epoch": 2.95863309352518, + "grad_norm": 1.1411021947860718, + "learning_rate": 9.999365726530599e-05, + "loss": 0.04, + "step": 3290 + }, + { + "action_loss": 0.01918291114270687, + "epoch": 2.95863309352518, + "step": 3290 + }, + { + "epoch": 2.95863309352518, + "step": 3290, + "torque_loss": 0.16686545312404633 + }, + { + "epoch": 2.95863309352518, + "force_loss": 0.012200385332107544, + "step": 3290 + }, + { + "epoch": 2.9676258992805753, + "grad_norm": 0.8965969085693359, + "learning_rate": 9.999321073765481e-05, + "loss": 0.0462, + "step": 3300 + }, + { + "action_loss": 0.017377378419041634, + "epoch": 2.9676258992805753, + "step": 3300 + }, + { + "epoch": 2.9676258992805753, + "step": 3300, + "torque_loss": 0.1282060742378235 + }, + { + "epoch": 2.9676258992805753, + "force_loss": 0.017754575237631798, + "step": 3300 + }, + { + "epoch": 2.976618705035971, + "grad_norm": 0.48440292477607727, + "learning_rate": 9.99927490233862e-05, + "loss": 0.0331, + "step": 3310 + }, + { + "action_loss": 0.02285308577120304, + "epoch": 2.976618705035971, + "step": 3310 + }, + { + "epoch": 2.976618705035971, + "step": 3310, + "torque_loss": 0.22604024410247803 + }, + { + "epoch": 2.976618705035971, + "force_loss": 0.019879691302776337, + "step": 3310 + }, + { + "epoch": 2.985611510791367, + "grad_norm": 1.7107858657836914, + "learning_rate": 9.999227212264043e-05, + "loss": 0.0364, + "step": 3320 + }, + { + "action_loss": 0.01286298781633377, + "epoch": 2.985611510791367, + "step": 3320 + }, + { + "epoch": 2.985611510791367, + "step": 3320, + "torque_loss": 0.0966697558760643 + }, + { + "epoch": 2.985611510791367, + "force_loss": 0.01715177856385708, + "step": 3320 + }, + { + "epoch": 2.994604316546763, + "grad_norm": 0.9540174007415771, + "learning_rate": 9.999178003556236e-05, + "loss": 0.0386, + "step": 3330 + }, + { + "action_loss": 0.020100312307476997, + "epoch": 2.994604316546763, + "step": 3330 + }, + { + "epoch": 2.994604316546763, + "step": 3330, + "torque_loss": 0.11698877811431885 + }, + { + "epoch": 2.994604316546763, + "force_loss": 0.019933760166168213, + "step": 3330 + }, + { + "epoch": 3.0035971223021583, + "grad_norm": 0.68222975730896, + "learning_rate": 9.999127276230146e-05, + "loss": 0.0321, + "step": 3340 + }, + { + "action_loss": 0.020617427304387093, + "epoch": 3.0035971223021583, + "step": 3340 + }, + { + "epoch": 3.0035971223021583, + "step": 3340, + "torque_loss": 0.12337452173233032 + }, + { + "epoch": 3.0035971223021583, + "force_loss": 0.01989404298365116, + "step": 3340 + }, + { + "epoch": 3.012589928057554, + "grad_norm": 2.0390818119049072, + "learning_rate": 9.999075030301184e-05, + "loss": 0.0391, + "step": 3350 + }, + { + "action_loss": 0.016074737533926964, + "epoch": 3.012589928057554, + "step": 3350 + }, + { + "epoch": 3.012589928057554, + "step": 3350, + "torque_loss": 0.13831709325313568 + }, + { + "epoch": 3.012589928057554, + "force_loss": 0.015261041931807995, + "step": 3350 + }, + { + "epoch": 3.0215827338129495, + "grad_norm": 0.5331881642341614, + "learning_rate": 9.999021265785221e-05, + "loss": 0.043, + "step": 3360 + }, + { + "action_loss": 0.020711304619908333, + "epoch": 3.0215827338129495, + "step": 3360 + }, + { + "epoch": 3.0215827338129495, + "step": 3360, + "torque_loss": 0.215559720993042 + }, + { + "epoch": 3.0215827338129495, + "force_loss": 0.01075026299804449, + "step": 3360 + }, + { + "epoch": 3.0305755395683454, + "grad_norm": 2.314973831176758, + "learning_rate": 9.998965982698589e-05, + "loss": 0.045, + "step": 3370 + }, + { + "action_loss": 0.02324887178838253, + "epoch": 3.0305755395683454, + "step": 3370 + }, + { + "epoch": 3.0305755395683454, + "step": 3370, + "torque_loss": 0.13435320556163788 + }, + { + "epoch": 3.0305755395683454, + "force_loss": 0.01448883581906557, + "step": 3370 + }, + { + "epoch": 3.039568345323741, + "grad_norm": 1.7778946161270142, + "learning_rate": 9.998909181058082e-05, + "loss": 0.0364, + "step": 3380 + }, + { + "action_loss": 0.023925187066197395, + "epoch": 3.039568345323741, + "step": 3380 + }, + { + "epoch": 3.039568345323741, + "step": 3380, + "torque_loss": 0.12586790323257446 + }, + { + "epoch": 3.039568345323741, + "force_loss": 0.026657452806830406, + "step": 3380 + }, + { + "epoch": 3.0485611510791366, + "grad_norm": 1.2528610229492188, + "learning_rate": 9.998850860880953e-05, + "loss": 0.0421, + "step": 3390 + }, + { + "action_loss": 0.02990814298391342, + "epoch": 3.0485611510791366, + "step": 3390 + }, + { + "epoch": 3.0485611510791366, + "step": 3390, + "torque_loss": 0.21017348766326904 + }, + { + "epoch": 3.0485611510791366, + "force_loss": 0.012373861856758595, + "step": 3390 + }, + { + "epoch": 3.0575539568345325, + "grad_norm": 1.3309173583984375, + "learning_rate": 9.998791022184922e-05, + "loss": 0.0418, + "step": 3400 + }, + { + "action_loss": 0.014552815817296505, + "epoch": 3.0575539568345325, + "step": 3400 + }, + { + "epoch": 3.0575539568345325, + "step": 3400, + "torque_loss": 0.11067936569452286 + }, + { + "epoch": 3.0575539568345325, + "force_loss": 0.009075048379600048, + "step": 3400 + }, + { + "epoch": 3.066546762589928, + "grad_norm": 2.535957098007202, + "learning_rate": 9.99872966498816e-05, + "loss": 0.0373, + "step": 3410 + }, + { + "action_loss": 0.03187643364071846, + "epoch": 3.066546762589928, + "step": 3410 + }, + { + "epoch": 3.066546762589928, + "step": 3410, + "torque_loss": 0.1743755340576172 + }, + { + "epoch": 3.066546762589928, + "force_loss": 0.0168390404433012, + "step": 3410 + }, + { + "epoch": 3.0755395683453237, + "grad_norm": 0.8553867340087891, + "learning_rate": 9.998666789309313e-05, + "loss": 0.0424, + "step": 3420 + }, + { + "action_loss": 0.007862004451453686, + "epoch": 3.0755395683453237, + "step": 3420 + }, + { + "epoch": 3.0755395683453237, + "step": 3420, + "torque_loss": 0.14355872571468353 + }, + { + "epoch": 3.0755395683453237, + "force_loss": 0.007574799004942179, + "step": 3420 + }, + { + "epoch": 3.0845323741007196, + "grad_norm": 0.7984017729759216, + "learning_rate": 9.998602395167475e-05, + "loss": 0.0343, + "step": 3430 + }, + { + "action_loss": 0.03197667375206947, + "epoch": 3.0845323741007196, + "step": 3430 + }, + { + "epoch": 3.0845323741007196, + "step": 3430, + "torque_loss": 0.15288887917995453 + }, + { + "epoch": 3.0845323741007196, + "force_loss": 0.02210446633398533, + "step": 3430 + }, + { + "epoch": 3.093525179856115, + "grad_norm": 0.7792773842811584, + "learning_rate": 9.998536482582213e-05, + "loss": 0.0443, + "step": 3440 + }, + { + "action_loss": 0.02684091031551361, + "epoch": 3.093525179856115, + "step": 3440 + }, + { + "epoch": 3.093525179856115, + "step": 3440, + "torque_loss": 0.14769357442855835 + }, + { + "epoch": 3.093525179856115, + "force_loss": 0.013106010854244232, + "step": 3440 + }, + { + "epoch": 3.102517985611511, + "grad_norm": 0.9977942109107971, + "learning_rate": 9.998469051573544e-05, + "loss": 0.0436, + "step": 3450 + }, + { + "action_loss": 0.03798346221446991, + "epoch": 3.102517985611511, + "step": 3450 + }, + { + "epoch": 3.102517985611511, + "step": 3450, + "torque_loss": 0.23463428020477295 + }, + { + "epoch": 3.102517985611511, + "force_loss": 0.0281215887516737, + "step": 3450 + }, + { + "epoch": 3.1115107913669067, + "grad_norm": 1.1203683614730835, + "learning_rate": 9.998400102161954e-05, + "loss": 0.0419, + "step": 3460 + }, + { + "action_loss": 0.01838778890669346, + "epoch": 3.1115107913669067, + "step": 3460 + }, + { + "epoch": 3.1115107913669067, + "step": 3460, + "torque_loss": 0.169475719332695 + }, + { + "epoch": 3.1115107913669067, + "force_loss": 0.010304216295480728, + "step": 3460 + }, + { + "epoch": 3.120503597122302, + "grad_norm": 1.2310011386871338, + "learning_rate": 9.998329634368388e-05, + "loss": 0.0401, + "step": 3470 + }, + { + "action_loss": 0.02858925424516201, + "epoch": 3.120503597122302, + "step": 3470 + }, + { + "epoch": 3.120503597122302, + "step": 3470, + "torque_loss": 0.13004864752292633 + }, + { + "epoch": 3.120503597122302, + "force_loss": 0.014659442007541656, + "step": 3470 + }, + { + "epoch": 3.129496402877698, + "grad_norm": 0.9335276484489441, + "learning_rate": 9.998257648214253e-05, + "loss": 0.0355, + "step": 3480 + }, + { + "action_loss": 0.039215151220560074, + "epoch": 3.129496402877698, + "step": 3480 + }, + { + "epoch": 3.129496402877698, + "step": 3480, + "torque_loss": 0.22563539445400238 + }, + { + "epoch": 3.129496402877698, + "force_loss": 0.028997493907809258, + "step": 3480 + }, + { + "epoch": 3.1384892086330933, + "grad_norm": 1.2678771018981934, + "learning_rate": 9.998184143721417e-05, + "loss": 0.0467, + "step": 3490 + }, + { + "action_loss": 0.016788633540272713, + "epoch": 3.1384892086330933, + "step": 3490 + }, + { + "epoch": 3.1384892086330933, + "step": 3490, + "torque_loss": 0.11538002640008926 + }, + { + "epoch": 3.1384892086330933, + "force_loss": 0.016048191115260124, + "step": 3490 + }, + { + "epoch": 3.147482014388489, + "grad_norm": 0.9799803495407104, + "learning_rate": 9.998109120912206e-05, + "loss": 0.0353, + "step": 3500 + }, + { + "action_loss": 0.02866787649691105, + "epoch": 3.147482014388489, + "step": 3500 + }, + { + "epoch": 3.147482014388489, + "step": 3500, + "torque_loss": 0.15908962488174438 + }, + { + "epoch": 3.147482014388489, + "force_loss": 0.020994337275624275, + "step": 3500 + }, + { + "epoch": 3.156474820143885, + "grad_norm": 1.8498404026031494, + "learning_rate": 9.998032579809411e-05, + "loss": 0.0427, + "step": 3510 + }, + { + "action_loss": 0.014884372241795063, + "epoch": 3.156474820143885, + "step": 3510 + }, + { + "epoch": 3.156474820143885, + "step": 3510, + "torque_loss": 0.20385859906673431 + }, + { + "epoch": 3.156474820143885, + "force_loss": 0.014859344810247421, + "step": 3510 + }, + { + "epoch": 3.1654676258992804, + "grad_norm": 0.9601037502288818, + "learning_rate": 9.997954520436286e-05, + "loss": 0.0447, + "step": 3520 + }, + { + "action_loss": 0.021648511290550232, + "epoch": 3.1654676258992804, + "step": 3520 + }, + { + "epoch": 3.1654676258992804, + "step": 3520, + "torque_loss": 0.1645248681306839 + }, + { + "epoch": 3.1654676258992804, + "force_loss": 0.016823599115014076, + "step": 3520 + }, + { + "epoch": 3.1744604316546763, + "grad_norm": 1.0865265130996704, + "learning_rate": 9.997874942816538e-05, + "loss": 0.0363, + "step": 3530 + }, + { + "action_loss": 0.034875307232141495, + "epoch": 3.1744604316546763, + "step": 3530 + }, + { + "epoch": 3.1744604316546763, + "step": 3530, + "torque_loss": 0.13772688806056976 + }, + { + "epoch": 3.1744604316546763, + "force_loss": 0.02724367380142212, + "step": 3530 + }, + { + "epoch": 3.183453237410072, + "grad_norm": 0.9987258911132812, + "learning_rate": 9.997793846974345e-05, + "loss": 0.0481, + "step": 3540 + }, + { + "action_loss": 0.013754952698946, + "epoch": 3.183453237410072, + "step": 3540 + }, + { + "epoch": 3.183453237410072, + "step": 3540, + "torque_loss": 0.11794767528772354 + }, + { + "epoch": 3.183453237410072, + "force_loss": 0.008873325772583485, + "step": 3540 + }, + { + "epoch": 3.1924460431654675, + "grad_norm": 1.811292052268982, + "learning_rate": 9.997711232934341e-05, + "loss": 0.0422, + "step": 3550 + }, + { + "action_loss": 0.04341394826769829, + "epoch": 3.1924460431654675, + "step": 3550 + }, + { + "epoch": 3.1924460431654675, + "step": 3550, + "torque_loss": 0.17486751079559326 + }, + { + "epoch": 3.1924460431654675, + "force_loss": 0.03140212595462799, + "step": 3550 + }, + { + "epoch": 3.2014388489208634, + "grad_norm": 0.49703332781791687, + "learning_rate": 9.99762710072162e-05, + "loss": 0.0408, + "step": 3560 + }, + { + "action_loss": 0.023574858903884888, + "epoch": 3.2014388489208634, + "step": 3560 + }, + { + "epoch": 3.2014388489208634, + "step": 3560, + "torque_loss": 0.17619562149047852 + }, + { + "epoch": 3.2014388489208634, + "force_loss": 0.02313327044248581, + "step": 3560 + }, + { + "epoch": 3.210431654676259, + "grad_norm": 0.9942551255226135, + "learning_rate": 9.997541450361743e-05, + "loss": 0.0471, + "step": 3570 + }, + { + "action_loss": 0.0332251638174057, + "epoch": 3.210431654676259, + "step": 3570 + }, + { + "epoch": 3.210431654676259, + "step": 3570, + "torque_loss": 0.2173522710800171 + }, + { + "epoch": 3.210431654676259, + "force_loss": 0.03791753575205803, + "step": 3570 + }, + { + "epoch": 3.2194244604316546, + "grad_norm": 0.9259369373321533, + "learning_rate": 9.997454281880723e-05, + "loss": 0.0405, + "step": 3580 + }, + { + "action_loss": 0.014682799577713013, + "epoch": 3.2194244604316546, + "step": 3580 + }, + { + "epoch": 3.2194244604316546, + "step": 3580, + "torque_loss": 0.12946762144565582 + }, + { + "epoch": 3.2194244604316546, + "force_loss": 0.007857296615839005, + "step": 3580 + }, + { + "epoch": 3.2284172661870505, + "grad_norm": 1.395731806755066, + "learning_rate": 9.997365595305044e-05, + "loss": 0.0395, + "step": 3590 + }, + { + "action_loss": 0.03349921479821205, + "epoch": 3.2284172661870505, + "step": 3590 + }, + { + "epoch": 3.2284172661870505, + "step": 3590, + "torque_loss": 0.22216928005218506 + }, + { + "epoch": 3.2284172661870505, + "force_loss": 0.029241399839520454, + "step": 3590 + }, + { + "epoch": 3.237410071942446, + "grad_norm": 1.07401442527771, + "learning_rate": 9.997275390661644e-05, + "loss": 0.0382, + "step": 3600 + }, + { + "action_loss": 0.013655010610818863, + "epoch": 3.237410071942446, + "step": 3600 + }, + { + "epoch": 3.237410071942446, + "step": 3600, + "torque_loss": 0.1119321957230568 + }, + { + "epoch": 3.237410071942446, + "force_loss": 0.01100183930248022, + "step": 3600 + }, + { + "epoch": 3.2464028776978417, + "grad_norm": 1.507638692855835, + "learning_rate": 9.997183667977926e-05, + "loss": 0.0392, + "step": 3610 + }, + { + "action_loss": 0.01712990552186966, + "epoch": 3.2464028776978417, + "step": 3610 + }, + { + "epoch": 3.2464028776978417, + "step": 3610, + "torque_loss": 0.15502284467220306 + }, + { + "epoch": 3.2464028776978417, + "force_loss": 0.011407186277210712, + "step": 3610 + }, + { + "epoch": 3.2553956834532376, + "grad_norm": 1.8317286968231201, + "learning_rate": 9.997090427281752e-05, + "loss": 0.0379, + "step": 3620 + }, + { + "action_loss": 0.014633041806519032, + "epoch": 3.2553956834532376, + "step": 3620 + }, + { + "epoch": 3.2553956834532376, + "step": 3620, + "torque_loss": 0.10888916999101639 + }, + { + "epoch": 3.2553956834532376, + "force_loss": 0.014046691358089447, + "step": 3620 + }, + { + "epoch": 3.264388489208633, + "grad_norm": 0.5321849584579468, + "learning_rate": 9.996995668601448e-05, + "loss": 0.0357, + "step": 3630 + }, + { + "action_loss": 0.008210469037294388, + "epoch": 3.264388489208633, + "step": 3630 + }, + { + "epoch": 3.264388489208633, + "step": 3630, + "torque_loss": 0.07508663088083267 + }, + { + "epoch": 3.264388489208633, + "force_loss": 0.006780738476663828, + "step": 3630 + }, + { + "epoch": 3.273381294964029, + "grad_norm": 0.8848887085914612, + "learning_rate": 9.996899391965798e-05, + "loss": 0.0296, + "step": 3640 + }, + { + "action_loss": 0.028002088889479637, + "epoch": 3.273381294964029, + "step": 3640 + }, + { + "epoch": 3.273381294964029, + "step": 3640, + "torque_loss": 0.18522708117961884 + }, + { + "epoch": 3.273381294964029, + "force_loss": 0.024815693497657776, + "step": 3640 + }, + { + "epoch": 3.2823741007194247, + "grad_norm": 1.1505377292633057, + "learning_rate": 9.996801597404048e-05, + "loss": 0.042, + "step": 3650 + }, + { + "action_loss": 0.016801102086901665, + "epoch": 3.2823741007194247, + "step": 3650 + }, + { + "epoch": 3.2823741007194247, + "step": 3650, + "torque_loss": 0.16668272018432617 + }, + { + "epoch": 3.2823741007194247, + "force_loss": 0.012274055741727352, + "step": 3650 + }, + { + "epoch": 3.29136690647482, + "grad_norm": 1.0085569620132446, + "learning_rate": 9.996702284945905e-05, + "loss": 0.0342, + "step": 3660 + }, + { + "action_loss": 0.023353412747383118, + "epoch": 3.29136690647482, + "step": 3660 + }, + { + "epoch": 3.29136690647482, + "step": 3660, + "torque_loss": 0.12883375585079193 + }, + { + "epoch": 3.29136690647482, + "force_loss": 0.021165847778320312, + "step": 3660 + }, + { + "epoch": 3.300359712230216, + "grad_norm": 1.063624620437622, + "learning_rate": 9.996601454621539e-05, + "loss": 0.0396, + "step": 3670 + }, + { + "action_loss": 0.01499168947339058, + "epoch": 3.300359712230216, + "step": 3670 + }, + { + "epoch": 3.300359712230216, + "step": 3670, + "torque_loss": 0.15017037093639374 + }, + { + "epoch": 3.300359712230216, + "force_loss": 0.011859290301799774, + "step": 3670 + }, + { + "epoch": 3.3093525179856114, + "grad_norm": 0.9896354079246521, + "learning_rate": 9.996499106461577e-05, + "loss": 0.0348, + "step": 3680 + }, + { + "action_loss": 0.01218866091221571, + "epoch": 3.3093525179856114, + "step": 3680 + }, + { + "epoch": 3.3093525179856114, + "step": 3680, + "torque_loss": 0.1389753669500351 + }, + { + "epoch": 3.3093525179856114, + "force_loss": 0.011266622692346573, + "step": 3680 + }, + { + "epoch": 3.318345323741007, + "grad_norm": 1.788463830947876, + "learning_rate": 9.996395240497112e-05, + "loss": 0.0368, + "step": 3690 + }, + { + "action_loss": 0.03510531410574913, + "epoch": 3.318345323741007, + "step": 3690 + }, + { + "epoch": 3.318345323741007, + "step": 3690, + "torque_loss": 0.1701621562242508 + }, + { + "epoch": 3.318345323741007, + "force_loss": 0.02181488275527954, + "step": 3690 + }, + { + "epoch": 3.327338129496403, + "grad_norm": 1.2068116664886475, + "learning_rate": 9.996289856759696e-05, + "loss": 0.0397, + "step": 3700 + }, + { + "action_loss": 0.019518474116921425, + "epoch": 3.327338129496403, + "step": 3700 + }, + { + "epoch": 3.327338129496403, + "step": 3700, + "torque_loss": 0.11307797580957413 + }, + { + "epoch": 3.327338129496403, + "force_loss": 0.013637223280966282, + "step": 3700 + }, + { + "epoch": 3.3363309352517985, + "grad_norm": 1.6817306280136108, + "learning_rate": 9.996182955281342e-05, + "loss": 0.0374, + "step": 3710 + }, + { + "action_loss": 0.03645653650164604, + "epoch": 3.3363309352517985, + "step": 3710 + }, + { + "epoch": 3.3363309352517985, + "step": 3710, + "torque_loss": 0.18721990287303925 + }, + { + "epoch": 3.3363309352517985, + "force_loss": 0.023440003395080566, + "step": 3710 + }, + { + "epoch": 3.3453237410071943, + "grad_norm": 1.4603242874145508, + "learning_rate": 9.996074536094519e-05, + "loss": 0.0406, + "step": 3720 + }, + { + "action_loss": 0.03198450058698654, + "epoch": 3.3453237410071943, + "step": 3720 + }, + { + "epoch": 3.3453237410071943, + "step": 3720, + "torque_loss": 0.1488967388868332 + }, + { + "epoch": 3.3453237410071943, + "force_loss": 0.02351897396147251, + "step": 3720 + }, + { + "epoch": 3.3543165467625897, + "grad_norm": 0.5646918416023254, + "learning_rate": 9.995964599232168e-05, + "loss": 0.0365, + "step": 3730 + }, + { + "action_loss": 0.03587939590215683, + "epoch": 3.3543165467625897, + "step": 3730 + }, + { + "epoch": 3.3543165467625897, + "step": 3730, + "torque_loss": 0.14359591901302338 + }, + { + "epoch": 3.3543165467625897, + "force_loss": 0.026941703632473946, + "step": 3730 + }, + { + "epoch": 3.3633093525179856, + "grad_norm": 0.924182116985321, + "learning_rate": 9.995853144727683e-05, + "loss": 0.0409, + "step": 3740 + }, + { + "action_loss": 0.029336510226130486, + "epoch": 3.3633093525179856, + "step": 3740 + }, + { + "epoch": 3.3633093525179856, + "step": 3740, + "torque_loss": 0.16399581730365753 + }, + { + "epoch": 3.3633093525179856, + "force_loss": 0.018774529919028282, + "step": 3740 + }, + { + "epoch": 3.3723021582733814, + "grad_norm": 2.190215587615967, + "learning_rate": 9.99574017261492e-05, + "loss": 0.0464, + "step": 3750 + }, + { + "action_loss": 0.01895342394709587, + "epoch": 3.3723021582733814, + "step": 3750 + }, + { + "epoch": 3.3723021582733814, + "step": 3750, + "torque_loss": 0.09289685636758804 + }, + { + "epoch": 3.3723021582733814, + "force_loss": 0.015037271194159985, + "step": 3750 + }, + { + "epoch": 3.381294964028777, + "grad_norm": 0.9864612221717834, + "learning_rate": 9.995625682928198e-05, + "loss": 0.0378, + "step": 3760 + }, + { + "action_loss": 0.01902839168906212, + "epoch": 3.381294964028777, + "step": 3760 + }, + { + "epoch": 3.381294964028777, + "step": 3760, + "torque_loss": 0.15829142928123474 + }, + { + "epoch": 3.381294964028777, + "force_loss": 0.013129468075931072, + "step": 3760 + }, + { + "epoch": 3.3902877697841727, + "grad_norm": 1.2213791608810425, + "learning_rate": 9.995509675702295e-05, + "loss": 0.045, + "step": 3770 + }, + { + "action_loss": 0.01340919453650713, + "epoch": 3.3902877697841727, + "step": 3770 + }, + { + "epoch": 3.3902877697841727, + "step": 3770, + "torque_loss": 0.14504216611385345 + }, + { + "epoch": 3.3902877697841727, + "force_loss": 0.009189505130052567, + "step": 3770 + }, + { + "epoch": 3.3992805755395685, + "grad_norm": 0.8528039455413818, + "learning_rate": 9.995392150972451e-05, + "loss": 0.0436, + "step": 3780 + }, + { + "action_loss": 0.07091962546110153, + "epoch": 3.3992805755395685, + "step": 3780 + }, + { + "epoch": 3.3992805755395685, + "step": 3780, + "torque_loss": 0.14519180357456207 + }, + { + "epoch": 3.3992805755395685, + "force_loss": 0.0425095297396183, + "step": 3780 + }, + { + "epoch": 3.408273381294964, + "grad_norm": 0.9560016393661499, + "learning_rate": 9.995273108774366e-05, + "loss": 0.0528, + "step": 3790 + }, + { + "action_loss": 0.016869833692908287, + "epoch": 3.408273381294964, + "step": 3790 + }, + { + "epoch": 3.408273381294964, + "step": 3790, + "torque_loss": 0.12144602090120316 + }, + { + "epoch": 3.408273381294964, + "force_loss": 0.00925482902675867, + "step": 3790 + }, + { + "epoch": 3.41726618705036, + "grad_norm": 1.135102391242981, + "learning_rate": 9.995152549144205e-05, + "loss": 0.0495, + "step": 3800 + }, + { + "action_loss": 0.020546669140458107, + "epoch": 3.41726618705036, + "step": 3800 + }, + { + "epoch": 3.41726618705036, + "step": 3800, + "torque_loss": 0.20160607993602753 + }, + { + "epoch": 3.41726618705036, + "force_loss": 0.015021462924778461, + "step": 3800 + }, + { + "epoch": 3.4262589928057556, + "grad_norm": 0.9326383471488953, + "learning_rate": 9.995030472118587e-05, + "loss": 0.0426, + "step": 3810 + }, + { + "action_loss": 0.04457582160830498, + "epoch": 3.4262589928057556, + "step": 3810 + }, + { + "epoch": 3.4262589928057556, + "step": 3810, + "torque_loss": 0.17118744552135468 + }, + { + "epoch": 3.4262589928057556, + "force_loss": 0.026535334065556526, + "step": 3810 + }, + { + "epoch": 3.435251798561151, + "grad_norm": 1.203792691230774, + "learning_rate": 9.9949068777346e-05, + "loss": 0.0442, + "step": 3820 + }, + { + "action_loss": 0.0204255823045969, + "epoch": 3.435251798561151, + "step": 3820 + }, + { + "epoch": 3.435251798561151, + "step": 3820, + "torque_loss": 0.18842995166778564 + }, + { + "epoch": 3.435251798561151, + "force_loss": 0.01134722400456667, + "step": 3820 + }, + { + "epoch": 3.444244604316547, + "grad_norm": 1.5818146467208862, + "learning_rate": 9.994781766029786e-05, + "loss": 0.0392, + "step": 3830 + }, + { + "action_loss": 0.041504666209220886, + "epoch": 3.444244604316547, + "step": 3830 + }, + { + "epoch": 3.444244604316547, + "step": 3830, + "torque_loss": 0.18697874248027802 + }, + { + "epoch": 3.444244604316547, + "force_loss": 0.03712286055088043, + "step": 3830 + }, + { + "epoch": 3.4532374100719423, + "grad_norm": 0.9084063768386841, + "learning_rate": 9.994655137042151e-05, + "loss": 0.0465, + "step": 3840 + }, + { + "action_loss": 0.04241631552577019, + "epoch": 3.4532374100719423, + "step": 3840 + }, + { + "epoch": 3.4532374100719423, + "step": 3840, + "torque_loss": 0.16232672333717346 + }, + { + "epoch": 3.4532374100719423, + "force_loss": 0.022606873884797096, + "step": 3840 + }, + { + "epoch": 3.462230215827338, + "grad_norm": 1.1572521924972534, + "learning_rate": 9.99452699081016e-05, + "loss": 0.0365, + "step": 3850 + }, + { + "action_loss": 0.02277149260044098, + "epoch": 3.462230215827338, + "step": 3850 + }, + { + "epoch": 3.462230215827338, + "step": 3850, + "torque_loss": 0.17342841625213623 + }, + { + "epoch": 3.462230215827338, + "force_loss": 0.011866346001625061, + "step": 3850 + }, + { + "epoch": 3.471223021582734, + "grad_norm": 1.7111402750015259, + "learning_rate": 9.994397327372743e-05, + "loss": 0.0443, + "step": 3860 + }, + { + "action_loss": 0.033042121678590775, + "epoch": 3.471223021582734, + "step": 3860 + }, + { + "epoch": 3.471223021582734, + "step": 3860, + "torque_loss": 0.21560950577259064 + }, + { + "epoch": 3.471223021582734, + "force_loss": 0.035754211246967316, + "step": 3860 + }, + { + "epoch": 3.4802158273381294, + "grad_norm": 1.9151628017425537, + "learning_rate": 9.994266146769286e-05, + "loss": 0.045, + "step": 3870 + }, + { + "action_loss": 0.01448530051857233, + "epoch": 3.4802158273381294, + "step": 3870 + }, + { + "epoch": 3.4802158273381294, + "step": 3870, + "torque_loss": 0.15126045048236847 + }, + { + "epoch": 3.4802158273381294, + "force_loss": 0.012448366731405258, + "step": 3870 + }, + { + "epoch": 3.4892086330935252, + "grad_norm": 1.1126052141189575, + "learning_rate": 9.994133449039642e-05, + "loss": 0.041, + "step": 3880 + }, + { + "action_loss": 0.02015937864780426, + "epoch": 3.4892086330935252, + "step": 3880 + }, + { + "epoch": 3.4892086330935252, + "step": 3880, + "torque_loss": 0.1298368126153946 + }, + { + "epoch": 3.4892086330935252, + "force_loss": 0.012545089237391949, + "step": 3880 + }, + { + "epoch": 3.4982014388489207, + "grad_norm": 1.0569645166397095, + "learning_rate": 9.993999234224118e-05, + "loss": 0.0429, + "step": 3890 + }, + { + "action_loss": 0.026209423318505287, + "epoch": 3.4982014388489207, + "step": 3890 + }, + { + "epoch": 3.4982014388489207, + "step": 3890, + "torque_loss": 0.1408567875623703 + }, + { + "epoch": 3.4982014388489207, + "force_loss": 0.022115493193268776, + "step": 3890 + }, + { + "epoch": 3.5071942446043165, + "grad_norm": 1.0926611423492432, + "learning_rate": 9.993863502363485e-05, + "loss": 0.0374, + "step": 3900 + }, + { + "action_loss": 0.02498788945376873, + "epoch": 3.5071942446043165, + "step": 3900 + }, + { + "epoch": 3.5071942446043165, + "step": 3900, + "torque_loss": 0.15101857483386993 + }, + { + "epoch": 3.5071942446043165, + "force_loss": 0.02080947905778885, + "step": 3900 + }, + { + "epoch": 3.5161870503597124, + "grad_norm": 0.7063518762588501, + "learning_rate": 9.993726253498976e-05, + "loss": 0.034, + "step": 3910 + }, + { + "action_loss": 0.02012750692665577, + "epoch": 3.5161870503597124, + "step": 3910 + }, + { + "epoch": 3.5161870503597124, + "step": 3910, + "torque_loss": 0.11606822162866592 + }, + { + "epoch": 3.5161870503597124, + "force_loss": 0.017499500885605812, + "step": 3910 + }, + { + "epoch": 3.5251798561151078, + "grad_norm": 1.7970179319381714, + "learning_rate": 9.993587487672282e-05, + "loss": 0.0349, + "step": 3920 + }, + { + "action_loss": 0.025140687823295593, + "epoch": 3.5251798561151078, + "step": 3920 + }, + { + "epoch": 3.5251798561151078, + "step": 3920, + "torque_loss": 0.12630312144756317 + }, + { + "epoch": 3.5251798561151078, + "force_loss": 0.01934153400361538, + "step": 3920 + }, + { + "epoch": 3.5341726618705036, + "grad_norm": 1.2401013374328613, + "learning_rate": 9.993447204925558e-05, + "loss": 0.0422, + "step": 3930 + }, + { + "action_loss": 0.03816840425133705, + "epoch": 3.5341726618705036, + "step": 3930 + }, + { + "epoch": 3.5341726618705036, + "step": 3930, + "torque_loss": 0.22597800195217133 + }, + { + "epoch": 3.5341726618705036, + "force_loss": 0.0375300869345665, + "step": 3930 + }, + { + "epoch": 3.543165467625899, + "grad_norm": 0.7885603308677673, + "learning_rate": 9.993305405301416e-05, + "loss": 0.0448, + "step": 3940 + }, + { + "action_loss": 0.020705319941043854, + "epoch": 3.543165467625899, + "step": 3940 + }, + { + "epoch": 3.543165467625899, + "step": 3940, + "torque_loss": 0.11626722663640976 + }, + { + "epoch": 3.543165467625899, + "force_loss": 0.02008006162941456, + "step": 3940 + }, + { + "epoch": 3.552158273381295, + "grad_norm": 0.9565855264663696, + "learning_rate": 9.993162088842935e-05, + "loss": 0.0412, + "step": 3950 + }, + { + "action_loss": 0.04231289029121399, + "epoch": 3.552158273381295, + "step": 3950 + }, + { + "epoch": 3.552158273381295, + "step": 3950, + "torque_loss": 0.18785852193832397 + }, + { + "epoch": 3.552158273381295, + "force_loss": 0.022834552451968193, + "step": 3950 + }, + { + "epoch": 3.5611510791366907, + "grad_norm": 0.9896112084388733, + "learning_rate": 9.993017255593646e-05, + "loss": 0.048, + "step": 3960 + }, + { + "action_loss": 0.015152017585933208, + "epoch": 3.5611510791366907, + "step": 3960 + }, + { + "epoch": 3.5611510791366907, + "step": 3960, + "torque_loss": 0.16741900146007538 + }, + { + "epoch": 3.5611510791366907, + "force_loss": 0.009725605137646198, + "step": 3960 + }, + { + "epoch": 3.5701438848920866, + "grad_norm": 0.6375613808631897, + "learning_rate": 9.992870905597548e-05, + "loss": 0.0382, + "step": 3970 + }, + { + "action_loss": 0.0428573377430439, + "epoch": 3.5701438848920866, + "step": 3970 + }, + { + "epoch": 3.5701438848920866, + "step": 3970, + "torque_loss": 0.11328927427530289 + }, + { + "epoch": 3.5701438848920866, + "force_loss": 0.03325273469090462, + "step": 3970 + }, + { + "epoch": 3.579136690647482, + "grad_norm": 0.8191521763801575, + "learning_rate": 9.9927230388991e-05, + "loss": 0.0361, + "step": 3980 + }, + { + "action_loss": 0.018099667504429817, + "epoch": 3.579136690647482, + "step": 3980 + }, + { + "epoch": 3.579136690647482, + "step": 3980, + "torque_loss": 0.12714602053165436 + }, + { + "epoch": 3.579136690647482, + "force_loss": 0.015349042601883411, + "step": 3980 + }, + { + "epoch": 3.588129496402878, + "grad_norm": 1.7662588357925415, + "learning_rate": 9.992573655543215e-05, + "loss": 0.0349, + "step": 3990 + }, + { + "action_loss": 0.018912767991423607, + "epoch": 3.588129496402878, + "step": 3990 + }, + { + "epoch": 3.588129496402878, + "step": 3990, + "torque_loss": 0.1362752616405487 + }, + { + "epoch": 3.588129496402878, + "force_loss": 0.011249770410358906, + "step": 3990 + }, + { + "epoch": 3.597122302158273, + "grad_norm": 0.8619410991668701, + "learning_rate": 9.992422755575277e-05, + "loss": 0.034, + "step": 4000 + }, + { + "action_loss": 0.022844454273581505, + "epoch": 3.597122302158273, + "step": 4000 + }, + { + "epoch": 3.597122302158273, + "step": 4000, + "torque_loss": 0.14633597433567047 + }, + { + "epoch": 3.597122302158273, + "force_loss": 0.011328269727528095, + "step": 4000 + }, + { + "epoch": 3.606115107913669, + "grad_norm": 0.6428923606872559, + "learning_rate": 9.992270339041123e-05, + "loss": 0.0381, + "step": 4010 + }, + { + "action_loss": 0.015509418211877346, + "epoch": 3.606115107913669, + "step": 4010 + }, + { + "epoch": 3.606115107913669, + "step": 4010, + "torque_loss": 0.20643214881420135 + }, + { + "epoch": 3.606115107913669, + "force_loss": 0.007344987243413925, + "step": 4010 + }, + { + "epoch": 3.615107913669065, + "grad_norm": 2.0724446773529053, + "learning_rate": 9.992116405987053e-05, + "loss": 0.0369, + "step": 4020 + }, + { + "action_loss": 0.025622619315981865, + "epoch": 3.615107913669065, + "step": 4020 + }, + { + "epoch": 3.615107913669065, + "step": 4020, + "torque_loss": 0.16435176134109497 + }, + { + "epoch": 3.615107913669065, + "force_loss": 0.023499146103858948, + "step": 4020 + }, + { + "epoch": 3.6241007194244603, + "grad_norm": 0.7356473803520203, + "learning_rate": 9.991960956459828e-05, + "loss": 0.0386, + "step": 4030 + }, + { + "action_loss": 0.023059682920575142, + "epoch": 3.6241007194244603, + "step": 4030 + }, + { + "epoch": 3.6241007194244603, + "step": 4030, + "torque_loss": 0.15098251402378082 + }, + { + "epoch": 3.6241007194244603, + "force_loss": 0.01126853097230196, + "step": 4030 + }, + { + "epoch": 3.633093525179856, + "grad_norm": 0.8907721042633057, + "learning_rate": 9.991803990506669e-05, + "loss": 0.0403, + "step": 4040 + }, + { + "action_loss": 0.02048400230705738, + "epoch": 3.633093525179856, + "step": 4040 + }, + { + "epoch": 3.633093525179856, + "step": 4040, + "torque_loss": 0.16468499600887299 + }, + { + "epoch": 3.633093525179856, + "force_loss": 0.021904809400439262, + "step": 4040 + }, + { + "epoch": 3.6420863309352516, + "grad_norm": 1.6261680126190186, + "learning_rate": 9.991645508175258e-05, + "loss": 0.0453, + "step": 4050 + }, + { + "action_loss": 0.03897305205464363, + "epoch": 3.6420863309352516, + "step": 4050 + }, + { + "epoch": 3.6420863309352516, + "step": 4050, + "torque_loss": 0.1823081225156784 + }, + { + "epoch": 3.6420863309352516, + "force_loss": 0.022277308627963066, + "step": 4050 + }, + { + "epoch": 3.6510791366906474, + "grad_norm": 0.8403676152229309, + "learning_rate": 9.99148550951374e-05, + "loss": 0.0349, + "step": 4060 + }, + { + "action_loss": 0.019982775673270226, + "epoch": 3.6510791366906474, + "step": 4060 + }, + { + "epoch": 3.6510791366906474, + "step": 4060, + "torque_loss": 0.14510613679885864 + }, + { + "epoch": 3.6510791366906474, + "force_loss": 0.016272230073809624, + "step": 4060 + }, + { + "epoch": 3.6600719424460433, + "grad_norm": 0.7153682112693787, + "learning_rate": 9.991323994570716e-05, + "loss": 0.0481, + "step": 4070 + }, + { + "action_loss": 0.012759196572005749, + "epoch": 3.6600719424460433, + "step": 4070 + }, + { + "epoch": 3.6600719424460433, + "step": 4070, + "torque_loss": 0.1196979507803917 + }, + { + "epoch": 3.6600719424460433, + "force_loss": 0.011219008825719357, + "step": 4070 + }, + { + "epoch": 3.6690647482014387, + "grad_norm": 1.0684508085250854, + "learning_rate": 9.99116096339525e-05, + "loss": 0.038, + "step": 4080 + }, + { + "action_loss": 0.03012736700475216, + "epoch": 3.6690647482014387, + "step": 4080 + }, + { + "epoch": 3.6690647482014387, + "step": 4080, + "torque_loss": 0.13003218173980713 + }, + { + "epoch": 3.6690647482014387, + "force_loss": 0.020892785862088203, + "step": 4080 + }, + { + "epoch": 3.6780575539568345, + "grad_norm": 1.5951957702636719, + "learning_rate": 9.990996416036869e-05, + "loss": 0.0351, + "step": 4090 + }, + { + "action_loss": 0.01665342226624489, + "epoch": 3.6780575539568345, + "step": 4090 + }, + { + "epoch": 3.6780575539568345, + "step": 4090, + "torque_loss": 0.1305442452430725 + }, + { + "epoch": 3.6780575539568345, + "force_loss": 0.01119800191372633, + "step": 4090 + }, + { + "epoch": 3.68705035971223, + "grad_norm": 1.4955958127975464, + "learning_rate": 9.990830352545555e-05, + "loss": 0.0366, + "step": 4100 + }, + { + "action_loss": 0.013869650661945343, + "epoch": 3.68705035971223, + "step": 4100 + }, + { + "epoch": 3.68705035971223, + "step": 4100, + "torque_loss": 0.10206267982721329 + }, + { + "epoch": 3.68705035971223, + "force_loss": 0.008940272964537144, + "step": 4100 + }, + { + "epoch": 3.696043165467626, + "grad_norm": 0.483769029378891, + "learning_rate": 9.990662772971756e-05, + "loss": 0.0326, + "step": 4110 + }, + { + "action_loss": 0.03507047891616821, + "epoch": 3.696043165467626, + "step": 4110 + }, + { + "epoch": 3.696043165467626, + "step": 4110, + "torque_loss": 0.19386164844036102 + }, + { + "epoch": 3.696043165467626, + "force_loss": 0.02307868003845215, + "step": 4110 + }, + { + "epoch": 3.7050359712230216, + "grad_norm": 1.2861037254333496, + "learning_rate": 9.990493677366376e-05, + "loss": 0.0356, + "step": 4120 + }, + { + "action_loss": 0.018631864339113235, + "epoch": 3.7050359712230216, + "step": 4120 + }, + { + "epoch": 3.7050359712230216, + "step": 4120, + "torque_loss": 0.18866489827632904 + }, + { + "epoch": 3.7050359712230216, + "force_loss": 0.01411637756973505, + "step": 4120 + }, + { + "epoch": 3.7140287769784175, + "grad_norm": 0.7408118844032288, + "learning_rate": 9.990323065780786e-05, + "loss": 0.032, + "step": 4130 + }, + { + "action_loss": 0.022415978834033012, + "epoch": 3.7140287769784175, + "step": 4130 + }, + { + "epoch": 3.7140287769784175, + "step": 4130, + "torque_loss": 0.09953228384256363 + }, + { + "epoch": 3.7140287769784175, + "force_loss": 0.011655923910439014, + "step": 4130 + }, + { + "epoch": 3.723021582733813, + "grad_norm": 1.2279568910598755, + "learning_rate": 9.990150938266808e-05, + "loss": 0.0389, + "step": 4140 + }, + { + "action_loss": 0.012769377790391445, + "epoch": 3.723021582733813, + "step": 4140 + }, + { + "epoch": 3.723021582733813, + "step": 4140, + "torque_loss": 0.1138978898525238 + }, + { + "epoch": 3.723021582733813, + "force_loss": 0.011995532549917698, + "step": 4140 + }, + { + "epoch": 3.7320143884892087, + "grad_norm": 1.4583615064620972, + "learning_rate": 9.989977294876733e-05, + "loss": 0.0369, + "step": 4150 + }, + { + "action_loss": 0.015207845717668533, + "epoch": 3.7320143884892087, + "step": 4150 + }, + { + "epoch": 3.7320143884892087, + "step": 4150, + "torque_loss": 0.11968394368886948 + }, + { + "epoch": 3.7320143884892087, + "force_loss": 0.009818506427109241, + "step": 4150 + }, + { + "epoch": 3.741007194244604, + "grad_norm": 0.9236806631088257, + "learning_rate": 9.989802135663308e-05, + "loss": 0.0294, + "step": 4160 + }, + { + "action_loss": 0.02421175129711628, + "epoch": 3.741007194244604, + "step": 4160 + }, + { + "epoch": 3.741007194244604, + "step": 4160, + "torque_loss": 0.17359507083892822 + }, + { + "epoch": 3.741007194244604, + "force_loss": 0.014731325209140778, + "step": 4160 + }, + { + "epoch": 3.75, + "grad_norm": 0.9404249787330627, + "learning_rate": 9.989625460679743e-05, + "loss": 0.0325, + "step": 4170 + }, + { + "action_loss": 0.031850870698690414, + "epoch": 3.75, + "step": 4170 + }, + { + "epoch": 3.75, + "step": 4170, + "torque_loss": 0.196450337767601 + }, + { + "epoch": 3.75, + "force_loss": 0.036668237298727036, + "step": 4170 + }, + { + "epoch": 3.758992805755396, + "grad_norm": 0.9139700531959534, + "learning_rate": 9.989447269979706e-05, + "loss": 0.0343, + "step": 4180 + }, + { + "action_loss": 0.012918020598590374, + "epoch": 3.758992805755396, + "step": 4180 + }, + { + "epoch": 3.758992805755396, + "step": 4180, + "torque_loss": 0.1268961876630783 + }, + { + "epoch": 3.758992805755396, + "force_loss": 0.008404101245105267, + "step": 4180 + }, + { + "epoch": 3.7679856115107913, + "grad_norm": 0.8955261707305908, + "learning_rate": 9.989267563617328e-05, + "loss": 0.0357, + "step": 4190 + }, + { + "action_loss": 0.009634359739720821, + "epoch": 3.7679856115107913, + "step": 4190 + }, + { + "epoch": 3.7679856115107913, + "step": 4190, + "torque_loss": 0.12917061150074005 + }, + { + "epoch": 3.7679856115107913, + "force_loss": 0.007190661504864693, + "step": 4190 + }, + { + "epoch": 3.776978417266187, + "grad_norm": 1.0073851346969604, + "learning_rate": 9.989086341647198e-05, + "loss": 0.0317, + "step": 4200 + }, + { + "action_loss": 0.033623646944761276, + "epoch": 3.776978417266187, + "step": 4200 + }, + { + "epoch": 3.776978417266187, + "step": 4200, + "torque_loss": 0.12459053844213486 + }, + { + "epoch": 3.776978417266187, + "force_loss": 0.02177286706864834, + "step": 4200 + }, + { + "epoch": 3.7859712230215825, + "grad_norm": 0.9160759449005127, + "learning_rate": 9.988903604124366e-05, + "loss": 0.0321, + "step": 4210 + }, + { + "action_loss": 0.009711216203868389, + "epoch": 3.7859712230215825, + "step": 4210 + }, + { + "epoch": 3.7859712230215825, + "step": 4210, + "torque_loss": 0.174884632229805 + }, + { + "epoch": 3.7859712230215825, + "force_loss": 0.0061207967810332775, + "step": 4210 + }, + { + "epoch": 3.7949640287769784, + "grad_norm": 0.9023813605308533, + "learning_rate": 9.988719351104343e-05, + "loss": 0.0331, + "step": 4220 + }, + { + "action_loss": 0.009800736792385578, + "epoch": 3.7949640287769784, + "step": 4220 + }, + { + "epoch": 3.7949640287769784, + "step": 4220, + "torque_loss": 0.14536048471927643 + }, + { + "epoch": 3.7949640287769784, + "force_loss": 0.008176678791642189, + "step": 4220 + }, + { + "epoch": 3.803956834532374, + "grad_norm": 1.2028447389602661, + "learning_rate": 9.9885335826431e-05, + "loss": 0.0324, + "step": 4230 + }, + { + "action_loss": 0.009005378931760788, + "epoch": 3.803956834532374, + "step": 4230 + }, + { + "epoch": 3.803956834532374, + "step": 4230, + "torque_loss": 0.15913419425487518 + }, + { + "epoch": 3.803956834532374, + "force_loss": 0.006897900719195604, + "step": 4230 + }, + { + "epoch": 3.81294964028777, + "grad_norm": 0.7526970505714417, + "learning_rate": 9.988346298797071e-05, + "loss": 0.03, + "step": 4240 + }, + { + "action_loss": 0.014618687331676483, + "epoch": 3.81294964028777, + "step": 4240 + }, + { + "epoch": 3.81294964028777, + "step": 4240, + "torque_loss": 0.13127528131008148 + }, + { + "epoch": 3.81294964028777, + "force_loss": 0.006682276260107756, + "step": 4240 + }, + { + "epoch": 3.8219424460431655, + "grad_norm": 2.0396652221679688, + "learning_rate": 9.988157499623146e-05, + "loss": 0.0416, + "step": 4250 + }, + { + "action_loss": 0.020371615886688232, + "epoch": 3.8219424460431655, + "step": 4250 + }, + { + "epoch": 3.8219424460431655, + "step": 4250, + "torque_loss": 0.1893899291753769 + }, + { + "epoch": 3.8219424460431655, + "force_loss": 0.012332126498222351, + "step": 4250 + }, + { + "epoch": 3.8309352517985613, + "grad_norm": 1.1619176864624023, + "learning_rate": 9.987967185178677e-05, + "loss": 0.0327, + "step": 4260 + }, + { + "action_loss": 0.01781490258872509, + "epoch": 3.8309352517985613, + "step": 4260 + }, + { + "epoch": 3.8309352517985613, + "step": 4260, + "torque_loss": 0.1966101974248886 + }, + { + "epoch": 3.8309352517985613, + "force_loss": 0.015079325996339321, + "step": 4260 + }, + { + "epoch": 3.8399280575539567, + "grad_norm": 0.9576718807220459, + "learning_rate": 9.987775355521476e-05, + "loss": 0.0387, + "step": 4270 + }, + { + "action_loss": 0.007879335433244705, + "epoch": 3.8399280575539567, + "step": 4270 + }, + { + "epoch": 3.8399280575539567, + "step": 4270, + "torque_loss": 0.12961077690124512 + }, + { + "epoch": 3.8399280575539567, + "force_loss": 0.008596367202699184, + "step": 4270 + }, + { + "epoch": 3.8489208633093526, + "grad_norm": 1.0311295986175537, + "learning_rate": 9.987582010709817e-05, + "loss": 0.0368, + "step": 4280 + }, + { + "action_loss": 0.056443434208631516, + "epoch": 3.8489208633093526, + "step": 4280 + }, + { + "epoch": 3.8489208633093526, + "step": 4280, + "torque_loss": 0.23857808113098145 + }, + { + "epoch": 3.8489208633093526, + "force_loss": 0.051936257630586624, + "step": 4280 + }, + { + "epoch": 3.8579136690647484, + "grad_norm": 0.3889097571372986, + "learning_rate": 9.987387150802431e-05, + "loss": 0.0491, + "step": 4290 + }, + { + "action_loss": 0.010186254046857357, + "epoch": 3.8579136690647484, + "step": 4290 + }, + { + "epoch": 3.8579136690647484, + "step": 4290, + "torque_loss": 0.11526137590408325 + }, + { + "epoch": 3.8579136690647484, + "force_loss": 0.00836224015802145, + "step": 4290 + }, + { + "epoch": 3.866906474820144, + "grad_norm": 0.7624739408493042, + "learning_rate": 9.987190775858517e-05, + "loss": 0.0328, + "step": 4300 + }, + { + "action_loss": 0.021824024617671967, + "epoch": 3.866906474820144, + "step": 4300 + }, + { + "epoch": 3.866906474820144, + "step": 4300, + "torque_loss": 0.11139372736215591 + }, + { + "epoch": 3.866906474820144, + "force_loss": 0.011579248122870922, + "step": 4300 + }, + { + "epoch": 3.8758992805755397, + "grad_norm": 0.6951278448104858, + "learning_rate": 9.98699288593772e-05, + "loss": 0.0351, + "step": 4310 + }, + { + "action_loss": 0.009745229966938496, + "epoch": 3.8758992805755397, + "step": 4310 + }, + { + "epoch": 3.8758992805755397, + "step": 4310, + "torque_loss": 0.1391468197107315 + }, + { + "epoch": 3.8758992805755397, + "force_loss": 0.01467133779078722, + "step": 4310 + }, + { + "epoch": 3.884892086330935, + "grad_norm": 1.9014315605163574, + "learning_rate": 9.986793481100161e-05, + "loss": 0.0376, + "step": 4320 + }, + { + "action_loss": 0.024018773809075356, + "epoch": 3.884892086330935, + "step": 4320 + }, + { + "epoch": 3.884892086330935, + "step": 4320, + "torque_loss": 0.13993610441684723 + }, + { + "epoch": 3.884892086330935, + "force_loss": 0.021737635135650635, + "step": 4320 + }, + { + "epoch": 3.893884892086331, + "grad_norm": 0.5066997408866882, + "learning_rate": 9.986592561406412e-05, + "loss": 0.035, + "step": 4330 + }, + { + "action_loss": 0.01049784291535616, + "epoch": 3.893884892086331, + "step": 4330 + }, + { + "epoch": 3.893884892086331, + "step": 4330, + "torque_loss": 0.16271261870861053 + }, + { + "epoch": 3.893884892086331, + "force_loss": 0.007432944606989622, + "step": 4330 + }, + { + "epoch": 3.902877697841727, + "grad_norm": 0.7239689826965332, + "learning_rate": 9.986390126917503e-05, + "loss": 0.0298, + "step": 4340 + }, + { + "action_loss": 0.01687888614833355, + "epoch": 3.902877697841727, + "step": 4340 + }, + { + "epoch": 3.902877697841727, + "step": 4340, + "torque_loss": 0.20288710296154022 + }, + { + "epoch": 3.902877697841727, + "force_loss": 0.010263652540743351, + "step": 4340 + }, + { + "epoch": 3.911870503597122, + "grad_norm": 0.8635382056236267, + "learning_rate": 9.986186177694933e-05, + "loss": 0.0307, + "step": 4350 + }, + { + "action_loss": 0.02397182583808899, + "epoch": 3.911870503597122, + "step": 4350 + }, + { + "epoch": 3.911870503597122, + "step": 4350, + "torque_loss": 0.14415116608142853 + }, + { + "epoch": 3.911870503597122, + "force_loss": 0.021520474925637245, + "step": 4350 + }, + { + "epoch": 3.920863309352518, + "grad_norm": 1.4948008060455322, + "learning_rate": 9.985980713800656e-05, + "loss": 0.0429, + "step": 4360 + }, + { + "action_loss": 0.008482703007757664, + "epoch": 3.920863309352518, + "step": 4360 + }, + { + "epoch": 3.920863309352518, + "step": 4360, + "torque_loss": 0.11190765351057053 + }, + { + "epoch": 3.920863309352518, + "force_loss": 0.008000750094652176, + "step": 4360 + }, + { + "epoch": 3.9298561151079134, + "grad_norm": 0.8221027255058289, + "learning_rate": 9.985773735297084e-05, + "loss": 0.0305, + "step": 4370 + }, + { + "action_loss": 0.014182105660438538, + "epoch": 3.9298561151079134, + "step": 4370 + }, + { + "epoch": 3.9298561151079134, + "step": 4370, + "torque_loss": 0.179058238863945 + }, + { + "epoch": 3.9298561151079134, + "force_loss": 0.010382688604295254, + "step": 4370 + }, + { + "epoch": 3.9388489208633093, + "grad_norm": 0.8402854204177856, + "learning_rate": 9.985565242247092e-05, + "loss": 0.0347, + "step": 4380 + }, + { + "action_loss": 0.01737954281270504, + "epoch": 3.9388489208633093, + "step": 4380 + }, + { + "epoch": 3.9388489208633093, + "step": 4380, + "torque_loss": 0.16029177606105804 + }, + { + "epoch": 3.9388489208633093, + "force_loss": 0.01366510521620512, + "step": 4380 + }, + { + "epoch": 3.947841726618705, + "grad_norm": 1.3627126216888428, + "learning_rate": 9.985355234714016e-05, + "loss": 0.0421, + "step": 4390 + }, + { + "action_loss": 0.009394989348948002, + "epoch": 3.947841726618705, + "step": 4390 + }, + { + "epoch": 3.947841726618705, + "step": 4390, + "torque_loss": 0.14305204153060913 + }, + { + "epoch": 3.947841726618705, + "force_loss": 0.009510352276265621, + "step": 4390 + }, + { + "epoch": 3.956834532374101, + "grad_norm": 1.1128345727920532, + "learning_rate": 9.985143712761652e-05, + "loss": 0.029, + "step": 4400 + }, + { + "action_loss": 0.011551998555660248, + "epoch": 3.956834532374101, + "step": 4400 + }, + { + "epoch": 3.956834532374101, + "step": 4400, + "torque_loss": 0.07601193338632584 + }, + { + "epoch": 3.956834532374101, + "force_loss": 0.007855924777686596, + "step": 4400 + }, + { + "epoch": 3.9658273381294964, + "grad_norm": 0.510873019695282, + "learning_rate": 9.984930676454252e-05, + "loss": 0.0339, + "step": 4410 + }, + { + "action_loss": 0.012548827566206455, + "epoch": 3.9658273381294964, + "step": 4410 + }, + { + "epoch": 3.9658273381294964, + "step": 4410, + "torque_loss": 0.10738100856542587 + }, + { + "epoch": 3.9658273381294964, + "force_loss": 0.007798769976943731, + "step": 4410 + }, + { + "epoch": 3.9748201438848922, + "grad_norm": 0.6574655771255493, + "learning_rate": 9.984716125856532e-05, + "loss": 0.0325, + "step": 4420 + }, + { + "action_loss": 0.00918615236878395, + "epoch": 3.9748201438848922, + "step": 4420 + }, + { + "epoch": 3.9748201438848922, + "step": 4420, + "torque_loss": 0.12600557506084442 + }, + { + "epoch": 3.9748201438848922, + "force_loss": 0.007819893769919872, + "step": 4420 + }, + { + "epoch": 3.9838129496402876, + "grad_norm": 0.6425493955612183, + "learning_rate": 9.984500061033667e-05, + "loss": 0.0293, + "step": 4430 + }, + { + "action_loss": 0.011316080577671528, + "epoch": 3.9838129496402876, + "step": 4430 + }, + { + "epoch": 3.9838129496402876, + "step": 4430, + "torque_loss": 0.12432371824979782 + }, + { + "epoch": 3.9838129496402876, + "force_loss": 0.007674422115087509, + "step": 4430 + }, + { + "epoch": 3.9928057553956835, + "grad_norm": 1.767102599143982, + "learning_rate": 9.984282482051293e-05, + "loss": 0.0342, + "step": 4440 + }, + { + "action_loss": 0.029515380039811134, + "epoch": 3.9928057553956835, + "step": 4440 + }, + { + "epoch": 3.9928057553956835, + "step": 4440, + "torque_loss": 0.20241832733154297 + }, + { + "epoch": 3.9928057553956835, + "force_loss": 0.03437669947743416, + "step": 4440 + }, + { + "epoch": 4.001798561151079, + "grad_norm": 1.1519076824188232, + "learning_rate": 9.9840633889755e-05, + "loss": 0.0349, + "step": 4450 + }, + { + "action_loss": 0.02478351630270481, + "epoch": 4.001798561151079, + "step": 4450 + }, + { + "epoch": 4.001798561151079, + "step": 4450, + "torque_loss": 0.22265885770320892 + }, + { + "epoch": 4.001798561151079, + "force_loss": 0.023469364270567894, + "step": 4450 + }, + { + "epoch": 4.010791366906475, + "grad_norm": 0.9709997177124023, + "learning_rate": 9.983842781872848e-05, + "loss": 0.0393, + "step": 4460 + }, + { + "action_loss": 0.022763893008232117, + "epoch": 4.010791366906475, + "step": 4460 + }, + { + "epoch": 4.010791366906475, + "step": 4460, + "torque_loss": 0.156441792845726 + }, + { + "epoch": 4.010791366906475, + "force_loss": 0.012009362690150738, + "step": 4460 + }, + { + "epoch": 4.01978417266187, + "grad_norm": 0.885554850101471, + "learning_rate": 9.98362066081035e-05, + "loss": 0.0374, + "step": 4470 + }, + { + "action_loss": 0.025641275569796562, + "epoch": 4.01978417266187, + "step": 4470 + }, + { + "epoch": 4.01978417266187, + "step": 4470, + "torque_loss": 0.16256771981716156 + }, + { + "epoch": 4.01978417266187, + "force_loss": 0.02282743901014328, + "step": 4470 + }, + { + "epoch": 4.028776978417266, + "grad_norm": 1.4418059587478638, + "learning_rate": 9.983397025855479e-05, + "loss": 0.032, + "step": 4480 + }, + { + "action_loss": 0.02729133330285549, + "epoch": 4.028776978417266, + "step": 4480 + }, + { + "epoch": 4.028776978417266, + "step": 4480, + "torque_loss": 0.15347303450107574 + }, + { + "epoch": 4.028776978417266, + "force_loss": 0.02187216840684414, + "step": 4480 + }, + { + "epoch": 4.037769784172662, + "grad_norm": 1.0959330797195435, + "learning_rate": 9.983171877076171e-05, + "loss": 0.034, + "step": 4490 + }, + { + "action_loss": 0.052611928433179855, + "epoch": 4.037769784172662, + "step": 4490 + }, + { + "epoch": 4.037769784172662, + "step": 4490, + "torque_loss": 0.18150770664215088 + }, + { + "epoch": 4.037769784172662, + "force_loss": 0.044015612453222275, + "step": 4490 + }, + { + "epoch": 4.046762589928058, + "grad_norm": 0.8289868831634521, + "learning_rate": 9.98294521454082e-05, + "loss": 0.0457, + "step": 4500 + }, + { + "action_loss": 0.008363536559045315, + "epoch": 4.046762589928058, + "step": 4500 + }, + { + "epoch": 4.046762589928058, + "step": 4500, + "torque_loss": 0.14328737556934357 + }, + { + "epoch": 4.046762589928058, + "force_loss": 0.010264570824801922, + "step": 4500 + }, + { + "epoch": 4.055755395683454, + "grad_norm": 1.583045482635498, + "learning_rate": 9.98271703831828e-05, + "loss": 0.0347, + "step": 4510 + }, + { + "action_loss": 0.045835670083761215, + "epoch": 4.055755395683454, + "step": 4510 + }, + { + "epoch": 4.055755395683454, + "step": 4510, + "torque_loss": 0.19038699567317963 + }, + { + "epoch": 4.055755395683454, + "force_loss": 0.032266441732645035, + "step": 4510 + }, + { + "epoch": 4.0647482014388485, + "grad_norm": 1.173993706703186, + "learning_rate": 9.982487348477865e-05, + "loss": 0.0466, + "step": 4520 + }, + { + "action_loss": 0.02522791363298893, + "epoch": 4.0647482014388485, + "step": 4520 + }, + { + "epoch": 4.0647482014388485, + "step": 4520, + "torque_loss": 0.12414098531007767 + }, + { + "epoch": 4.0647482014388485, + "force_loss": 0.02000529319047928, + "step": 4520 + }, + { + "epoch": 4.073741007194244, + "grad_norm": 0.7844947576522827, + "learning_rate": 9.982256145089347e-05, + "loss": 0.0371, + "step": 4530 + }, + { + "action_loss": 0.009618385694921017, + "epoch": 4.073741007194244, + "step": 4530 + }, + { + "epoch": 4.073741007194244, + "step": 4530, + "torque_loss": 0.10716839879751205 + }, + { + "epoch": 4.073741007194244, + "force_loss": 0.008798129856586456, + "step": 4530 + }, + { + "epoch": 4.08273381294964, + "grad_norm": 1.02362859249115, + "learning_rate": 9.982023428222962e-05, + "loss": 0.0323, + "step": 4540 + }, + { + "action_loss": 0.023549756035208702, + "epoch": 4.08273381294964, + "step": 4540 + }, + { + "epoch": 4.08273381294964, + "step": 4540, + "torque_loss": 0.19426637887954712 + }, + { + "epoch": 4.08273381294964, + "force_loss": 0.014226471073925495, + "step": 4540 + }, + { + "epoch": 4.091726618705036, + "grad_norm": 0.9992864727973938, + "learning_rate": 9.981789197949403e-05, + "loss": 0.0407, + "step": 4550 + }, + { + "action_loss": 0.02116341143846512, + "epoch": 4.091726618705036, + "step": 4550 + }, + { + "epoch": 4.091726618705036, + "step": 4550, + "torque_loss": 0.15985850989818573 + }, + { + "epoch": 4.091726618705036, + "force_loss": 0.02765163965523243, + "step": 4550 + }, + { + "epoch": 4.100719424460432, + "grad_norm": 0.8838006854057312, + "learning_rate": 9.98155345433982e-05, + "loss": 0.0352, + "step": 4560 + }, + { + "action_loss": 0.0556286983191967, + "epoch": 4.100719424460432, + "step": 4560 + }, + { + "epoch": 4.100719424460432, + "step": 4560, + "torque_loss": 0.17061519622802734 + }, + { + "epoch": 4.100719424460432, + "force_loss": 0.03908475860953331, + "step": 4560 + }, + { + "epoch": 4.109712230215828, + "grad_norm": 0.9535768628120422, + "learning_rate": 9.981316197465831e-05, + "loss": 0.0341, + "step": 4570 + }, + { + "action_loss": 0.018756555393338203, + "epoch": 4.109712230215828, + "step": 4570 + }, + { + "epoch": 4.109712230215828, + "step": 4570, + "torque_loss": 0.14983071386814117 + }, + { + "epoch": 4.109712230215828, + "force_loss": 0.018597392365336418, + "step": 4570 + }, + { + "epoch": 4.118705035971223, + "grad_norm": 0.9701385498046875, + "learning_rate": 9.981077427399504e-05, + "loss": 0.0369, + "step": 4580 + }, + { + "action_loss": 0.016903484240174294, + "epoch": 4.118705035971223, + "step": 4580 + }, + { + "epoch": 4.118705035971223, + "step": 4580, + "torque_loss": 0.13766567409038544 + }, + { + "epoch": 4.118705035971223, + "force_loss": 0.009743332862854004, + "step": 4580 + }, + { + "epoch": 4.127697841726619, + "grad_norm": 0.6447258591651917, + "learning_rate": 9.980837144213371e-05, + "loss": 0.0316, + "step": 4590 + }, + { + "action_loss": 0.01526044774800539, + "epoch": 4.127697841726619, + "step": 4590 + }, + { + "epoch": 4.127697841726619, + "step": 4590, + "torque_loss": 0.12720173597335815 + }, + { + "epoch": 4.127697841726619, + "force_loss": 0.013995117507874966, + "step": 4590 + }, + { + "epoch": 4.136690647482014, + "grad_norm": 0.5802412033081055, + "learning_rate": 9.980595347980426e-05, + "loss": 0.0311, + "step": 4600 + }, + { + "action_loss": 0.02217998541891575, + "epoch": 4.136690647482014, + "step": 4600 + }, + { + "epoch": 4.136690647482014, + "step": 4600, + "torque_loss": 0.09590903669595718 + }, + { + "epoch": 4.136690647482014, + "force_loss": 0.016717659309506416, + "step": 4600 + }, + { + "epoch": 4.14568345323741, + "grad_norm": 0.8760179281234741, + "learning_rate": 9.980352038774119e-05, + "loss": 0.0419, + "step": 4610 + }, + { + "action_loss": 0.014122466556727886, + "epoch": 4.14568345323741, + "step": 4610 + }, + { + "epoch": 4.14568345323741, + "step": 4610, + "torque_loss": 0.11563873291015625 + }, + { + "epoch": 4.14568345323741, + "force_loss": 0.007175022270530462, + "step": 4610 + }, + { + "epoch": 4.154676258992806, + "grad_norm": 0.5086389183998108, + "learning_rate": 9.98010721666836e-05, + "loss": 0.0368, + "step": 4620 + }, + { + "action_loss": 0.010292047634720802, + "epoch": 4.154676258992806, + "step": 4620 + }, + { + "epoch": 4.154676258992806, + "step": 4620, + "torque_loss": 0.14402835071086884 + }, + { + "epoch": 4.154676258992806, + "force_loss": 0.018386662006378174, + "step": 4620 + }, + { + "epoch": 4.163669064748201, + "grad_norm": 0.776138186454773, + "learning_rate": 9.979860881737523e-05, + "loss": 0.0281, + "step": 4630 + }, + { + "action_loss": 0.007989101111888885, + "epoch": 4.163669064748201, + "step": 4630 + }, + { + "epoch": 4.163669064748201, + "step": 4630, + "torque_loss": 0.12181323766708374 + }, + { + "epoch": 4.163669064748201, + "force_loss": 0.005654403939843178, + "step": 4630 + }, + { + "epoch": 4.172661870503597, + "grad_norm": 0.9099301099777222, + "learning_rate": 9.979613034056434e-05, + "loss": 0.0417, + "step": 4640 + }, + { + "action_loss": 0.012895102612674236, + "epoch": 4.172661870503597, + "step": 4640 + }, + { + "epoch": 4.172661870503597, + "step": 4640, + "torque_loss": 0.12507624924182892 + }, + { + "epoch": 4.172661870503597, + "force_loss": 0.00915098562836647, + "step": 4640 + }, + { + "epoch": 4.181654676258993, + "grad_norm": 0.9602915644645691, + "learning_rate": 9.979363673700386e-05, + "loss": 0.0369, + "step": 4650 + }, + { + "action_loss": 0.01851240172982216, + "epoch": 4.181654676258993, + "step": 4650 + }, + { + "epoch": 4.181654676258993, + "step": 4650, + "torque_loss": 0.12963305413722992 + }, + { + "epoch": 4.181654676258993, + "force_loss": 0.0167622659355402, + "step": 4650 + }, + { + "epoch": 4.190647482014389, + "grad_norm": 1.073164701461792, + "learning_rate": 9.979112800745124e-05, + "loss": 0.0441, + "step": 4660 + }, + { + "action_loss": 0.03333734720945358, + "epoch": 4.190647482014389, + "step": 4660 + }, + { + "epoch": 4.190647482014389, + "step": 4660, + "torque_loss": 0.14169570803642273 + }, + { + "epoch": 4.190647482014389, + "force_loss": 0.022027716040611267, + "step": 4660 + }, + { + "epoch": 4.1996402877697845, + "grad_norm": 0.4481082558631897, + "learning_rate": 9.978860415266861e-05, + "loss": 0.0384, + "step": 4670 + }, + { + "action_loss": 0.020381757989525795, + "epoch": 4.1996402877697845, + "step": 4670 + }, + { + "epoch": 4.1996402877697845, + "step": 4670, + "torque_loss": 0.14372999966144562 + }, + { + "epoch": 4.1996402877697845, + "force_loss": 0.015198520384728909, + "step": 4670 + }, + { + "epoch": 4.2086330935251794, + "grad_norm": 0.9679866433143616, + "learning_rate": 9.978606517342262e-05, + "loss": 0.0357, + "step": 4680 + }, + { + "action_loss": 0.01251304429024458, + "epoch": 4.2086330935251794, + "step": 4680 + }, + { + "epoch": 4.2086330935251794, + "step": 4680, + "torque_loss": 0.1499743014574051 + }, + { + "epoch": 4.2086330935251794, + "force_loss": 0.010061842389404774, + "step": 4680 + }, + { + "epoch": 4.217625899280575, + "grad_norm": 0.41633597016334534, + "learning_rate": 9.978351107048456e-05, + "loss": 0.0399, + "step": 4690 + }, + { + "action_loss": 0.01703997142612934, + "epoch": 4.217625899280575, + "step": 4690 + }, + { + "epoch": 4.217625899280575, + "step": 4690, + "torque_loss": 0.16055503487586975 + }, + { + "epoch": 4.217625899280575, + "force_loss": 0.02300129272043705, + "step": 4690 + }, + { + "epoch": 4.226618705035971, + "grad_norm": 1.6063692569732666, + "learning_rate": 9.978094184463029e-05, + "loss": 0.0459, + "step": 4700 + }, + { + "action_loss": 0.015734244138002396, + "epoch": 4.226618705035971, + "step": 4700 + }, + { + "epoch": 4.226618705035971, + "step": 4700, + "torque_loss": 0.21210801601409912 + }, + { + "epoch": 4.226618705035971, + "force_loss": 0.016246922314167023, + "step": 4700 + }, + { + "epoch": 4.235611510791367, + "grad_norm": 0.6652126908302307, + "learning_rate": 9.977835749664029e-05, + "loss": 0.0305, + "step": 4710 + }, + { + "action_loss": 0.017241256311535835, + "epoch": 4.235611510791367, + "step": 4710 + }, + { + "epoch": 4.235611510791367, + "step": 4710, + "torque_loss": 0.13138684630393982 + }, + { + "epoch": 4.235611510791367, + "force_loss": 0.009017406962811947, + "step": 4710 + }, + { + "epoch": 4.244604316546763, + "grad_norm": 0.9250365495681763, + "learning_rate": 9.97757580272996e-05, + "loss": 0.0323, + "step": 4720 + }, + { + "action_loss": 0.01591983251273632, + "epoch": 4.244604316546763, + "step": 4720 + }, + { + "epoch": 4.244604316546763, + "step": 4720, + "torque_loss": 0.15921808779239655 + }, + { + "epoch": 4.244604316546763, + "force_loss": 0.00837093684822321, + "step": 4720 + }, + { + "epoch": 4.253597122302159, + "grad_norm": 0.5056507587432861, + "learning_rate": 9.977314343739786e-05, + "loss": 0.0292, + "step": 4730 + }, + { + "action_loss": 0.032968662679195404, + "epoch": 4.253597122302159, + "step": 4730 + }, + { + "epoch": 4.253597122302159, + "step": 4730, + "torque_loss": 0.1851831078529358 + }, + { + "epoch": 4.253597122302159, + "force_loss": 0.03455134853720665, + "step": 4730 + }, + { + "epoch": 4.262589928057554, + "grad_norm": 0.6013080477714539, + "learning_rate": 9.977051372772934e-05, + "loss": 0.0392, + "step": 4740 + }, + { + "action_loss": 0.014050872065126896, + "epoch": 4.262589928057554, + "step": 4740 + }, + { + "epoch": 4.262589928057554, + "step": 4740, + "torque_loss": 0.1577189713716507 + }, + { + "epoch": 4.262589928057554, + "force_loss": 0.012093194760382175, + "step": 4740 + }, + { + "epoch": 4.2715827338129495, + "grad_norm": 1.4672819375991821, + "learning_rate": 9.976786889909286e-05, + "loss": 0.0288, + "step": 4750 + }, + { + "action_loss": 0.015345960855484009, + "epoch": 4.2715827338129495, + "step": 4750 + }, + { + "epoch": 4.2715827338129495, + "step": 4750, + "torque_loss": 0.14214837551116943 + }, + { + "epoch": 4.2715827338129495, + "force_loss": 0.012158502824604511, + "step": 4750 + }, + { + "epoch": 4.280575539568345, + "grad_norm": 0.8512645363807678, + "learning_rate": 9.976520895229185e-05, + "loss": 0.0337, + "step": 4760 + }, + { + "action_loss": 0.018188422545790672, + "epoch": 4.280575539568345, + "step": 4760 + }, + { + "epoch": 4.280575539568345, + "step": 4760, + "torque_loss": 0.13265514373779297 + }, + { + "epoch": 4.280575539568345, + "force_loss": 0.012605768628418446, + "step": 4760 + }, + { + "epoch": 4.289568345323741, + "grad_norm": 0.6212011575698853, + "learning_rate": 9.976253388813433e-05, + "loss": 0.0386, + "step": 4770 + }, + { + "action_loss": 0.014844626188278198, + "epoch": 4.289568345323741, + "step": 4770 + }, + { + "epoch": 4.289568345323741, + "step": 4770, + "torque_loss": 0.13482101261615753 + }, + { + "epoch": 4.289568345323741, + "force_loss": 0.015131277032196522, + "step": 4770 + }, + { + "epoch": 4.298561151079137, + "grad_norm": 0.7584494352340698, + "learning_rate": 9.975984370743293e-05, + "loss": 0.0381, + "step": 4780 + }, + { + "action_loss": 0.014859539456665516, + "epoch": 4.298561151079137, + "step": 4780 + }, + { + "epoch": 4.298561151079137, + "step": 4780, + "torque_loss": 0.17865495383739471 + }, + { + "epoch": 4.298561151079137, + "force_loss": 0.014067028649151325, + "step": 4780 + }, + { + "epoch": 4.307553956834532, + "grad_norm": 1.149087905883789, + "learning_rate": 9.975713841100485e-05, + "loss": 0.0468, + "step": 4790 + }, + { + "action_loss": 0.010169833898544312, + "epoch": 4.307553956834532, + "step": 4790 + }, + { + "epoch": 4.307553956834532, + "step": 4790, + "torque_loss": 0.14036309719085693 + }, + { + "epoch": 4.307553956834532, + "force_loss": 0.010213927365839481, + "step": 4790 + }, + { + "epoch": 4.316546762589928, + "grad_norm": 0.916983962059021, + "learning_rate": 9.975441799967187e-05, + "loss": 0.0324, + "step": 4800 + }, + { + "action_loss": 0.016165580600500107, + "epoch": 4.316546762589928, + "step": 4800 + }, + { + "epoch": 4.316546762589928, + "step": 4800, + "torque_loss": 0.10830014944076538 + }, + { + "epoch": 4.316546762589928, + "force_loss": 0.01386170368641615, + "step": 4800 + }, + { + "epoch": 4.325539568345324, + "grad_norm": 0.9218096137046814, + "learning_rate": 9.975168247426039e-05, + "loss": 0.0344, + "step": 4810 + }, + { + "action_loss": 0.016445549204945564, + "epoch": 4.325539568345324, + "step": 4810 + }, + { + "epoch": 4.325539568345324, + "step": 4810, + "torque_loss": 0.10059714317321777 + }, + { + "epoch": 4.325539568345324, + "force_loss": 0.012037478387355804, + "step": 4810 + }, + { + "epoch": 4.33453237410072, + "grad_norm": 1.8911771774291992, + "learning_rate": 9.974893183560139e-05, + "loss": 0.0306, + "step": 4820 + }, + { + "action_loss": 0.017487460747361183, + "epoch": 4.33453237410072, + "step": 4820 + }, + { + "epoch": 4.33453237410072, + "step": 4820, + "torque_loss": 0.1674678772687912 + }, + { + "epoch": 4.33453237410072, + "force_loss": 0.021262051537632942, + "step": 4820 + }, + { + "epoch": 4.343525179856115, + "grad_norm": 0.8325040340423584, + "learning_rate": 9.974616608453045e-05, + "loss": 0.0337, + "step": 4830 + }, + { + "action_loss": 0.01841500587761402, + "epoch": 4.343525179856115, + "step": 4830 + }, + { + "epoch": 4.343525179856115, + "step": 4830, + "torque_loss": 0.14614729583263397 + }, + { + "epoch": 4.343525179856115, + "force_loss": 0.022032881155610085, + "step": 4830 + }, + { + "epoch": 4.35251798561151, + "grad_norm": 1.0409008264541626, + "learning_rate": 9.974338522188772e-05, + "loss": 0.0332, + "step": 4840 + }, + { + "action_loss": 0.014770823530852795, + "epoch": 4.35251798561151, + "step": 4840 + }, + { + "epoch": 4.35251798561151, + "step": 4840, + "torque_loss": 0.1954604834318161 + }, + { + "epoch": 4.35251798561151, + "force_loss": 0.01183202862739563, + "step": 4840 + }, + { + "epoch": 4.361510791366906, + "grad_norm": 1.3001381158828735, + "learning_rate": 9.974058924851797e-05, + "loss": 0.0319, + "step": 4850 + }, + { + "action_loss": 0.02393859624862671, + "epoch": 4.361510791366906, + "step": 4850 + }, + { + "epoch": 4.361510791366906, + "step": 4850, + "torque_loss": 0.16679538786411285 + }, + { + "epoch": 4.361510791366906, + "force_loss": 0.02487037144601345, + "step": 4850 + }, + { + "epoch": 4.370503597122302, + "grad_norm": 1.2244654893875122, + "learning_rate": 9.973777816527051e-05, + "loss": 0.0361, + "step": 4860 + }, + { + "action_loss": 0.04007313400506973, + "epoch": 4.370503597122302, + "step": 4860 + }, + { + "epoch": 4.370503597122302, + "step": 4860, + "torque_loss": 0.1552257388830185 + }, + { + "epoch": 4.370503597122302, + "force_loss": 0.03166845440864563, + "step": 4860 + }, + { + "epoch": 4.379496402877698, + "grad_norm": 0.8373429179191589, + "learning_rate": 9.973495197299931e-05, + "loss": 0.0366, + "step": 4870 + }, + { + "action_loss": 0.007736609783023596, + "epoch": 4.379496402877698, + "step": 4870 + }, + { + "epoch": 4.379496402877698, + "step": 4870, + "torque_loss": 0.09443923085927963 + }, + { + "epoch": 4.379496402877698, + "force_loss": 0.01111941784620285, + "step": 4870 + }, + { + "epoch": 4.388489208633094, + "grad_norm": 0.5578615069389343, + "learning_rate": 9.973211067256287e-05, + "loss": 0.0338, + "step": 4880 + }, + { + "action_loss": 0.01750328578054905, + "epoch": 4.388489208633094, + "step": 4880 + }, + { + "epoch": 4.388489208633094, + "step": 4880, + "torque_loss": 0.18886463344097137 + }, + { + "epoch": 4.388489208633094, + "force_loss": 0.016042115166783333, + "step": 4880 + }, + { + "epoch": 4.39748201438849, + "grad_norm": 1.8016473054885864, + "learning_rate": 9.97292542648243e-05, + "loss": 0.0381, + "step": 4890 + }, + { + "action_loss": 0.010596145875751972, + "epoch": 4.39748201438849, + "step": 4890 + }, + { + "epoch": 4.39748201438849, + "step": 4890, + "torque_loss": 0.10433429479598999 + }, + { + "epoch": 4.39748201438849, + "force_loss": 0.006710140500217676, + "step": 4890 + }, + { + "epoch": 4.406474820143885, + "grad_norm": 1.21533203125, + "learning_rate": 9.972638275065131e-05, + "loss": 0.0311, + "step": 4900 + }, + { + "action_loss": 0.03270336613059044, + "epoch": 4.406474820143885, + "step": 4900 + }, + { + "epoch": 4.406474820143885, + "step": 4900, + "torque_loss": 0.1301143318414688 + }, + { + "epoch": 4.406474820143885, + "force_loss": 0.0368109829723835, + "step": 4900 + }, + { + "epoch": 4.41546762589928, + "grad_norm": 0.7246043682098389, + "learning_rate": 9.972349613091621e-05, + "loss": 0.0358, + "step": 4910 + }, + { + "action_loss": 0.01927509531378746, + "epoch": 4.41546762589928, + "step": 4910 + }, + { + "epoch": 4.41546762589928, + "step": 4910, + "torque_loss": 0.1635270118713379 + }, + { + "epoch": 4.41546762589928, + "force_loss": 0.011132900603115559, + "step": 4910 + }, + { + "epoch": 4.424460431654676, + "grad_norm": 1.5251998901367188, + "learning_rate": 9.972059440649584e-05, + "loss": 0.0402, + "step": 4920 + }, + { + "action_loss": 0.01019121054559946, + "epoch": 4.424460431654676, + "step": 4920 + }, + { + "epoch": 4.424460431654676, + "step": 4920, + "torque_loss": 0.12546344101428986 + }, + { + "epoch": 4.424460431654676, + "force_loss": 0.008014569990336895, + "step": 4920 + }, + { + "epoch": 4.433453237410072, + "grad_norm": 0.8194612264633179, + "learning_rate": 9.971767757827168e-05, + "loss": 0.032, + "step": 4930 + }, + { + "action_loss": 0.024361327290534973, + "epoch": 4.433453237410072, + "step": 4930 + }, + { + "epoch": 4.433453237410072, + "step": 4930, + "torque_loss": 0.11441129446029663 + }, + { + "epoch": 4.433453237410072, + "force_loss": 0.022045262157917023, + "step": 4930 + }, + { + "epoch": 4.442446043165468, + "grad_norm": 1.2900441884994507, + "learning_rate": 9.971474564712982e-05, + "loss": 0.0359, + "step": 4940 + }, + { + "action_loss": 0.016533298417925835, + "epoch": 4.442446043165468, + "step": 4940 + }, + { + "epoch": 4.442446043165468, + "step": 4940, + "torque_loss": 0.15621978044509888 + }, + { + "epoch": 4.442446043165468, + "force_loss": 0.011242390610277653, + "step": 4940 + }, + { + "epoch": 4.451438848920863, + "grad_norm": 0.8452239632606506, + "learning_rate": 9.971179861396084e-05, + "loss": 0.0329, + "step": 4950 + }, + { + "action_loss": 0.024112576618790627, + "epoch": 4.451438848920863, + "step": 4950 + }, + { + "epoch": 4.451438848920863, + "step": 4950, + "torque_loss": 0.12866103649139404 + }, + { + "epoch": 4.451438848920863, + "force_loss": 0.018411686643958092, + "step": 4950 + }, + { + "epoch": 4.460431654676259, + "grad_norm": 0.6208613514900208, + "learning_rate": 9.970883647966003e-05, + "loss": 0.0326, + "step": 4960 + }, + { + "action_loss": 0.036774277687072754, + "epoch": 4.460431654676259, + "step": 4960 + }, + { + "epoch": 4.460431654676259, + "step": 4960, + "torque_loss": 0.23626530170440674 + }, + { + "epoch": 4.460431654676259, + "force_loss": 0.01701635681092739, + "step": 4960 + }, + { + "epoch": 4.469424460431655, + "grad_norm": 0.982664167881012, + "learning_rate": 9.970585924512717e-05, + "loss": 0.0345, + "step": 4970 + }, + { + "action_loss": 0.016286931931972504, + "epoch": 4.469424460431655, + "step": 4970 + }, + { + "epoch": 4.469424460431655, + "step": 4970, + "torque_loss": 0.1323186755180359 + }, + { + "epoch": 4.469424460431655, + "force_loss": 0.017630256712436676, + "step": 4970 + }, + { + "epoch": 4.4784172661870505, + "grad_norm": 0.6811734437942505, + "learning_rate": 9.970286691126669e-05, + "loss": 0.033, + "step": 4980 + }, + { + "action_loss": 0.0160505548119545, + "epoch": 4.4784172661870505, + "step": 4980 + }, + { + "epoch": 4.4784172661870505, + "step": 4980, + "torque_loss": 0.12203121185302734 + }, + { + "epoch": 4.4784172661870505, + "force_loss": 0.007708249147981405, + "step": 4980 + }, + { + "epoch": 4.487410071942446, + "grad_norm": 0.8383340239524841, + "learning_rate": 9.969985947898756e-05, + "loss": 0.0386, + "step": 4990 + }, + { + "action_loss": 0.03993428871035576, + "epoch": 4.487410071942446, + "step": 4990 + }, + { + "epoch": 4.487410071942446, + "step": 4990, + "torque_loss": 0.20964229106903076 + }, + { + "epoch": 4.487410071942446, + "force_loss": 0.014576214365661144, + "step": 4990 + }, + { + "epoch": 4.496402877697841, + "grad_norm": 0.8964179754257202, + "learning_rate": 9.969683694920337e-05, + "loss": 0.0396, + "step": 5000 + }, + { + "action_loss": 0.03832182660698891, + "epoch": 4.496402877697841, + "step": 5000 + }, + { + "epoch": 4.496402877697841, + "step": 5000, + "torque_loss": 0.1555413454771042 + }, + { + "epoch": 4.496402877697841, + "force_loss": 0.02865142934024334, + "step": 5000 + }, + { + "epoch": 4.505395683453237, + "grad_norm": 0.6202208399772644, + "learning_rate": 9.969379932283228e-05, + "loss": 0.0387, + "step": 5010 + }, + { + "action_loss": 0.012801670469343662, + "epoch": 4.505395683453237, + "step": 5010 + }, + { + "epoch": 4.505395683453237, + "step": 5010, + "torque_loss": 0.15284575521945953 + }, + { + "epoch": 4.505395683453237, + "force_loss": 0.01691793091595173, + "step": 5010 + }, + { + "epoch": 4.514388489208633, + "grad_norm": 0.9778483510017395, + "learning_rate": 9.969074660079704e-05, + "loss": 0.0351, + "step": 5020 + }, + { + "action_loss": 0.045336682349443436, + "epoch": 4.514388489208633, + "step": 5020 + }, + { + "epoch": 4.514388489208633, + "step": 5020, + "torque_loss": 0.21425507962703705 + }, + { + "epoch": 4.514388489208633, + "force_loss": 0.04962506890296936, + "step": 5020 + }, + { + "epoch": 4.523381294964029, + "grad_norm": 1.2425585985183716, + "learning_rate": 9.968767878402501e-05, + "loss": 0.0371, + "step": 5030 + }, + { + "action_loss": 0.011635159142315388, + "epoch": 4.523381294964029, + "step": 5030 + }, + { + "epoch": 4.523381294964029, + "step": 5030, + "torque_loss": 0.11857563257217407 + }, + { + "epoch": 4.523381294964029, + "force_loss": 0.012137492187321186, + "step": 5030 + }, + { + "epoch": 4.532374100719425, + "grad_norm": 0.9975318312644958, + "learning_rate": 9.968459587344808e-05, + "loss": 0.0305, + "step": 5040 + }, + { + "action_loss": 0.012168730609118938, + "epoch": 4.532374100719425, + "step": 5040 + }, + { + "epoch": 4.532374100719425, + "step": 5040, + "torque_loss": 0.1289518177509308 + }, + { + "epoch": 4.532374100719425, + "force_loss": 0.007105143740773201, + "step": 5040 + }, + { + "epoch": 4.5413669064748206, + "grad_norm": 0.7822269201278687, + "learning_rate": 9.968149787000278e-05, + "loss": 0.0339, + "step": 5050 + }, + { + "action_loss": 0.010288389399647713, + "epoch": 4.5413669064748206, + "step": 5050 + }, + { + "epoch": 4.5413669064748206, + "step": 5050, + "torque_loss": 0.16448652744293213 + }, + { + "epoch": 4.5413669064748206, + "force_loss": 0.007236870471388102, + "step": 5050 + }, + { + "epoch": 4.5503597122302155, + "grad_norm": 0.6134659647941589, + "learning_rate": 9.967838477463018e-05, + "loss": 0.0282, + "step": 5060 + }, + { + "action_loss": 0.008654690347611904, + "epoch": 4.5503597122302155, + "step": 5060 + }, + { + "epoch": 4.5503597122302155, + "step": 5060, + "torque_loss": 0.1068754494190216 + }, + { + "epoch": 4.5503597122302155, + "force_loss": 0.00835444126278162, + "step": 5060 + }, + { + "epoch": 4.559352517985611, + "grad_norm": 1.0853147506713867, + "learning_rate": 9.967525658827597e-05, + "loss": 0.0328, + "step": 5070 + }, + { + "action_loss": 0.017858320847153664, + "epoch": 4.559352517985611, + "step": 5070 + }, + { + "epoch": 4.559352517985611, + "step": 5070, + "torque_loss": 0.10284539312124252 + }, + { + "epoch": 4.559352517985611, + "force_loss": 0.019653117284178734, + "step": 5070 + }, + { + "epoch": 4.568345323741007, + "grad_norm": 1.014072299003601, + "learning_rate": 9.967211331189042e-05, + "loss": 0.0349, + "step": 5080 + }, + { + "action_loss": 0.01476411521434784, + "epoch": 4.568345323741007, + "step": 5080 + }, + { + "epoch": 4.568345323741007, + "step": 5080, + "torque_loss": 0.12568867206573486 + }, + { + "epoch": 4.568345323741007, + "force_loss": 0.010045386850833893, + "step": 5080 + }, + { + "epoch": 4.577338129496403, + "grad_norm": 1.1342768669128418, + "learning_rate": 9.966895494642834e-05, + "loss": 0.0357, + "step": 5090 + }, + { + "action_loss": 0.015453745611011982, + "epoch": 4.577338129496403, + "step": 5090 + }, + { + "epoch": 4.577338129496403, + "step": 5090, + "torque_loss": 0.1701519638299942 + }, + { + "epoch": 4.577338129496403, + "force_loss": 0.009401858784258366, + "step": 5090 + }, + { + "epoch": 4.586330935251799, + "grad_norm": 0.6681179404258728, + "learning_rate": 9.96657814928492e-05, + "loss": 0.0402, + "step": 5100 + }, + { + "action_loss": 0.01565312221646309, + "epoch": 4.586330935251799, + "step": 5100 + }, + { + "epoch": 4.586330935251799, + "step": 5100, + "torque_loss": 0.13380496203899384 + }, + { + "epoch": 4.586330935251799, + "force_loss": 0.011605246923863888, + "step": 5100 + }, + { + "epoch": 4.595323741007194, + "grad_norm": 0.6765364408493042, + "learning_rate": 9.966259295211697e-05, + "loss": 0.0325, + "step": 5110 + }, + { + "action_loss": 0.01968207024037838, + "epoch": 4.595323741007194, + "step": 5110 + }, + { + "epoch": 4.595323741007194, + "step": 5110, + "torque_loss": 0.1369035392999649 + }, + { + "epoch": 4.595323741007194, + "force_loss": 0.014389949850738049, + "step": 5110 + }, + { + "epoch": 4.60431654676259, + "grad_norm": 0.7684283256530762, + "learning_rate": 9.965938932520028e-05, + "loss": 0.036, + "step": 5120 + }, + { + "action_loss": 0.023936502635478973, + "epoch": 4.60431654676259, + "step": 5120 + }, + { + "epoch": 4.60431654676259, + "step": 5120, + "torque_loss": 0.12168872356414795 + }, + { + "epoch": 4.60431654676259, + "force_loss": 0.01676768623292446, + "step": 5120 + }, + { + "epoch": 4.613309352517986, + "grad_norm": 0.764106273651123, + "learning_rate": 9.965617061307229e-05, + "loss": 0.0314, + "step": 5130 + }, + { + "action_loss": 0.02953227050602436, + "epoch": 4.613309352517986, + "step": 5130 + }, + { + "epoch": 4.613309352517986, + "step": 5130, + "torque_loss": 0.1810380071401596 + }, + { + "epoch": 4.613309352517986, + "force_loss": 0.024404026567935944, + "step": 5130 + }, + { + "epoch": 4.622302158273381, + "grad_norm": 0.874100387096405, + "learning_rate": 9.965293681671077e-05, + "loss": 0.0401, + "step": 5140 + }, + { + "action_loss": 0.01979588158428669, + "epoch": 4.622302158273381, + "step": 5140 + }, + { + "epoch": 4.622302158273381, + "step": 5140, + "torque_loss": 0.12462959438562393 + }, + { + "epoch": 4.622302158273381, + "force_loss": 0.016895165666937828, + "step": 5140 + }, + { + "epoch": 4.631294964028777, + "grad_norm": 0.6007933020591736, + "learning_rate": 9.964968793709804e-05, + "loss": 0.0313, + "step": 5150 + }, + { + "action_loss": 0.012876530177891254, + "epoch": 4.631294964028777, + "step": 5150 + }, + { + "epoch": 4.631294964028777, + "step": 5150, + "torque_loss": 0.12673906981945038 + }, + { + "epoch": 4.631294964028777, + "force_loss": 0.009997191838920116, + "step": 5150 + }, + { + "epoch": 4.640287769784173, + "grad_norm": 0.6168749332427979, + "learning_rate": 9.964642397522106e-05, + "loss": 0.0355, + "step": 5160 + }, + { + "action_loss": 0.015389413572847843, + "epoch": 4.640287769784173, + "step": 5160 + }, + { + "epoch": 4.640287769784173, + "step": 5160, + "torque_loss": 0.1390451341867447 + }, + { + "epoch": 4.640287769784173, + "force_loss": 0.01586478017270565, + "step": 5160 + }, + { + "epoch": 4.649280575539568, + "grad_norm": 0.7872642278671265, + "learning_rate": 9.96431449320713e-05, + "loss": 0.0325, + "step": 5170 + }, + { + "action_loss": 0.025892110541462898, + "epoch": 4.649280575539568, + "step": 5170 + }, + { + "epoch": 4.649280575539568, + "step": 5170, + "torque_loss": 0.1645011156797409 + }, + { + "epoch": 4.649280575539568, + "force_loss": 0.026243895292282104, + "step": 5170 + }, + { + "epoch": 4.658273381294964, + "grad_norm": 0.8102500438690186, + "learning_rate": 9.963985080864486e-05, + "loss": 0.0398, + "step": 5180 + }, + { + "action_loss": 0.014946692623198032, + "epoch": 4.658273381294964, + "step": 5180 + }, + { + "epoch": 4.658273381294964, + "step": 5180, + "torque_loss": 0.1599656194448471 + }, + { + "epoch": 4.658273381294964, + "force_loss": 0.018800029531121254, + "step": 5180 + }, + { + "epoch": 4.66726618705036, + "grad_norm": 0.8238599300384521, + "learning_rate": 9.96365416059424e-05, + "loss": 0.0333, + "step": 5190 + }, + { + "action_loss": 0.022762248292565346, + "epoch": 4.66726618705036, + "step": 5190 + }, + { + "epoch": 4.66726618705036, + "step": 5190, + "torque_loss": 0.13875947892665863 + }, + { + "epoch": 4.66726618705036, + "force_loss": 0.01594657450914383, + "step": 5190 + }, + { + "epoch": 4.676258992805756, + "grad_norm": 1.2515461444854736, + "learning_rate": 9.963321732496919e-05, + "loss": 0.0351, + "step": 5200 + }, + { + "action_loss": 0.02745429426431656, + "epoch": 4.676258992805756, + "step": 5200 + }, + { + "epoch": 4.676258992805756, + "step": 5200, + "torque_loss": 0.1591407209634781 + }, + { + "epoch": 4.676258992805756, + "force_loss": 0.016723915934562683, + "step": 5200 + }, + { + "epoch": 4.685251798561151, + "grad_norm": 0.5526142716407776, + "learning_rate": 9.962987796673506e-05, + "loss": 0.0363, + "step": 5210 + }, + { + "action_loss": 0.01568163000047207, + "epoch": 4.685251798561151, + "step": 5210 + }, + { + "epoch": 4.685251798561151, + "step": 5210, + "torque_loss": 0.13518263399600983 + }, + { + "epoch": 4.685251798561151, + "force_loss": 0.010564791969954967, + "step": 5210 + }, + { + "epoch": 4.694244604316546, + "grad_norm": 1.299788236618042, + "learning_rate": 9.962652353225438e-05, + "loss": 0.0301, + "step": 5220 + }, + { + "action_loss": 0.015987521037459373, + "epoch": 4.694244604316546, + "step": 5220 + }, + { + "epoch": 4.694244604316546, + "step": 5220, + "torque_loss": 0.12131834030151367 + }, + { + "epoch": 4.694244604316546, + "force_loss": 0.012146626599133015, + "step": 5220 + }, + { + "epoch": 4.703237410071942, + "grad_norm": 1.3127905130386353, + "learning_rate": 9.962315402254619e-05, + "loss": 0.0356, + "step": 5230 + }, + { + "action_loss": 0.01628502644598484, + "epoch": 4.703237410071942, + "step": 5230 + }, + { + "epoch": 4.703237410071942, + "step": 5230, + "torque_loss": 0.15971876680850983 + }, + { + "epoch": 4.703237410071942, + "force_loss": 0.00842433050274849, + "step": 5230 + }, + { + "epoch": 4.712230215827338, + "grad_norm": 0.564807653427124, + "learning_rate": 9.9619769438634e-05, + "loss": 0.0334, + "step": 5240 + }, + { + "action_loss": 0.023909149691462517, + "epoch": 4.712230215827338, + "step": 5240 + }, + { + "epoch": 4.712230215827338, + "step": 5240, + "torque_loss": 0.15270943939685822 + }, + { + "epoch": 4.712230215827338, + "force_loss": 0.014199353754520416, + "step": 5240 + }, + { + "epoch": 4.721223021582734, + "grad_norm": 0.8288257122039795, + "learning_rate": 9.9616369781546e-05, + "loss": 0.0296, + "step": 5250 + }, + { + "action_loss": 0.018591200932860374, + "epoch": 4.721223021582734, + "step": 5250 + }, + { + "epoch": 4.721223021582734, + "step": 5250, + "torque_loss": 0.15318669378757477 + }, + { + "epoch": 4.721223021582734, + "force_loss": 0.015463444404304028, + "step": 5250 + }, + { + "epoch": 4.73021582733813, + "grad_norm": 0.5895083546638489, + "learning_rate": 9.961295505231491e-05, + "loss": 0.0348, + "step": 5260 + }, + { + "action_loss": 0.023209886625409126, + "epoch": 4.73021582733813, + "step": 5260 + }, + { + "epoch": 4.73021582733813, + "step": 5260, + "torque_loss": 0.13728447258472443 + }, + { + "epoch": 4.73021582733813, + "force_loss": 0.019676925614476204, + "step": 5260 + }, + { + "epoch": 4.739208633093525, + "grad_norm": 1.345011591911316, + "learning_rate": 9.960952525197804e-05, + "loss": 0.0319, + "step": 5270 + }, + { + "action_loss": 0.005507905501872301, + "epoch": 4.739208633093525, + "step": 5270 + }, + { + "epoch": 4.739208633093525, + "step": 5270, + "torque_loss": 0.13759738206863403 + }, + { + "epoch": 4.739208633093525, + "force_loss": 0.006951266899704933, + "step": 5270 + }, + { + "epoch": 4.748201438848921, + "grad_norm": 0.7037013173103333, + "learning_rate": 9.960608038157724e-05, + "loss": 0.0262, + "step": 5280 + }, + { + "action_loss": 0.01759648509323597, + "epoch": 4.748201438848921, + "step": 5280 + }, + { + "epoch": 4.748201438848921, + "step": 5280, + "torque_loss": 0.12934188544750214 + }, + { + "epoch": 4.748201438848921, + "force_loss": 0.021389560773968697, + "step": 5280 + }, + { + "epoch": 4.7571942446043165, + "grad_norm": 1.2372910976409912, + "learning_rate": 9.960262044215901e-05, + "loss": 0.0297, + "step": 5290 + }, + { + "action_loss": 0.027033813297748566, + "epoch": 4.7571942446043165, + "step": 5290 + }, + { + "epoch": 4.7571942446043165, + "step": 5290, + "torque_loss": 0.22342686355113983 + }, + { + "epoch": 4.7571942446043165, + "force_loss": 0.03127235919237137, + "step": 5290 + }, + { + "epoch": 4.766187050359712, + "grad_norm": 0.9708763360977173, + "learning_rate": 9.959914543477435e-05, + "loss": 0.0408, + "step": 5300 + }, + { + "action_loss": 0.053166795521974564, + "epoch": 4.766187050359712, + "step": 5300 + }, + { + "epoch": 4.766187050359712, + "step": 5300, + "torque_loss": 0.18424074351787567 + }, + { + "epoch": 4.766187050359712, + "force_loss": 0.06984049826860428, + "step": 5300 + }, + { + "epoch": 4.775179856115108, + "grad_norm": 1.3754055500030518, + "learning_rate": 9.959565536047892e-05, + "loss": 0.0418, + "step": 5310 + }, + { + "action_loss": 0.00839131698012352, + "epoch": 4.775179856115108, + "step": 5310 + }, + { + "epoch": 4.775179856115108, + "step": 5310, + "torque_loss": 0.10514402389526367 + }, + { + "epoch": 4.775179856115108, + "force_loss": 0.0082511892542243, + "step": 5310 + }, + { + "epoch": 4.784172661870503, + "grad_norm": 0.5044088363647461, + "learning_rate": 9.959215022033288e-05, + "loss": 0.0321, + "step": 5320 + }, + { + "action_loss": 0.02304108999669552, + "epoch": 4.784172661870503, + "step": 5320 + }, + { + "epoch": 4.784172661870503, + "step": 5320, + "torque_loss": 0.18510663509368896 + }, + { + "epoch": 4.784172661870503, + "force_loss": 0.021340645849704742, + "step": 5320 + }, + { + "epoch": 4.793165467625899, + "grad_norm": 0.6199601292610168, + "learning_rate": 9.9588630015401e-05, + "loss": 0.0359, + "step": 5330 + }, + { + "action_loss": 0.016309387981891632, + "epoch": 4.793165467625899, + "step": 5330 + }, + { + "epoch": 4.793165467625899, + "step": 5330, + "torque_loss": 0.1842377781867981 + }, + { + "epoch": 4.793165467625899, + "force_loss": 0.017711469903588295, + "step": 5330 + }, + { + "epoch": 4.802158273381295, + "grad_norm": 0.6002806425094604, + "learning_rate": 9.958509474675264e-05, + "loss": 0.033, + "step": 5340 + }, + { + "action_loss": 0.013878691010177135, + "epoch": 4.802158273381295, + "step": 5340 + }, + { + "epoch": 4.802158273381295, + "step": 5340, + "torque_loss": 0.15046750009059906 + }, + { + "epoch": 4.802158273381295, + "force_loss": 0.011715096421539783, + "step": 5340 + }, + { + "epoch": 4.811151079136691, + "grad_norm": 0.9952367544174194, + "learning_rate": 9.958154441546171e-05, + "loss": 0.0397, + "step": 5350 + }, + { + "action_loss": 0.034337714314460754, + "epoch": 4.811151079136691, + "step": 5350 + }, + { + "epoch": 4.811151079136691, + "step": 5350, + "torque_loss": 0.161305770277977 + }, + { + "epoch": 4.811151079136691, + "force_loss": 0.027293890714645386, + "step": 5350 + }, + { + "epoch": 4.820143884892087, + "grad_norm": 1.044042706489563, + "learning_rate": 9.957797902260673e-05, + "loss": 0.0374, + "step": 5360 + }, + { + "action_loss": 0.011409183032810688, + "epoch": 4.820143884892087, + "step": 5360 + }, + { + "epoch": 4.820143884892087, + "step": 5360, + "torque_loss": 0.13266690075397491 + }, + { + "epoch": 4.820143884892087, + "force_loss": 0.007539886515587568, + "step": 5360 + }, + { + "epoch": 4.829136690647482, + "grad_norm": 0.7261578440666199, + "learning_rate": 9.957439856927073e-05, + "loss": 0.0314, + "step": 5370 + }, + { + "action_loss": 0.017129620537161827, + "epoch": 4.829136690647482, + "step": 5370 + }, + { + "epoch": 4.829136690647482, + "step": 5370, + "torque_loss": 0.11524128913879395 + }, + { + "epoch": 4.829136690647482, + "force_loss": 0.017683295533061028, + "step": 5370 + }, + { + "epoch": 4.838129496402877, + "grad_norm": 1.042865514755249, + "learning_rate": 9.957080305654139e-05, + "loss": 0.0392, + "step": 5380 + }, + { + "action_loss": 0.017216136679053307, + "epoch": 4.838129496402877, + "step": 5380 + }, + { + "epoch": 4.838129496402877, + "step": 5380, + "torque_loss": 0.16361086070537567 + }, + { + "epoch": 4.838129496402877, + "force_loss": 0.013957299292087555, + "step": 5380 + }, + { + "epoch": 4.847122302158273, + "grad_norm": 1.5993086099624634, + "learning_rate": 9.956719248551092e-05, + "loss": 0.0388, + "step": 5390 + }, + { + "action_loss": 0.014758334495127201, + "epoch": 4.847122302158273, + "step": 5390 + }, + { + "epoch": 4.847122302158273, + "step": 5390, + "torque_loss": 0.1057029739022255 + }, + { + "epoch": 4.847122302158273, + "force_loss": 0.01089871022850275, + "step": 5390 + }, + { + "epoch": 4.856115107913669, + "grad_norm": 0.75738924741745, + "learning_rate": 9.956356685727612e-05, + "loss": 0.0362, + "step": 5400 + }, + { + "action_loss": 0.010722232051193714, + "epoch": 4.856115107913669, + "step": 5400 + }, + { + "epoch": 4.856115107913669, + "step": 5400, + "torque_loss": 0.12964071333408356 + }, + { + "epoch": 4.856115107913669, + "force_loss": 0.009252565912902355, + "step": 5400 + }, + { + "epoch": 4.865107913669065, + "grad_norm": 0.45988819003105164, + "learning_rate": 9.955992617293836e-05, + "loss": 0.0329, + "step": 5410 + }, + { + "action_loss": 0.01255980134010315, + "epoch": 4.865107913669065, + "step": 5410 + }, + { + "epoch": 4.865107913669065, + "step": 5410, + "torque_loss": 0.17839623987674713 + }, + { + "epoch": 4.865107913669065, + "force_loss": 0.011090818792581558, + "step": 5410 + }, + { + "epoch": 4.874100719424461, + "grad_norm": 0.3930070996284485, + "learning_rate": 9.955627043360358e-05, + "loss": 0.0355, + "step": 5420 + }, + { + "action_loss": 0.01895631104707718, + "epoch": 4.874100719424461, + "step": 5420 + }, + { + "epoch": 4.874100719424461, + "step": 5420, + "torque_loss": 0.17519693076610565 + }, + { + "epoch": 4.874100719424461, + "force_loss": 0.017761051654815674, + "step": 5420 + }, + { + "epoch": 4.883093525179856, + "grad_norm": 1.7386761903762817, + "learning_rate": 9.955259964038231e-05, + "loss": 0.0335, + "step": 5430 + }, + { + "action_loss": 0.01452229917049408, + "epoch": 4.883093525179856, + "step": 5430 + }, + { + "epoch": 4.883093525179856, + "step": 5430, + "torque_loss": 0.15585090219974518 + }, + { + "epoch": 4.883093525179856, + "force_loss": 0.016538606956601143, + "step": 5430 + }, + { + "epoch": 4.892086330935252, + "grad_norm": 1.6594011783599854, + "learning_rate": 9.954891379438962e-05, + "loss": 0.0363, + "step": 5440 + }, + { + "action_loss": 0.018527643755078316, + "epoch": 4.892086330935252, + "step": 5440 + }, + { + "epoch": 4.892086330935252, + "step": 5440, + "torque_loss": 0.16896848380565643 + }, + { + "epoch": 4.892086330935252, + "force_loss": 0.022833295166492462, + "step": 5440 + }, + { + "epoch": 4.901079136690647, + "grad_norm": 1.4442071914672852, + "learning_rate": 9.954521289674519e-05, + "loss": 0.0374, + "step": 5450 + }, + { + "action_loss": 0.015118967741727829, + "epoch": 4.901079136690647, + "step": 5450 + }, + { + "epoch": 4.901079136690647, + "step": 5450, + "torque_loss": 0.11366186290979385 + }, + { + "epoch": 4.901079136690647, + "force_loss": 0.013646438717842102, + "step": 5450 + }, + { + "epoch": 4.910071942446043, + "grad_norm": 0.9077381491661072, + "learning_rate": 9.954149694857325e-05, + "loss": 0.0344, + "step": 5460 + }, + { + "action_loss": 0.014302697964012623, + "epoch": 4.910071942446043, + "step": 5460 + }, + { + "epoch": 4.910071942446043, + "step": 5460, + "torque_loss": 0.17495770752429962 + }, + { + "epoch": 4.910071942446043, + "force_loss": 0.018926559016108513, + "step": 5460 + }, + { + "epoch": 4.919064748201439, + "grad_norm": 0.5356278419494629, + "learning_rate": 9.953776595100258e-05, + "loss": 0.0386, + "step": 5470 + }, + { + "action_loss": 0.016110757365822792, + "epoch": 4.919064748201439, + "step": 5470 + }, + { + "epoch": 4.919064748201439, + "step": 5470, + "torque_loss": 0.17800450325012207 + }, + { + "epoch": 4.919064748201439, + "force_loss": 0.015125256963074207, + "step": 5470 + }, + { + "epoch": 4.928057553956835, + "grad_norm": 1.376873254776001, + "learning_rate": 9.95340199051666e-05, + "loss": 0.0282, + "step": 5480 + }, + { + "action_loss": 0.011282348074018955, + "epoch": 4.928057553956835, + "step": 5480 + }, + { + "epoch": 4.928057553956835, + "step": 5480, + "torque_loss": 0.14414973556995392 + }, + { + "epoch": 4.928057553956835, + "force_loss": 0.007896043360233307, + "step": 5480 + }, + { + "epoch": 4.93705035971223, + "grad_norm": 0.8240759968757629, + "learning_rate": 9.953025881220325e-05, + "loss": 0.0294, + "step": 5490 + }, + { + "action_loss": 0.01085557509213686, + "epoch": 4.93705035971223, + "step": 5490 + }, + { + "epoch": 4.93705035971223, + "step": 5490, + "torque_loss": 0.13415758311748505 + }, + { + "epoch": 4.93705035971223, + "force_loss": 0.009497441351413727, + "step": 5490 + }, + { + "epoch": 4.946043165467626, + "grad_norm": 0.7632565498352051, + "learning_rate": 9.952648267325504e-05, + "loss": 0.0299, + "step": 5500 + }, + { + "action_loss": 0.023374313488602638, + "epoch": 4.946043165467626, + "step": 5500 + }, + { + "epoch": 4.946043165467626, + "step": 5500, + "torque_loss": 0.14962723851203918 + }, + { + "epoch": 4.946043165467626, + "force_loss": 0.025252118706703186, + "step": 5500 + }, + { + "epoch": 4.955035971223022, + "grad_norm": 0.9009926319122314, + "learning_rate": 9.952269148946905e-05, + "loss": 0.0324, + "step": 5510 + }, + { + "action_loss": 0.022167472168803215, + "epoch": 4.955035971223022, + "step": 5510 + }, + { + "epoch": 4.955035971223022, + "step": 5510, + "torque_loss": 0.1405017226934433 + }, + { + "epoch": 4.955035971223022, + "force_loss": 0.03133165463805199, + "step": 5510 + }, + { + "epoch": 4.9640287769784175, + "grad_norm": 0.883463978767395, + "learning_rate": 9.951888526199697e-05, + "loss": 0.0337, + "step": 5520 + }, + { + "action_loss": 0.0214130487293005, + "epoch": 4.9640287769784175, + "step": 5520 + }, + { + "epoch": 4.9640287769784175, + "step": 5520, + "torque_loss": 0.14747439324855804 + }, + { + "epoch": 4.9640287769784175, + "force_loss": 0.020815743133425713, + "step": 5520 + }, + { + "epoch": 4.9730215827338125, + "grad_norm": 1.318161129951477, + "learning_rate": 9.951506399199501e-05, + "loss": 0.0347, + "step": 5530 + }, + { + "action_loss": 0.013590328395366669, + "epoch": 4.9730215827338125, + "step": 5530 + }, + { + "epoch": 4.9730215827338125, + "step": 5530, + "torque_loss": 0.12350980192422867 + }, + { + "epoch": 4.9730215827338125, + "force_loss": 0.009893982671201229, + "step": 5530 + }, + { + "epoch": 4.982014388489208, + "grad_norm": 1.0562297105789185, + "learning_rate": 9.951122768062399e-05, + "loss": 0.0323, + "step": 5540 + }, + { + "action_loss": 0.006967715919017792, + "epoch": 4.982014388489208, + "step": 5540 + }, + { + "epoch": 4.982014388489208, + "step": 5540, + "torque_loss": 0.12151825428009033 + }, + { + "epoch": 4.982014388489208, + "force_loss": 0.0068406532518565655, + "step": 5540 + }, + { + "epoch": 4.991007194244604, + "grad_norm": 0.664605438709259, + "learning_rate": 9.950737632904927e-05, + "loss": 0.03, + "step": 5550 + }, + { + "action_loss": 0.014268617145717144, + "epoch": 4.991007194244604, + "step": 5550 + }, + { + "epoch": 4.991007194244604, + "step": 5550, + "torque_loss": 0.12223458290100098 + }, + { + "epoch": 4.991007194244604, + "force_loss": 0.010322910733520985, + "step": 5550 + }, + { + "epoch": 5.0, + "grad_norm": 0.986358642578125, + "learning_rate": 9.950350993844077e-05, + "loss": 0.0333, + "step": 5560 + }, + { + "action_loss": 0.02257212996482849, + "epoch": 5.0, + "step": 5560 + }, + { + "epoch": 5.0, + "step": 5560, + "torque_loss": 0.15743350982666016 + }, + { + "epoch": 5.0, + "force_loss": 0.013492533005774021, + "step": 5560 + }, + { + "epoch": 5.008992805755396, + "grad_norm": 0.6122710704803467, + "learning_rate": 9.949962850997303e-05, + "loss": 0.0362, + "step": 5570 + }, + { + "action_loss": 0.017204903066158295, + "epoch": 5.008992805755396, + "step": 5570 + }, + { + "epoch": 5.008992805755396, + "step": 5570, + "torque_loss": 0.1751583367586136 + }, + { + "epoch": 5.008992805755396, + "force_loss": 0.010946311056613922, + "step": 5570 + }, + { + "epoch": 5.017985611510792, + "grad_norm": 1.5294653177261353, + "learning_rate": 9.949573204482512e-05, + "loss": 0.0296, + "step": 5580 + }, + { + "action_loss": 0.016943540424108505, + "epoch": 5.017985611510792, + "step": 5580 + }, + { + "epoch": 5.017985611510792, + "step": 5580, + "torque_loss": 0.11341521888971329 + }, + { + "epoch": 5.017985611510792, + "force_loss": 0.010504749603569508, + "step": 5580 + }, + { + "epoch": 5.026978417266187, + "grad_norm": 1.120571255683899, + "learning_rate": 9.949182054418064e-05, + "loss": 0.0327, + "step": 5590 + }, + { + "action_loss": 0.02788245677947998, + "epoch": 5.026978417266187, + "step": 5590 + }, + { + "epoch": 5.026978417266187, + "step": 5590, + "torque_loss": 0.23911577463150024 + }, + { + "epoch": 5.026978417266187, + "force_loss": 0.030254175886511803, + "step": 5590 + }, + { + "epoch": 5.0359712230215825, + "grad_norm": 1.5035184621810913, + "learning_rate": 9.948789400922787e-05, + "loss": 0.0367, + "step": 5600 + }, + { + "action_loss": 0.024786515161395073, + "epoch": 5.0359712230215825, + "step": 5600 + }, + { + "epoch": 5.0359712230215825, + "step": 5600, + "torque_loss": 0.1330796480178833 + }, + { + "epoch": 5.0359712230215825, + "force_loss": 0.010250705294311047, + "step": 5600 + }, + { + "epoch": 5.044964028776978, + "grad_norm": 1.7596006393432617, + "learning_rate": 9.948395244115953e-05, + "loss": 0.0327, + "step": 5610 + }, + { + "action_loss": 0.04590732976794243, + "epoch": 5.044964028776978, + "step": 5610 + }, + { + "epoch": 5.044964028776978, + "step": 5610, + "torque_loss": 0.1576809585094452 + }, + { + "epoch": 5.044964028776978, + "force_loss": 0.046087637543678284, + "step": 5610 + }, + { + "epoch": 5.053956834532374, + "grad_norm": 0.893988311290741, + "learning_rate": 9.9479995841173e-05, + "loss": 0.0335, + "step": 5620 + }, + { + "action_loss": 0.012712088413536549, + "epoch": 5.053956834532374, + "step": 5620 + }, + { + "epoch": 5.053956834532374, + "step": 5620, + "torque_loss": 0.12414965033531189 + }, + { + "epoch": 5.053956834532374, + "force_loss": 0.015016489662230015, + "step": 5620 + }, + { + "epoch": 5.06294964028777, + "grad_norm": 0.6276310086250305, + "learning_rate": 9.947602421047017e-05, + "loss": 0.0247, + "step": 5630 + }, + { + "action_loss": 0.027676789090037346, + "epoch": 5.06294964028777, + "step": 5630 + }, + { + "epoch": 5.06294964028777, + "step": 5630, + "torque_loss": 0.1542523056268692 + }, + { + "epoch": 5.06294964028777, + "force_loss": 0.019151249900460243, + "step": 5630 + }, + { + "epoch": 5.071942446043165, + "grad_norm": 1.2098196744918823, + "learning_rate": 9.947203755025753e-05, + "loss": 0.0347, + "step": 5640 + }, + { + "action_loss": 0.011504284106194973, + "epoch": 5.071942446043165, + "step": 5640 + }, + { + "epoch": 5.071942446043165, + "step": 5640, + "torque_loss": 0.11376959085464478 + }, + { + "epoch": 5.071942446043165, + "force_loss": 0.024473929777741432, + "step": 5640 + }, + { + "epoch": 5.080935251798561, + "grad_norm": 1.16839599609375, + "learning_rate": 9.946803586174611e-05, + "loss": 0.033, + "step": 5650 + }, + { + "action_loss": 0.021279791370034218, + "epoch": 5.080935251798561, + "step": 5650 + }, + { + "epoch": 5.080935251798561, + "step": 5650, + "torque_loss": 0.1337127685546875 + }, + { + "epoch": 5.080935251798561, + "force_loss": 0.01976182870566845, + "step": 5650 + }, + { + "epoch": 5.089928057553957, + "grad_norm": 0.5171191692352295, + "learning_rate": 9.946401914615151e-05, + "loss": 0.0341, + "step": 5660 + }, + { + "action_loss": 0.009707505814731121, + "epoch": 5.089928057553957, + "step": 5660 + }, + { + "epoch": 5.089928057553957, + "step": 5660, + "torque_loss": 0.0918097272515297 + }, + { + "epoch": 5.089928057553957, + "force_loss": 0.00940024945884943, + "step": 5660 + }, + { + "epoch": 5.098920863309353, + "grad_norm": 0.841602087020874, + "learning_rate": 9.945998740469394e-05, + "loss": 0.0259, + "step": 5670 + }, + { + "action_loss": 0.07884933799505234, + "epoch": 5.098920863309353, + "step": 5670 + }, + { + "epoch": 5.098920863309353, + "step": 5670, + "torque_loss": 0.20755648612976074 + }, + { + "epoch": 5.098920863309353, + "force_loss": 0.07677451521158218, + "step": 5670 + }, + { + "epoch": 5.107913669064748, + "grad_norm": 1.2909084558486938, + "learning_rate": 9.945594063859809e-05, + "loss": 0.041, + "step": 5680 + }, + { + "action_loss": 0.008488588966429234, + "epoch": 5.107913669064748, + "step": 5680 + }, + { + "epoch": 5.107913669064748, + "step": 5680, + "torque_loss": 0.14961610734462738 + }, + { + "epoch": 5.107913669064748, + "force_loss": 0.008059262298047543, + "step": 5680 + }, + { + "epoch": 5.116906474820144, + "grad_norm": 1.4823791980743408, + "learning_rate": 9.94518788490933e-05, + "loss": 0.0297, + "step": 5690 + }, + { + "action_loss": 0.030524766072630882, + "epoch": 5.116906474820144, + "step": 5690 + }, + { + "epoch": 5.116906474820144, + "step": 5690, + "torque_loss": 0.19970446825027466 + }, + { + "epoch": 5.116906474820144, + "force_loss": 0.022799404338002205, + "step": 5690 + }, + { + "epoch": 5.125899280575539, + "grad_norm": 0.34172824025154114, + "learning_rate": 9.944780203741341e-05, + "loss": 0.0379, + "step": 5700 + }, + { + "action_loss": 0.010089091025292873, + "epoch": 5.125899280575539, + "step": 5700 + }, + { + "epoch": 5.125899280575539, + "step": 5700, + "torque_loss": 0.09576954692602158 + }, + { + "epoch": 5.125899280575539, + "force_loss": 0.005920028779655695, + "step": 5700 + }, + { + "epoch": 5.134892086330935, + "grad_norm": 0.9330561757087708, + "learning_rate": 9.944371020479686e-05, + "loss": 0.0324, + "step": 5710 + }, + { + "action_loss": 0.0141999376937747, + "epoch": 5.134892086330935, + "step": 5710 + }, + { + "epoch": 5.134892086330935, + "step": 5710, + "torque_loss": 0.14227472245693207 + }, + { + "epoch": 5.134892086330935, + "force_loss": 0.00944321695715189, + "step": 5710 + }, + { + "epoch": 5.143884892086331, + "grad_norm": 0.49037691950798035, + "learning_rate": 9.943960335248662e-05, + "loss": 0.0298, + "step": 5720 + }, + { + "action_loss": 0.009946809150278568, + "epoch": 5.143884892086331, + "step": 5720 + }, + { + "epoch": 5.143884892086331, + "step": 5720, + "torque_loss": 0.11354728788137436 + }, + { + "epoch": 5.143884892086331, + "force_loss": 0.00917693879455328, + "step": 5720 + }, + { + "epoch": 5.152877697841727, + "grad_norm": 1.005693793296814, + "learning_rate": 9.943548148173027e-05, + "loss": 0.0339, + "step": 5730 + }, + { + "action_loss": 0.009529300034046173, + "epoch": 5.152877697841727, + "step": 5730 + }, + { + "epoch": 5.152877697841727, + "step": 5730, + "torque_loss": 0.10242322087287903 + }, + { + "epoch": 5.152877697841727, + "force_loss": 0.008441810496151447, + "step": 5730 + }, + { + "epoch": 5.161870503597123, + "grad_norm": 0.7240970730781555, + "learning_rate": 9.943134459377992e-05, + "loss": 0.0274, + "step": 5740 + }, + { + "action_loss": 0.009209486655890942, + "epoch": 5.161870503597123, + "step": 5740 + }, + { + "epoch": 5.161870503597123, + "step": 5740, + "torque_loss": 0.1518832892179489 + }, + { + "epoch": 5.161870503597123, + "force_loss": 0.008651758544147015, + "step": 5740 + }, + { + "epoch": 5.170863309352518, + "grad_norm": 0.6877978444099426, + "learning_rate": 9.942719268989222e-05, + "loss": 0.036, + "step": 5750 + }, + { + "action_loss": 0.008205019868910313, + "epoch": 5.170863309352518, + "step": 5750 + }, + { + "epoch": 5.170863309352518, + "step": 5750, + "torque_loss": 0.14600127935409546 + }, + { + "epoch": 5.170863309352518, + "force_loss": 0.008497755043208599, + "step": 5750 + }, + { + "epoch": 5.179856115107913, + "grad_norm": 0.7438530325889587, + "learning_rate": 9.942302577132844e-05, + "loss": 0.0308, + "step": 5760 + }, + { + "action_loss": 0.01791953109204769, + "epoch": 5.179856115107913, + "step": 5760 + }, + { + "epoch": 5.179856115107913, + "step": 5760, + "torque_loss": 0.12526437640190125 + }, + { + "epoch": 5.179856115107913, + "force_loss": 0.01129516214132309, + "step": 5760 + }, + { + "epoch": 5.188848920863309, + "grad_norm": 0.9081936478614807, + "learning_rate": 9.941884383935438e-05, + "loss": 0.0291, + "step": 5770 + }, + { + "action_loss": 0.03710727021098137, + "epoch": 5.188848920863309, + "step": 5770 + }, + { + "epoch": 5.188848920863309, + "step": 5770, + "torque_loss": 0.18137772381305695 + }, + { + "epoch": 5.188848920863309, + "force_loss": 0.041631344705820084, + "step": 5770 + }, + { + "epoch": 5.197841726618705, + "grad_norm": 0.7799079418182373, + "learning_rate": 9.941464689524039e-05, + "loss": 0.034, + "step": 5780 + }, + { + "action_loss": 0.008697784505784512, + "epoch": 5.197841726618705, + "step": 5780 + }, + { + "epoch": 5.197841726618705, + "step": 5780, + "torque_loss": 0.11888748407363892 + }, + { + "epoch": 5.197841726618705, + "force_loss": 0.007591801229864359, + "step": 5780 + }, + { + "epoch": 5.206834532374101, + "grad_norm": 1.5161101818084717, + "learning_rate": 9.941043494026139e-05, + "loss": 0.0344, + "step": 5790 + }, + { + "action_loss": 0.03356954827904701, + "epoch": 5.206834532374101, + "step": 5790 + }, + { + "epoch": 5.206834532374101, + "step": 5790, + "torque_loss": 0.1888788938522339 + }, + { + "epoch": 5.206834532374101, + "force_loss": 0.030938828364014626, + "step": 5790 + }, + { + "epoch": 5.215827338129497, + "grad_norm": 1.2998768091201782, + "learning_rate": 9.940620797569685e-05, + "loss": 0.0321, + "step": 5800 + }, + { + "action_loss": 0.02752203680574894, + "epoch": 5.215827338129497, + "step": 5800 + }, + { + "epoch": 5.215827338129497, + "step": 5800, + "torque_loss": 0.1494486778974533 + }, + { + "epoch": 5.215827338129497, + "force_loss": 0.020826246589422226, + "step": 5800 + }, + { + "epoch": 5.224820143884892, + "grad_norm": 0.6310189366340637, + "learning_rate": 9.940196600283082e-05, + "loss": 0.0421, + "step": 5810 + }, + { + "action_loss": 0.016576392576098442, + "epoch": 5.224820143884892, + "step": 5810 + }, + { + "epoch": 5.224820143884892, + "step": 5810, + "torque_loss": 0.1569250077009201 + }, + { + "epoch": 5.224820143884892, + "force_loss": 0.012351572513580322, + "step": 5810 + }, + { + "epoch": 5.233812949640288, + "grad_norm": 0.8556134700775146, + "learning_rate": 9.939770902295192e-05, + "loss": 0.0398, + "step": 5820 + }, + { + "action_loss": 0.010393361561000347, + "epoch": 5.233812949640288, + "step": 5820 + }, + { + "epoch": 5.233812949640288, + "step": 5820, + "torque_loss": 0.12201127409934998 + }, + { + "epoch": 5.233812949640288, + "force_loss": 0.011214190162718296, + "step": 5820 + }, + { + "epoch": 5.2428057553956835, + "grad_norm": 0.8266036510467529, + "learning_rate": 9.939343703735329e-05, + "loss": 0.0347, + "step": 5830 + }, + { + "action_loss": 0.02877046726644039, + "epoch": 5.2428057553956835, + "step": 5830 + }, + { + "epoch": 5.2428057553956835, + "step": 5830, + "torque_loss": 0.1741381287574768 + }, + { + "epoch": 5.2428057553956835, + "force_loss": 0.02859148569405079, + "step": 5830 + }, + { + "epoch": 5.251798561151079, + "grad_norm": 1.0230200290679932, + "learning_rate": 9.938915004733264e-05, + "loss": 0.0326, + "step": 5840 + }, + { + "action_loss": 0.01110775861889124, + "epoch": 5.251798561151079, + "step": 5840 + }, + { + "epoch": 5.251798561151079, + "step": 5840, + "torque_loss": 0.1227857694029808 + }, + { + "epoch": 5.251798561151079, + "force_loss": 0.008381695486605167, + "step": 5840 + }, + { + "epoch": 5.260791366906475, + "grad_norm": 0.6586215496063232, + "learning_rate": 9.938484805419224e-05, + "loss": 0.0321, + "step": 5850 + }, + { + "action_loss": 0.009457248263061047, + "epoch": 5.260791366906475, + "step": 5850 + }, + { + "epoch": 5.260791366906475, + "step": 5850, + "torque_loss": 0.1029616966843605 + }, + { + "epoch": 5.260791366906475, + "force_loss": 0.008344221860170364, + "step": 5850 + }, + { + "epoch": 5.26978417266187, + "grad_norm": 1.0250165462493896, + "learning_rate": 9.938053105923894e-05, + "loss": 0.0305, + "step": 5860 + }, + { + "action_loss": 0.01243405882269144, + "epoch": 5.26978417266187, + "step": 5860 + }, + { + "epoch": 5.26978417266187, + "step": 5860, + "torque_loss": 0.12254921346902847 + }, + { + "epoch": 5.26978417266187, + "force_loss": 0.007495278958231211, + "step": 5860 + }, + { + "epoch": 5.278776978417266, + "grad_norm": 0.8344842195510864, + "learning_rate": 9.937619906378413e-05, + "loss": 0.0304, + "step": 5870 + }, + { + "action_loss": 0.014139537699520588, + "epoch": 5.278776978417266, + "step": 5870 + }, + { + "epoch": 5.278776978417266, + "step": 5870, + "torque_loss": 0.12871983647346497 + }, + { + "epoch": 5.278776978417266, + "force_loss": 0.008537528105080128, + "step": 5870 + }, + { + "epoch": 5.287769784172662, + "grad_norm": 1.210252046585083, + "learning_rate": 9.937185206914374e-05, + "loss": 0.0337, + "step": 5880 + }, + { + "action_loss": 0.032883673906326294, + "epoch": 5.287769784172662, + "step": 5880 + }, + { + "epoch": 5.287769784172662, + "step": 5880, + "torque_loss": 0.1661003828048706 + }, + { + "epoch": 5.287769784172662, + "force_loss": 0.03350367024540901, + "step": 5880 + }, + { + "epoch": 5.296762589928058, + "grad_norm": 1.44407057762146, + "learning_rate": 9.936749007663829e-05, + "loss": 0.0304, + "step": 5890 + }, + { + "action_loss": 0.01643591932952404, + "epoch": 5.296762589928058, + "step": 5890 + }, + { + "epoch": 5.296762589928058, + "step": 5890, + "torque_loss": 0.14750412106513977 + }, + { + "epoch": 5.296762589928058, + "force_loss": 0.030642440542578697, + "step": 5890 + }, + { + "epoch": 5.305755395683454, + "grad_norm": 0.7410878539085388, + "learning_rate": 9.93631130875928e-05, + "loss": 0.033, + "step": 5900 + }, + { + "action_loss": 0.02585108019411564, + "epoch": 5.305755395683454, + "step": 5900 + }, + { + "epoch": 5.305755395683454, + "step": 5900, + "torque_loss": 0.15912772715091705 + }, + { + "epoch": 5.305755395683454, + "force_loss": 0.026567354798316956, + "step": 5900 + }, + { + "epoch": 5.3147482014388485, + "grad_norm": 1.1565377712249756, + "learning_rate": 9.935872110333692e-05, + "loss": 0.0373, + "step": 5910 + }, + { + "action_loss": 0.02192777208983898, + "epoch": 5.3147482014388485, + "step": 5910 + }, + { + "epoch": 5.3147482014388485, + "step": 5910, + "torque_loss": 0.1286812275648117 + }, + { + "epoch": 5.3147482014388485, + "force_loss": 0.019034305587410927, + "step": 5910 + }, + { + "epoch": 5.323741007194244, + "grad_norm": 2.197715997695923, + "learning_rate": 9.935431412520484e-05, + "loss": 0.0362, + "step": 5920 + }, + { + "action_loss": 0.010740249417722225, + "epoch": 5.323741007194244, + "step": 5920 + }, + { + "epoch": 5.323741007194244, + "step": 5920, + "torque_loss": 0.18354232609272003 + }, + { + "epoch": 5.323741007194244, + "force_loss": 0.011155351996421814, + "step": 5920 + }, + { + "epoch": 5.33273381294964, + "grad_norm": 0.7228177189826965, + "learning_rate": 9.934989215453523e-05, + "loss": 0.0343, + "step": 5930 + }, + { + "action_loss": 0.023066185414791107, + "epoch": 5.33273381294964, + "step": 5930 + }, + { + "epoch": 5.33273381294964, + "step": 5930, + "torque_loss": 0.19458584487438202 + }, + { + "epoch": 5.33273381294964, + "force_loss": 0.034828025847673416, + "step": 5930 + }, + { + "epoch": 5.341726618705036, + "grad_norm": 0.5792932510375977, + "learning_rate": 9.934545519267139e-05, + "loss": 0.037, + "step": 5940 + }, + { + "action_loss": 0.024221688508987427, + "epoch": 5.341726618705036, + "step": 5940 + }, + { + "epoch": 5.341726618705036, + "step": 5940, + "torque_loss": 0.19522254168987274 + }, + { + "epoch": 5.341726618705036, + "force_loss": 0.012741059064865112, + "step": 5940 + }, + { + "epoch": 5.350719424460432, + "grad_norm": 0.9161556363105774, + "learning_rate": 9.934100324096117e-05, + "loss": 0.0347, + "step": 5950 + }, + { + "action_loss": 0.01206832006573677, + "epoch": 5.350719424460432, + "step": 5950 + }, + { + "epoch": 5.350719424460432, + "step": 5950, + "torque_loss": 0.15647456049919128 + }, + { + "epoch": 5.350719424460432, + "force_loss": 0.009277389384806156, + "step": 5950 + }, + { + "epoch": 5.359712230215827, + "grad_norm": 0.8598365783691406, + "learning_rate": 9.933653630075692e-05, + "loss": 0.0308, + "step": 5960 + }, + { + "action_loss": 0.013124617747962475, + "epoch": 5.359712230215827, + "step": 5960 + }, + { + "epoch": 5.359712230215827, + "step": 5960, + "torque_loss": 0.14300768077373505 + }, + { + "epoch": 5.359712230215827, + "force_loss": 0.01134935487061739, + "step": 5960 + }, + { + "epoch": 5.368705035971223, + "grad_norm": 1.8468637466430664, + "learning_rate": 9.93320543734156e-05, + "loss": 0.03, + "step": 5970 + }, + { + "action_loss": 0.01102599035948515, + "epoch": 5.368705035971223, + "step": 5970 + }, + { + "epoch": 5.368705035971223, + "step": 5970, + "torque_loss": 0.1412300020456314 + }, + { + "epoch": 5.368705035971223, + "force_loss": 0.008286123163998127, + "step": 5970 + }, + { + "epoch": 5.377697841726619, + "grad_norm": 1.8177632093429565, + "learning_rate": 9.932755746029871e-05, + "loss": 0.0295, + "step": 5980 + }, + { + "action_loss": 0.013801603578031063, + "epoch": 5.377697841726619, + "step": 5980 + }, + { + "epoch": 5.377697841726619, + "step": 5980, + "torque_loss": 0.10726621001958847 + }, + { + "epoch": 5.377697841726619, + "force_loss": 0.012256343849003315, + "step": 5980 + }, + { + "epoch": 5.386690647482014, + "grad_norm": 1.0670894384384155, + "learning_rate": 9.932304556277228e-05, + "loss": 0.0343, + "step": 5990 + }, + { + "action_loss": 0.018760504201054573, + "epoch": 5.386690647482014, + "step": 5990 + }, + { + "epoch": 5.386690647482014, + "step": 5990, + "torque_loss": 0.20835180580615997 + }, + { + "epoch": 5.386690647482014, + "force_loss": 0.017465850338339806, + "step": 5990 + }, + { + "epoch": 5.39568345323741, + "grad_norm": 1.05755615234375, + "learning_rate": 9.93185186822069e-05, + "loss": 0.0377, + "step": 6000 + }, + { + "action_loss": 0.02312903106212616, + "epoch": 5.39568345323741, + "step": 6000 + }, + { + "epoch": 5.39568345323741, + "step": 6000, + "torque_loss": 0.13508160412311554 + }, + { + "epoch": 5.39568345323741, + "force_loss": 0.0179379191249609, + "step": 6000 + }, + { + "epoch": 5.404676258992806, + "grad_norm": 1.0471394062042236, + "learning_rate": 9.931397681997773e-05, + "loss": 0.0331, + "step": 6010 + }, + { + "action_loss": 0.0077772438526153564, + "epoch": 5.404676258992806, + "step": 6010 + }, + { + "epoch": 5.404676258992806, + "step": 6010, + "torque_loss": 0.12868870794773102 + }, + { + "epoch": 5.404676258992806, + "force_loss": 0.008187138475477695, + "step": 6010 + }, + { + "epoch": 5.413669064748201, + "grad_norm": 0.59952712059021, + "learning_rate": 9.930941997746446e-05, + "loss": 0.0294, + "step": 6020 + }, + { + "action_loss": 0.014202840626239777, + "epoch": 5.413669064748201, + "step": 6020 + }, + { + "epoch": 5.413669064748201, + "step": 6020, + "torque_loss": 0.13312187790870667 + }, + { + "epoch": 5.413669064748201, + "force_loss": 0.01386262010782957, + "step": 6020 + }, + { + "epoch": 5.422661870503597, + "grad_norm": 0.424985408782959, + "learning_rate": 9.930484815605134e-05, + "loss": 0.0329, + "step": 6030 + }, + { + "action_loss": 0.01719067431986332, + "epoch": 5.422661870503597, + "step": 6030 + }, + { + "epoch": 5.422661870503597, + "step": 6030, + "torque_loss": 0.1441829800605774 + }, + { + "epoch": 5.422661870503597, + "force_loss": 0.014642399735748768, + "step": 6030 + }, + { + "epoch": 5.431654676258993, + "grad_norm": 0.7542428374290466, + "learning_rate": 9.930026135712717e-05, + "loss": 0.0365, + "step": 6040 + }, + { + "action_loss": 0.00925524067133665, + "epoch": 5.431654676258993, + "step": 6040 + }, + { + "epoch": 5.431654676258993, + "step": 6040, + "torque_loss": 0.1308252066373825 + }, + { + "epoch": 5.431654676258993, + "force_loss": 0.007872766815125942, + "step": 6040 + }, + { + "epoch": 5.440647482014389, + "grad_norm": 0.7442678213119507, + "learning_rate": 9.92956595820853e-05, + "loss": 0.0347, + "step": 6050 + }, + { + "action_loss": 0.006951652467250824, + "epoch": 5.440647482014389, + "step": 6050 + }, + { + "epoch": 5.440647482014389, + "step": 6050, + "torque_loss": 0.121300108730793 + }, + { + "epoch": 5.440647482014389, + "force_loss": 0.007886544801294804, + "step": 6050 + }, + { + "epoch": 5.4496402877697845, + "grad_norm": 0.8208723068237305, + "learning_rate": 9.929104283232362e-05, + "loss": 0.0302, + "step": 6060 + }, + { + "action_loss": 0.024792775511741638, + "epoch": 5.4496402877697845, + "step": 6060 + }, + { + "epoch": 5.4496402877697845, + "step": 6060, + "torque_loss": 0.17975036799907684 + }, + { + "epoch": 5.4496402877697845, + "force_loss": 0.036326829344034195, + "step": 6060 + }, + { + "epoch": 5.4586330935251794, + "grad_norm": 0.7009875774383545, + "learning_rate": 9.92864111092446e-05, + "loss": 0.0302, + "step": 6070 + }, + { + "action_loss": 0.015496629290282726, + "epoch": 5.4586330935251794, + "step": 6070 + }, + { + "epoch": 5.4586330935251794, + "step": 6070, + "torque_loss": 0.12660561501979828 + }, + { + "epoch": 5.4586330935251794, + "force_loss": 0.022680183872580528, + "step": 6070 + }, + { + "epoch": 5.467625899280575, + "grad_norm": 1.0963332653045654, + "learning_rate": 9.92817644142552e-05, + "loss": 0.0288, + "step": 6080 + }, + { + "action_loss": 0.01309314277023077, + "epoch": 5.467625899280575, + "step": 6080 + }, + { + "epoch": 5.467625899280575, + "step": 6080, + "torque_loss": 0.12677361071109772 + }, + { + "epoch": 5.467625899280575, + "force_loss": 0.00979540217667818, + "step": 6080 + }, + { + "epoch": 5.476618705035971, + "grad_norm": 1.43858802318573, + "learning_rate": 9.927710274876698e-05, + "loss": 0.035, + "step": 6090 + }, + { + "action_loss": 0.014266262762248516, + "epoch": 5.476618705035971, + "step": 6090 + }, + { + "epoch": 5.476618705035971, + "step": 6090, + "torque_loss": 0.14075371623039246 + }, + { + "epoch": 5.476618705035971, + "force_loss": 0.009991207160055637, + "step": 6090 + }, + { + "epoch": 5.485611510791367, + "grad_norm": 0.41903799772262573, + "learning_rate": 9.927242611419603e-05, + "loss": 0.0334, + "step": 6100 + }, + { + "action_loss": 0.04521968960762024, + "epoch": 5.485611510791367, + "step": 6100 + }, + { + "epoch": 5.485611510791367, + "step": 6100, + "torque_loss": 0.15832291543483734 + }, + { + "epoch": 5.485611510791367, + "force_loss": 0.03506319597363472, + "step": 6100 + }, + { + "epoch": 5.494604316546763, + "grad_norm": 0.8055093884468079, + "learning_rate": 9.926773451196301e-05, + "loss": 0.0395, + "step": 6110 + }, + { + "action_loss": 0.009163173846900463, + "epoch": 5.494604316546763, + "step": 6110 + }, + { + "epoch": 5.494604316546763, + "step": 6110, + "torque_loss": 0.08509886264801025 + }, + { + "epoch": 5.494604316546763, + "force_loss": 0.007788896095007658, + "step": 6110 + }, + { + "epoch": 5.503597122302159, + "grad_norm": 0.9088740944862366, + "learning_rate": 9.926302794349306e-05, + "loss": 0.0306, + "step": 6120 + }, + { + "action_loss": 0.030656510964035988, + "epoch": 5.503597122302159, + "step": 6120 + }, + { + "epoch": 5.503597122302159, + "step": 6120, + "torque_loss": 0.23709328472614288 + }, + { + "epoch": 5.503597122302159, + "force_loss": 0.030489442870020866, + "step": 6120 + }, + { + "epoch": 5.512589928057554, + "grad_norm": 1.0816494226455688, + "learning_rate": 9.925830641021594e-05, + "loss": 0.0398, + "step": 6130 + }, + { + "action_loss": 0.02450937032699585, + "epoch": 5.512589928057554, + "step": 6130 + }, + { + "epoch": 5.512589928057554, + "step": 6130, + "torque_loss": 0.16555556654930115 + }, + { + "epoch": 5.512589928057554, + "force_loss": 0.022962206974625587, + "step": 6130 + }, + { + "epoch": 5.5215827338129495, + "grad_norm": 0.9822729825973511, + "learning_rate": 9.925356991356593e-05, + "loss": 0.0367, + "step": 6140 + }, + { + "action_loss": 0.033981140702962875, + "epoch": 5.5215827338129495, + "step": 6140 + }, + { + "epoch": 5.5215827338129495, + "step": 6140, + "torque_loss": 0.16091638803482056 + }, + { + "epoch": 5.5215827338129495, + "force_loss": 0.01706506870687008, + "step": 6140 + }, + { + "epoch": 5.530575539568345, + "grad_norm": 0.41172078251838684, + "learning_rate": 9.924881845498184e-05, + "loss": 0.0297, + "step": 6150 + }, + { + "action_loss": 0.022704074159264565, + "epoch": 5.530575539568345, + "step": 6150 + }, + { + "epoch": 5.530575539568345, + "step": 6150, + "torque_loss": 0.1596505045890808 + }, + { + "epoch": 5.530575539568345, + "force_loss": 0.01549468468874693, + "step": 6150 + }, + { + "epoch": 5.539568345323741, + "grad_norm": 0.7721667885780334, + "learning_rate": 9.924405203590705e-05, + "loss": 0.0349, + "step": 6160 + }, + { + "action_loss": 0.0241011381149292, + "epoch": 5.539568345323741, + "step": 6160 + }, + { + "epoch": 5.539568345323741, + "step": 6160, + "torque_loss": 0.17437483370304108 + }, + { + "epoch": 5.539568345323741, + "force_loss": 0.026363296434283257, + "step": 6160 + }, + { + "epoch": 5.548561151079137, + "grad_norm": 1.2850946187973022, + "learning_rate": 9.923927065778946e-05, + "loss": 0.0345, + "step": 6170 + }, + { + "action_loss": 0.01621459610760212, + "epoch": 5.548561151079137, + "step": 6170 + }, + { + "epoch": 5.548561151079137, + "step": 6170, + "torque_loss": 0.1293100118637085 + }, + { + "epoch": 5.548561151079137, + "force_loss": 0.012890475802123547, + "step": 6170 + }, + { + "epoch": 5.557553956834532, + "grad_norm": 1.1787601709365845, + "learning_rate": 9.923447432208154e-05, + "loss": 0.0363, + "step": 6180 + }, + { + "action_loss": 0.010358665138483047, + "epoch": 5.557553956834532, + "step": 6180 + }, + { + "epoch": 5.557553956834532, + "step": 6180, + "torque_loss": 0.10722432285547256 + }, + { + "epoch": 5.557553956834532, + "force_loss": 0.007596289739012718, + "step": 6180 + }, + { + "epoch": 5.566546762589928, + "grad_norm": 0.7916187644004822, + "learning_rate": 9.922966303024027e-05, + "loss": 0.0317, + "step": 6190 + }, + { + "action_loss": 0.016377853229641914, + "epoch": 5.566546762589928, + "step": 6190 + }, + { + "epoch": 5.566546762589928, + "step": 6190, + "torque_loss": 0.18232186138629913 + }, + { + "epoch": 5.566546762589928, + "force_loss": 0.010570134967565536, + "step": 6190 + }, + { + "epoch": 5.575539568345324, + "grad_norm": 0.779882550239563, + "learning_rate": 9.922483678372721e-05, + "loss": 0.0423, + "step": 6200 + }, + { + "action_loss": 0.029877014458179474, + "epoch": 5.575539568345324, + "step": 6200 + }, + { + "epoch": 5.575539568345324, + "step": 6200, + "torque_loss": 0.14230668544769287 + }, + { + "epoch": 5.575539568345324, + "force_loss": 0.021593831479549408, + "step": 6200 + }, + { + "epoch": 5.58453237410072, + "grad_norm": 1.3249753713607788, + "learning_rate": 9.921999558400845e-05, + "loss": 0.0344, + "step": 6210 + }, + { + "action_loss": 0.016748622059822083, + "epoch": 5.58453237410072, + "step": 6210 + }, + { + "epoch": 5.58453237410072, + "step": 6210, + "torque_loss": 0.1596631556749344 + }, + { + "epoch": 5.58453237410072, + "force_loss": 0.01481617521494627, + "step": 6210 + }, + { + "epoch": 5.593525179856115, + "grad_norm": 0.6485111713409424, + "learning_rate": 9.92151394325546e-05, + "loss": 0.0407, + "step": 6220 + }, + { + "action_loss": 0.026288457214832306, + "epoch": 5.593525179856115, + "step": 6220 + }, + { + "epoch": 5.593525179856115, + "step": 6220, + "torque_loss": 0.13771633803844452 + }, + { + "epoch": 5.593525179856115, + "force_loss": 0.01655431091785431, + "step": 6220 + }, + { + "epoch": 5.602517985611511, + "grad_norm": 1.0226564407348633, + "learning_rate": 9.921026833084084e-05, + "loss": 0.0341, + "step": 6230 + }, + { + "action_loss": 0.02121158502995968, + "epoch": 5.602517985611511, + "step": 6230 + }, + { + "epoch": 5.602517985611511, + "step": 6230, + "torque_loss": 0.17241235077381134 + }, + { + "epoch": 5.602517985611511, + "force_loss": 0.016864560544490814, + "step": 6230 + }, + { + "epoch": 5.611510791366906, + "grad_norm": 0.9332301020622253, + "learning_rate": 9.920538228034689e-05, + "loss": 0.0308, + "step": 6240 + }, + { + "action_loss": 0.024339988827705383, + "epoch": 5.611510791366906, + "step": 6240 + }, + { + "epoch": 5.611510791366906, + "step": 6240, + "torque_loss": 0.14449548721313477 + }, + { + "epoch": 5.611510791366906, + "force_loss": 0.0294595118612051, + "step": 6240 + }, + { + "epoch": 5.620503597122302, + "grad_norm": 0.9044941663742065, + "learning_rate": 9.920048128255699e-05, + "loss": 0.0348, + "step": 6250 + }, + { + "action_loss": 0.011431700550019741, + "epoch": 5.620503597122302, + "step": 6250 + }, + { + "epoch": 5.620503597122302, + "step": 6250, + "torque_loss": 0.13255928456783295 + }, + { + "epoch": 5.620503597122302, + "force_loss": 0.009113145992159843, + "step": 6250 + }, + { + "epoch": 5.629496402877698, + "grad_norm": 1.7834445238113403, + "learning_rate": 9.919556533895995e-05, + "loss": 0.0395, + "step": 6260 + }, + { + "action_loss": 0.028955094516277313, + "epoch": 5.629496402877698, + "step": 6260 + }, + { + "epoch": 5.629496402877698, + "step": 6260, + "torque_loss": 0.1273440271615982 + }, + { + "epoch": 5.629496402877698, + "force_loss": 0.014864925295114517, + "step": 6260 + }, + { + "epoch": 5.638489208633094, + "grad_norm": 1.134705662727356, + "learning_rate": 9.919063445104907e-05, + "loss": 0.0371, + "step": 6270 + }, + { + "action_loss": 0.01801454648375511, + "epoch": 5.638489208633094, + "step": 6270 + }, + { + "epoch": 5.638489208633094, + "step": 6270, + "torque_loss": 0.1646595448255539 + }, + { + "epoch": 5.638489208633094, + "force_loss": 0.013859125785529613, + "step": 6270 + }, + { + "epoch": 5.647482014388489, + "grad_norm": 0.9329164624214172, + "learning_rate": 9.918568862032227e-05, + "loss": 0.0327, + "step": 6280 + }, + { + "action_loss": 0.02373000979423523, + "epoch": 5.647482014388489, + "step": 6280 + }, + { + "epoch": 5.647482014388489, + "step": 6280, + "torque_loss": 0.1502731889486313 + }, + { + "epoch": 5.647482014388489, + "force_loss": 0.01534126978367567, + "step": 6280 + }, + { + "epoch": 5.656474820143885, + "grad_norm": 0.8661342263221741, + "learning_rate": 9.918072784828194e-05, + "loss": 0.032, + "step": 6290 + }, + { + "action_loss": 0.016404708847403526, + "epoch": 5.656474820143885, + "step": 6290 + }, + { + "epoch": 5.656474820143885, + "step": 6290, + "torque_loss": 0.14487525820732117 + }, + { + "epoch": 5.656474820143885, + "force_loss": 0.015142071060836315, + "step": 6290 + }, + { + "epoch": 5.66546762589928, + "grad_norm": 0.9574587941169739, + "learning_rate": 9.917575213643501e-05, + "loss": 0.0378, + "step": 6300 + }, + { + "action_loss": 0.015606609173119068, + "epoch": 5.66546762589928, + "step": 6300 + }, + { + "epoch": 5.66546762589928, + "step": 6300, + "torque_loss": 0.14903031289577484 + }, + { + "epoch": 5.66546762589928, + "force_loss": 0.01487062405794859, + "step": 6300 + }, + { + "epoch": 5.674460431654676, + "grad_norm": 1.2378849983215332, + "learning_rate": 9.917076148629302e-05, + "loss": 0.032, + "step": 6310 + }, + { + "action_loss": 0.020867319777607918, + "epoch": 5.674460431654676, + "step": 6310 + }, + { + "epoch": 5.674460431654676, + "step": 6310, + "torque_loss": 0.09977815300226212 + }, + { + "epoch": 5.674460431654676, + "force_loss": 0.010460575111210346, + "step": 6310 + }, + { + "epoch": 5.683453237410072, + "grad_norm": 1.5370500087738037, + "learning_rate": 9.916575589937196e-05, + "loss": 0.0364, + "step": 6320 + }, + { + "action_loss": 0.01641274057328701, + "epoch": 5.683453237410072, + "step": 6320 + }, + { + "epoch": 5.683453237410072, + "step": 6320, + "torque_loss": 0.1378653198480606 + }, + { + "epoch": 5.683453237410072, + "force_loss": 0.020352089777588844, + "step": 6320 + }, + { + "epoch": 5.692446043165468, + "grad_norm": 0.7210316061973572, + "learning_rate": 9.916073537719239e-05, + "loss": 0.0326, + "step": 6330 + }, + { + "action_loss": 0.01788381300866604, + "epoch": 5.692446043165468, + "step": 6330 + }, + { + "epoch": 5.692446043165468, + "step": 6330, + "torque_loss": 0.1495245099067688 + }, + { + "epoch": 5.692446043165468, + "force_loss": 0.008984549902379513, + "step": 6330 + }, + { + "epoch": 5.701438848920863, + "grad_norm": 0.8334928154945374, + "learning_rate": 9.915569992127944e-05, + "loss": 0.0361, + "step": 6340 + }, + { + "action_loss": 0.022939607501029968, + "epoch": 5.701438848920863, + "step": 6340 + }, + { + "epoch": 5.701438848920863, + "step": 6340, + "torque_loss": 0.15945382416248322 + }, + { + "epoch": 5.701438848920863, + "force_loss": 0.018436981365084648, + "step": 6340 + }, + { + "epoch": 5.710431654676259, + "grad_norm": 0.4778817892074585, + "learning_rate": 9.915064953316273e-05, + "loss": 0.0297, + "step": 6350 + }, + { + "action_loss": 0.010276437737047672, + "epoch": 5.710431654676259, + "step": 6350 + }, + { + "epoch": 5.710431654676259, + "step": 6350, + "torque_loss": 0.15242727100849152 + }, + { + "epoch": 5.710431654676259, + "force_loss": 0.007281759288161993, + "step": 6350 + }, + { + "epoch": 5.719424460431655, + "grad_norm": 0.6238363981246948, + "learning_rate": 9.914558421437645e-05, + "loss": 0.0305, + "step": 6360 + }, + { + "action_loss": 0.012519605457782745, + "epoch": 5.719424460431655, + "step": 6360 + }, + { + "epoch": 5.719424460431655, + "step": 6360, + "torque_loss": 0.10555735230445862 + }, + { + "epoch": 5.719424460431655, + "force_loss": 0.012490061111748219, + "step": 6360 + }, + { + "epoch": 5.7284172661870505, + "grad_norm": 0.6641746759414673, + "learning_rate": 9.914050396645929e-05, + "loss": 0.0335, + "step": 6370 + }, + { + "action_loss": 0.022814318537712097, + "epoch": 5.7284172661870505, + "step": 6370 + }, + { + "epoch": 5.7284172661870505, + "step": 6370, + "torque_loss": 0.10337921231985092 + }, + { + "epoch": 5.7284172661870505, + "force_loss": 0.013225001282989979, + "step": 6370 + }, + { + "epoch": 5.737410071942446, + "grad_norm": 0.5852411389350891, + "learning_rate": 9.913540879095452e-05, + "loss": 0.0382, + "step": 6380 + }, + { + "action_loss": 0.02187674678862095, + "epoch": 5.737410071942446, + "step": 6380 + }, + { + "epoch": 5.737410071942446, + "step": 6380, + "torque_loss": 0.18658983707427979 + }, + { + "epoch": 5.737410071942446, + "force_loss": 0.018079740926623344, + "step": 6380 + }, + { + "epoch": 5.746402877697841, + "grad_norm": 0.8768635988235474, + "learning_rate": 9.913029868940987e-05, + "loss": 0.038, + "step": 6390 + }, + { + "action_loss": 0.05202604830265045, + "epoch": 5.746402877697841, + "step": 6390 + }, + { + "epoch": 5.746402877697841, + "step": 6390, + "torque_loss": 0.1624857634305954 + }, + { + "epoch": 5.746402877697841, + "force_loss": 0.03932690620422363, + "step": 6390 + }, + { + "epoch": 5.755395683453237, + "grad_norm": 1.5830304622650146, + "learning_rate": 9.912517366337772e-05, + "loss": 0.0371, + "step": 6400 + }, + { + "action_loss": 0.013611477799713612, + "epoch": 5.755395683453237, + "step": 6400 + }, + { + "epoch": 5.755395683453237, + "step": 6400, + "torque_loss": 0.1407223343849182 + }, + { + "epoch": 5.755395683453237, + "force_loss": 0.010399263352155685, + "step": 6400 + }, + { + "epoch": 5.764388489208633, + "grad_norm": 0.5996264219284058, + "learning_rate": 9.912003371441487e-05, + "loss": 0.0356, + "step": 6410 + }, + { + "action_loss": 0.02901286445558071, + "epoch": 5.764388489208633, + "step": 6410 + }, + { + "epoch": 5.764388489208633, + "step": 6410, + "torque_loss": 0.1307028979063034 + }, + { + "epoch": 5.764388489208633, + "force_loss": 0.017980530858039856, + "step": 6410 + }, + { + "epoch": 5.773381294964029, + "grad_norm": 0.5171142816543579, + "learning_rate": 9.911487884408271e-05, + "loss": 0.0331, + "step": 6420 + }, + { + "action_loss": 0.009266541339457035, + "epoch": 5.773381294964029, + "step": 6420 + }, + { + "epoch": 5.773381294964029, + "step": 6420, + "torque_loss": 0.11346965283155441 + }, + { + "epoch": 5.773381294964029, + "force_loss": 0.005412036087363958, + "step": 6420 + }, + { + "epoch": 5.782374100719425, + "grad_norm": 1.4605542421340942, + "learning_rate": 9.910970905394719e-05, + "loss": 0.0347, + "step": 6430 + }, + { + "action_loss": 0.012880965135991573, + "epoch": 5.782374100719425, + "step": 6430 + }, + { + "epoch": 5.782374100719425, + "step": 6430, + "torque_loss": 0.12281922250986099 + }, + { + "epoch": 5.782374100719425, + "force_loss": 0.008332737721502781, + "step": 6430 + }, + { + "epoch": 5.7913669064748206, + "grad_norm": 0.7657610774040222, + "learning_rate": 9.91045243455787e-05, + "loss": 0.0284, + "step": 6440 + }, + { + "action_loss": 0.01047398429363966, + "epoch": 5.7913669064748206, + "step": 6440 + }, + { + "epoch": 5.7913669064748206, + "step": 6440, + "torque_loss": 0.10461390018463135 + }, + { + "epoch": 5.7913669064748206, + "force_loss": 0.008071732707321644, + "step": 6440 + }, + { + "epoch": 5.8003597122302155, + "grad_norm": 0.6476050615310669, + "learning_rate": 9.909932472055225e-05, + "loss": 0.0364, + "step": 6450 + }, + { + "action_loss": 0.0158433485776186, + "epoch": 5.8003597122302155, + "step": 6450 + }, + { + "epoch": 5.8003597122302155, + "step": 6450, + "torque_loss": 0.13918693363666534 + }, + { + "epoch": 5.8003597122302155, + "force_loss": 0.009686020202934742, + "step": 6450 + }, + { + "epoch": 5.809352517985611, + "grad_norm": 0.5028572082519531, + "learning_rate": 9.909411018044734e-05, + "loss": 0.0299, + "step": 6460 + }, + { + "action_loss": 0.01332776714116335, + "epoch": 5.809352517985611, + "step": 6460 + }, + { + "epoch": 5.809352517985611, + "step": 6460, + "torque_loss": 0.12181492894887924 + }, + { + "epoch": 5.809352517985611, + "force_loss": 0.010035021230578423, + "step": 6460 + }, + { + "epoch": 5.818345323741007, + "grad_norm": 0.8717068433761597, + "learning_rate": 9.908888072684802e-05, + "loss": 0.0329, + "step": 6470 + }, + { + "action_loss": 0.01626739092171192, + "epoch": 5.818345323741007, + "step": 6470 + }, + { + "epoch": 5.818345323741007, + "step": 6470, + "torque_loss": 0.12440016865730286 + }, + { + "epoch": 5.818345323741007, + "force_loss": 0.013880801387131214, + "step": 6470 + }, + { + "epoch": 5.827338129496403, + "grad_norm": 1.1392223834991455, + "learning_rate": 9.908363636134285e-05, + "loss": 0.0282, + "step": 6480 + }, + { + "action_loss": 0.013950206339359283, + "epoch": 5.827338129496403, + "step": 6480 + }, + { + "epoch": 5.827338129496403, + "step": 6480, + "torque_loss": 0.1354544758796692 + }, + { + "epoch": 5.827338129496403, + "force_loss": 0.010613635182380676, + "step": 6480 + }, + { + "epoch": 5.836330935251799, + "grad_norm": 0.6433037519454956, + "learning_rate": 9.907837708552493e-05, + "loss": 0.0276, + "step": 6490 + }, + { + "action_loss": 0.0168133731931448, + "epoch": 5.836330935251799, + "step": 6490 + }, + { + "epoch": 5.836330935251799, + "step": 6490, + "torque_loss": 0.1305723488330841 + }, + { + "epoch": 5.836330935251799, + "force_loss": 0.024245254695415497, + "step": 6490 + }, + { + "epoch": 5.845323741007194, + "grad_norm": 1.059878945350647, + "learning_rate": 9.90731029009919e-05, + "loss": 0.0291, + "step": 6500 + }, + { + "action_loss": 0.027319571003317833, + "epoch": 5.845323741007194, + "step": 6500 + }, + { + "epoch": 5.845323741007194, + "step": 6500, + "torque_loss": 0.16758990287780762 + }, + { + "epoch": 5.845323741007194, + "force_loss": 0.025982022285461426, + "step": 6500 + }, + { + "epoch": 5.85431654676259, + "grad_norm": 1.482749104499817, + "learning_rate": 9.906781380934589e-05, + "loss": 0.0439, + "step": 6510 + }, + { + "action_loss": 0.02713160216808319, + "epoch": 5.85431654676259, + "step": 6510 + }, + { + "epoch": 5.85431654676259, + "step": 6510, + "torque_loss": 0.16857409477233887 + }, + { + "epoch": 5.85431654676259, + "force_loss": 0.03157292306423187, + "step": 6510 + }, + { + "epoch": 5.863309352517986, + "grad_norm": 0.8787999749183655, + "learning_rate": 9.906250981219362e-05, + "loss": 0.0347, + "step": 6520 + }, + { + "action_loss": 0.012791815213859081, + "epoch": 5.863309352517986, + "step": 6520 + }, + { + "epoch": 5.863309352517986, + "step": 6520, + "torque_loss": 0.19160287082195282 + }, + { + "epoch": 5.863309352517986, + "force_loss": 0.008362742140889168, + "step": 6520 + }, + { + "epoch": 5.872302158273381, + "grad_norm": 0.752284586429596, + "learning_rate": 9.905719091114628e-05, + "loss": 0.0296, + "step": 6530 + }, + { + "action_loss": 0.012131291441619396, + "epoch": 5.872302158273381, + "step": 6530 + }, + { + "epoch": 5.872302158273381, + "step": 6530, + "torque_loss": 0.12434646487236023 + }, + { + "epoch": 5.872302158273381, + "force_loss": 0.010255965404212475, + "step": 6530 + }, + { + "epoch": 5.881294964028777, + "grad_norm": 1.9539376497268677, + "learning_rate": 9.905185710781964e-05, + "loss": 0.039, + "step": 6540 + }, + { + "action_loss": 0.013075563125312328, + "epoch": 5.881294964028777, + "step": 6540 + }, + { + "epoch": 5.881294964028777, + "step": 6540, + "torque_loss": 0.14066021144390106 + }, + { + "epoch": 5.881294964028777, + "force_loss": 0.009071187116205692, + "step": 6540 + }, + { + "epoch": 5.890287769784173, + "grad_norm": 0.39630407094955444, + "learning_rate": 9.904650840383392e-05, + "loss": 0.035, + "step": 6550 + }, + { + "action_loss": 0.012409411370754242, + "epoch": 5.890287769784173, + "step": 6550 + }, + { + "epoch": 5.890287769784173, + "step": 6550, + "torque_loss": 0.10571638494729996 + }, + { + "epoch": 5.890287769784173, + "force_loss": 0.012267733924090862, + "step": 6550 + }, + { + "epoch": 5.899280575539568, + "grad_norm": 1.1717088222503662, + "learning_rate": 9.904114480081397e-05, + "loss": 0.027, + "step": 6560 + }, + { + "action_loss": 0.02597968839108944, + "epoch": 5.899280575539568, + "step": 6560 + }, + { + "epoch": 5.899280575539568, + "step": 6560, + "torque_loss": 0.1780753880739212 + }, + { + "epoch": 5.899280575539568, + "force_loss": 0.02346406877040863, + "step": 6560 + }, + { + "epoch": 5.908273381294964, + "grad_norm": 0.9946366548538208, + "learning_rate": 9.903576630038906e-05, + "loss": 0.0375, + "step": 6570 + }, + { + "action_loss": 0.015300107188522816, + "epoch": 5.908273381294964, + "step": 6570 + }, + { + "epoch": 5.908273381294964, + "step": 6570, + "torque_loss": 0.16440166532993317 + }, + { + "epoch": 5.908273381294964, + "force_loss": 0.016858940944075584, + "step": 6570 + }, + { + "epoch": 5.91726618705036, + "grad_norm": 0.5869100689888, + "learning_rate": 9.903037290419309e-05, + "loss": 0.0309, + "step": 6580 + }, + { + "action_loss": 0.014476188458502293, + "epoch": 5.91726618705036, + "step": 6580 + }, + { + "epoch": 5.91726618705036, + "step": 6580, + "torque_loss": 0.127194344997406 + }, + { + "epoch": 5.91726618705036, + "force_loss": 0.013970345258712769, + "step": 6580 + }, + { + "epoch": 5.926258992805756, + "grad_norm": 0.7035734057426453, + "learning_rate": 9.902496461386439e-05, + "loss": 0.0305, + "step": 6590 + }, + { + "action_loss": 0.010824356228113174, + "epoch": 5.926258992805756, + "step": 6590 + }, + { + "epoch": 5.926258992805756, + "step": 6590, + "torque_loss": 0.1059899553656578 + }, + { + "epoch": 5.926258992805756, + "force_loss": 0.01181915681809187, + "step": 6590 + }, + { + "epoch": 5.935251798561151, + "grad_norm": 1.3031914234161377, + "learning_rate": 9.901954143104588e-05, + "loss": 0.0314, + "step": 6600 + }, + { + "action_loss": 0.010905037634074688, + "epoch": 5.935251798561151, + "step": 6600 + }, + { + "epoch": 5.935251798561151, + "step": 6600, + "torque_loss": 0.07843795418739319 + }, + { + "epoch": 5.935251798561151, + "force_loss": 0.006029366049915552, + "step": 6600 + }, + { + "epoch": 5.944244604316546, + "grad_norm": 0.8794500231742859, + "learning_rate": 9.901410335738496e-05, + "loss": 0.025, + "step": 6610 + }, + { + "action_loss": 0.022479886189103127, + "epoch": 5.944244604316546, + "step": 6610 + }, + { + "epoch": 5.944244604316546, + "step": 6610, + "torque_loss": 0.14966750144958496 + }, + { + "epoch": 5.944244604316546, + "force_loss": 0.013774256221950054, + "step": 6610 + }, + { + "epoch": 5.953237410071942, + "grad_norm": 0.49869632720947266, + "learning_rate": 9.900865039453358e-05, + "loss": 0.0342, + "step": 6620 + }, + { + "action_loss": 0.00962988194078207, + "epoch": 5.953237410071942, + "step": 6620 + }, + { + "epoch": 5.953237410071942, + "step": 6620, + "torque_loss": 0.12475377321243286 + }, + { + "epoch": 5.953237410071942, + "force_loss": 0.008503995835781097, + "step": 6620 + }, + { + "epoch": 5.962230215827338, + "grad_norm": 1.0734554529190063, + "learning_rate": 9.900318254414821e-05, + "loss": 0.0315, + "step": 6630 + }, + { + "action_loss": 0.020362840965390205, + "epoch": 5.962230215827338, + "step": 6630 + }, + { + "epoch": 5.962230215827338, + "step": 6630, + "torque_loss": 0.22567391395568848 + }, + { + "epoch": 5.962230215827338, + "force_loss": 0.010840315371751785, + "step": 6630 + }, + { + "epoch": 5.971223021582734, + "grad_norm": 0.7541504502296448, + "learning_rate": 9.899769980788985e-05, + "loss": 0.0287, + "step": 6640 + }, + { + "action_loss": 0.009102792479097843, + "epoch": 5.971223021582734, + "step": 6640 + }, + { + "epoch": 5.971223021582734, + "step": 6640, + "torque_loss": 0.11625024676322937 + }, + { + "epoch": 5.971223021582734, + "force_loss": 0.0074315182864665985, + "step": 6640 + }, + { + "epoch": 5.98021582733813, + "grad_norm": 1.1257209777832031, + "learning_rate": 9.899220218742398e-05, + "loss": 0.0264, + "step": 6650 + }, + { + "action_loss": 0.008441298268735409, + "epoch": 5.98021582733813, + "step": 6650 + }, + { + "epoch": 5.98021582733813, + "step": 6650, + "torque_loss": 0.12980546057224274 + }, + { + "epoch": 5.98021582733813, + "force_loss": 0.008751760236918926, + "step": 6650 + }, + { + "epoch": 5.989208633093525, + "grad_norm": 1.6277531385421753, + "learning_rate": 9.898668968442066e-05, + "loss": 0.0373, + "step": 6660 + }, + { + "action_loss": 0.02496408484876156, + "epoch": 5.989208633093525, + "step": 6660 + }, + { + "epoch": 5.989208633093525, + "step": 6660, + "torque_loss": 0.16968567669391632 + }, + { + "epoch": 5.989208633093525, + "force_loss": 0.019812721759080887, + "step": 6660 + }, + { + "epoch": 5.998201438848921, + "grad_norm": 0.550177812576294, + "learning_rate": 9.898116230055443e-05, + "loss": 0.0327, + "step": 6670 + }, + { + "action_loss": 0.01624131202697754, + "epoch": 5.998201438848921, + "step": 6670 + }, + { + "epoch": 5.998201438848921, + "step": 6670, + "torque_loss": 0.1611861139535904 + }, + { + "epoch": 5.998201438848921, + "force_loss": 0.009481154382228851, + "step": 6670 + }, + { + "epoch": 6.0071942446043165, + "grad_norm": 0.3939690589904785, + "learning_rate": 9.897562003750437e-05, + "loss": 0.0286, + "step": 6680 + }, + { + "action_loss": 0.022057220339775085, + "epoch": 6.0071942446043165, + "step": 6680 + }, + { + "epoch": 6.0071942446043165, + "step": 6680, + "torque_loss": 0.18737749755382538 + }, + { + "epoch": 6.0071942446043165, + "force_loss": 0.02907741628587246, + "step": 6680 + }, + { + "epoch": 6.016187050359712, + "grad_norm": 0.7050179243087769, + "learning_rate": 9.897006289695407e-05, + "loss": 0.0336, + "step": 6690 + }, + { + "action_loss": 0.021385440602898598, + "epoch": 6.016187050359712, + "step": 6690 + }, + { + "epoch": 6.016187050359712, + "step": 6690, + "torque_loss": 0.1306765228509903 + }, + { + "epoch": 6.016187050359712, + "force_loss": 0.020024040713906288, + "step": 6690 + }, + { + "epoch": 6.025179856115108, + "grad_norm": 1.4871512651443481, + "learning_rate": 9.896449088059164e-05, + "loss": 0.0368, + "step": 6700 + }, + { + "action_loss": 0.01957033760845661, + "epoch": 6.025179856115108, + "step": 6700 + }, + { + "epoch": 6.025179856115108, + "step": 6700, + "torque_loss": 0.1466013640165329 + }, + { + "epoch": 6.025179856115108, + "force_loss": 0.017637547105550766, + "step": 6700 + }, + { + "epoch": 6.034172661870503, + "grad_norm": 0.9981018304824829, + "learning_rate": 9.89589039901097e-05, + "loss": 0.0329, + "step": 6710 + }, + { + "action_loss": 0.008246971294283867, + "epoch": 6.034172661870503, + "step": 6710 + }, + { + "epoch": 6.034172661870503, + "step": 6710, + "torque_loss": 0.16157709062099457 + }, + { + "epoch": 6.034172661870503, + "force_loss": 0.007431477308273315, + "step": 6710 + }, + { + "epoch": 6.043165467625899, + "grad_norm": 0.9391647577285767, + "learning_rate": 9.895330222720542e-05, + "loss": 0.0301, + "step": 6720 + }, + { + "action_loss": 0.013659723103046417, + "epoch": 6.043165467625899, + "step": 6720 + }, + { + "epoch": 6.043165467625899, + "step": 6720, + "torque_loss": 0.14471757411956787 + }, + { + "epoch": 6.043165467625899, + "force_loss": 0.012654148042201996, + "step": 6720 + }, + { + "epoch": 6.052158273381295, + "grad_norm": 0.7203466892242432, + "learning_rate": 9.894768559358047e-05, + "loss": 0.0442, + "step": 6730 + }, + { + "action_loss": 0.016545280814170837, + "epoch": 6.052158273381295, + "step": 6730 + }, + { + "epoch": 6.052158273381295, + "step": 6730, + "torque_loss": 0.18547111749649048 + }, + { + "epoch": 6.052158273381295, + "force_loss": 0.01861295849084854, + "step": 6730 + }, + { + "epoch": 6.061151079136691, + "grad_norm": 1.0598502159118652, + "learning_rate": 9.894205409094101e-05, + "loss": 0.0381, + "step": 6740 + }, + { + "action_loss": 0.03386823832988739, + "epoch": 6.061151079136691, + "step": 6740 + }, + { + "epoch": 6.061151079136691, + "step": 6740, + "torque_loss": 0.23352237045764923 + }, + { + "epoch": 6.061151079136691, + "force_loss": 0.025537485256791115, + "step": 6740 + }, + { + "epoch": 6.070143884892087, + "grad_norm": 0.6828194856643677, + "learning_rate": 9.893640772099777e-05, + "loss": 0.0313, + "step": 6750 + }, + { + "action_loss": 0.018237030133605003, + "epoch": 6.070143884892087, + "step": 6750 + }, + { + "epoch": 6.070143884892087, + "step": 6750, + "torque_loss": 0.13684232532978058 + }, + { + "epoch": 6.070143884892087, + "force_loss": 0.015216675586998463, + "step": 6750 + }, + { + "epoch": 6.079136690647482, + "grad_norm": 1.2732737064361572, + "learning_rate": 9.893074648546595e-05, + "loss": 0.0381, + "step": 6760 + }, + { + "action_loss": 0.010769792832434177, + "epoch": 6.079136690647482, + "step": 6760 + }, + { + "epoch": 6.079136690647482, + "step": 6760, + "torque_loss": 0.10087388008832932 + }, + { + "epoch": 6.079136690647482, + "force_loss": 0.009020409546792507, + "step": 6760 + }, + { + "epoch": 6.088129496402877, + "grad_norm": 0.6518918871879578, + "learning_rate": 9.892507038606528e-05, + "loss": 0.0377, + "step": 6770 + }, + { + "action_loss": 0.010382195934653282, + "epoch": 6.088129496402877, + "step": 6770 + }, + { + "epoch": 6.088129496402877, + "step": 6770, + "torque_loss": 0.1361764818429947 + }, + { + "epoch": 6.088129496402877, + "force_loss": 0.011860187165439129, + "step": 6770 + }, + { + "epoch": 6.097122302158273, + "grad_norm": 0.6612163186073303, + "learning_rate": 9.891937942452003e-05, + "loss": 0.0325, + "step": 6780 + }, + { + "action_loss": 0.02234676480293274, + "epoch": 6.097122302158273, + "step": 6780 + }, + { + "epoch": 6.097122302158273, + "step": 6780, + "torque_loss": 0.14713360369205475 + }, + { + "epoch": 6.097122302158273, + "force_loss": 0.013437267392873764, + "step": 6780 + }, + { + "epoch": 6.106115107913669, + "grad_norm": 0.785275936126709, + "learning_rate": 9.891367360255895e-05, + "loss": 0.0328, + "step": 6790 + }, + { + "action_loss": 0.022016048431396484, + "epoch": 6.106115107913669, + "step": 6790 + }, + { + "epoch": 6.106115107913669, + "step": 6790, + "torque_loss": 0.19155645370483398 + }, + { + "epoch": 6.106115107913669, + "force_loss": 0.026529178023338318, + "step": 6790 + }, + { + "epoch": 6.115107913669065, + "grad_norm": 1.501050591468811, + "learning_rate": 9.890795292191532e-05, + "loss": 0.0402, + "step": 6800 + }, + { + "action_loss": 0.02979736216366291, + "epoch": 6.115107913669065, + "step": 6800 + }, + { + "epoch": 6.115107913669065, + "step": 6800, + "torque_loss": 0.13777391612529755 + }, + { + "epoch": 6.115107913669065, + "force_loss": 0.021183064207434654, + "step": 6800 + }, + { + "epoch": 6.124100719424461, + "grad_norm": 0.622193455696106, + "learning_rate": 9.890221738432694e-05, + "loss": 0.0386, + "step": 6810 + }, + { + "action_loss": 0.00862081628292799, + "epoch": 6.124100719424461, + "step": 6810 + }, + { + "epoch": 6.124100719424461, + "step": 6810, + "torque_loss": 0.13776084780693054 + }, + { + "epoch": 6.124100719424461, + "force_loss": 0.013538769446313381, + "step": 6810 + }, + { + "epoch": 6.133093525179856, + "grad_norm": 0.969052791595459, + "learning_rate": 9.88964669915361e-05, + "loss": 0.0291, + "step": 6820 + }, + { + "action_loss": 0.009601211175322533, + "epoch": 6.133093525179856, + "step": 6820 + }, + { + "epoch": 6.133093525179856, + "step": 6820, + "torque_loss": 0.10650753229856491 + }, + { + "epoch": 6.133093525179856, + "force_loss": 0.011245968751609325, + "step": 6820 + }, + { + "epoch": 6.142086330935252, + "grad_norm": 0.682878851890564, + "learning_rate": 9.889070174528963e-05, + "loss": 0.0383, + "step": 6830 + }, + { + "action_loss": 0.010681535117328167, + "epoch": 6.142086330935252, + "step": 6830 + }, + { + "epoch": 6.142086330935252, + "step": 6830, + "torque_loss": 0.09342696517705917 + }, + { + "epoch": 6.142086330935252, + "force_loss": 0.010287155397236347, + "step": 6830 + }, + { + "epoch": 6.151079136690647, + "grad_norm": 0.9495276212692261, + "learning_rate": 9.888492164733883e-05, + "loss": 0.0307, + "step": 6840 + }, + { + "action_loss": 0.016709303483366966, + "epoch": 6.151079136690647, + "step": 6840 + }, + { + "epoch": 6.151079136690647, + "step": 6840, + "torque_loss": 0.12592746317386627 + }, + { + "epoch": 6.151079136690647, + "force_loss": 0.01575835421681404, + "step": 6840 + }, + { + "epoch": 6.160071942446043, + "grad_norm": 0.3741442859172821, + "learning_rate": 9.88791266994396e-05, + "loss": 0.0336, + "step": 6850 + }, + { + "action_loss": 0.010481658391654491, + "epoch": 6.160071942446043, + "step": 6850 + }, + { + "epoch": 6.160071942446043, + "step": 6850, + "torque_loss": 0.12324514240026474 + }, + { + "epoch": 6.160071942446043, + "force_loss": 0.008142626844346523, + "step": 6850 + }, + { + "epoch": 6.169064748201439, + "grad_norm": 0.5587148666381836, + "learning_rate": 9.887331690335223e-05, + "loss": 0.0297, + "step": 6860 + }, + { + "action_loss": 0.018462836742401123, + "epoch": 6.169064748201439, + "step": 6860 + }, + { + "epoch": 6.169064748201439, + "step": 6860, + "torque_loss": 0.1041853204369545 + }, + { + "epoch": 6.169064748201439, + "force_loss": 0.015111044049263, + "step": 6860 + }, + { + "epoch": 6.178057553956835, + "grad_norm": 0.927023708820343, + "learning_rate": 9.886749226084163e-05, + "loss": 0.0284, + "step": 6870 + }, + { + "action_loss": 0.014140564016997814, + "epoch": 6.178057553956835, + "step": 6870 + }, + { + "epoch": 6.178057553956835, + "step": 6870, + "torque_loss": 0.11716964840888977 + }, + { + "epoch": 6.178057553956835, + "force_loss": 0.01707938127219677, + "step": 6870 + }, + { + "epoch": 6.18705035971223, + "grad_norm": 1.0379085540771484, + "learning_rate": 9.886165277367714e-05, + "loss": 0.0374, + "step": 6880 + }, + { + "action_loss": 0.01943344622850418, + "epoch": 6.18705035971223, + "step": 6880 + }, + { + "epoch": 6.18705035971223, + "step": 6880, + "torque_loss": 0.1765347123146057 + }, + { + "epoch": 6.18705035971223, + "force_loss": 0.02587093412876129, + "step": 6880 + }, + { + "epoch": 6.196043165467626, + "grad_norm": 1.0784411430358887, + "learning_rate": 9.885579844363265e-05, + "loss": 0.0289, + "step": 6890 + }, + { + "action_loss": 0.014207050204277039, + "epoch": 6.196043165467626, + "step": 6890 + }, + { + "epoch": 6.196043165467626, + "step": 6890, + "torque_loss": 0.13490696251392365 + }, + { + "epoch": 6.196043165467626, + "force_loss": 0.015821872279047966, + "step": 6890 + }, + { + "epoch": 6.205035971223022, + "grad_norm": 1.1022746562957764, + "learning_rate": 9.884992927248656e-05, + "loss": 0.0299, + "step": 6900 + }, + { + "action_loss": 0.006934223230928183, + "epoch": 6.205035971223022, + "step": 6900 + }, + { + "epoch": 6.205035971223022, + "step": 6900, + "torque_loss": 0.1258738487958908 + }, + { + "epoch": 6.205035971223022, + "force_loss": 0.00647265138104558, + "step": 6900 + }, + { + "epoch": 6.2140287769784175, + "grad_norm": 0.9378265738487244, + "learning_rate": 9.884404526202178e-05, + "loss": 0.0356, + "step": 6910 + }, + { + "action_loss": 0.028104035183787346, + "epoch": 6.2140287769784175, + "step": 6910 + }, + { + "epoch": 6.2140287769784175, + "step": 6910, + "torque_loss": 0.15238653123378754 + }, + { + "epoch": 6.2140287769784175, + "force_loss": 0.033355068415403366, + "step": 6910 + }, + { + "epoch": 6.223021582733813, + "grad_norm": 1.419150948524475, + "learning_rate": 9.883814641402568e-05, + "loss": 0.0334, + "step": 6920 + }, + { + "action_loss": 0.024264290928840637, + "epoch": 6.223021582733813, + "step": 6920 + }, + { + "epoch": 6.223021582733813, + "step": 6920, + "torque_loss": 0.10440417379140854 + }, + { + "epoch": 6.223021582733813, + "force_loss": 0.019781582057476044, + "step": 6920 + }, + { + "epoch": 6.232014388489208, + "grad_norm": 1.142441987991333, + "learning_rate": 9.88322327302902e-05, + "loss": 0.0276, + "step": 6930 + }, + { + "action_loss": 0.014163498766720295, + "epoch": 6.232014388489208, + "step": 6930 + }, + { + "epoch": 6.232014388489208, + "step": 6930, + "torque_loss": 0.12220502644777298 + }, + { + "epoch": 6.232014388489208, + "force_loss": 0.0158188845962286, + "step": 6930 + }, + { + "epoch": 6.241007194244604, + "grad_norm": 0.591874361038208, + "learning_rate": 9.882630421261176e-05, + "loss": 0.0269, + "step": 6940 + }, + { + "action_loss": 0.008671222254633904, + "epoch": 6.241007194244604, + "step": 6940 + }, + { + "epoch": 6.241007194244604, + "step": 6940, + "torque_loss": 0.1492723971605301 + }, + { + "epoch": 6.241007194244604, + "force_loss": 0.007767186965793371, + "step": 6940 + }, + { + "epoch": 6.25, + "grad_norm": 0.8983883857727051, + "learning_rate": 9.88203608627913e-05, + "loss": 0.031, + "step": 6950 + }, + { + "action_loss": 0.026276089251041412, + "epoch": 6.25, + "step": 6950 + }, + { + "epoch": 6.25, + "step": 6950, + "torque_loss": 0.15878672897815704 + }, + { + "epoch": 6.25, + "force_loss": 0.022327935323119164, + "step": 6950 + }, + { + "epoch": 6.258992805755396, + "grad_norm": 1.1845142841339111, + "learning_rate": 9.881440268263422e-05, + "loss": 0.0329, + "step": 6960 + }, + { + "action_loss": 0.012182827107608318, + "epoch": 6.258992805755396, + "step": 6960 + }, + { + "epoch": 6.258992805755396, + "step": 6960, + "torque_loss": 0.11265624314546585 + }, + { + "epoch": 6.258992805755396, + "force_loss": 0.017472369596362114, + "step": 6960 + }, + { + "epoch": 6.267985611510792, + "grad_norm": 0.7261620759963989, + "learning_rate": 9.880842967395048e-05, + "loss": 0.0314, + "step": 6970 + }, + { + "action_loss": 0.016557425260543823, + "epoch": 6.267985611510792, + "step": 6970 + }, + { + "epoch": 6.267985611510792, + "step": 6970, + "torque_loss": 0.1438518911600113 + }, + { + "epoch": 6.267985611510792, + "force_loss": 0.010495592840015888, + "step": 6970 + }, + { + "epoch": 6.276978417266187, + "grad_norm": 1.1216435432434082, + "learning_rate": 9.880244183855452e-05, + "loss": 0.0338, + "step": 6980 + }, + { + "action_loss": 0.009683732874691486, + "epoch": 6.276978417266187, + "step": 6980 + }, + { + "epoch": 6.276978417266187, + "step": 6980, + "torque_loss": 0.14600324630737305 + }, + { + "epoch": 6.276978417266187, + "force_loss": 0.009540971368551254, + "step": 6980 + }, + { + "epoch": 6.2859712230215825, + "grad_norm": 0.6765707731246948, + "learning_rate": 9.879643917826527e-05, + "loss": 0.0425, + "step": 6990 + }, + { + "action_loss": 0.023353347554802895, + "epoch": 6.2859712230215825, + "step": 6990 + }, + { + "epoch": 6.2859712230215825, + "step": 6990, + "torque_loss": 0.14869339764118195 + }, + { + "epoch": 6.2859712230215825, + "force_loss": 0.01961287297308445, + "step": 6990 + }, + { + "epoch": 6.294964028776978, + "grad_norm": 1.4713687896728516, + "learning_rate": 9.87904216949062e-05, + "loss": 0.032, + "step": 7000 + }, + { + "action_loss": 0.011836816556751728, + "epoch": 6.294964028776978, + "step": 7000 + }, + { + "epoch": 6.294964028776978, + "step": 7000, + "torque_loss": 0.14368630945682526 + }, + { + "epoch": 6.294964028776978, + "force_loss": 0.008611331693828106, + "step": 7000 + }, + { + "epoch": 6.303956834532374, + "grad_norm": 1.2804861068725586, + "learning_rate": 9.878438939030526e-05, + "loss": 0.0347, + "step": 7010 + }, + { + "action_loss": 0.015133117325603962, + "epoch": 6.303956834532374, + "step": 7010 + }, + { + "epoch": 6.303956834532374, + "step": 7010, + "torque_loss": 0.27548733353614807 + }, + { + "epoch": 6.303956834532374, + "force_loss": 0.01170522253960371, + "step": 7010 + }, + { + "epoch": 6.31294964028777, + "grad_norm": 0.39458712935447693, + "learning_rate": 9.877834226629489e-05, + "loss": 0.0314, + "step": 7020 + }, + { + "action_loss": 0.006892053876072168, + "epoch": 6.31294964028777, + "step": 7020 + }, + { + "epoch": 6.31294964028777, + "step": 7020, + "torque_loss": 0.14708781242370605 + }, + { + "epoch": 6.31294964028777, + "force_loss": 0.0062276930548250675, + "step": 7020 + }, + { + "epoch": 6.321942446043165, + "grad_norm": 1.543850064277649, + "learning_rate": 9.877228032471206e-05, + "loss": 0.0303, + "step": 7030 + }, + { + "action_loss": 0.009968861006200314, + "epoch": 6.321942446043165, + "step": 7030 + }, + { + "epoch": 6.321942446043165, + "step": 7030, + "torque_loss": 0.1613566130399704 + }, + { + "epoch": 6.321942446043165, + "force_loss": 0.009990171529352665, + "step": 7030 + }, + { + "epoch": 6.330935251798561, + "grad_norm": 0.5653095841407776, + "learning_rate": 9.876620356739823e-05, + "loss": 0.0294, + "step": 7040 + }, + { + "action_loss": 0.013471047393977642, + "epoch": 6.330935251798561, + "step": 7040 + }, + { + "epoch": 6.330935251798561, + "step": 7040, + "torque_loss": 0.15058602392673492 + }, + { + "epoch": 6.330935251798561, + "force_loss": 0.013712618499994278, + "step": 7040 + }, + { + "epoch": 6.339928057553957, + "grad_norm": 0.4438684284687042, + "learning_rate": 9.876011199619935e-05, + "loss": 0.0289, + "step": 7050 + }, + { + "action_loss": 0.0074244216084480286, + "epoch": 6.339928057553957, + "step": 7050 + }, + { + "epoch": 6.339928057553957, + "step": 7050, + "torque_loss": 0.14606742560863495 + }, + { + "epoch": 6.339928057553957, + "force_loss": 0.008681533858180046, + "step": 7050 + }, + { + "epoch": 6.348920863309353, + "grad_norm": 0.8378824591636658, + "learning_rate": 9.875400561296589e-05, + "loss": 0.032, + "step": 7060 + }, + { + "action_loss": 0.021747564896941185, + "epoch": 6.348920863309353, + "step": 7060 + }, + { + "epoch": 6.348920863309353, + "step": 7060, + "torque_loss": 0.11303558200597763 + }, + { + "epoch": 6.348920863309353, + "force_loss": 0.010669874958693981, + "step": 7060 + }, + { + "epoch": 6.357913669064748, + "grad_norm": 1.1590083837509155, + "learning_rate": 9.874788441955278e-05, + "loss": 0.0364, + "step": 7070 + }, + { + "action_loss": 0.008278983645141125, + "epoch": 6.357913669064748, + "step": 7070 + }, + { + "epoch": 6.357913669064748, + "step": 7070, + "torque_loss": 0.09738612174987793 + }, + { + "epoch": 6.357913669064748, + "force_loss": 0.009098508395254612, + "step": 7070 + }, + { + "epoch": 6.366906474820144, + "grad_norm": 1.0467586517333984, + "learning_rate": 9.874174841781951e-05, + "loss": 0.038, + "step": 7080 + }, + { + "action_loss": 0.013057406060397625, + "epoch": 6.366906474820144, + "step": 7080 + }, + { + "epoch": 6.366906474820144, + "step": 7080, + "torque_loss": 0.1664576530456543 + }, + { + "epoch": 6.366906474820144, + "force_loss": 0.017414381727576256, + "step": 7080 + }, + { + "epoch": 6.375899280575539, + "grad_norm": 0.56081622838974, + "learning_rate": 9.873559760963003e-05, + "loss": 0.0313, + "step": 7090 + }, + { + "action_loss": 0.020285315811634064, + "epoch": 6.375899280575539, + "step": 7090 + }, + { + "epoch": 6.375899280575539, + "step": 7090, + "torque_loss": 0.1311166137456894 + }, + { + "epoch": 6.375899280575539, + "force_loss": 0.025589032098650932, + "step": 7090 + }, + { + "epoch": 6.384892086330935, + "grad_norm": 1.2313724756240845, + "learning_rate": 9.872943199685278e-05, + "loss": 0.0348, + "step": 7100 + }, + { + "action_loss": 0.01696467585861683, + "epoch": 6.384892086330935, + "step": 7100 + }, + { + "epoch": 6.384892086330935, + "step": 7100, + "torque_loss": 0.1781919002532959 + }, + { + "epoch": 6.384892086330935, + "force_loss": 0.011121484450995922, + "step": 7100 + }, + { + "epoch": 6.393884892086331, + "grad_norm": 0.9991695284843445, + "learning_rate": 9.872325158136071e-05, + "loss": 0.0334, + "step": 7110 + }, + { + "action_loss": 0.021009324118494987, + "epoch": 6.393884892086331, + "step": 7110 + }, + { + "epoch": 6.393884892086331, + "step": 7110, + "torque_loss": 0.17891235649585724 + }, + { + "epoch": 6.393884892086331, + "force_loss": 0.02270572818815708, + "step": 7110 + }, + { + "epoch": 6.402877697841727, + "grad_norm": 0.693954348564148, + "learning_rate": 9.871705636503128e-05, + "loss": 0.0334, + "step": 7120 + }, + { + "action_loss": 0.020892560482025146, + "epoch": 6.402877697841727, + "step": 7120 + }, + { + "epoch": 6.402877697841727, + "step": 7120, + "torque_loss": 0.13139615952968597 + }, + { + "epoch": 6.402877697841727, + "force_loss": 0.02533758245408535, + "step": 7120 + }, + { + "epoch": 6.411870503597123, + "grad_norm": 0.8198838233947754, + "learning_rate": 9.871084634974641e-05, + "loss": 0.0312, + "step": 7130 + }, + { + "action_loss": 0.030722124502062798, + "epoch": 6.411870503597123, + "step": 7130 + }, + { + "epoch": 6.411870503597123, + "step": 7130, + "torque_loss": 0.19522593915462494 + }, + { + "epoch": 6.411870503597123, + "force_loss": 0.019080134108662605, + "step": 7130 + }, + { + "epoch": 6.420863309352518, + "grad_norm": 0.9934543371200562, + "learning_rate": 9.870462153739257e-05, + "loss": 0.0381, + "step": 7140 + }, + { + "action_loss": 0.012233416549861431, + "epoch": 6.420863309352518, + "step": 7140 + }, + { + "epoch": 6.420863309352518, + "step": 7140, + "torque_loss": 0.13385216891765594 + }, + { + "epoch": 6.420863309352518, + "force_loss": 0.012103955261409283, + "step": 7140 + }, + { + "epoch": 6.429856115107913, + "grad_norm": 1.9219950437545776, + "learning_rate": 9.869838192986067e-05, + "loss": 0.0317, + "step": 7150 + }, + { + "action_loss": 0.010699831880629063, + "epoch": 6.429856115107913, + "step": 7150 + }, + { + "epoch": 6.429856115107913, + "step": 7150, + "torque_loss": 0.1361990123987198 + }, + { + "epoch": 6.429856115107913, + "force_loss": 0.011185181327164173, + "step": 7150 + }, + { + "epoch": 6.438848920863309, + "grad_norm": 0.7379173636436462, + "learning_rate": 9.869212752904616e-05, + "loss": 0.0404, + "step": 7160 + }, + { + "action_loss": 0.012237799353897572, + "epoch": 6.438848920863309, + "step": 7160 + }, + { + "epoch": 6.438848920863309, + "step": 7160, + "torque_loss": 0.16572776436805725 + }, + { + "epoch": 6.438848920863309, + "force_loss": 0.008981684222817421, + "step": 7160 + }, + { + "epoch": 6.447841726618705, + "grad_norm": 0.5905617475509644, + "learning_rate": 9.868585833684894e-05, + "loss": 0.0298, + "step": 7170 + }, + { + "action_loss": 0.01441280823200941, + "epoch": 6.447841726618705, + "step": 7170 + }, + { + "epoch": 6.447841726618705, + "step": 7170, + "torque_loss": 0.13176214694976807 + }, + { + "epoch": 6.447841726618705, + "force_loss": 0.014526325277984142, + "step": 7170 + }, + { + "epoch": 6.456834532374101, + "grad_norm": 0.7369416952133179, + "learning_rate": 9.867957435517342e-05, + "loss": 0.0331, + "step": 7180 + }, + { + "action_loss": 0.021134106442332268, + "epoch": 6.456834532374101, + "step": 7180 + }, + { + "epoch": 6.456834532374101, + "step": 7180, + "torque_loss": 0.18894974887371063 + }, + { + "epoch": 6.456834532374101, + "force_loss": 0.016715548932552338, + "step": 7180 + }, + { + "epoch": 6.465827338129497, + "grad_norm": 0.9571877717971802, + "learning_rate": 9.867327558592854e-05, + "loss": 0.0317, + "step": 7190 + }, + { + "action_loss": 0.012671258300542831, + "epoch": 6.465827338129497, + "step": 7190 + }, + { + "epoch": 6.465827338129497, + "step": 7190, + "torque_loss": 0.17883343994617462 + }, + { + "epoch": 6.465827338129497, + "force_loss": 0.00944325141608715, + "step": 7190 + }, + { + "epoch": 6.474820143884892, + "grad_norm": 1.0005165338516235, + "learning_rate": 9.866696203102766e-05, + "loss": 0.0391, + "step": 7200 + }, + { + "action_loss": 0.013150811195373535, + "epoch": 6.474820143884892, + "step": 7200 + }, + { + "epoch": 6.474820143884892, + "step": 7200, + "torque_loss": 0.11808394640684128 + }, + { + "epoch": 6.474820143884892, + "force_loss": 0.024321196600794792, + "step": 7200 + }, + { + "epoch": 6.483812949640288, + "grad_norm": 0.5357975363731384, + "learning_rate": 9.86606336923887e-05, + "loss": 0.0298, + "step": 7210 + }, + { + "action_loss": 0.00911055225878954, + "epoch": 6.483812949640288, + "step": 7210 + }, + { + "epoch": 6.483812949640288, + "step": 7210, + "torque_loss": 0.12453941255807877 + }, + { + "epoch": 6.483812949640288, + "force_loss": 0.006998790893703699, + "step": 7210 + }, + { + "epoch": 6.4928057553956835, + "grad_norm": 1.3047868013381958, + "learning_rate": 9.865429057193403e-05, + "loss": 0.0322, + "step": 7220 + }, + { + "action_loss": 0.021523533388972282, + "epoch": 6.4928057553956835, + "step": 7220 + }, + { + "epoch": 6.4928057553956835, + "step": 7220, + "torque_loss": 0.1434011161327362 + }, + { + "epoch": 6.4928057553956835, + "force_loss": 0.01496762316673994, + "step": 7220 + }, + { + "epoch": 6.501798561151079, + "grad_norm": 0.6869087815284729, + "learning_rate": 9.864793267159053e-05, + "loss": 0.0298, + "step": 7230 + }, + { + "action_loss": 0.012286696583032608, + "epoch": 6.501798561151079, + "step": 7230 + }, + { + "epoch": 6.501798561151079, + "step": 7230, + "torque_loss": 0.1482219696044922 + }, + { + "epoch": 6.501798561151079, + "force_loss": 0.009055552072823048, + "step": 7230 + }, + { + "epoch": 6.510791366906475, + "grad_norm": 0.9738610982894897, + "learning_rate": 9.864155999328957e-05, + "loss": 0.0307, + "step": 7240 + }, + { + "action_loss": 0.014220502227544785, + "epoch": 6.510791366906475, + "step": 7240 + }, + { + "epoch": 6.510791366906475, + "step": 7240, + "torque_loss": 0.12077126652002335 + }, + { + "epoch": 6.510791366906475, + "force_loss": 0.013996350578963757, + "step": 7240 + }, + { + "epoch": 6.51978417266187, + "grad_norm": 1.5982275009155273, + "learning_rate": 9.8635172538967e-05, + "loss": 0.036, + "step": 7250 + }, + { + "action_loss": 0.01579565368592739, + "epoch": 6.51978417266187, + "step": 7250 + }, + { + "epoch": 6.51978417266187, + "step": 7250, + "torque_loss": 0.12311286479234695 + }, + { + "epoch": 6.51978417266187, + "force_loss": 0.025801658630371094, + "step": 7250 + }, + { + "epoch": 6.528776978417266, + "grad_norm": 0.9958117008209229, + "learning_rate": 9.862877031056312e-05, + "loss": 0.0285, + "step": 7260 + }, + { + "action_loss": 0.009172475896775723, + "epoch": 6.528776978417266, + "step": 7260 + }, + { + "epoch": 6.528776978417266, + "step": 7260, + "torque_loss": 0.11217430979013443 + }, + { + "epoch": 6.528776978417266, + "force_loss": 0.007703617215156555, + "step": 7260 + }, + { + "epoch": 6.537769784172662, + "grad_norm": 0.6896548271179199, + "learning_rate": 9.862235331002279e-05, + "loss": 0.038, + "step": 7270 + }, + { + "action_loss": 0.004068915732204914, + "epoch": 6.537769784172662, + "step": 7270 + }, + { + "epoch": 6.537769784172662, + "step": 7270, + "torque_loss": 0.13171564042568207 + }, + { + "epoch": 6.537769784172662, + "force_loss": 0.008077028207480907, + "step": 7270 + }, + { + "epoch": 6.546762589928058, + "grad_norm": 0.6242184042930603, + "learning_rate": 9.861592153929533e-05, + "loss": 0.0284, + "step": 7280 + }, + { + "action_loss": 0.008289997465908527, + "epoch": 6.546762589928058, + "step": 7280 + }, + { + "epoch": 6.546762589928058, + "step": 7280, + "torque_loss": 0.11463453620672226 + }, + { + "epoch": 6.546762589928058, + "force_loss": 0.006958022713661194, + "step": 7280 + }, + { + "epoch": 6.555755395683454, + "grad_norm": 1.4783086776733398, + "learning_rate": 9.860947500033455e-05, + "loss": 0.0281, + "step": 7290 + }, + { + "action_loss": 0.05710189417004585, + "epoch": 6.555755395683454, + "step": 7290 + }, + { + "epoch": 6.555755395683454, + "step": 7290, + "torque_loss": 0.19084270298480988 + }, + { + "epoch": 6.555755395683454, + "force_loss": 0.05748428776860237, + "step": 7290 + }, + { + "epoch": 6.564748201438849, + "grad_norm": 0.4605726897716522, + "learning_rate": 9.86030136950987e-05, + "loss": 0.0379, + "step": 7300 + }, + { + "action_loss": 0.008308137767016888, + "epoch": 6.564748201438849, + "step": 7300 + }, + { + "epoch": 6.564748201438849, + "step": 7300, + "torque_loss": 0.09644180536270142 + }, + { + "epoch": 6.564748201438849, + "force_loss": 0.007345800753682852, + "step": 7300 + }, + { + "epoch": 6.573741007194244, + "grad_norm": 0.7289091944694519, + "learning_rate": 9.85965376255506e-05, + "loss": 0.0291, + "step": 7310 + }, + { + "action_loss": 0.010317725129425526, + "epoch": 6.573741007194244, + "step": 7310 + }, + { + "epoch": 6.573741007194244, + "step": 7310, + "torque_loss": 0.09413930028676987 + }, + { + "epoch": 6.573741007194244, + "force_loss": 0.009280539117753506, + "step": 7310 + }, + { + "epoch": 6.58273381294964, + "grad_norm": 1.0729559659957886, + "learning_rate": 9.859004679365747e-05, + "loss": 0.0273, + "step": 7320 + }, + { + "action_loss": 0.009764806367456913, + "epoch": 6.58273381294964, + "step": 7320 + }, + { + "epoch": 6.58273381294964, + "step": 7320, + "torque_loss": 0.10927480459213257 + }, + { + "epoch": 6.58273381294964, + "force_loss": 0.009876654483377934, + "step": 7320 + }, + { + "epoch": 6.591726618705036, + "grad_norm": 0.67609041929245, + "learning_rate": 9.858354120139108e-05, + "loss": 0.0291, + "step": 7330 + }, + { + "action_loss": 0.026859885081648827, + "epoch": 6.591726618705036, + "step": 7330 + }, + { + "epoch": 6.591726618705036, + "step": 7330, + "torque_loss": 0.22407126426696777 + }, + { + "epoch": 6.591726618705036, + "force_loss": 0.0318852998316288, + "step": 7330 + }, + { + "epoch": 6.600719424460432, + "grad_norm": 0.969810426235199, + "learning_rate": 9.857702085072764e-05, + "loss": 0.0318, + "step": 7340 + }, + { + "action_loss": 0.011868906207382679, + "epoch": 6.600719424460432, + "step": 7340 + }, + { + "epoch": 6.600719424460432, + "step": 7340, + "torque_loss": 0.11096364259719849 + }, + { + "epoch": 6.600719424460432, + "force_loss": 0.008112683892250061, + "step": 7340 + }, + { + "epoch": 6.609712230215827, + "grad_norm": 0.7616317272186279, + "learning_rate": 9.857048574364787e-05, + "loss": 0.034, + "step": 7350 + }, + { + "action_loss": 0.012131169438362122, + "epoch": 6.609712230215827, + "step": 7350 + }, + { + "epoch": 6.609712230215827, + "step": 7350, + "torque_loss": 0.14511050283908844 + }, + { + "epoch": 6.609712230215827, + "force_loss": 0.010916545987129211, + "step": 7350 + }, + { + "epoch": 6.618705035971223, + "grad_norm": 0.965458333492279, + "learning_rate": 9.856393588213698e-05, + "loss": 0.0345, + "step": 7360 + }, + { + "action_loss": 0.01754717156291008, + "epoch": 6.618705035971223, + "step": 7360 + }, + { + "epoch": 6.618705035971223, + "step": 7360, + "torque_loss": 0.13961432874202728 + }, + { + "epoch": 6.618705035971223, + "force_loss": 0.025284044444561005, + "step": 7360 + }, + { + "epoch": 6.627697841726619, + "grad_norm": 0.7501308917999268, + "learning_rate": 9.855737126818458e-05, + "loss": 0.0343, + "step": 7370 + }, + { + "action_loss": 0.008759301155805588, + "epoch": 6.627697841726619, + "step": 7370 + }, + { + "epoch": 6.627697841726619, + "step": 7370, + "torque_loss": 0.11119731515645981 + }, + { + "epoch": 6.627697841726619, + "force_loss": 0.008812778629362583, + "step": 7370 + }, + { + "epoch": 6.636690647482014, + "grad_norm": 0.5349105596542358, + "learning_rate": 9.855079190378491e-05, + "loss": 0.0356, + "step": 7380 + }, + { + "action_loss": 0.01553251687437296, + "epoch": 6.636690647482014, + "step": 7380 + }, + { + "epoch": 6.636690647482014, + "step": 7380, + "torque_loss": 0.18644464015960693 + }, + { + "epoch": 6.636690647482014, + "force_loss": 0.011027391068637371, + "step": 7380 + }, + { + "epoch": 6.64568345323741, + "grad_norm": 0.7108652591705322, + "learning_rate": 9.854419779093655e-05, + "loss": 0.0304, + "step": 7390 + }, + { + "action_loss": 0.015734700486063957, + "epoch": 6.64568345323741, + "step": 7390 + }, + { + "epoch": 6.64568345323741, + "step": 7390, + "torque_loss": 0.1646876484155655 + }, + { + "epoch": 6.64568345323741, + "force_loss": 0.009140515699982643, + "step": 7390 + }, + { + "epoch": 6.654676258992806, + "grad_norm": 0.7373231649398804, + "learning_rate": 9.853758893164264e-05, + "loss": 0.0306, + "step": 7400 + }, + { + "action_loss": 0.01355788391083479, + "epoch": 6.654676258992806, + "step": 7400 + }, + { + "epoch": 6.654676258992806, + "step": 7400, + "torque_loss": 0.136037677526474 + }, + { + "epoch": 6.654676258992806, + "force_loss": 0.008646302856504917, + "step": 7400 + }, + { + "epoch": 6.663669064748201, + "grad_norm": 0.5388456583023071, + "learning_rate": 9.853096532791078e-05, + "loss": 0.0296, + "step": 7410 + }, + { + "action_loss": 0.017288384959101677, + "epoch": 6.663669064748201, + "step": 7410 + }, + { + "epoch": 6.663669064748201, + "step": 7410, + "torque_loss": 0.13616986572742462 + }, + { + "epoch": 6.663669064748201, + "force_loss": 0.012311995029449463, + "step": 7410 + }, + { + "epoch": 6.672661870503597, + "grad_norm": 0.5630616545677185, + "learning_rate": 9.852432698175304e-05, + "loss": 0.0414, + "step": 7420 + }, + { + "action_loss": 0.00943202804774046, + "epoch": 6.672661870503597, + "step": 7420 + }, + { + "epoch": 6.672661870503597, + "step": 7420, + "torque_loss": 0.11193060874938965 + }, + { + "epoch": 6.672661870503597, + "force_loss": 0.013097594492137432, + "step": 7420 + }, + { + "epoch": 6.681654676258993, + "grad_norm": 1.0656129121780396, + "learning_rate": 9.851767389518597e-05, + "loss": 0.0397, + "step": 7430 + }, + { + "action_loss": 0.014476067386567593, + "epoch": 6.681654676258993, + "step": 7430 + }, + { + "epoch": 6.681654676258993, + "step": 7430, + "torque_loss": 0.15549862384796143 + }, + { + "epoch": 6.681654676258993, + "force_loss": 0.009197737090289593, + "step": 7430 + }, + { + "epoch": 6.690647482014389, + "grad_norm": 1.3346757888793945, + "learning_rate": 9.85110060702306e-05, + "loss": 0.0308, + "step": 7440 + }, + { + "action_loss": 0.012891768477857113, + "epoch": 6.690647482014389, + "step": 7440 + }, + { + "epoch": 6.690647482014389, + "step": 7440, + "torque_loss": 0.15279309451580048 + }, + { + "epoch": 6.690647482014389, + "force_loss": 0.0094189727678895, + "step": 7440 + }, + { + "epoch": 6.6996402877697845, + "grad_norm": 0.5730975866317749, + "learning_rate": 9.850432350891245e-05, + "loss": 0.0311, + "step": 7450 + }, + { + "action_loss": 0.017355715855956078, + "epoch": 6.6996402877697845, + "step": 7450 + }, + { + "epoch": 6.6996402877697845, + "step": 7450, + "torque_loss": 0.14554846286773682 + }, + { + "epoch": 6.6996402877697845, + "force_loss": 0.011299587786197662, + "step": 7450 + }, + { + "epoch": 6.7086330935251794, + "grad_norm": 1.064045786857605, + "learning_rate": 9.84976262132615e-05, + "loss": 0.0308, + "step": 7460 + }, + { + "action_loss": 0.0111715542152524, + "epoch": 6.7086330935251794, + "step": 7460 + }, + { + "epoch": 6.7086330935251794, + "step": 7460, + "torque_loss": 0.15227502584457397 + }, + { + "epoch": 6.7086330935251794, + "force_loss": 0.00802071113139391, + "step": 7460 + }, + { + "epoch": 6.717625899280575, + "grad_norm": 1.085392713546753, + "learning_rate": 9.849091418531222e-05, + "loss": 0.0344, + "step": 7470 + }, + { + "action_loss": 0.014177029021084309, + "epoch": 6.717625899280575, + "step": 7470 + }, + { + "epoch": 6.717625899280575, + "step": 7470, + "torque_loss": 0.1624159812927246 + }, + { + "epoch": 6.717625899280575, + "force_loss": 0.012642182409763336, + "step": 7470 + }, + { + "epoch": 6.726618705035971, + "grad_norm": 0.8970483541488647, + "learning_rate": 9.848418742710353e-05, + "loss": 0.0391, + "step": 7480 + }, + { + "action_loss": 0.007151718717068434, + "epoch": 6.726618705035971, + "step": 7480 + }, + { + "epoch": 6.726618705035971, + "step": 7480, + "torque_loss": 0.1363411396741867 + }, + { + "epoch": 6.726618705035971, + "force_loss": 0.004806325305253267, + "step": 7480 + }, + { + "epoch": 6.735611510791367, + "grad_norm": 1.2199770212173462, + "learning_rate": 9.847744594067885e-05, + "loss": 0.0341, + "step": 7490 + }, + { + "action_loss": 0.010929140262305737, + "epoch": 6.735611510791367, + "step": 7490 + }, + { + "epoch": 6.735611510791367, + "step": 7490, + "torque_loss": 0.11901833862066269 + }, + { + "epoch": 6.735611510791367, + "force_loss": 0.00839619804173708, + "step": 7490 + }, + { + "epoch": 6.744604316546763, + "grad_norm": 0.8313911557197571, + "learning_rate": 9.847068972808607e-05, + "loss": 0.033, + "step": 7500 + }, + { + "action_loss": 0.006041519343852997, + "epoch": 6.744604316546763, + "step": 7500 + }, + { + "epoch": 6.744604316546763, + "step": 7500, + "torque_loss": 0.11097083240747452 + }, + { + "epoch": 6.744604316546763, + "force_loss": 0.006382436957210302, + "step": 7500 + }, + { + "epoch": 6.753597122302159, + "grad_norm": 0.8322669267654419, + "learning_rate": 9.846391879137756e-05, + "loss": 0.0287, + "step": 7510 + }, + { + "action_loss": 0.011735928244888783, + "epoch": 6.753597122302159, + "step": 7510 + }, + { + "epoch": 6.753597122302159, + "step": 7510, + "torque_loss": 0.13664768636226654 + }, + { + "epoch": 6.753597122302159, + "force_loss": 0.014030583202838898, + "step": 7510 + }, + { + "epoch": 6.762589928057554, + "grad_norm": 0.850685715675354, + "learning_rate": 9.845713313261012e-05, + "loss": 0.0372, + "step": 7520 + }, + { + "action_loss": 0.005964975338429213, + "epoch": 6.762589928057554, + "step": 7520 + }, + { + "epoch": 6.762589928057554, + "step": 7520, + "torque_loss": 0.09368737787008286 + }, + { + "epoch": 6.762589928057554, + "force_loss": 0.007000280078500509, + "step": 7520 + }, + { + "epoch": 6.7715827338129495, + "grad_norm": 0.7753496170043945, + "learning_rate": 9.845033275384505e-05, + "loss": 0.0394, + "step": 7530 + }, + { + "action_loss": 0.00790408719331026, + "epoch": 6.7715827338129495, + "step": 7530 + }, + { + "epoch": 6.7715827338129495, + "step": 7530, + "torque_loss": 0.10145465284585953 + }, + { + "epoch": 6.7715827338129495, + "force_loss": 0.012381188571453094, + "step": 7530 + }, + { + "epoch": 6.780575539568345, + "grad_norm": 0.6639843583106995, + "learning_rate": 9.844351765714818e-05, + "loss": 0.0322, + "step": 7540 + }, + { + "action_loss": 0.02301245927810669, + "epoch": 6.780575539568345, + "step": 7540 + }, + { + "epoch": 6.780575539568345, + "step": 7540, + "torque_loss": 0.19160182774066925 + }, + { + "epoch": 6.780575539568345, + "force_loss": 0.015690037980675697, + "step": 7540 + }, + { + "epoch": 6.789568345323741, + "grad_norm": 0.9610219597816467, + "learning_rate": 9.843668784458971e-05, + "loss": 0.0351, + "step": 7550 + }, + { + "action_loss": 0.01086804736405611, + "epoch": 6.789568345323741, + "step": 7550 + }, + { + "epoch": 6.789568345323741, + "step": 7550, + "torque_loss": 0.13474760949611664 + }, + { + "epoch": 6.789568345323741, + "force_loss": 0.012556198053061962, + "step": 7550 + }, + { + "epoch": 6.798561151079137, + "grad_norm": 0.4313907027244568, + "learning_rate": 9.842984331824437e-05, + "loss": 0.0287, + "step": 7560 + }, + { + "action_loss": 0.017279664054512978, + "epoch": 6.798561151079137, + "step": 7560 + }, + { + "epoch": 6.798561151079137, + "step": 7560, + "torque_loss": 0.17883305251598358 + }, + { + "epoch": 6.798561151079137, + "force_loss": 0.01957053877413273, + "step": 7560 + }, + { + "epoch": 6.807553956834532, + "grad_norm": 0.7270711064338684, + "learning_rate": 9.842298408019133e-05, + "loss": 0.0241, + "step": 7570 + }, + { + "action_loss": 0.00688102887943387, + "epoch": 6.807553956834532, + "step": 7570 + }, + { + "epoch": 6.807553956834532, + "step": 7570, + "torque_loss": 0.15468500554561615 + }, + { + "epoch": 6.807553956834532, + "force_loss": 0.00769207626581192, + "step": 7570 + }, + { + "epoch": 6.816546762589928, + "grad_norm": 0.6725876331329346, + "learning_rate": 9.841611013251429e-05, + "loss": 0.0327, + "step": 7580 + }, + { + "action_loss": 0.013837548904120922, + "epoch": 6.816546762589928, + "step": 7580 + }, + { + "epoch": 6.816546762589928, + "step": 7580, + "torque_loss": 0.15249620378017426 + }, + { + "epoch": 6.816546762589928, + "force_loss": 0.010722585953772068, + "step": 7580 + }, + { + "epoch": 6.825539568345324, + "grad_norm": 1.0520435571670532, + "learning_rate": 9.840922147730133e-05, + "loss": 0.034, + "step": 7590 + }, + { + "action_loss": 0.008364955894649029, + "epoch": 6.825539568345324, + "step": 7590 + }, + { + "epoch": 6.825539568345324, + "step": 7590, + "torque_loss": 0.16778381168842316 + }, + { + "epoch": 6.825539568345324, + "force_loss": 0.00917450338602066, + "step": 7590 + }, + { + "epoch": 6.83453237410072, + "grad_norm": 1.3950741291046143, + "learning_rate": 9.840231811664506e-05, + "loss": 0.0352, + "step": 7600 + }, + { + "action_loss": 0.02204935997724533, + "epoch": 6.83453237410072, + "step": 7600 + }, + { + "epoch": 6.83453237410072, + "step": 7600, + "torque_loss": 0.14785687625408173 + }, + { + "epoch": 6.83453237410072, + "force_loss": 0.02144394814968109, + "step": 7600 + }, + { + "epoch": 6.843525179856115, + "grad_norm": 1.2162723541259766, + "learning_rate": 9.839540005264252e-05, + "loss": 0.0319, + "step": 7610 + }, + { + "action_loss": 0.015045590698719025, + "epoch": 6.843525179856115, + "step": 7610 + }, + { + "epoch": 6.843525179856115, + "step": 7610, + "torque_loss": 0.13497065007686615 + }, + { + "epoch": 6.843525179856115, + "force_loss": 0.010974928736686707, + "step": 7610 + }, + { + "epoch": 6.852517985611511, + "grad_norm": 0.5862897038459778, + "learning_rate": 9.838846728739527e-05, + "loss": 0.0255, + "step": 7620 + }, + { + "action_loss": 0.015867173671722412, + "epoch": 6.852517985611511, + "step": 7620 + }, + { + "epoch": 6.852517985611511, + "step": 7620, + "torque_loss": 0.15003100037574768 + }, + { + "epoch": 6.852517985611511, + "force_loss": 0.01031761709600687, + "step": 7620 + }, + { + "epoch": 6.861510791366906, + "grad_norm": 1.037456750869751, + "learning_rate": 9.838151982300927e-05, + "loss": 0.0394, + "step": 7630 + }, + { + "action_loss": 0.012813209556043148, + "epoch": 6.861510791366906, + "step": 7630 + }, + { + "epoch": 6.861510791366906, + "step": 7630, + "torque_loss": 0.1465476006269455 + }, + { + "epoch": 6.861510791366906, + "force_loss": 0.012145946733653545, + "step": 7630 + }, + { + "epoch": 6.870503597122302, + "grad_norm": 0.7836095094680786, + "learning_rate": 9.8374557661595e-05, + "loss": 0.0297, + "step": 7640 + }, + { + "action_loss": 0.028261477127671242, + "epoch": 6.870503597122302, + "step": 7640 + }, + { + "epoch": 6.870503597122302, + "step": 7640, + "torque_loss": 0.1583840399980545 + }, + { + "epoch": 6.870503597122302, + "force_loss": 0.018456261605024338, + "step": 7640 + }, + { + "epoch": 6.879496402877698, + "grad_norm": 0.702251672744751, + "learning_rate": 9.836758080526735e-05, + "loss": 0.0341, + "step": 7650 + }, + { + "action_loss": 0.01756826601922512, + "epoch": 6.879496402877698, + "step": 7650 + }, + { + "epoch": 6.879496402877698, + "step": 7650, + "torque_loss": 0.16502438485622406 + }, + { + "epoch": 6.879496402877698, + "force_loss": 0.013490281067788601, + "step": 7650 + }, + { + "epoch": 6.888489208633094, + "grad_norm": 0.8463592529296875, + "learning_rate": 9.836058925614575e-05, + "loss": 0.0313, + "step": 7660 + }, + { + "action_loss": 0.0048490907065570354, + "epoch": 6.888489208633094, + "step": 7660 + }, + { + "epoch": 6.888489208633094, + "step": 7660, + "torque_loss": 0.16393551230430603 + }, + { + "epoch": 6.888489208633094, + "force_loss": 0.006976804230362177, + "step": 7660 + }, + { + "epoch": 6.897482014388489, + "grad_norm": 0.7803255319595337, + "learning_rate": 9.8353583016354e-05, + "loss": 0.0287, + "step": 7670 + }, + { + "action_loss": 0.012085658498108387, + "epoch": 6.897482014388489, + "step": 7670 + }, + { + "epoch": 6.897482014388489, + "step": 7670, + "torque_loss": 0.17637775838375092 + }, + { + "epoch": 6.897482014388489, + "force_loss": 0.008509885519742966, + "step": 7670 + }, + { + "epoch": 6.906474820143885, + "grad_norm": 1.180240511894226, + "learning_rate": 9.834656208802044e-05, + "loss": 0.0323, + "step": 7680 + }, + { + "action_loss": 0.018915629014372826, + "epoch": 6.906474820143885, + "step": 7680 + }, + { + "epoch": 6.906474820143885, + "step": 7680, + "torque_loss": 0.21235208213329315 + }, + { + "epoch": 6.906474820143885, + "force_loss": 0.01931210793554783, + "step": 7680 + }, + { + "epoch": 6.91546762589928, + "grad_norm": 0.3223646283149719, + "learning_rate": 9.833952647327784e-05, + "loss": 0.0301, + "step": 7690 + }, + { + "action_loss": 0.009821752086281776, + "epoch": 6.91546762589928, + "step": 7690 + }, + { + "epoch": 6.91546762589928, + "step": 7690, + "torque_loss": 0.1348685473203659 + }, + { + "epoch": 6.91546762589928, + "force_loss": 0.004952156450599432, + "step": 7690 + }, + { + "epoch": 6.924460431654676, + "grad_norm": 0.7646491527557373, + "learning_rate": 9.833247617426342e-05, + "loss": 0.0284, + "step": 7700 + }, + { + "action_loss": 0.013955810107290745, + "epoch": 6.924460431654676, + "step": 7700 + }, + { + "epoch": 6.924460431654676, + "step": 7700, + "torque_loss": 0.17015063762664795 + }, + { + "epoch": 6.924460431654676, + "force_loss": 0.009839924052357674, + "step": 7700 + }, + { + "epoch": 6.933453237410072, + "grad_norm": 0.6083731055259705, + "learning_rate": 9.832541119311889e-05, + "loss": 0.0328, + "step": 7710 + }, + { + "action_loss": 0.024154866114258766, + "epoch": 6.933453237410072, + "step": 7710 + }, + { + "epoch": 6.933453237410072, + "step": 7710, + "torque_loss": 0.16915397346019745 + }, + { + "epoch": 6.933453237410072, + "force_loss": 0.019542505964636803, + "step": 7710 + }, + { + "epoch": 6.942446043165468, + "grad_norm": 1.2142714262008667, + "learning_rate": 9.83183315319904e-05, + "loss": 0.0402, + "step": 7720 + }, + { + "action_loss": 0.028392454609274864, + "epoch": 6.942446043165468, + "step": 7720 + }, + { + "epoch": 6.942446043165468, + "step": 7720, + "torque_loss": 0.19171525537967682 + }, + { + "epoch": 6.942446043165468, + "force_loss": 0.016506226733326912, + "step": 7720 + }, + { + "epoch": 6.951438848920863, + "grad_norm": 0.8586347699165344, + "learning_rate": 9.831123719302855e-05, + "loss": 0.0309, + "step": 7730 + }, + { + "action_loss": 0.022412672638893127, + "epoch": 6.951438848920863, + "step": 7730 + }, + { + "epoch": 6.951438848920863, + "step": 7730, + "torque_loss": 0.10771119594573975 + }, + { + "epoch": 6.951438848920863, + "force_loss": 0.026249969378113747, + "step": 7730 + }, + { + "epoch": 6.960431654676259, + "grad_norm": 1.0157043933868408, + "learning_rate": 9.830412817838842e-05, + "loss": 0.0306, + "step": 7740 + }, + { + "action_loss": 0.020980997011065483, + "epoch": 6.960431654676259, + "step": 7740 + }, + { + "epoch": 6.960431654676259, + "step": 7740, + "torque_loss": 0.169539213180542 + }, + { + "epoch": 6.960431654676259, + "force_loss": 0.020628642290830612, + "step": 7740 + }, + { + "epoch": 6.969424460431655, + "grad_norm": 0.5320214629173279, + "learning_rate": 9.829700449022956e-05, + "loss": 0.0347, + "step": 7750 + }, + { + "action_loss": 0.012410610914230347, + "epoch": 6.969424460431655, + "step": 7750 + }, + { + "epoch": 6.969424460431655, + "step": 7750, + "torque_loss": 0.15810233354568481 + }, + { + "epoch": 6.969424460431655, + "force_loss": 0.012388341128826141, + "step": 7750 + }, + { + "epoch": 6.9784172661870505, + "grad_norm": 1.3180557489395142, + "learning_rate": 9.828986613071593e-05, + "loss": 0.0281, + "step": 7760 + }, + { + "action_loss": 0.007189827039837837, + "epoch": 6.9784172661870505, + "step": 7760 + }, + { + "epoch": 6.9784172661870505, + "step": 7760, + "torque_loss": 0.09706615656614304 + }, + { + "epoch": 6.9784172661870505, + "force_loss": 0.007400956004858017, + "step": 7760 + }, + { + "epoch": 6.987410071942446, + "grad_norm": 0.7778557538986206, + "learning_rate": 9.828271310201601e-05, + "loss": 0.0277, + "step": 7770 + }, + { + "action_loss": 0.013597776181995869, + "epoch": 6.987410071942446, + "step": 7770 + }, + { + "epoch": 6.987410071942446, + "step": 7770, + "torque_loss": 0.1417521834373474 + }, + { + "epoch": 6.987410071942446, + "force_loss": 0.019632231444120407, + "step": 7770 + }, + { + "epoch": 6.996402877697841, + "grad_norm": 0.7626013159751892, + "learning_rate": 9.827554540630268e-05, + "loss": 0.0305, + "step": 7780 + }, + { + "action_loss": 0.014794292859733105, + "epoch": 6.996402877697841, + "step": 7780 + }, + { + "epoch": 6.996402877697841, + "step": 7780, + "torque_loss": 0.20905987918376923 + }, + { + "epoch": 6.996402877697841, + "force_loss": 0.009513375349342823, + "step": 7780 + }, + { + "epoch": 7.005395683453237, + "grad_norm": 0.3939371407032013, + "learning_rate": 9.826836304575329e-05, + "loss": 0.0303, + "step": 7790 + }, + { + "action_loss": 0.011105640791356564, + "epoch": 7.005395683453237, + "step": 7790 + }, + { + "epoch": 7.005395683453237, + "step": 7790, + "torque_loss": 0.1300303339958191 + }, + { + "epoch": 7.005395683453237, + "force_loss": 0.011433079838752747, + "step": 7790 + }, + { + "epoch": 7.014388489208633, + "grad_norm": 0.980540931224823, + "learning_rate": 9.826116602254966e-05, + "loss": 0.0315, + "step": 7800 + }, + { + "action_loss": 0.018765976652503014, + "epoch": 7.014388489208633, + "step": 7800 + }, + { + "epoch": 7.014388489208633, + "step": 7800, + "torque_loss": 0.1850420981645584 + }, + { + "epoch": 7.014388489208633, + "force_loss": 0.01585307903587818, + "step": 7800 + }, + { + "epoch": 7.023381294964029, + "grad_norm": 0.5051628947257996, + "learning_rate": 9.825395433887805e-05, + "loss": 0.0332, + "step": 7810 + }, + { + "action_loss": 0.029291892424225807, + "epoch": 7.023381294964029, + "step": 7810 + }, + { + "epoch": 7.023381294964029, + "step": 7810, + "torque_loss": 0.15354830026626587 + }, + { + "epoch": 7.023381294964029, + "force_loss": 0.033693213015794754, + "step": 7810 + }, + { + "epoch": 7.032374100719425, + "grad_norm": 1.2920396327972412, + "learning_rate": 9.824672799692917e-05, + "loss": 0.0326, + "step": 7820 + }, + { + "action_loss": 0.016909083351492882, + "epoch": 7.032374100719425, + "step": 7820 + }, + { + "epoch": 7.032374100719425, + "step": 7820, + "torque_loss": 0.1852744072675705 + }, + { + "epoch": 7.032374100719425, + "force_loss": 0.014743109233677387, + "step": 7820 + }, + { + "epoch": 7.0413669064748206, + "grad_norm": 1.7694145441055298, + "learning_rate": 9.823948699889823e-05, + "loss": 0.0334, + "step": 7830 + }, + { + "action_loss": 0.009295588359236717, + "epoch": 7.0413669064748206, + "step": 7830 + }, + { + "epoch": 7.0413669064748206, + "step": 7830, + "torque_loss": 0.11250940710306168 + }, + { + "epoch": 7.0413669064748206, + "force_loss": 0.00965843815356493, + "step": 7830 + }, + { + "epoch": 7.0503597122302155, + "grad_norm": 1.013838768005371, + "learning_rate": 9.823223134698483e-05, + "loss": 0.0299, + "step": 7840 + }, + { + "action_loss": 0.021403102204203606, + "epoch": 7.0503597122302155, + "step": 7840 + }, + { + "epoch": 7.0503597122302155, + "step": 7840, + "torque_loss": 0.1695012003183365 + }, + { + "epoch": 7.0503597122302155, + "force_loss": 0.015083979815244675, + "step": 7840 + }, + { + "epoch": 7.059352517985611, + "grad_norm": 0.733481764793396, + "learning_rate": 9.822496104339303e-05, + "loss": 0.0319, + "step": 7850 + }, + { + "action_loss": 0.008760634809732437, + "epoch": 7.059352517985611, + "step": 7850 + }, + { + "epoch": 7.059352517985611, + "step": 7850, + "torque_loss": 0.15025722980499268 + }, + { + "epoch": 7.059352517985611, + "force_loss": 0.007303763180971146, + "step": 7850 + }, + { + "epoch": 7.068345323741007, + "grad_norm": 0.5384030342102051, + "learning_rate": 9.821767609033138e-05, + "loss": 0.0306, + "step": 7860 + }, + { + "action_loss": 0.014348868280649185, + "epoch": 7.068345323741007, + "step": 7860 + }, + { + "epoch": 7.068345323741007, + "step": 7860, + "torque_loss": 0.14780882000923157 + }, + { + "epoch": 7.068345323741007, + "force_loss": 0.024701185524463654, + "step": 7860 + }, + { + "epoch": 7.077338129496403, + "grad_norm": 0.6079313158988953, + "learning_rate": 9.821037649001284e-05, + "loss": 0.0353, + "step": 7870 + }, + { + "action_loss": 0.011603743769228458, + "epoch": 7.077338129496403, + "step": 7870 + }, + { + "epoch": 7.077338129496403, + "step": 7870, + "torque_loss": 0.14331673085689545 + }, + { + "epoch": 7.077338129496403, + "force_loss": 0.012257256545126438, + "step": 7870 + }, + { + "epoch": 7.086330935251799, + "grad_norm": 0.8938829302787781, + "learning_rate": 9.820306224465486e-05, + "loss": 0.0299, + "step": 7880 + }, + { + "action_loss": 0.009041390381753445, + "epoch": 7.086330935251799, + "step": 7880 + }, + { + "epoch": 7.086330935251799, + "step": 7880, + "torque_loss": 0.14269673824310303 + }, + { + "epoch": 7.086330935251799, + "force_loss": 0.007723438087850809, + "step": 7880 + }, + { + "epoch": 7.095323741007194, + "grad_norm": 0.6975097060203552, + "learning_rate": 9.819573335647928e-05, + "loss": 0.034, + "step": 7890 + }, + { + "action_loss": 0.007311273366212845, + "epoch": 7.095323741007194, + "step": 7890 + }, + { + "epoch": 7.095323741007194, + "step": 7890, + "torque_loss": 0.11497911065816879 + }, + { + "epoch": 7.095323741007194, + "force_loss": 0.005881762597709894, + "step": 7890 + }, + { + "epoch": 7.10431654676259, + "grad_norm": 0.9963082075119019, + "learning_rate": 9.818838982771246e-05, + "loss": 0.0361, + "step": 7900 + }, + { + "action_loss": 0.013384412042796612, + "epoch": 7.10431654676259, + "step": 7900 + }, + { + "epoch": 7.10431654676259, + "step": 7900, + "torque_loss": 0.1763625591993332 + }, + { + "epoch": 7.10431654676259, + "force_loss": 0.01059798151254654, + "step": 7900 + }, + { + "epoch": 7.113309352517986, + "grad_norm": 1.1353193521499634, + "learning_rate": 9.818103166058514e-05, + "loss": 0.0316, + "step": 7910 + }, + { + "action_loss": 0.011034096591174603, + "epoch": 7.113309352517986, + "step": 7910 + }, + { + "epoch": 7.113309352517986, + "step": 7910, + "torque_loss": 0.17189419269561768 + }, + { + "epoch": 7.113309352517986, + "force_loss": 0.011203192174434662, + "step": 7910 + }, + { + "epoch": 7.122302158273381, + "grad_norm": 1.559002161026001, + "learning_rate": 9.817365885733254e-05, + "loss": 0.0315, + "step": 7920 + }, + { + "action_loss": 0.014221259392797947, + "epoch": 7.122302158273381, + "step": 7920 + }, + { + "epoch": 7.122302158273381, + "step": 7920, + "torque_loss": 0.12348828464746475 + }, + { + "epoch": 7.122302158273381, + "force_loss": 0.010941113345324993, + "step": 7920 + }, + { + "epoch": 7.131294964028777, + "grad_norm": 0.6356121897697449, + "learning_rate": 9.816627142019434e-05, + "loss": 0.0299, + "step": 7930 + }, + { + "action_loss": 0.019868925213813782, + "epoch": 7.131294964028777, + "step": 7930 + }, + { + "epoch": 7.131294964028777, + "step": 7930, + "torque_loss": 0.15779805183410645 + }, + { + "epoch": 7.131294964028777, + "force_loss": 0.01079738512635231, + "step": 7930 + }, + { + "epoch": 7.140287769784172, + "grad_norm": 1.2879135608673096, + "learning_rate": 9.815886935141463e-05, + "loss": 0.0366, + "step": 7940 + }, + { + "action_loss": 0.015258598141372204, + "epoch": 7.140287769784172, + "step": 7940 + }, + { + "epoch": 7.140287769784172, + "step": 7940, + "torque_loss": 0.11968880891799927 + }, + { + "epoch": 7.140287769784172, + "force_loss": 0.011036291718482971, + "step": 7940 + }, + { + "epoch": 7.149280575539568, + "grad_norm": 0.8261431455612183, + "learning_rate": 9.8151452653242e-05, + "loss": 0.0288, + "step": 7950 + }, + { + "action_loss": 0.015346703119575977, + "epoch": 7.149280575539568, + "step": 7950 + }, + { + "epoch": 7.149280575539568, + "step": 7950, + "torque_loss": 0.14154738187789917 + }, + { + "epoch": 7.149280575539568, + "force_loss": 0.02692827582359314, + "step": 7950 + }, + { + "epoch": 7.158273381294964, + "grad_norm": 0.9473282098770142, + "learning_rate": 9.814402132792939e-05, + "loss": 0.0299, + "step": 7960 + }, + { + "action_loss": 0.026272349059581757, + "epoch": 7.158273381294964, + "step": 7960 + }, + { + "epoch": 7.158273381294964, + "step": 7960, + "torque_loss": 0.1546822041273117 + }, + { + "epoch": 7.158273381294964, + "force_loss": 0.024519843980669975, + "step": 7960 + }, + { + "epoch": 7.16726618705036, + "grad_norm": 0.6530436873435974, + "learning_rate": 9.813657537773428e-05, + "loss": 0.0319, + "step": 7970 + }, + { + "action_loss": 0.01660039834678173, + "epoch": 7.16726618705036, + "step": 7970 + }, + { + "epoch": 7.16726618705036, + "step": 7970, + "torque_loss": 0.11716824769973755 + }, + { + "epoch": 7.16726618705036, + "force_loss": 0.009484336711466312, + "step": 7970 + }, + { + "epoch": 7.176258992805756, + "grad_norm": 0.6228723526000977, + "learning_rate": 9.812911480491854e-05, + "loss": 0.0284, + "step": 7980 + }, + { + "action_loss": 0.015811538323760033, + "epoch": 7.176258992805756, + "step": 7980 + }, + { + "epoch": 7.176258992805756, + "step": 7980, + "torque_loss": 0.14678019285202026 + }, + { + "epoch": 7.176258992805756, + "force_loss": 0.019527392461895943, + "step": 7980 + }, + { + "epoch": 7.1852517985611515, + "grad_norm": 0.6273057460784912, + "learning_rate": 9.81216396117485e-05, + "loss": 0.0301, + "step": 7990 + }, + { + "action_loss": 0.014880210161209106, + "epoch": 7.1852517985611515, + "step": 7990 + }, + { + "epoch": 7.1852517985611515, + "step": 7990, + "torque_loss": 0.1347213238477707 + }, + { + "epoch": 7.1852517985611515, + "force_loss": 0.01680292934179306, + "step": 7990 + }, + { + "epoch": 7.194244604316546, + "grad_norm": 0.8474001884460449, + "learning_rate": 9.811414980049491e-05, + "loss": 0.032, + "step": 8000 + }, + { + "action_loss": 0.02514682523906231, + "epoch": 7.194244604316546, + "step": 8000 + }, + { + "epoch": 7.194244604316546, + "step": 8000, + "torque_loss": 0.1738385111093521 + }, + { + "epoch": 7.194244604316546, + "force_loss": 0.016921082511544228, + "step": 8000 + }, + { + "epoch": 7.203237410071942, + "grad_norm": 0.5634039640426636, + "learning_rate": 9.810664537343301e-05, + "loss": 0.0296, + "step": 8010 + }, + { + "action_loss": 0.023893803358078003, + "epoch": 7.203237410071942, + "step": 8010 + }, + { + "epoch": 7.203237410071942, + "step": 8010, + "torque_loss": 0.14144329726696014 + }, + { + "epoch": 7.203237410071942, + "force_loss": 0.017246244475245476, + "step": 8010 + }, + { + "epoch": 7.212230215827338, + "grad_norm": 0.8652377128601074, + "learning_rate": 9.809912633284243e-05, + "loss": 0.0307, + "step": 8020 + }, + { + "action_loss": 0.01660771109163761, + "epoch": 7.212230215827338, + "step": 8020 + }, + { + "epoch": 7.212230215827338, + "step": 8020, + "torque_loss": 0.15797439217567444 + }, + { + "epoch": 7.212230215827338, + "force_loss": 0.012745119631290436, + "step": 8020 + }, + { + "epoch": 7.221223021582734, + "grad_norm": 0.8667154312133789, + "learning_rate": 9.809159268100725e-05, + "loss": 0.0296, + "step": 8030 + }, + { + "action_loss": 0.00831642746925354, + "epoch": 7.221223021582734, + "step": 8030 + }, + { + "epoch": 7.221223021582734, + "step": 8030, + "torque_loss": 0.14936727285385132 + }, + { + "epoch": 7.221223021582734, + "force_loss": 0.006427200045436621, + "step": 8030 + }, + { + "epoch": 7.23021582733813, + "grad_norm": 0.843334436416626, + "learning_rate": 9.808404442021599e-05, + "loss": 0.031, + "step": 8040 + }, + { + "action_loss": 0.008103086613118649, + "epoch": 7.23021582733813, + "step": 8040 + }, + { + "epoch": 7.23021582733813, + "step": 8040, + "torque_loss": 0.14466805756092072 + }, + { + "epoch": 7.23021582733813, + "force_loss": 0.009107830002903938, + "step": 8040 + }, + { + "epoch": 7.239208633093525, + "grad_norm": 1.0166850090026855, + "learning_rate": 9.807648155276163e-05, + "loss": 0.0313, + "step": 8050 + }, + { + "action_loss": 0.016420679166913033, + "epoch": 7.239208633093525, + "step": 8050 + }, + { + "epoch": 7.239208633093525, + "step": 8050, + "torque_loss": 0.13282187283039093 + }, + { + "epoch": 7.239208633093525, + "force_loss": 0.014203312806785107, + "step": 8050 + }, + { + "epoch": 7.248201438848921, + "grad_norm": 0.8945342898368835, + "learning_rate": 9.806890408094156e-05, + "loss": 0.0325, + "step": 8060 + }, + { + "action_loss": 0.021993523463606834, + "epoch": 7.248201438848921, + "step": 8060 + }, + { + "epoch": 7.248201438848921, + "step": 8060, + "torque_loss": 0.11349403858184814 + }, + { + "epoch": 7.248201438848921, + "force_loss": 0.03570099547505379, + "step": 8060 + }, + { + "epoch": 7.2571942446043165, + "grad_norm": 1.963100552558899, + "learning_rate": 9.806131200705761e-05, + "loss": 0.0326, + "step": 8070 + }, + { + "action_loss": 0.01009267196059227, + "epoch": 7.2571942446043165, + "step": 8070 + }, + { + "epoch": 7.2571942446043165, + "step": 8070, + "torque_loss": 0.13126297295093536 + }, + { + "epoch": 7.2571942446043165, + "force_loss": 0.015860306099057198, + "step": 8070 + }, + { + "epoch": 7.266187050359712, + "grad_norm": 0.19442281126976013, + "learning_rate": 9.805370533341605e-05, + "loss": 0.0334, + "step": 8080 + }, + { + "action_loss": 0.01956632174551487, + "epoch": 7.266187050359712, + "step": 8080 + }, + { + "epoch": 7.266187050359712, + "step": 8080, + "torque_loss": 0.1147812083363533 + }, + { + "epoch": 7.266187050359712, + "force_loss": 0.012806649319827557, + "step": 8080 + }, + { + "epoch": 7.275179856115108, + "grad_norm": 0.6823049187660217, + "learning_rate": 9.804608406232762e-05, + "loss": 0.0272, + "step": 8090 + }, + { + "action_loss": 0.007072980049997568, + "epoch": 7.275179856115108, + "step": 8090 + }, + { + "epoch": 7.275179856115108, + "step": 8090, + "torque_loss": 0.13415533304214478 + }, + { + "epoch": 7.275179856115108, + "force_loss": 0.007197019178420305, + "step": 8090 + }, + { + "epoch": 7.284172661870503, + "grad_norm": 0.589218020439148, + "learning_rate": 9.803844819610741e-05, + "loss": 0.0316, + "step": 8100 + }, + { + "action_loss": 0.015059317462146282, + "epoch": 7.284172661870503, + "step": 8100 + }, + { + "epoch": 7.284172661870503, + "step": 8100, + "torque_loss": 0.15744559466838837 + }, + { + "epoch": 7.284172661870503, + "force_loss": 0.015000968240201473, + "step": 8100 + }, + { + "epoch": 7.293165467625899, + "grad_norm": 0.48272275924682617, + "learning_rate": 9.803079773707504e-05, + "loss": 0.0284, + "step": 8110 + }, + { + "action_loss": 0.015221621841192245, + "epoch": 7.293165467625899, + "step": 8110 + }, + { + "epoch": 7.293165467625899, + "step": 8110, + "torque_loss": 0.18278925120830536 + }, + { + "epoch": 7.293165467625899, + "force_loss": 0.014225400984287262, + "step": 8110 + }, + { + "epoch": 7.302158273381295, + "grad_norm": 0.7312065362930298, + "learning_rate": 9.802313268755447e-05, + "loss": 0.0371, + "step": 8120 + }, + { + "action_loss": 0.027010740712285042, + "epoch": 7.302158273381295, + "step": 8120 + }, + { + "epoch": 7.302158273381295, + "step": 8120, + "torque_loss": 0.147797092795372 + }, + { + "epoch": 7.302158273381295, + "force_loss": 0.02799549140036106, + "step": 8120 + }, + { + "epoch": 7.311151079136691, + "grad_norm": 1.2887564897537231, + "learning_rate": 9.801545304987419e-05, + "loss": 0.0329, + "step": 8130 + }, + { + "action_loss": 0.015190199948847294, + "epoch": 7.311151079136691, + "step": 8130 + }, + { + "epoch": 7.311151079136691, + "step": 8130, + "torque_loss": 0.14864178001880646 + }, + { + "epoch": 7.311151079136691, + "force_loss": 0.01111955102533102, + "step": 8130 + }, + { + "epoch": 7.320143884892087, + "grad_norm": 0.3698447048664093, + "learning_rate": 9.800775882636704e-05, + "loss": 0.032, + "step": 8140 + }, + { + "action_loss": 0.02153765596449375, + "epoch": 7.320143884892087, + "step": 8140 + }, + { + "epoch": 7.320143884892087, + "step": 8140, + "torque_loss": 0.1726057082414627 + }, + { + "epoch": 7.320143884892087, + "force_loss": 0.031489450484514236, + "step": 8140 + }, + { + "epoch": 7.329136690647482, + "grad_norm": 0.7618156671524048, + "learning_rate": 9.800005001937034e-05, + "loss": 0.0267, + "step": 8150 + }, + { + "action_loss": 0.014157422818243504, + "epoch": 7.329136690647482, + "step": 8150 + }, + { + "epoch": 7.329136690647482, + "step": 8150, + "torque_loss": 0.13258889317512512 + }, + { + "epoch": 7.329136690647482, + "force_loss": 0.012225021608173847, + "step": 8150 + }, + { + "epoch": 7.338129496402877, + "grad_norm": 1.3190714120864868, + "learning_rate": 9.79923266312258e-05, + "loss": 0.0321, + "step": 8160 + }, + { + "action_loss": 0.01977492682635784, + "epoch": 7.338129496402877, + "step": 8160 + }, + { + "epoch": 7.338129496402877, + "step": 8160, + "torque_loss": 0.13922341167926788 + }, + { + "epoch": 7.338129496402877, + "force_loss": 0.01874031312763691, + "step": 8160 + }, + { + "epoch": 7.347122302158273, + "grad_norm": 0.620725691318512, + "learning_rate": 9.79845886642796e-05, + "loss": 0.029, + "step": 8170 + }, + { + "action_loss": 0.008612917736172676, + "epoch": 7.347122302158273, + "step": 8170 + }, + { + "epoch": 7.347122302158273, + "step": 8170, + "torque_loss": 0.12805069983005524 + }, + { + "epoch": 7.347122302158273, + "force_loss": 0.014394022524356842, + "step": 8170 + }, + { + "epoch": 7.356115107913669, + "grad_norm": 0.6814894080162048, + "learning_rate": 9.797683612088233e-05, + "loss": 0.0277, + "step": 8180 + }, + { + "action_loss": 0.020145997405052185, + "epoch": 7.356115107913669, + "step": 8180 + }, + { + "epoch": 7.356115107913669, + "step": 8180, + "torque_loss": 0.11983004957437515 + }, + { + "epoch": 7.356115107913669, + "force_loss": 0.020338408648967743, + "step": 8180 + }, + { + "epoch": 7.365107913669065, + "grad_norm": 0.8076351284980774, + "learning_rate": 9.796906900338898e-05, + "loss": 0.0321, + "step": 8190 + }, + { + "action_loss": 0.012436367571353912, + "epoch": 7.365107913669065, + "step": 8190 + }, + { + "epoch": 7.365107913669065, + "step": 8190, + "torque_loss": 0.15969730913639069 + }, + { + "epoch": 7.365107913669065, + "force_loss": 0.013429229147732258, + "step": 8190 + }, + { + "epoch": 7.374100719424461, + "grad_norm": 0.5265650749206543, + "learning_rate": 9.796128731415903e-05, + "loss": 0.032, + "step": 8200 + }, + { + "action_loss": 0.01803666166961193, + "epoch": 7.374100719424461, + "step": 8200 + }, + { + "epoch": 7.374100719424461, + "step": 8200, + "torque_loss": 0.15755203366279602 + }, + { + "epoch": 7.374100719424461, + "force_loss": 0.013762987218797207, + "step": 8200 + }, + { + "epoch": 7.383093525179856, + "grad_norm": 0.48601090908050537, + "learning_rate": 9.795349105555634e-05, + "loss": 0.0321, + "step": 8210 + }, + { + "action_loss": 0.012979594059288502, + "epoch": 7.383093525179856, + "step": 8210 + }, + { + "epoch": 7.383093525179856, + "step": 8210, + "torque_loss": 0.13602197170257568 + }, + { + "epoch": 7.383093525179856, + "force_loss": 0.016358325257897377, + "step": 8210 + }, + { + "epoch": 7.392086330935252, + "grad_norm": 0.28090977668762207, + "learning_rate": 9.794568022994922e-05, + "loss": 0.0309, + "step": 8220 + }, + { + "action_loss": 0.01126746367663145, + "epoch": 7.392086330935252, + "step": 8220 + }, + { + "epoch": 7.392086330935252, + "step": 8220, + "torque_loss": 0.1472492814064026 + }, + { + "epoch": 7.392086330935252, + "force_loss": 0.010812432505190372, + "step": 8220 + }, + { + "epoch": 7.401079136690647, + "grad_norm": 0.6091715693473816, + "learning_rate": 9.793785483971034e-05, + "loss": 0.0279, + "step": 8230 + }, + { + "action_loss": 0.009267676621675491, + "epoch": 7.401079136690647, + "step": 8230 + }, + { + "epoch": 7.401079136690647, + "step": 8230, + "torque_loss": 0.1545286327600479 + }, + { + "epoch": 7.401079136690647, + "force_loss": 0.007919063791632652, + "step": 8230 + }, + { + "epoch": 7.410071942446043, + "grad_norm": 1.1030200719833374, + "learning_rate": 9.793001488721691e-05, + "loss": 0.0326, + "step": 8240 + }, + { + "action_loss": 0.010712782852351665, + "epoch": 7.410071942446043, + "step": 8240 + }, + { + "epoch": 7.410071942446043, + "step": 8240, + "torque_loss": 0.10139413923025131 + }, + { + "epoch": 7.410071942446043, + "force_loss": 0.00794156827032566, + "step": 8240 + }, + { + "epoch": 7.419064748201439, + "grad_norm": 1.2643754482269287, + "learning_rate": 9.792216037485047e-05, + "loss": 0.0314, + "step": 8250 + }, + { + "action_loss": 0.013765938580036163, + "epoch": 7.419064748201439, + "step": 8250 + }, + { + "epoch": 7.419064748201439, + "step": 8250, + "torque_loss": 0.1910661906003952 + }, + { + "epoch": 7.419064748201439, + "force_loss": 0.013252117671072483, + "step": 8250 + }, + { + "epoch": 7.428057553956835, + "grad_norm": 0.582712709903717, + "learning_rate": 9.791429130499704e-05, + "loss": 0.0308, + "step": 8260 + }, + { + "action_loss": 0.018466627225279808, + "epoch": 7.428057553956835, + "step": 8260 + }, + { + "epoch": 7.428057553956835, + "step": 8260, + "torque_loss": 0.13622981309890747 + }, + { + "epoch": 7.428057553956835, + "force_loss": 0.016750799492001534, + "step": 8260 + }, + { + "epoch": 7.43705035971223, + "grad_norm": 0.5868211984634399, + "learning_rate": 9.790640768004698e-05, + "loss": 0.03, + "step": 8270 + }, + { + "action_loss": 0.01863769069314003, + "epoch": 7.43705035971223, + "step": 8270 + }, + { + "epoch": 7.43705035971223, + "step": 8270, + "torque_loss": 0.13692380487918854 + }, + { + "epoch": 7.43705035971223, + "force_loss": 0.021651098504662514, + "step": 8270 + }, + { + "epoch": 7.446043165467626, + "grad_norm": 1.2289541959762573, + "learning_rate": 9.789850950239518e-05, + "loss": 0.0351, + "step": 8280 + }, + { + "action_loss": 0.013784590177237988, + "epoch": 7.446043165467626, + "step": 8280 + }, + { + "epoch": 7.446043165467626, + "step": 8280, + "torque_loss": 0.1744844913482666 + }, + { + "epoch": 7.446043165467626, + "force_loss": 0.010908793658018112, + "step": 8280 + }, + { + "epoch": 7.455035971223022, + "grad_norm": 0.647811770439148, + "learning_rate": 9.789059677444089e-05, + "loss": 0.0295, + "step": 8290 + }, + { + "action_loss": 0.008474532514810562, + "epoch": 7.455035971223022, + "step": 8290 + }, + { + "epoch": 7.455035971223022, + "step": 8290, + "torque_loss": 0.1466941386461258 + }, + { + "epoch": 7.455035971223022, + "force_loss": 0.007238794583827257, + "step": 8290 + }, + { + "epoch": 7.4640287769784175, + "grad_norm": 0.5557348728179932, + "learning_rate": 9.788266949858776e-05, + "loss": 0.0249, + "step": 8300 + }, + { + "action_loss": 0.012704807333648205, + "epoch": 7.4640287769784175, + "step": 8300 + }, + { + "epoch": 7.4640287769784175, + "step": 8300, + "torque_loss": 0.13206680119037628 + }, + { + "epoch": 7.4640287769784175, + "force_loss": 0.010610085912048817, + "step": 8300 + }, + { + "epoch": 7.473021582733813, + "grad_norm": 0.5853516459465027, + "learning_rate": 9.787472767724392e-05, + "loss": 0.0245, + "step": 8310 + }, + { + "action_loss": 0.007080281153321266, + "epoch": 7.473021582733813, + "step": 8310 + }, + { + "epoch": 7.473021582733813, + "step": 8310, + "torque_loss": 0.11170990020036697 + }, + { + "epoch": 7.473021582733813, + "force_loss": 0.0078382333740592, + "step": 8310 + }, + { + "epoch": 7.482014388489208, + "grad_norm": 0.8179770708084106, + "learning_rate": 9.786677131282185e-05, + "loss": 0.0311, + "step": 8320 + }, + { + "action_loss": 0.007529696449637413, + "epoch": 7.482014388489208, + "step": 8320 + }, + { + "epoch": 7.482014388489208, + "step": 8320, + "torque_loss": 0.09825699776411057 + }, + { + "epoch": 7.482014388489208, + "force_loss": 0.009542413055896759, + "step": 8320 + }, + { + "epoch": 7.491007194244604, + "grad_norm": 0.38167914748191833, + "learning_rate": 9.785880040773853e-05, + "loss": 0.0278, + "step": 8330 + }, + { + "action_loss": 0.01627209782600403, + "epoch": 7.491007194244604, + "step": 8330 + }, + { + "epoch": 7.491007194244604, + "step": 8330, + "torque_loss": 0.1791231781244278 + }, + { + "epoch": 7.491007194244604, + "force_loss": 0.00902497861534357, + "step": 8330 + }, + { + "epoch": 7.5, + "grad_norm": 1.403141736984253, + "learning_rate": 9.785081496441527e-05, + "loss": 0.0368, + "step": 8340 + }, + { + "action_loss": 0.012243962846696377, + "epoch": 7.5, + "step": 8340 + }, + { + "epoch": 7.5, + "step": 8340, + "torque_loss": 0.12263405323028564 + }, + { + "epoch": 7.5, + "force_loss": 0.010465558618307114, + "step": 8340 + }, + { + "epoch": 7.508992805755396, + "grad_norm": 1.248809814453125, + "learning_rate": 9.784281498527785e-05, + "loss": 0.0315, + "step": 8350 + }, + { + "action_loss": 0.013629280030727386, + "epoch": 7.508992805755396, + "step": 8350 + }, + { + "epoch": 7.508992805755396, + "step": 8350, + "torque_loss": 0.13109223544597626 + }, + { + "epoch": 7.508992805755396, + "force_loss": 0.015668490901589394, + "step": 8350 + }, + { + "epoch": 7.517985611510792, + "grad_norm": 1.1658705472946167, + "learning_rate": 9.783480047275646e-05, + "loss": 0.0376, + "step": 8360 + }, + { + "action_loss": 0.01723673939704895, + "epoch": 7.517985611510792, + "step": 8360 + }, + { + "epoch": 7.517985611510792, + "step": 8360, + "torque_loss": 0.11456664651632309 + }, + { + "epoch": 7.517985611510792, + "force_loss": 0.01445191353559494, + "step": 8360 + }, + { + "epoch": 7.5269784172661875, + "grad_norm": 0.44663509726524353, + "learning_rate": 9.78267714292857e-05, + "loss": 0.0358, + "step": 8370 + }, + { + "action_loss": 0.006761031690984964, + "epoch": 7.5269784172661875, + "step": 8370 + }, + { + "epoch": 7.5269784172661875, + "step": 8370, + "torque_loss": 0.11758812516927719 + }, + { + "epoch": 7.5269784172661875, + "force_loss": 0.007492410484701395, + "step": 8370 + }, + { + "epoch": 7.5359712230215825, + "grad_norm": 0.5402593612670898, + "learning_rate": 9.781872785730454e-05, + "loss": 0.031, + "step": 8380 + }, + { + "action_loss": 0.010334520600736141, + "epoch": 7.5359712230215825, + "step": 8380 + }, + { + "epoch": 7.5359712230215825, + "step": 8380, + "torque_loss": 0.1553741842508316 + }, + { + "epoch": 7.5359712230215825, + "force_loss": 0.009717588312923908, + "step": 8380 + }, + { + "epoch": 7.544964028776978, + "grad_norm": 0.9734342098236084, + "learning_rate": 9.781066975925646e-05, + "loss": 0.0277, + "step": 8390 + }, + { + "action_loss": 0.018119921907782555, + "epoch": 7.544964028776978, + "step": 8390 + }, + { + "epoch": 7.544964028776978, + "step": 8390, + "torque_loss": 0.11200815439224243 + }, + { + "epoch": 7.544964028776978, + "force_loss": 0.01347032655030489, + "step": 8390 + }, + { + "epoch": 7.553956834532374, + "grad_norm": 0.5535122156143188, + "learning_rate": 9.780259713758928e-05, + "loss": 0.0293, + "step": 8400 + }, + { + "action_loss": 0.007479672785848379, + "epoch": 7.553956834532374, + "step": 8400 + }, + { + "epoch": 7.553956834532374, + "step": 8400, + "torque_loss": 0.12148300558328629 + }, + { + "epoch": 7.553956834532374, + "force_loss": 0.01105521246790886, + "step": 8400 + }, + { + "epoch": 7.56294964028777, + "grad_norm": 0.7123938798904419, + "learning_rate": 9.779450999475524e-05, + "loss": 0.0259, + "step": 8410 + }, + { + "action_loss": 0.010749242268502712, + "epoch": 7.56294964028777, + "step": 8410 + }, + { + "epoch": 7.56294964028777, + "step": 8410, + "torque_loss": 0.1403711587190628 + }, + { + "epoch": 7.56294964028777, + "force_loss": 0.021566079929471016, + "step": 8410 + }, + { + "epoch": 7.571942446043165, + "grad_norm": 0.8686343431472778, + "learning_rate": 9.7786408333211e-05, + "loss": 0.0289, + "step": 8420 + }, + { + "action_loss": 0.00947518553584814, + "epoch": 7.571942446043165, + "step": 8420 + }, + { + "epoch": 7.571942446043165, + "step": 8420, + "torque_loss": 0.16997653245925903 + }, + { + "epoch": 7.571942446043165, + "force_loss": 0.007192822638899088, + "step": 8420 + }, + { + "epoch": 7.580935251798561, + "grad_norm": 0.4283559322357178, + "learning_rate": 9.777829215541764e-05, + "loss": 0.0308, + "step": 8430 + }, + { + "action_loss": 0.011530887335538864, + "epoch": 7.580935251798561, + "step": 8430 + }, + { + "epoch": 7.580935251798561, + "step": 8430, + "torque_loss": 0.1305180937051773 + }, + { + "epoch": 7.580935251798561, + "force_loss": 0.010568656958639622, + "step": 8430 + }, + { + "epoch": 7.589928057553957, + "grad_norm": 0.48228919506073, + "learning_rate": 9.777016146384064e-05, + "loss": 0.0284, + "step": 8440 + }, + { + "action_loss": 0.027326839044690132, + "epoch": 7.589928057553957, + "step": 8440 + }, + { + "epoch": 7.589928057553957, + "step": 8440, + "torque_loss": 0.14576278626918793 + }, + { + "epoch": 7.589928057553957, + "force_loss": 0.024153592064976692, + "step": 8440 + }, + { + "epoch": 7.598920863309353, + "grad_norm": 0.927008330821991, + "learning_rate": 9.776201626094988e-05, + "loss": 0.0259, + "step": 8450 + }, + { + "action_loss": 0.01435435563325882, + "epoch": 7.598920863309353, + "step": 8450 + }, + { + "epoch": 7.598920863309353, + "step": 8450, + "torque_loss": 0.1355314701795578 + }, + { + "epoch": 7.598920863309353, + "force_loss": 0.007660705130547285, + "step": 8450 + }, + { + "epoch": 7.607913669064748, + "grad_norm": 0.43980029225349426, + "learning_rate": 9.775385654921965e-05, + "loss": 0.0268, + "step": 8460 + }, + { + "action_loss": 0.013210107572376728, + "epoch": 7.607913669064748, + "step": 8460 + }, + { + "epoch": 7.607913669064748, + "step": 8460, + "torque_loss": 0.16153599321842194 + }, + { + "epoch": 7.607913669064748, + "force_loss": 0.012342006899416447, + "step": 8460 + }, + { + "epoch": 7.616906474820144, + "grad_norm": 0.793596625328064, + "learning_rate": 9.774568233112868e-05, + "loss": 0.032, + "step": 8470 + }, + { + "action_loss": 0.009867441840469837, + "epoch": 7.616906474820144, + "step": 8470 + }, + { + "epoch": 7.616906474820144, + "step": 8470, + "torque_loss": 0.0989953801035881 + }, + { + "epoch": 7.616906474820144, + "force_loss": 0.007182652596384287, + "step": 8470 + }, + { + "epoch": 7.625899280575539, + "grad_norm": 0.7396289706230164, + "learning_rate": 9.773749360916007e-05, + "loss": 0.0321, + "step": 8480 + }, + { + "action_loss": 0.017461545765399933, + "epoch": 7.625899280575539, + "step": 8480 + }, + { + "epoch": 7.625899280575539, + "step": 8480, + "torque_loss": 0.12636858224868774 + }, + { + "epoch": 7.625899280575539, + "force_loss": 0.01598258875310421, + "step": 8480 + }, + { + "epoch": 7.634892086330935, + "grad_norm": 1.1434593200683594, + "learning_rate": 9.772929038580134e-05, + "loss": 0.0349, + "step": 8490 + }, + { + "action_loss": 0.012149865739047527, + "epoch": 7.634892086330935, + "step": 8490 + }, + { + "epoch": 7.634892086330935, + "step": 8490, + "torque_loss": 0.1339341253042221 + }, + { + "epoch": 7.634892086330935, + "force_loss": 0.01241689920425415, + "step": 8490 + }, + { + "epoch": 7.643884892086331, + "grad_norm": 1.122774362564087, + "learning_rate": 9.772107266354439e-05, + "loss": 0.0356, + "step": 8500 + }, + { + "action_loss": 0.0162548515945673, + "epoch": 7.643884892086331, + "step": 8500 + }, + { + "epoch": 7.643884892086331, + "step": 8500, + "torque_loss": 0.19368226826190948 + }, + { + "epoch": 7.643884892086331, + "force_loss": 0.010798695497214794, + "step": 8500 + }, + { + "epoch": 7.652877697841727, + "grad_norm": 0.6731473803520203, + "learning_rate": 9.77128404448856e-05, + "loss": 0.0378, + "step": 8510 + }, + { + "action_loss": 0.008567575365304947, + "epoch": 7.652877697841727, + "step": 8510 + }, + { + "epoch": 7.652877697841727, + "step": 8510, + "torque_loss": 0.14658944308757782 + }, + { + "epoch": 7.652877697841727, + "force_loss": 0.006835754960775375, + "step": 8510 + }, + { + "epoch": 7.661870503597123, + "grad_norm": 1.0794564485549927, + "learning_rate": 9.770459373232565e-05, + "loss": 0.0268, + "step": 8520 + }, + { + "action_loss": 0.004705252591520548, + "epoch": 7.661870503597123, + "step": 8520 + }, + { + "epoch": 7.661870503597123, + "step": 8520, + "torque_loss": 0.09792991727590561 + }, + { + "epoch": 7.661870503597123, + "force_loss": 0.006157430354505777, + "step": 8520 + }, + { + "epoch": 7.670863309352518, + "grad_norm": 0.55097895860672, + "learning_rate": 9.769633252836969e-05, + "loss": 0.0309, + "step": 8530 + }, + { + "action_loss": 0.010069550015032291, + "epoch": 7.670863309352518, + "step": 8530 + }, + { + "epoch": 7.670863309352518, + "step": 8530, + "torque_loss": 0.09578531980514526 + }, + { + "epoch": 7.670863309352518, + "force_loss": 0.007114263717085123, + "step": 8530 + }, + { + "epoch": 7.679856115107913, + "grad_norm": 1.0670244693756104, + "learning_rate": 9.768805683552724e-05, + "loss": 0.026, + "step": 8540 + }, + { + "action_loss": 0.01338367909193039, + "epoch": 7.679856115107913, + "step": 8540 + }, + { + "epoch": 7.679856115107913, + "step": 8540, + "torque_loss": 0.10631659626960754 + }, + { + "epoch": 7.679856115107913, + "force_loss": 0.00999636109918356, + "step": 8540 + }, + { + "epoch": 7.688848920863309, + "grad_norm": 0.9078983664512634, + "learning_rate": 9.767976665631228e-05, + "loss": 0.0281, + "step": 8550 + }, + { + "action_loss": 0.007202318403869867, + "epoch": 7.688848920863309, + "step": 8550 + }, + { + "epoch": 7.688848920863309, + "step": 8550, + "torque_loss": 0.12337324768304825 + }, + { + "epoch": 7.688848920863309, + "force_loss": 0.006715590599924326, + "step": 8550 + }, + { + "epoch": 7.697841726618705, + "grad_norm": 0.40653520822525024, + "learning_rate": 9.767146199324311e-05, + "loss": 0.0279, + "step": 8560 + }, + { + "action_loss": 0.01734321378171444, + "epoch": 7.697841726618705, + "step": 8560 + }, + { + "epoch": 7.697841726618705, + "step": 8560, + "torque_loss": 0.17517967522144318 + }, + { + "epoch": 7.697841726618705, + "force_loss": 0.021837225183844566, + "step": 8560 + }, + { + "epoch": 7.706834532374101, + "grad_norm": 0.5068568587303162, + "learning_rate": 9.766314284884249e-05, + "loss": 0.0259, + "step": 8570 + }, + { + "action_loss": 0.016721414402127266, + "epoch": 7.706834532374101, + "step": 8570 + }, + { + "epoch": 7.706834532374101, + "step": 8570, + "torque_loss": 0.13541068136692047 + }, + { + "epoch": 7.706834532374101, + "force_loss": 0.020000582560896873, + "step": 8570 + }, + { + "epoch": 7.715827338129497, + "grad_norm": 1.5598217248916626, + "learning_rate": 9.765480922563752e-05, + "loss": 0.0301, + "step": 8580 + }, + { + "action_loss": 0.020986055955290794, + "epoch": 7.715827338129497, + "step": 8580 + }, + { + "epoch": 7.715827338129497, + "step": 8580, + "torque_loss": 0.1576753705739975 + }, + { + "epoch": 7.715827338129497, + "force_loss": 0.015498235821723938, + "step": 8580 + }, + { + "epoch": 7.724820143884892, + "grad_norm": 0.7705401182174683, + "learning_rate": 9.764646112615978e-05, + "loss": 0.0301, + "step": 8590 + }, + { + "action_loss": 0.016865838319063187, + "epoch": 7.724820143884892, + "step": 8590 + }, + { + "epoch": 7.724820143884892, + "step": 8590, + "torque_loss": 0.17905347049236298 + }, + { + "epoch": 7.724820143884892, + "force_loss": 0.01965884305536747, + "step": 8590 + }, + { + "epoch": 7.733812949640288, + "grad_norm": 0.9407777786254883, + "learning_rate": 9.763809855294517e-05, + "loss": 0.0305, + "step": 8600 + }, + { + "action_loss": 0.020679781213402748, + "epoch": 7.733812949640288, + "step": 8600 + }, + { + "epoch": 7.733812949640288, + "step": 8600, + "torque_loss": 0.1808556765317917 + }, + { + "epoch": 7.733812949640288, + "force_loss": 0.03475014492869377, + "step": 8600 + }, + { + "epoch": 7.7428057553956835, + "grad_norm": 0.7985976338386536, + "learning_rate": 9.762972150853404e-05, + "loss": 0.0286, + "step": 8610 + }, + { + "action_loss": 0.008926980197429657, + "epoch": 7.7428057553956835, + "step": 8610 + }, + { + "epoch": 7.7428057553956835, + "step": 8610, + "torque_loss": 0.16814644634723663 + }, + { + "epoch": 7.7428057553956835, + "force_loss": 0.010465036146342754, + "step": 8610 + }, + { + "epoch": 7.751798561151079, + "grad_norm": 1.1951462030410767, + "learning_rate": 9.762132999547111e-05, + "loss": 0.0278, + "step": 8620 + }, + { + "action_loss": 0.06406699866056442, + "epoch": 7.751798561151079, + "step": 8620 + }, + { + "epoch": 7.751798561151079, + "step": 8620, + "torque_loss": 0.17853160202503204 + }, + { + "epoch": 7.751798561151079, + "force_loss": 0.06099233031272888, + "step": 8620 + }, + { + "epoch": 7.760791366906475, + "grad_norm": 1.1469022035598755, + "learning_rate": 9.761292401630549e-05, + "loss": 0.0335, + "step": 8630 + }, + { + "action_loss": 0.011628423817455769, + "epoch": 7.760791366906475, + "step": 8630 + }, + { + "epoch": 7.760791366906475, + "step": 8630, + "torque_loss": 0.14463847875595093 + }, + { + "epoch": 7.760791366906475, + "force_loss": 0.018112724646925926, + "step": 8630 + }, + { + "epoch": 7.76978417266187, + "grad_norm": 0.6961926221847534, + "learning_rate": 9.76045035735907e-05, + "loss": 0.028, + "step": 8640 + }, + { + "action_loss": 0.008572163991630077, + "epoch": 7.76978417266187, + "step": 8640 + }, + { + "epoch": 7.76978417266187, + "step": 8640, + "torque_loss": 0.18002033233642578 + }, + { + "epoch": 7.76978417266187, + "force_loss": 0.008644177578389645, + "step": 8640 + }, + { + "epoch": 7.778776978417266, + "grad_norm": 1.1041719913482666, + "learning_rate": 9.759606866988464e-05, + "loss": 0.0292, + "step": 8650 + }, + { + "action_loss": 0.016385432332754135, + "epoch": 7.778776978417266, + "step": 8650 + }, + { + "epoch": 7.778776978417266, + "step": 8650, + "torque_loss": 0.1338357925415039 + }, + { + "epoch": 7.778776978417266, + "force_loss": 0.014097783714532852, + "step": 8650 + }, + { + "epoch": 7.787769784172662, + "grad_norm": 1.7157633304595947, + "learning_rate": 9.758761930774963e-05, + "loss": 0.0254, + "step": 8660 + }, + { + "action_loss": 0.017985621467232704, + "epoch": 7.787769784172662, + "step": 8660 + }, + { + "epoch": 7.787769784172662, + "step": 8660, + "torque_loss": 0.1349223554134369 + }, + { + "epoch": 7.787769784172662, + "force_loss": 0.014778709970414639, + "step": 8660 + }, + { + "epoch": 7.796762589928058, + "grad_norm": 0.7497832179069519, + "learning_rate": 9.757915548975235e-05, + "loss": 0.0271, + "step": 8670 + }, + { + "action_loss": 0.019775187596678734, + "epoch": 7.796762589928058, + "step": 8670 + }, + { + "epoch": 7.796762589928058, + "step": 8670, + "torque_loss": 0.13471777737140656 + }, + { + "epoch": 7.796762589928058, + "force_loss": 0.026757625862956047, + "step": 8670 + }, + { + "epoch": 7.805755395683454, + "grad_norm": 1.151245355606079, + "learning_rate": 9.757067721846389e-05, + "loss": 0.0295, + "step": 8680 + }, + { + "action_loss": 0.01734592393040657, + "epoch": 7.805755395683454, + "step": 8680 + }, + { + "epoch": 7.805755395683454, + "step": 8680, + "torque_loss": 0.10999622195959091 + }, + { + "epoch": 7.805755395683454, + "force_loss": 0.01157617848366499, + "step": 8680 + }, + { + "epoch": 7.814748201438849, + "grad_norm": 0.6144757270812988, + "learning_rate": 9.756218449645971e-05, + "loss": 0.0297, + "step": 8690 + }, + { + "action_loss": 0.007410380989313126, + "epoch": 7.814748201438849, + "step": 8690 + }, + { + "epoch": 7.814748201438849, + "step": 8690, + "torque_loss": 0.17453980445861816 + }, + { + "epoch": 7.814748201438849, + "force_loss": 0.006194221321493387, + "step": 8690 + }, + { + "epoch": 7.823741007194244, + "grad_norm": 1.1518076658248901, + "learning_rate": 9.75536773263197e-05, + "loss": 0.0312, + "step": 8700 + }, + { + "action_loss": 0.013584806583821774, + "epoch": 7.823741007194244, + "step": 8700 + }, + { + "epoch": 7.823741007194244, + "step": 8700, + "torque_loss": 0.15442724525928497 + }, + { + "epoch": 7.823741007194244, + "force_loss": 0.019365733489394188, + "step": 8700 + }, + { + "epoch": 7.83273381294964, + "grad_norm": 1.2157481908798218, + "learning_rate": 9.75451557106281e-05, + "loss": 0.0273, + "step": 8710 + }, + { + "action_loss": 0.013075500726699829, + "epoch": 7.83273381294964, + "step": 8710 + }, + { + "epoch": 7.83273381294964, + "step": 8710, + "torque_loss": 0.1235220655798912 + }, + { + "epoch": 7.83273381294964, + "force_loss": 0.008428572677075863, + "step": 8710 + }, + { + "epoch": 7.841726618705036, + "grad_norm": 0.925574779510498, + "learning_rate": 9.753661965197354e-05, + "loss": 0.0304, + "step": 8720 + }, + { + "action_loss": 0.011055998504161835, + "epoch": 7.841726618705036, + "step": 8720 + }, + { + "epoch": 7.841726618705036, + "step": 8720, + "torque_loss": 0.16982437670230865 + }, + { + "epoch": 7.841726618705036, + "force_loss": 0.009769403375685215, + "step": 8720 + }, + { + "epoch": 7.850719424460432, + "grad_norm": 0.841577410697937, + "learning_rate": 9.752806915294908e-05, + "loss": 0.0304, + "step": 8730 + }, + { + "action_loss": 0.017190979793667793, + "epoch": 7.850719424460432, + "step": 8730 + }, + { + "epoch": 7.850719424460432, + "step": 8730, + "torque_loss": 0.15277527272701263 + }, + { + "epoch": 7.850719424460432, + "force_loss": 0.012404471635818481, + "step": 8730 + }, + { + "epoch": 7.859712230215827, + "grad_norm": 0.5871753096580505, + "learning_rate": 9.75195042161521e-05, + "loss": 0.0278, + "step": 8740 + }, + { + "action_loss": 0.009524501860141754, + "epoch": 7.859712230215827, + "step": 8740 + }, + { + "epoch": 7.859712230215827, + "step": 8740, + "torque_loss": 0.11257639527320862 + }, + { + "epoch": 7.859712230215827, + "force_loss": 0.007627982646226883, + "step": 8740 + }, + { + "epoch": 7.868705035971223, + "grad_norm": 0.8407716751098633, + "learning_rate": 9.751092484418442e-05, + "loss": 0.0253, + "step": 8750 + }, + { + "action_loss": 0.007104267831891775, + "epoch": 7.868705035971223, + "step": 8750 + }, + { + "epoch": 7.868705035971223, + "step": 8750, + "torque_loss": 0.17534182965755463 + }, + { + "epoch": 7.868705035971223, + "force_loss": 0.007171860430389643, + "step": 8750 + }, + { + "epoch": 7.877697841726619, + "grad_norm": 0.7447162866592407, + "learning_rate": 9.750233103965224e-05, + "loss": 0.0305, + "step": 8760 + }, + { + "action_loss": 0.026125816628336906, + "epoch": 7.877697841726619, + "step": 8760 + }, + { + "epoch": 7.877697841726619, + "step": 8760, + "torque_loss": 0.18450210988521576 + }, + { + "epoch": 7.877697841726619, + "force_loss": 0.022638479247689247, + "step": 8760 + }, + { + "epoch": 7.886690647482014, + "grad_norm": 0.7517133951187134, + "learning_rate": 9.749372280516611e-05, + "loss": 0.0309, + "step": 8770 + }, + { + "action_loss": 0.009107548743486404, + "epoch": 7.886690647482014, + "step": 8770 + }, + { + "epoch": 7.886690647482014, + "step": 8770, + "torque_loss": 0.1936049610376358 + }, + { + "epoch": 7.886690647482014, + "force_loss": 0.0101165184751153, + "step": 8770 + }, + { + "epoch": 7.89568345323741, + "grad_norm": 0.4790474474430084, + "learning_rate": 9.748510014334097e-05, + "loss": 0.035, + "step": 8780 + }, + { + "action_loss": 0.024564029648900032, + "epoch": 7.89568345323741, + "step": 8780 + }, + { + "epoch": 7.89568345323741, + "step": 8780, + "torque_loss": 0.169158473610878 + }, + { + "epoch": 7.89568345323741, + "force_loss": 0.02265465259552002, + "step": 8780 + }, + { + "epoch": 7.904676258992806, + "grad_norm": 0.5702713131904602, + "learning_rate": 9.747646305679621e-05, + "loss": 0.0331, + "step": 8790 + }, + { + "action_loss": 0.019576692953705788, + "epoch": 7.904676258992806, + "step": 8790 + }, + { + "epoch": 7.904676258992806, + "step": 8790, + "torque_loss": 0.15589185059070587 + }, + { + "epoch": 7.904676258992806, + "force_loss": 0.020213015377521515, + "step": 8790 + }, + { + "epoch": 7.913669064748201, + "grad_norm": 0.47509273886680603, + "learning_rate": 9.74678115481555e-05, + "loss": 0.0362, + "step": 8800 + }, + { + "action_loss": 0.020954512059688568, + "epoch": 7.913669064748201, + "step": 8800 + }, + { + "epoch": 7.913669064748201, + "step": 8800, + "torque_loss": 0.12004091590642929 + }, + { + "epoch": 7.913669064748201, + "force_loss": 0.02293638326227665, + "step": 8800 + }, + { + "epoch": 7.922661870503597, + "grad_norm": 0.9124072194099426, + "learning_rate": 9.745914562004696e-05, + "loss": 0.0324, + "step": 8810 + }, + { + "action_loss": 0.007807473186403513, + "epoch": 7.922661870503597, + "step": 8810 + }, + { + "epoch": 7.922661870503597, + "step": 8810, + "torque_loss": 0.16129188239574432 + }, + { + "epoch": 7.922661870503597, + "force_loss": 0.006094722542911768, + "step": 8810 + }, + { + "epoch": 7.931654676258993, + "grad_norm": 0.8787750601768494, + "learning_rate": 9.745046527510307e-05, + "loss": 0.0297, + "step": 8820 + }, + { + "action_loss": 0.014297188259661198, + "epoch": 7.931654676258993, + "step": 8820 + }, + { + "epoch": 7.931654676258993, + "step": 8820, + "torque_loss": 0.14133186638355255 + }, + { + "epoch": 7.931654676258993, + "force_loss": 0.015875620767474174, + "step": 8820 + }, + { + "epoch": 7.940647482014389, + "grad_norm": 0.8169597387313843, + "learning_rate": 9.744177051596068e-05, + "loss": 0.03, + "step": 8830 + }, + { + "action_loss": 0.009781970642507076, + "epoch": 7.940647482014389, + "step": 8830 + }, + { + "epoch": 7.940647482014389, + "step": 8830, + "torque_loss": 0.13550764322280884 + }, + { + "epoch": 7.940647482014389, + "force_loss": 0.007129937410354614, + "step": 8830 + }, + { + "epoch": 7.9496402877697845, + "grad_norm": 1.2415217161178589, + "learning_rate": 9.743306134526105e-05, + "loss": 0.0304, + "step": 8840 + }, + { + "action_loss": 0.028044523671269417, + "epoch": 7.9496402877697845, + "step": 8840 + }, + { + "epoch": 7.9496402877697845, + "step": 8840, + "torque_loss": 0.1481942981481552 + }, + { + "epoch": 7.9496402877697845, + "force_loss": 0.027820894494652748, + "step": 8840 + }, + { + "epoch": 7.9586330935251794, + "grad_norm": 0.8753522038459778, + "learning_rate": 9.742433776564977e-05, + "loss": 0.0301, + "step": 8850 + }, + { + "action_loss": 0.0076384288258850574, + "epoch": 7.9586330935251794, + "step": 8850 + }, + { + "epoch": 7.9586330935251794, + "step": 8850, + "torque_loss": 0.1496245265007019 + }, + { + "epoch": 7.9586330935251794, + "force_loss": 0.007920184172689915, + "step": 8850 + }, + { + "epoch": 7.967625899280575, + "grad_norm": 0.4720075726509094, + "learning_rate": 9.741559977977683e-05, + "loss": 0.0283, + "step": 8860 + }, + { + "action_loss": 0.012563151307404041, + "epoch": 7.967625899280575, + "step": 8860 + }, + { + "epoch": 7.967625899280575, + "step": 8860, + "torque_loss": 0.13388486206531525 + }, + { + "epoch": 7.967625899280575, + "force_loss": 0.011008444242179394, + "step": 8860 + }, + { + "epoch": 7.976618705035971, + "grad_norm": 0.6866222620010376, + "learning_rate": 9.740684739029661e-05, + "loss": 0.0332, + "step": 8870 + }, + { + "action_loss": 0.014407560229301453, + "epoch": 7.976618705035971, + "step": 8870 + }, + { + "epoch": 7.976618705035971, + "step": 8870, + "torque_loss": 0.17427630722522736 + }, + { + "epoch": 7.976618705035971, + "force_loss": 0.01010412722826004, + "step": 8870 + }, + { + "epoch": 7.985611510791367, + "grad_norm": 0.9170600175857544, + "learning_rate": 9.739808059986789e-05, + "loss": 0.0387, + "step": 8880 + }, + { + "action_loss": 0.009781739674508572, + "epoch": 7.985611510791367, + "step": 8880 + }, + { + "epoch": 7.985611510791367, + "step": 8880, + "torque_loss": 0.13599783182144165 + }, + { + "epoch": 7.985611510791367, + "force_loss": 0.009926320053637028, + "step": 8880 + }, + { + "epoch": 7.994604316546763, + "grad_norm": 0.8177639245986938, + "learning_rate": 9.738929941115373e-05, + "loss": 0.0318, + "step": 8890 + }, + { + "action_loss": 0.004782909527420998, + "epoch": 7.994604316546763, + "step": 8890 + }, + { + "epoch": 7.994604316546763, + "step": 8890, + "torque_loss": 0.12804840505123138 + }, + { + "epoch": 7.994604316546763, + "force_loss": 0.006489449646323919, + "step": 8890 + }, + { + "epoch": 8.003597122302159, + "grad_norm": 0.4400154948234558, + "learning_rate": 9.738050382682167e-05, + "loss": 0.0225, + "step": 8900 + }, + { + "action_loss": 0.007002175319939852, + "epoch": 8.003597122302159, + "step": 8900 + }, + { + "epoch": 8.003597122302159, + "step": 8900, + "torque_loss": 0.10859952121973038 + }, + { + "epoch": 8.003597122302159, + "force_loss": 0.007067775819450617, + "step": 8900 + }, + { + "epoch": 8.012589928057555, + "grad_norm": 1.134573221206665, + "learning_rate": 9.737169384954355e-05, + "loss": 0.0346, + "step": 8910 + }, + { + "action_loss": 0.009668984450399876, + "epoch": 8.012589928057555, + "step": 8910 + }, + { + "epoch": 8.012589928057555, + "step": 8910, + "torque_loss": 0.13476596772670746 + }, + { + "epoch": 8.012589928057555, + "force_loss": 0.011064928956329823, + "step": 8910 + }, + { + "epoch": 8.02158273381295, + "grad_norm": 1.1572602987289429, + "learning_rate": 9.736286948199562e-05, + "loss": 0.0292, + "step": 8920 + }, + { + "action_loss": 0.01806609332561493, + "epoch": 8.02158273381295, + "step": 8920 + }, + { + "epoch": 8.02158273381295, + "step": 8920, + "torque_loss": 0.1390966773033142 + }, + { + "epoch": 8.02158273381295, + "force_loss": 0.009013565257191658, + "step": 8920 + }, + { + "epoch": 8.030575539568344, + "grad_norm": 1.6132428646087646, + "learning_rate": 9.735403072685848e-05, + "loss": 0.0304, + "step": 8930 + }, + { + "action_loss": 0.012617386877536774, + "epoch": 8.030575539568344, + "step": 8930 + }, + { + "epoch": 8.030575539568344, + "step": 8930, + "torque_loss": 0.125461146235466 + }, + { + "epoch": 8.030575539568344, + "force_loss": 0.010436777025461197, + "step": 8930 + }, + { + "epoch": 8.03956834532374, + "grad_norm": 0.6063098907470703, + "learning_rate": 9.734517758681712e-05, + "loss": 0.0295, + "step": 8940 + }, + { + "action_loss": 0.012668609619140625, + "epoch": 8.03956834532374, + "step": 8940 + }, + { + "epoch": 8.03956834532374, + "step": 8940, + "torque_loss": 0.11099114269018173 + }, + { + "epoch": 8.03956834532374, + "force_loss": 0.015590664930641651, + "step": 8940 + }, + { + "epoch": 8.048561151079136, + "grad_norm": 1.1803076267242432, + "learning_rate": 9.733631006456088e-05, + "loss": 0.0288, + "step": 8950 + }, + { + "action_loss": 0.016963742673397064, + "epoch": 8.048561151079136, + "step": 8950 + }, + { + "epoch": 8.048561151079136, + "step": 8950, + "torque_loss": 0.17014066874980927 + }, + { + "epoch": 8.048561151079136, + "force_loss": 0.020709695294499397, + "step": 8950 + }, + { + "epoch": 8.057553956834532, + "grad_norm": 0.5683021545410156, + "learning_rate": 9.732742816278348e-05, + "loss": 0.0321, + "step": 8960 + }, + { + "action_loss": 0.01965462975203991, + "epoch": 8.057553956834532, + "step": 8960 + }, + { + "epoch": 8.057553956834532, + "step": 8960, + "torque_loss": 0.14173145592212677 + }, + { + "epoch": 8.057553956834532, + "force_loss": 0.026922591030597687, + "step": 8960 + }, + { + "epoch": 8.066546762589928, + "grad_norm": 0.870956540107727, + "learning_rate": 9.731853188418302e-05, + "loss": 0.0278, + "step": 8970 + }, + { + "action_loss": 0.04899698495864868, + "epoch": 8.066546762589928, + "step": 8970 + }, + { + "epoch": 8.066546762589928, + "step": 8970, + "torque_loss": 0.18799400329589844 + }, + { + "epoch": 8.066546762589928, + "force_loss": 0.05203847959637642, + "step": 8970 + }, + { + "epoch": 8.075539568345324, + "grad_norm": 0.8082453012466431, + "learning_rate": 9.730962123146194e-05, + "loss": 0.0345, + "step": 8980 + }, + { + "action_loss": 0.008148455992341042, + "epoch": 8.075539568345324, + "step": 8980 + }, + { + "epoch": 8.075539568345324, + "step": 8980, + "torque_loss": 0.13181421160697937 + }, + { + "epoch": 8.075539568345324, + "force_loss": 0.010034661740064621, + "step": 8980 + }, + { + "epoch": 8.08453237410072, + "grad_norm": 0.6588174700737, + "learning_rate": 9.730069620732709e-05, + "loss": 0.0326, + "step": 8990 + }, + { + "action_loss": 0.01672670803964138, + "epoch": 8.08453237410072, + "step": 8990 + }, + { + "epoch": 8.08453237410072, + "step": 8990, + "torque_loss": 0.12110227346420288 + }, + { + "epoch": 8.08453237410072, + "force_loss": 0.014961506240069866, + "step": 8990 + }, + { + "epoch": 8.093525179856115, + "grad_norm": 0.8046431541442871, + "learning_rate": 9.72917568144896e-05, + "loss": 0.0301, + "step": 9000 + }, + { + "action_loss": 0.016531439498066902, + "epoch": 8.093525179856115, + "step": 9000 + }, + { + "epoch": 8.093525179856115, + "step": 9000, + "torque_loss": 0.16104300320148468 + }, + { + "epoch": 8.093525179856115, + "force_loss": 0.011993743479251862, + "step": 9000 + }, + { + "epoch": 8.102517985611511, + "grad_norm": 1.2161035537719727, + "learning_rate": 9.728280305566509e-05, + "loss": 0.0358, + "step": 9010 + }, + { + "action_loss": 0.02981303632259369, + "epoch": 8.102517985611511, + "step": 9010 + }, + { + "epoch": 8.102517985611511, + "step": 9010, + "torque_loss": 0.1625359207391739 + }, + { + "epoch": 8.102517985611511, + "force_loss": 0.03526288643479347, + "step": 9010 + }, + { + "epoch": 8.111510791366907, + "grad_norm": 0.5446035861968994, + "learning_rate": 9.727383493357343e-05, + "loss": 0.0346, + "step": 9020 + }, + { + "action_loss": 0.018178652971982956, + "epoch": 8.111510791366907, + "step": 9020 + }, + { + "epoch": 8.111510791366907, + "step": 9020, + "torque_loss": 0.1328810602426529 + }, + { + "epoch": 8.111510791366907, + "force_loss": 0.016255365684628487, + "step": 9020 + }, + { + "epoch": 8.120503597122303, + "grad_norm": 0.7841731309890747, + "learning_rate": 9.726485245093891e-05, + "loss": 0.0343, + "step": 9030 + }, + { + "action_loss": 0.01261367741972208, + "epoch": 8.120503597122303, + "step": 9030 + }, + { + "epoch": 8.120503597122303, + "step": 9030, + "torque_loss": 0.1043577790260315 + }, + { + "epoch": 8.120503597122303, + "force_loss": 0.009312078356742859, + "step": 9030 + }, + { + "epoch": 8.129496402877697, + "grad_norm": 1.0097514390945435, + "learning_rate": 9.725585561049018e-05, + "loss": 0.0296, + "step": 9040 + }, + { + "action_loss": 0.032424528151750565, + "epoch": 8.129496402877697, + "step": 9040 + }, + { + "epoch": 8.129496402877697, + "step": 9040, + "torque_loss": 0.1894434541463852 + }, + { + "epoch": 8.129496402877697, + "force_loss": 0.03689010068774223, + "step": 9040 + }, + { + "epoch": 8.138489208633093, + "grad_norm": 0.6864043474197388, + "learning_rate": 9.724684441496022e-05, + "loss": 0.0326, + "step": 9050 + }, + { + "action_loss": 0.012702490203082561, + "epoch": 8.138489208633093, + "step": 9050 + }, + { + "epoch": 8.138489208633093, + "step": 9050, + "torque_loss": 0.08355443924665451 + }, + { + "epoch": 8.138489208633093, + "force_loss": 0.008524672128260136, + "step": 9050 + }, + { + "epoch": 8.147482014388489, + "grad_norm": 1.1375627517700195, + "learning_rate": 9.72378188670864e-05, + "loss": 0.0281, + "step": 9060 + }, + { + "action_loss": 0.007547351997345686, + "epoch": 8.147482014388489, + "step": 9060 + }, + { + "epoch": 8.147482014388489, + "step": 9060, + "torque_loss": 0.11102074384689331 + }, + { + "epoch": 8.147482014388489, + "force_loss": 0.006551103666424751, + "step": 9060 + }, + { + "epoch": 8.156474820143885, + "grad_norm": 0.4873935580253601, + "learning_rate": 9.722877896961047e-05, + "loss": 0.0254, + "step": 9070 + }, + { + "action_loss": 0.011491470970213413, + "epoch": 8.156474820143885, + "step": 9070 + }, + { + "epoch": 8.156474820143885, + "step": 9070, + "torque_loss": 0.1838628202676773 + }, + { + "epoch": 8.156474820143885, + "force_loss": 0.00972656812518835, + "step": 9070 + }, + { + "epoch": 8.16546762589928, + "grad_norm": 0.47592806816101074, + "learning_rate": 9.721972472527848e-05, + "loss": 0.0289, + "step": 9080 + }, + { + "action_loss": 0.010076670907437801, + "epoch": 8.16546762589928, + "step": 9080 + }, + { + "epoch": 8.16546762589928, + "step": 9080, + "torque_loss": 0.0921001061797142 + }, + { + "epoch": 8.16546762589928, + "force_loss": 0.01084989309310913, + "step": 9080 + }, + { + "epoch": 8.174460431654676, + "grad_norm": 0.6903480887413025, + "learning_rate": 9.721065613684089e-05, + "loss": 0.026, + "step": 9090 + }, + { + "action_loss": 0.009710323996841908, + "epoch": 8.174460431654676, + "step": 9090 + }, + { + "epoch": 8.174460431654676, + "step": 9090, + "torque_loss": 0.09566020965576172 + }, + { + "epoch": 8.174460431654676, + "force_loss": 0.013454924337565899, + "step": 9090 + }, + { + "epoch": 8.183453237410072, + "grad_norm": 0.47146815061569214, + "learning_rate": 9.72015732070525e-05, + "loss": 0.0252, + "step": 9100 + }, + { + "action_loss": 0.025996504351496696, + "epoch": 8.183453237410072, + "step": 9100 + }, + { + "epoch": 8.183453237410072, + "step": 9100, + "torque_loss": 0.1910381317138672 + }, + { + "epoch": 8.183453237410072, + "force_loss": 0.02081509865820408, + "step": 9100 + }, + { + "epoch": 8.192446043165468, + "grad_norm": 1.7140965461730957, + "learning_rate": 9.719247593867244e-05, + "loss": 0.0268, + "step": 9110 + }, + { + "action_loss": 0.018666816875338554, + "epoch": 8.192446043165468, + "step": 9110 + }, + { + "epoch": 8.192446043165468, + "step": 9110, + "torque_loss": 0.1385652869939804 + }, + { + "epoch": 8.192446043165468, + "force_loss": 0.016636190935969353, + "step": 9110 + }, + { + "epoch": 8.201438848920864, + "grad_norm": 0.647611141204834, + "learning_rate": 9.718336433446423e-05, + "loss": 0.032, + "step": 9120 + }, + { + "action_loss": 0.011723405681550503, + "epoch": 8.201438848920864, + "step": 9120 + }, + { + "epoch": 8.201438848920864, + "step": 9120, + "torque_loss": 0.15605856478214264 + }, + { + "epoch": 8.201438848920864, + "force_loss": 0.01640763320028782, + "step": 9120 + }, + { + "epoch": 8.21043165467626, + "grad_norm": 0.9738593101501465, + "learning_rate": 9.717423839719574e-05, + "loss": 0.028, + "step": 9130 + }, + { + "action_loss": 0.016471194103360176, + "epoch": 8.21043165467626, + "step": 9130 + }, + { + "epoch": 8.21043165467626, + "step": 9130, + "torque_loss": 0.16527016460895538 + }, + { + "epoch": 8.21043165467626, + "force_loss": 0.019555814564228058, + "step": 9130 + }, + { + "epoch": 8.219424460431656, + "grad_norm": 0.6147652864456177, + "learning_rate": 9.71650981296392e-05, + "loss": 0.0283, + "step": 9140 + }, + { + "action_loss": 0.017719784751534462, + "epoch": 8.219424460431656, + "step": 9140 + }, + { + "epoch": 8.219424460431656, + "step": 9140, + "torque_loss": 0.12981177866458893 + }, + { + "epoch": 8.219424460431656, + "force_loss": 0.017089011147618294, + "step": 9140 + }, + { + "epoch": 8.22841726618705, + "grad_norm": 0.4535752534866333, + "learning_rate": 9.715594353457118e-05, + "loss": 0.0266, + "step": 9150 + }, + { + "action_loss": 0.00867116916924715, + "epoch": 8.22841726618705, + "step": 9150 + }, + { + "epoch": 8.22841726618705, + "step": 9150, + "torque_loss": 0.09822756052017212 + }, + { + "epoch": 8.22841726618705, + "force_loss": 0.01133808959275484, + "step": 9150 + }, + { + "epoch": 8.237410071942445, + "grad_norm": 0.813336968421936, + "learning_rate": 9.714677461477257e-05, + "loss": 0.0299, + "step": 9160 + }, + { + "action_loss": 0.015352415852248669, + "epoch": 8.237410071942445, + "step": 9160 + }, + { + "epoch": 8.237410071942445, + "step": 9160, + "torque_loss": 0.14493830502033234 + }, + { + "epoch": 8.237410071942445, + "force_loss": 0.007876926101744175, + "step": 9160 + }, + { + "epoch": 8.246402877697841, + "grad_norm": 0.5332842469215393, + "learning_rate": 9.713759137302869e-05, + "loss": 0.0279, + "step": 9170 + }, + { + "action_loss": 0.020986506715416908, + "epoch": 8.246402877697841, + "step": 9170 + }, + { + "epoch": 8.246402877697841, + "step": 9170, + "torque_loss": 0.13961315155029297 + }, + { + "epoch": 8.246402877697841, + "force_loss": 0.016814392060041428, + "step": 9170 + }, + { + "epoch": 8.255395683453237, + "grad_norm": 1.2883217334747314, + "learning_rate": 9.712839381212914e-05, + "loss": 0.0294, + "step": 9180 + }, + { + "action_loss": 0.012940999120473862, + "epoch": 8.255395683453237, + "step": 9180 + }, + { + "epoch": 8.255395683453237, + "step": 9180, + "torque_loss": 0.1373448222875595 + }, + { + "epoch": 8.255395683453237, + "force_loss": 0.008718389086425304, + "step": 9180 + }, + { + "epoch": 8.264388489208633, + "grad_norm": 0.7358983159065247, + "learning_rate": 9.71191819348679e-05, + "loss": 0.0285, + "step": 9190 + }, + { + "action_loss": 0.00819359626621008, + "epoch": 8.264388489208633, + "step": 9190 + }, + { + "epoch": 8.264388489208633, + "step": 9190, + "torque_loss": 0.0880383625626564 + }, + { + "epoch": 8.264388489208633, + "force_loss": 0.006998899858444929, + "step": 9190 + }, + { + "epoch": 8.273381294964029, + "grad_norm": 0.4908333718776703, + "learning_rate": 9.710995574404331e-05, + "loss": 0.0295, + "step": 9200 + }, + { + "action_loss": 0.009041265584528446, + "epoch": 8.273381294964029, + "step": 9200 + }, + { + "epoch": 8.273381294964029, + "step": 9200, + "torque_loss": 0.12641453742980957 + }, + { + "epoch": 8.273381294964029, + "force_loss": 0.008404488675296307, + "step": 9200 + }, + { + "epoch": 8.282374100719425, + "grad_norm": 1.0569324493408203, + "learning_rate": 9.710071524245802e-05, + "loss": 0.0344, + "step": 9210 + }, + { + "action_loss": 0.023268500342965126, + "epoch": 8.282374100719425, + "step": 9210 + }, + { + "epoch": 8.282374100719425, + "step": 9210, + "torque_loss": 0.14671923220157623 + }, + { + "epoch": 8.282374100719425, + "force_loss": 0.02295270748436451, + "step": 9210 + }, + { + "epoch": 8.29136690647482, + "grad_norm": 0.43037769198417664, + "learning_rate": 9.709146043291906e-05, + "loss": 0.0258, + "step": 9220 + }, + { + "action_loss": 0.011890962719917297, + "epoch": 8.29136690647482, + "step": 9220 + }, + { + "epoch": 8.29136690647482, + "step": 9220, + "torque_loss": 0.1297449916601181 + }, + { + "epoch": 8.29136690647482, + "force_loss": 0.008125021122395992, + "step": 9220 + }, + { + "epoch": 8.300359712230216, + "grad_norm": 1.2475656270980835, + "learning_rate": 9.70821913182378e-05, + "loss": 0.0323, + "step": 9230 + }, + { + "action_loss": 0.01498037576675415, + "epoch": 8.300359712230216, + "step": 9230 + }, + { + "epoch": 8.300359712230216, + "step": 9230, + "torque_loss": 0.16476303339004517 + }, + { + "epoch": 8.300359712230216, + "force_loss": 0.01710757426917553, + "step": 9230 + }, + { + "epoch": 8.309352517985612, + "grad_norm": 0.6913616061210632, + "learning_rate": 9.707290790122995e-05, + "loss": 0.0324, + "step": 9240 + }, + { + "action_loss": 0.008063911460340023, + "epoch": 8.309352517985612, + "step": 9240 + }, + { + "epoch": 8.309352517985612, + "step": 9240, + "torque_loss": 0.09638575464487076 + }, + { + "epoch": 8.309352517985612, + "force_loss": 0.005585936829447746, + "step": 9240 + }, + { + "epoch": 8.318345323741006, + "grad_norm": 0.6526975035667419, + "learning_rate": 9.706361018471557e-05, + "loss": 0.0313, + "step": 9250 + }, + { + "action_loss": 0.022348126396536827, + "epoch": 8.318345323741006, + "step": 9250 + }, + { + "epoch": 8.318345323741006, + "step": 9250, + "torque_loss": 0.13248980045318604 + }, + { + "epoch": 8.318345323741006, + "force_loss": 0.02697359211742878, + "step": 9250 + }, + { + "epoch": 8.327338129496402, + "grad_norm": 0.947041928768158, + "learning_rate": 9.705429817151906e-05, + "loss": 0.0307, + "step": 9260 + }, + { + "action_loss": 0.01929389126598835, + "epoch": 8.327338129496402, + "step": 9260 + }, + { + "epoch": 8.327338129496402, + "step": 9260, + "torque_loss": 0.13584868609905243 + }, + { + "epoch": 8.327338129496402, + "force_loss": 0.028136268258094788, + "step": 9260 + }, + { + "epoch": 8.336330935251798, + "grad_norm": 0.9014036059379578, + "learning_rate": 9.704497186446917e-05, + "loss": 0.0276, + "step": 9270 + }, + { + "action_loss": 0.01147465780377388, + "epoch": 8.336330935251798, + "step": 9270 + }, + { + "epoch": 8.336330935251798, + "step": 9270, + "torque_loss": 0.12540923058986664 + }, + { + "epoch": 8.336330935251798, + "force_loss": 0.016301970928907394, + "step": 9270 + }, + { + "epoch": 8.345323741007194, + "grad_norm": 0.28941667079925537, + "learning_rate": 9.703563126639896e-05, + "loss": 0.033, + "step": 9280 + }, + { + "action_loss": 0.010872160084545612, + "epoch": 8.345323741007194, + "step": 9280 + }, + { + "epoch": 8.345323741007194, + "step": 9280, + "torque_loss": 0.13422954082489014 + }, + { + "epoch": 8.345323741007194, + "force_loss": 0.015045229345560074, + "step": 9280 + }, + { + "epoch": 8.35431654676259, + "grad_norm": 0.4963284730911255, + "learning_rate": 9.70262763801459e-05, + "loss": 0.0287, + "step": 9290 + }, + { + "action_loss": 0.020897654816508293, + "epoch": 8.35431654676259, + "step": 9290 + }, + { + "epoch": 8.35431654676259, + "step": 9290, + "torque_loss": 0.12592101097106934 + }, + { + "epoch": 8.35431654676259, + "force_loss": 0.014290876686573029, + "step": 9290 + }, + { + "epoch": 8.363309352517986, + "grad_norm": 0.9091506004333496, + "learning_rate": 9.701690720855171e-05, + "loss": 0.0335, + "step": 9300 + }, + { + "action_loss": 0.004191944841295481, + "epoch": 8.363309352517986, + "step": 9300 + }, + { + "epoch": 8.363309352517986, + "step": 9300, + "torque_loss": 0.06838779896497726 + }, + { + "epoch": 8.363309352517986, + "force_loss": 0.00599181791767478, + "step": 9300 + }, + { + "epoch": 8.372302158273381, + "grad_norm": 0.46863675117492676, + "learning_rate": 9.700752375446253e-05, + "loss": 0.0299, + "step": 9310 + }, + { + "action_loss": 0.0628635510802269, + "epoch": 8.372302158273381, + "step": 9310 + }, + { + "epoch": 8.372302158273381, + "step": 9310, + "torque_loss": 0.16405658423900604 + }, + { + "epoch": 8.372302158273381, + "force_loss": 0.05716918036341667, + "step": 9310 + }, + { + "epoch": 8.381294964028777, + "grad_norm": 1.1366662979125977, + "learning_rate": 9.69981260207288e-05, + "loss": 0.0414, + "step": 9320 + }, + { + "action_loss": 0.017923591658473015, + "epoch": 8.381294964028777, + "step": 9320 + }, + { + "epoch": 8.381294964028777, + "step": 9320, + "torque_loss": 0.22768175601959229 + }, + { + "epoch": 8.381294964028777, + "force_loss": 0.01648772321641445, + "step": 9320 + }, + { + "epoch": 8.390287769784173, + "grad_norm": 0.7111708521842957, + "learning_rate": 9.698871401020529e-05, + "loss": 0.0335, + "step": 9330 + }, + { + "action_loss": 0.007409380748867989, + "epoch": 8.390287769784173, + "step": 9330 + }, + { + "epoch": 8.390287769784173, + "step": 9330, + "torque_loss": 0.1321495622396469 + }, + { + "epoch": 8.390287769784173, + "force_loss": 0.007271577138453722, + "step": 9330 + }, + { + "epoch": 8.399280575539569, + "grad_norm": 0.9520056247711182, + "learning_rate": 9.697928772575112e-05, + "loss": 0.0309, + "step": 9340 + }, + { + "action_loss": 0.015844479203224182, + "epoch": 8.399280575539569, + "step": 9340 + }, + { + "epoch": 8.399280575539569, + "step": 9340, + "torque_loss": 0.15661227703094482 + }, + { + "epoch": 8.399280575539569, + "force_loss": 0.017089197412133217, + "step": 9340 + }, + { + "epoch": 8.408273381294965, + "grad_norm": 0.6098338961601257, + "learning_rate": 9.696984717022976e-05, + "loss": 0.0255, + "step": 9350 + }, + { + "action_loss": 0.005137607455253601, + "epoch": 8.408273381294965, + "step": 9350 + }, + { + "epoch": 8.408273381294965, + "step": 9350, + "torque_loss": 0.115314781665802 + }, + { + "epoch": 8.408273381294965, + "force_loss": 0.005152374505996704, + "step": 9350 + }, + { + "epoch": 8.417266187050359, + "grad_norm": 0.7887623906135559, + "learning_rate": 9.6960392346509e-05, + "loss": 0.026, + "step": 9360 + }, + { + "action_loss": 0.00956383440643549, + "epoch": 8.417266187050359, + "step": 9360 + }, + { + "epoch": 8.417266187050359, + "step": 9360, + "torque_loss": 0.11685600876808167 + }, + { + "epoch": 8.417266187050359, + "force_loss": 0.00842523854225874, + "step": 9360 + }, + { + "epoch": 8.426258992805755, + "grad_norm": 0.38678234815597534, + "learning_rate": 9.695092325746097e-05, + "loss": 0.0329, + "step": 9370 + }, + { + "action_loss": 0.005833383649587631, + "epoch": 8.426258992805755, + "step": 9370 + }, + { + "epoch": 8.426258992805755, + "step": 9370, + "torque_loss": 0.11251125484704971 + }, + { + "epoch": 8.426258992805755, + "force_loss": 0.006181512027978897, + "step": 9370 + }, + { + "epoch": 8.43525179856115, + "grad_norm": 0.8431825637817383, + "learning_rate": 9.694143990596211e-05, + "loss": 0.0259, + "step": 9380 + }, + { + "action_loss": 0.010571137070655823, + "epoch": 8.43525179856115, + "step": 9380 + }, + { + "epoch": 8.43525179856115, + "step": 9380, + "torque_loss": 0.1533479243516922 + }, + { + "epoch": 8.43525179856115, + "force_loss": 0.012814153917133808, + "step": 9380 + }, + { + "epoch": 8.444244604316546, + "grad_norm": 0.7134740948677063, + "learning_rate": 9.693194229489325e-05, + "loss": 0.0425, + "step": 9390 + }, + { + "action_loss": 0.011143607087433338, + "epoch": 8.444244604316546, + "step": 9390 + }, + { + "epoch": 8.444244604316546, + "step": 9390, + "torque_loss": 0.1590392142534256 + }, + { + "epoch": 8.444244604316546, + "force_loss": 0.017856024205684662, + "step": 9390 + }, + { + "epoch": 8.453237410071942, + "grad_norm": 1.0953168869018555, + "learning_rate": 9.692243042713944e-05, + "loss": 0.0289, + "step": 9400 + }, + { + "action_loss": 0.014227758161723614, + "epoch": 8.453237410071942, + "step": 9400 + }, + { + "epoch": 8.453237410071942, + "step": 9400, + "torque_loss": 0.12394676357507706 + }, + { + "epoch": 8.453237410071942, + "force_loss": 0.014927543699741364, + "step": 9400 + }, + { + "epoch": 8.462230215827338, + "grad_norm": 1.3834631443023682, + "learning_rate": 9.691290430559022e-05, + "loss": 0.028, + "step": 9410 + }, + { + "action_loss": 0.012397505342960358, + "epoch": 8.462230215827338, + "step": 9410 + }, + { + "epoch": 8.462230215827338, + "step": 9410, + "torque_loss": 0.18198783695697784 + }, + { + "epoch": 8.462230215827338, + "force_loss": 0.007384064141660929, + "step": 9410 + }, + { + "epoch": 8.471223021582734, + "grad_norm": 0.7003529667854309, + "learning_rate": 9.690336393313932e-05, + "loss": 0.0275, + "step": 9420 + }, + { + "action_loss": 0.009323895908892155, + "epoch": 8.471223021582734, + "step": 9420 + }, + { + "epoch": 8.471223021582734, + "step": 9420, + "torque_loss": 0.12896889448165894 + }, + { + "epoch": 8.471223021582734, + "force_loss": 0.0062034521251916885, + "step": 9420 + }, + { + "epoch": 8.48021582733813, + "grad_norm": 0.6547080874443054, + "learning_rate": 9.689380931268487e-05, + "loss": 0.031, + "step": 9430 + }, + { + "action_loss": 0.01772930659353733, + "epoch": 8.48021582733813, + "step": 9430 + }, + { + "epoch": 8.48021582733813, + "step": 9430, + "torque_loss": 0.1869361847639084 + }, + { + "epoch": 8.48021582733813, + "force_loss": 0.01642507128417492, + "step": 9430 + }, + { + "epoch": 8.489208633093526, + "grad_norm": 0.8937145471572876, + "learning_rate": 9.688424044712932e-05, + "loss": 0.0316, + "step": 9440 + }, + { + "action_loss": 0.009877237491309643, + "epoch": 8.489208633093526, + "step": 9440 + }, + { + "epoch": 8.489208633093526, + "step": 9440, + "torque_loss": 0.157605841755867 + }, + { + "epoch": 8.489208633093526, + "force_loss": 0.011405526660382748, + "step": 9440 + }, + { + "epoch": 8.498201438848922, + "grad_norm": 0.6158521771430969, + "learning_rate": 9.687465733937942e-05, + "loss": 0.0358, + "step": 9450 + }, + { + "action_loss": 0.0104767931625247, + "epoch": 8.498201438848922, + "step": 9450 + }, + { + "epoch": 8.498201438848922, + "step": 9450, + "torque_loss": 0.10644660145044327 + }, + { + "epoch": 8.498201438848922, + "force_loss": 0.009099212475121021, + "step": 9450 + }, + { + "epoch": 8.507194244604317, + "grad_norm": 1.7294045686721802, + "learning_rate": 9.686505999234627e-05, + "loss": 0.028, + "step": 9460 + }, + { + "action_loss": 0.007561964448541403, + "epoch": 8.507194244604317, + "step": 9460 + }, + { + "epoch": 8.507194244604317, + "step": 9460, + "torque_loss": 0.17132334411144257 + }, + { + "epoch": 8.507194244604317, + "force_loss": 0.009764394722878933, + "step": 9460 + }, + { + "epoch": 8.516187050359711, + "grad_norm": 0.8151748180389404, + "learning_rate": 9.685544840894529e-05, + "loss": 0.0272, + "step": 9470 + }, + { + "action_loss": 0.007332219276577234, + "epoch": 8.516187050359711, + "step": 9470 + }, + { + "epoch": 8.516187050359711, + "step": 9470, + "torque_loss": 0.14351345598697662 + }, + { + "epoch": 8.516187050359711, + "force_loss": 0.012972989119589329, + "step": 9470 + }, + { + "epoch": 8.525179856115107, + "grad_norm": 0.6972848773002625, + "learning_rate": 9.684582259209624e-05, + "loss": 0.0271, + "step": 9480 + }, + { + "action_loss": 0.009656401351094246, + "epoch": 8.525179856115107, + "step": 9480 + }, + { + "epoch": 8.525179856115107, + "step": 9480, + "torque_loss": 0.15328960120677948 + }, + { + "epoch": 8.525179856115107, + "force_loss": 0.012785851024091244, + "step": 9480 + }, + { + "epoch": 8.534172661870503, + "grad_norm": 0.5664857029914856, + "learning_rate": 9.683618254472317e-05, + "loss": 0.0277, + "step": 9490 + }, + { + "action_loss": 0.007961034774780273, + "epoch": 8.534172661870503, + "step": 9490 + }, + { + "epoch": 8.534172661870503, + "step": 9490, + "torque_loss": 0.15656043589115143 + }, + { + "epoch": 8.534172661870503, + "force_loss": 0.007319868076592684, + "step": 9490 + }, + { + "epoch": 8.543165467625899, + "grad_norm": 0.8491553068161011, + "learning_rate": 9.682652826975449e-05, + "loss": 0.0291, + "step": 9500 + }, + { + "action_loss": 0.019876837730407715, + "epoch": 8.543165467625899, + "step": 9500 + }, + { + "epoch": 8.543165467625899, + "step": 9500, + "torque_loss": 0.11075549572706223 + }, + { + "epoch": 8.543165467625899, + "force_loss": 0.02449529431760311, + "step": 9500 + }, + { + "epoch": 8.552158273381295, + "grad_norm": 0.7463903427124023, + "learning_rate": 9.681685977012291e-05, + "loss": 0.027, + "step": 9510 + }, + { + "action_loss": 0.008659898303449154, + "epoch": 8.552158273381295, + "step": 9510 + }, + { + "epoch": 8.552158273381295, + "step": 9510, + "torque_loss": 0.10944860428571701 + }, + { + "epoch": 8.552158273381295, + "force_loss": 0.008656457997858524, + "step": 9510 + }, + { + "epoch": 8.56115107913669, + "grad_norm": 0.7965874671936035, + "learning_rate": 9.680717704876546e-05, + "loss": 0.0302, + "step": 9520 + }, + { + "action_loss": 0.009310754016041756, + "epoch": 8.56115107913669, + "step": 9520 + }, + { + "epoch": 8.56115107913669, + "step": 9520, + "torque_loss": 0.1385524719953537 + }, + { + "epoch": 8.56115107913669, + "force_loss": 0.008410467766225338, + "step": 9520 + }, + { + "epoch": 8.570143884892087, + "grad_norm": 0.6976439952850342, + "learning_rate": 9.679748010862349e-05, + "loss": 0.0294, + "step": 9530 + }, + { + "action_loss": 0.013744421303272247, + "epoch": 8.570143884892087, + "step": 9530 + }, + { + "epoch": 8.570143884892087, + "step": 9530, + "torque_loss": 0.11806178838014603 + }, + { + "epoch": 8.570143884892087, + "force_loss": 0.01753784529864788, + "step": 9530 + }, + { + "epoch": 8.579136690647482, + "grad_norm": 0.6330084800720215, + "learning_rate": 9.678776895264267e-05, + "loss": 0.0325, + "step": 9540 + }, + { + "action_loss": 0.020705049857497215, + "epoch": 8.579136690647482, + "step": 9540 + }, + { + "epoch": 8.579136690647482, + "step": 9540, + "torque_loss": 0.16956502199172974 + }, + { + "epoch": 8.579136690647482, + "force_loss": 0.021446580067276955, + "step": 9540 + }, + { + "epoch": 8.588129496402878, + "grad_norm": 0.3010088801383972, + "learning_rate": 9.6778043583773e-05, + "loss": 0.0341, + "step": 9550 + }, + { + "action_loss": 0.011592283844947815, + "epoch": 8.588129496402878, + "step": 9550 + }, + { + "epoch": 8.588129496402878, + "step": 9550, + "torque_loss": 0.17431075870990753 + }, + { + "epoch": 8.588129496402878, + "force_loss": 0.019685033708810806, + "step": 9550 + }, + { + "epoch": 8.597122302158274, + "grad_norm": 0.3165673613548279, + "learning_rate": 9.67683040049688e-05, + "loss": 0.0259, + "step": 9560 + }, + { + "action_loss": 0.02787325717508793, + "epoch": 8.597122302158274, + "step": 9560 + }, + { + "epoch": 8.597122302158274, + "step": 9560, + "torque_loss": 0.16775758564472198 + }, + { + "epoch": 8.597122302158274, + "force_loss": 0.025492852553725243, + "step": 9560 + }, + { + "epoch": 8.60611510791367, + "grad_norm": 0.6373989582061768, + "learning_rate": 9.675855021918869e-05, + "loss": 0.031, + "step": 9570 + }, + { + "action_loss": 0.005025099031627178, + "epoch": 8.60611510791367, + "step": 9570 + }, + { + "epoch": 8.60611510791367, + "step": 9570, + "torque_loss": 0.14153970777988434 + }, + { + "epoch": 8.60611510791367, + "force_loss": 0.006119674537330866, + "step": 9570 + }, + { + "epoch": 8.615107913669064, + "grad_norm": 0.4211874008178711, + "learning_rate": 9.674878222939561e-05, + "loss": 0.0258, + "step": 9580 + }, + { + "action_loss": 0.0036755630280822515, + "epoch": 8.615107913669064, + "step": 9580 + }, + { + "epoch": 8.615107913669064, + "step": 9580, + "torque_loss": 0.07633873075246811 + }, + { + "epoch": 8.615107913669064, + "force_loss": 0.004712820518761873, + "step": 9580 + }, + { + "epoch": 8.62410071942446, + "grad_norm": 0.45486578345298767, + "learning_rate": 9.673900003855681e-05, + "loss": 0.0281, + "step": 9590 + }, + { + "action_loss": 0.016435489058494568, + "epoch": 8.62410071942446, + "step": 9590 + }, + { + "epoch": 8.62410071942446, + "step": 9590, + "torque_loss": 0.16094237565994263 + }, + { + "epoch": 8.62410071942446, + "force_loss": 0.016656536608934402, + "step": 9590 + }, + { + "epoch": 8.633093525179856, + "grad_norm": 0.5245792865753174, + "learning_rate": 9.672920364964389e-05, + "loss": 0.0359, + "step": 9600 + }, + { + "action_loss": 0.021780768409371376, + "epoch": 8.633093525179856, + "step": 9600 + }, + { + "epoch": 8.633093525179856, + "step": 9600, + "torque_loss": 0.18047575652599335 + }, + { + "epoch": 8.633093525179856, + "force_loss": 0.022455209866166115, + "step": 9600 + }, + { + "epoch": 8.642086330935252, + "grad_norm": 0.4266988933086395, + "learning_rate": 9.671939306563269e-05, + "loss": 0.025, + "step": 9610 + }, + { + "action_loss": 0.018251975998282433, + "epoch": 8.642086330935252, + "step": 9610 + }, + { + "epoch": 8.642086330935252, + "step": 9610, + "torque_loss": 0.1401875764131546 + }, + { + "epoch": 8.642086330935252, + "force_loss": 0.027092456817626953, + "step": 9610 + }, + { + "epoch": 8.651079136690647, + "grad_norm": 1.2824878692626953, + "learning_rate": 9.670956828950345e-05, + "loss": 0.0318, + "step": 9620 + }, + { + "action_loss": 0.011505433358252048, + "epoch": 8.651079136690647, + "step": 9620 + }, + { + "epoch": 8.651079136690647, + "step": 9620, + "torque_loss": 0.12270506471395493 + }, + { + "epoch": 8.651079136690647, + "force_loss": 0.013477717526257038, + "step": 9620 + }, + { + "epoch": 8.660071942446043, + "grad_norm": 0.8039194345474243, + "learning_rate": 9.669972932424065e-05, + "loss": 0.0299, + "step": 9630 + }, + { + "action_loss": 0.008318931795656681, + "epoch": 8.660071942446043, + "step": 9630 + }, + { + "epoch": 8.660071942446043, + "step": 9630, + "torque_loss": 0.13181714713573456 + }, + { + "epoch": 8.660071942446043, + "force_loss": 0.006616103928536177, + "step": 9630 + }, + { + "epoch": 8.66906474820144, + "grad_norm": 0.9702808856964111, + "learning_rate": 9.668987617283312e-05, + "loss": 0.0277, + "step": 9640 + }, + { + "action_loss": 0.009567559696733952, + "epoch": 8.66906474820144, + "step": 9640 + }, + { + "epoch": 8.66906474820144, + "step": 9640, + "torque_loss": 0.11925510317087173 + }, + { + "epoch": 8.66906474820144, + "force_loss": 0.006967818830162287, + "step": 9640 + }, + { + "epoch": 8.678057553956835, + "grad_norm": 0.4645603895187378, + "learning_rate": 9.668000883827397e-05, + "loss": 0.0305, + "step": 9650 + }, + { + "action_loss": 0.007450137287378311, + "epoch": 8.678057553956835, + "step": 9650 + }, + { + "epoch": 8.678057553956835, + "step": 9650, + "torque_loss": 0.12383129447698593 + }, + { + "epoch": 8.678057553956835, + "force_loss": 0.006323601584881544, + "step": 9650 + }, + { + "epoch": 8.68705035971223, + "grad_norm": 0.6332986354827881, + "learning_rate": 9.667012732356067e-05, + "loss": 0.0316, + "step": 9660 + }, + { + "action_loss": 0.0139608820900321, + "epoch": 8.68705035971223, + "step": 9660 + }, + { + "epoch": 8.68705035971223, + "step": 9660, + "torque_loss": 0.1393394023180008 + }, + { + "epoch": 8.68705035971223, + "force_loss": 0.014705373905599117, + "step": 9660 + }, + { + "epoch": 8.696043165467627, + "grad_norm": 1.1068260669708252, + "learning_rate": 9.666023163169493e-05, + "loss": 0.0479, + "step": 9670 + }, + { + "action_loss": 0.009434792213141918, + "epoch": 8.696043165467627, + "step": 9670 + }, + { + "epoch": 8.696043165467627, + "step": 9670, + "torque_loss": 0.17337889969348907 + }, + { + "epoch": 8.696043165467627, + "force_loss": 0.006190603133291006, + "step": 9670 + }, + { + "epoch": 8.70503597122302, + "grad_norm": 0.6589455604553223, + "learning_rate": 9.665032176568281e-05, + "loss": 0.0304, + "step": 9680 + }, + { + "action_loss": 0.01564081571996212, + "epoch": 8.70503597122302, + "step": 9680 + }, + { + "epoch": 8.70503597122302, + "step": 9680, + "torque_loss": 0.15311110019683838 + }, + { + "epoch": 8.70503597122302, + "force_loss": 0.009150044992566109, + "step": 9680 + }, + { + "epoch": 8.714028776978417, + "grad_norm": 1.3331222534179688, + "learning_rate": 9.664039772853469e-05, + "loss": 0.0324, + "step": 9690 + }, + { + "action_loss": 0.007561507169157267, + "epoch": 8.714028776978417, + "step": 9690 + }, + { + "epoch": 8.714028776978417, + "step": 9690, + "torque_loss": 0.05610576644539833 + }, + { + "epoch": 8.714028776978417, + "force_loss": 0.007908865809440613, + "step": 9690 + }, + { + "epoch": 8.723021582733812, + "grad_norm": 0.37813442945480347, + "learning_rate": 9.663045952326518e-05, + "loss": 0.027, + "step": 9700 + }, + { + "action_loss": 0.006428262684494257, + "epoch": 8.723021582733812, + "step": 9700 + }, + { + "epoch": 8.723021582733812, + "step": 9700, + "torque_loss": 0.1249067410826683 + }, + { + "epoch": 8.723021582733812, + "force_loss": 0.00746154272928834, + "step": 9700 + }, + { + "epoch": 8.732014388489208, + "grad_norm": 0.691757082939148, + "learning_rate": 9.662050715289328e-05, + "loss": 0.0266, + "step": 9710 + }, + { + "action_loss": 0.018051935359835625, + "epoch": 8.732014388489208, + "step": 9710 + }, + { + "epoch": 8.732014388489208, + "step": 9710, + "torque_loss": 0.17711806297302246 + }, + { + "epoch": 8.732014388489208, + "force_loss": 0.010310347191989422, + "step": 9710 + }, + { + "epoch": 8.741007194244604, + "grad_norm": 0.643276572227478, + "learning_rate": 9.661054062044226e-05, + "loss": 0.0345, + "step": 9720 + }, + { + "action_loss": 0.021279161795973778, + "epoch": 8.741007194244604, + "step": 9720 + }, + { + "epoch": 8.741007194244604, + "step": 9720, + "torque_loss": 0.14824149012565613 + }, + { + "epoch": 8.741007194244604, + "force_loss": 0.014712401665747166, + "step": 9720 + }, + { + "epoch": 8.75, + "grad_norm": 0.48943644762039185, + "learning_rate": 9.660055992893968e-05, + "loss": 0.0282, + "step": 9730 + }, + { + "action_loss": 0.019215544685721397, + "epoch": 8.75, + "step": 9730 + }, + { + "epoch": 8.75, + "step": 9730, + "torque_loss": 0.16288113594055176 + }, + { + "epoch": 8.75, + "force_loss": 0.01412635762244463, + "step": 9730 + }, + { + "epoch": 8.758992805755396, + "grad_norm": 0.46070775389671326, + "learning_rate": 9.659056508141739e-05, + "loss": 0.0286, + "step": 9740 + }, + { + "action_loss": 0.014236348681151867, + "epoch": 8.758992805755396, + "step": 9740 + }, + { + "epoch": 8.758992805755396, + "step": 9740, + "torque_loss": 0.12780122458934784 + }, + { + "epoch": 8.758992805755396, + "force_loss": 0.01821557618677616, + "step": 9740 + }, + { + "epoch": 8.767985611510792, + "grad_norm": 0.5840524435043335, + "learning_rate": 9.658055608091161e-05, + "loss": 0.0346, + "step": 9750 + }, + { + "action_loss": 0.011465773917734623, + "epoch": 8.767985611510792, + "step": 9750 + }, + { + "epoch": 8.767985611510792, + "step": 9750, + "torque_loss": 0.17164932191371918 + }, + { + "epoch": 8.767985611510792, + "force_loss": 0.008555716834962368, + "step": 9750 + }, + { + "epoch": 8.776978417266188, + "grad_norm": 1.5898200273513794, + "learning_rate": 9.657053293046276e-05, + "loss": 0.0311, + "step": 9760 + }, + { + "action_loss": 0.010616987943649292, + "epoch": 8.776978417266188, + "step": 9760 + }, + { + "epoch": 8.776978417266188, + "step": 9760, + "torque_loss": 0.12834854423999786 + }, + { + "epoch": 8.776978417266188, + "force_loss": 0.012776236049830914, + "step": 9760 + }, + { + "epoch": 8.785971223021583, + "grad_norm": 1.011033296585083, + "learning_rate": 9.656049563311564e-05, + "loss": 0.0272, + "step": 9770 + }, + { + "action_loss": 0.015393629670143127, + "epoch": 8.785971223021583, + "step": 9770 + }, + { + "epoch": 8.785971223021583, + "step": 9770, + "torque_loss": 0.14998044073581696 + }, + { + "epoch": 8.785971223021583, + "force_loss": 0.00780859449878335, + "step": 9770 + }, + { + "epoch": 8.79496402877698, + "grad_norm": 0.6138122081756592, + "learning_rate": 9.655044419191929e-05, + "loss": 0.0332, + "step": 9780 + }, + { + "action_loss": 0.009867786429822445, + "epoch": 8.79496402877698, + "step": 9780 + }, + { + "epoch": 8.79496402877698, + "step": 9780, + "torque_loss": 0.20817220211029053 + }, + { + "epoch": 8.79496402877698, + "force_loss": 0.005335627589374781, + "step": 9780 + }, + { + "epoch": 8.803956834532373, + "grad_norm": 0.5172603726387024, + "learning_rate": 9.654037860992711e-05, + "loss": 0.0313, + "step": 9790 + }, + { + "action_loss": 0.019808964803814888, + "epoch": 8.803956834532373, + "step": 9790 + }, + { + "epoch": 8.803956834532373, + "step": 9790, + "torque_loss": 0.1766410619020462 + }, + { + "epoch": 8.803956834532373, + "force_loss": 0.023585960268974304, + "step": 9790 + }, + { + "epoch": 8.81294964028777, + "grad_norm": 0.7335936427116394, + "learning_rate": 9.653029889019672e-05, + "loss": 0.031, + "step": 9800 + }, + { + "action_loss": 0.016281189396977425, + "epoch": 8.81294964028777, + "step": 9800 + }, + { + "epoch": 8.81294964028777, + "step": 9800, + "torque_loss": 0.14487840235233307 + }, + { + "epoch": 8.81294964028777, + "force_loss": 0.010766711086034775, + "step": 9800 + }, + { + "epoch": 8.821942446043165, + "grad_norm": 0.46491530537605286, + "learning_rate": 9.65202050357901e-05, + "loss": 0.0318, + "step": 9810 + }, + { + "action_loss": 0.042640089988708496, + "epoch": 8.821942446043165, + "step": 9810 + }, + { + "epoch": 8.821942446043165, + "step": 9810, + "torque_loss": 0.18616162240505219 + }, + { + "epoch": 8.821942446043165, + "force_loss": 0.03156326711177826, + "step": 9810 + }, + { + "epoch": 8.83093525179856, + "grad_norm": 0.29078832268714905, + "learning_rate": 9.651009704977347e-05, + "loss": 0.0277, + "step": 9820 + }, + { + "action_loss": 0.006332410965114832, + "epoch": 8.83093525179856, + "step": 9820 + }, + { + "epoch": 8.83093525179856, + "step": 9820, + "torque_loss": 0.1467493623495102 + }, + { + "epoch": 8.83093525179856, + "force_loss": 0.006051637232303619, + "step": 9820 + }, + { + "epoch": 8.839928057553957, + "grad_norm": 0.662349283695221, + "learning_rate": 9.649997493521738e-05, + "loss": 0.0307, + "step": 9830 + }, + { + "action_loss": 0.008931110613048077, + "epoch": 8.839928057553957, + "step": 9830 + }, + { + "epoch": 8.839928057553957, + "step": 9830, + "torque_loss": 0.13099968433380127 + }, + { + "epoch": 8.839928057553957, + "force_loss": 0.006029070820659399, + "step": 9830 + }, + { + "epoch": 8.848920863309353, + "grad_norm": 0.374646931886673, + "learning_rate": 9.64898386951967e-05, + "loss": 0.0282, + "step": 9840 + }, + { + "action_loss": 0.038531046360731125, + "epoch": 8.848920863309353, + "step": 9840 + }, + { + "epoch": 8.848920863309353, + "step": 9840, + "torque_loss": 0.2690763771533966 + }, + { + "epoch": 8.848920863309353, + "force_loss": 0.039697516709566116, + "step": 9840 + }, + { + "epoch": 8.857913669064748, + "grad_norm": 0.4819861054420471, + "learning_rate": 9.647968833279049e-05, + "loss": 0.032, + "step": 9850 + }, + { + "action_loss": 0.015623677521944046, + "epoch": 8.857913669064748, + "step": 9850 + }, + { + "epoch": 8.857913669064748, + "step": 9850, + "torque_loss": 0.19835130870342255 + }, + { + "epoch": 8.857913669064748, + "force_loss": 0.019565140828490257, + "step": 9850 + }, + { + "epoch": 8.866906474820144, + "grad_norm": 0.5853265523910522, + "learning_rate": 9.646952385108218e-05, + "loss": 0.0308, + "step": 9860 + }, + { + "action_loss": 0.011000948958098888, + "epoch": 8.866906474820144, + "step": 9860 + }, + { + "epoch": 8.866906474820144, + "step": 9860, + "torque_loss": 0.13960133492946625 + }, + { + "epoch": 8.866906474820144, + "force_loss": 0.0076547362841665745, + "step": 9860 + }, + { + "epoch": 8.87589928057554, + "grad_norm": 0.5242618322372437, + "learning_rate": 9.645934525315951e-05, + "loss": 0.026, + "step": 9870 + }, + { + "action_loss": 0.0023689023219048977, + "epoch": 8.87589928057554, + "step": 9870 + }, + { + "epoch": 8.87589928057554, + "step": 9870, + "torque_loss": 0.09525662660598755 + }, + { + "epoch": 8.87589928057554, + "force_loss": 0.004539574030786753, + "step": 9870 + }, + { + "epoch": 8.884892086330936, + "grad_norm": 0.7364248633384705, + "learning_rate": 9.644915254211442e-05, + "loss": 0.029, + "step": 9880 + }, + { + "action_loss": 0.007957479916512966, + "epoch": 8.884892086330936, + "step": 9880 + }, + { + "epoch": 8.884892086330936, + "step": 9880, + "torque_loss": 0.12385199218988419 + }, + { + "epoch": 8.884892086330936, + "force_loss": 0.010562228970229626, + "step": 9880 + }, + { + "epoch": 8.89388489208633, + "grad_norm": 0.6464004516601562, + "learning_rate": 9.643894572104321e-05, + "loss": 0.0281, + "step": 9890 + }, + { + "action_loss": 0.027421554550528526, + "epoch": 8.89388489208633, + "step": 9890 + }, + { + "epoch": 8.89388489208633, + "step": 9890, + "torque_loss": 0.19964134693145752 + }, + { + "epoch": 8.89388489208633, + "force_loss": 0.03211475536227226, + "step": 9890 + }, + { + "epoch": 8.902877697841726, + "grad_norm": 0.5398285984992981, + "learning_rate": 9.642872479304644e-05, + "loss": 0.033, + "step": 9900 + }, + { + "action_loss": 0.010402509942650795, + "epoch": 8.902877697841726, + "step": 9900 + }, + { + "epoch": 8.902877697841726, + "step": 9900, + "torque_loss": 0.16058401763439178 + }, + { + "epoch": 8.902877697841726, + "force_loss": 0.011009358800947666, + "step": 9900 + }, + { + "epoch": 8.911870503597122, + "grad_norm": 0.5709487795829773, + "learning_rate": 9.641848976122895e-05, + "loss": 0.0286, + "step": 9910 + }, + { + "action_loss": 0.011459200643002987, + "epoch": 8.911870503597122, + "step": 9910 + }, + { + "epoch": 8.911870503597122, + "step": 9910, + "torque_loss": 0.105069600045681 + }, + { + "epoch": 8.911870503597122, + "force_loss": 0.014452147297561169, + "step": 9910 + }, + { + "epoch": 8.920863309352518, + "grad_norm": 0.5692060589790344, + "learning_rate": 9.64082406286999e-05, + "loss": 0.0241, + "step": 9920 + }, + { + "action_loss": 0.01126785296946764, + "epoch": 8.920863309352518, + "step": 9920 + }, + { + "epoch": 8.920863309352518, + "step": 9920, + "torque_loss": 0.12175858020782471 + }, + { + "epoch": 8.920863309352518, + "force_loss": 0.010023976676166058, + "step": 9920 + }, + { + "epoch": 8.929856115107913, + "grad_norm": 0.9473484754562378, + "learning_rate": 9.639797739857269e-05, + "loss": 0.038, + "step": 9930 + }, + { + "action_loss": 0.006853196769952774, + "epoch": 8.929856115107913, + "step": 9930 + }, + { + "epoch": 8.929856115107913, + "step": 9930, + "torque_loss": 0.181155726313591 + }, + { + "epoch": 8.929856115107913, + "force_loss": 0.0068139140494167805, + "step": 9930 + }, + { + "epoch": 8.93884892086331, + "grad_norm": 0.3575853109359741, + "learning_rate": 9.638770007396498e-05, + "loss": 0.03, + "step": 9940 + }, + { + "action_loss": 0.00487502058967948, + "epoch": 8.93884892086331, + "step": 9940 + }, + { + "epoch": 8.93884892086331, + "step": 9940, + "torque_loss": 0.10531579703092575 + }, + { + "epoch": 8.93884892086331, + "force_loss": 0.006260173860937357, + "step": 9940 + }, + { + "epoch": 8.947841726618705, + "grad_norm": 0.5004148483276367, + "learning_rate": 9.63774086579988e-05, + "loss": 0.0399, + "step": 9950 + }, + { + "action_loss": 0.01306405384093523, + "epoch": 8.947841726618705, + "step": 9950 + }, + { + "epoch": 8.947841726618705, + "step": 9950, + "torque_loss": 0.1490817666053772 + }, + { + "epoch": 8.947841726618705, + "force_loss": 0.013406097888946533, + "step": 9950 + }, + { + "epoch": 8.956834532374101, + "grad_norm": 0.8100035786628723, + "learning_rate": 9.63671031538004e-05, + "loss": 0.0271, + "step": 9960 + }, + { + "action_loss": 0.007933643646538258, + "epoch": 8.956834532374101, + "step": 9960 + }, + { + "epoch": 8.956834532374101, + "step": 9960, + "torque_loss": 0.0904865637421608 + }, + { + "epoch": 8.956834532374101, + "force_loss": 0.0069241710007190704, + "step": 9960 + }, + { + "epoch": 8.965827338129497, + "grad_norm": 0.8812164664268494, + "learning_rate": 9.635678356450031e-05, + "loss": 0.029, + "step": 9970 + }, + { + "action_loss": 0.005592092871665955, + "epoch": 8.965827338129497, + "step": 9970 + }, + { + "epoch": 8.965827338129497, + "step": 9970, + "torque_loss": 0.11497309058904648 + }, + { + "epoch": 8.965827338129497, + "force_loss": 0.005749887321144342, + "step": 9970 + }, + { + "epoch": 8.974820143884893, + "grad_norm": 0.6939945816993713, + "learning_rate": 9.634644989323336e-05, + "loss": 0.0286, + "step": 9980 + }, + { + "action_loss": 0.02402697503566742, + "epoch": 8.974820143884893, + "step": 9980 + }, + { + "epoch": 8.974820143884893, + "step": 9980, + "torque_loss": 0.14686010777950287 + }, + { + "epoch": 8.974820143884893, + "force_loss": 0.020023677498102188, + "step": 9980 + }, + { + "epoch": 8.983812949640289, + "grad_norm": 0.6381686329841614, + "learning_rate": 9.633610214313861e-05, + "loss": 0.0303, + "step": 9990 + }, + { + "action_loss": 0.010235019028186798, + "epoch": 8.983812949640289, + "step": 9990 + }, + { + "epoch": 8.983812949640289, + "step": 9990, + "torque_loss": 0.1290939599275589 + }, + { + "epoch": 8.983812949640289, + "force_loss": 0.012070546858012676, + "step": 9990 + }, + { + "epoch": 8.992805755395683, + "grad_norm": 0.6441584825515747, + "learning_rate": 9.632574031735951e-05, + "loss": 0.0283, + "step": 10000 + }, + { + "action_loss": 0.01705154776573181, + "epoch": 8.992805755395683, + "step": 10000 + }, + { + "epoch": 8.992805755395683, + "step": 10000, + "torque_loss": 0.13199247419834137 + }, + { + "epoch": 8.992805755395683, + "force_loss": 0.012645289301872253, + "step": 10000 + }, + { + "epoch": 9.001798561151078, + "grad_norm": 0.5573319792747498, + "learning_rate": 9.631536441904364e-05, + "loss": 0.0271, + "step": 10010 + }, + { + "action_loss": 0.011835657991468906, + "epoch": 9.001798561151078, + "step": 10010 + }, + { + "epoch": 9.001798561151078, + "step": 10010, + "torque_loss": 0.12104404717683792 + }, + { + "epoch": 9.001798561151078, + "force_loss": 0.01840282790362835, + "step": 10010 + }, + { + "epoch": 9.010791366906474, + "grad_norm": 0.3133363723754883, + "learning_rate": 9.630497445134293e-05, + "loss": 0.0266, + "step": 10020 + }, + { + "action_loss": 0.01563299633562565, + "epoch": 9.010791366906474, + "step": 10020 + }, + { + "epoch": 9.010791366906474, + "step": 10020, + "torque_loss": 0.19062598049640656 + }, + { + "epoch": 9.010791366906474, + "force_loss": 0.018884213641285896, + "step": 10020 + }, + { + "epoch": 9.01978417266187, + "grad_norm": 0.4759218394756317, + "learning_rate": 9.62945704174136e-05, + "loss": 0.0289, + "step": 10030 + }, + { + "action_loss": 0.02218557894229889, + "epoch": 9.01978417266187, + "step": 10030 + }, + { + "epoch": 9.01978417266187, + "step": 10030, + "torque_loss": 0.11240116506814957 + }, + { + "epoch": 9.01978417266187, + "force_loss": 0.030858725309371948, + "step": 10030 + }, + { + "epoch": 9.028776978417266, + "grad_norm": 0.8062295317649841, + "learning_rate": 9.628415232041612e-05, + "loss": 0.0331, + "step": 10040 + }, + { + "action_loss": 0.02885535918176174, + "epoch": 9.028776978417266, + "step": 10040 + }, + { + "epoch": 9.028776978417266, + "step": 10040, + "torque_loss": 0.142742320895195 + }, + { + "epoch": 9.028776978417266, + "force_loss": 0.030764369294047356, + "step": 10040 + }, + { + "epoch": 9.037769784172662, + "grad_norm": 0.9538134932518005, + "learning_rate": 9.627372016351524e-05, + "loss": 0.0289, + "step": 10050 + }, + { + "action_loss": 0.014418564736843109, + "epoch": 9.037769784172662, + "step": 10050 + }, + { + "epoch": 9.037769784172662, + "step": 10050, + "torque_loss": 0.22218851745128632 + }, + { + "epoch": 9.037769784172662, + "force_loss": 0.01950434036552906, + "step": 10050 + }, + { + "epoch": 9.046762589928058, + "grad_norm": 0.8337439298629761, + "learning_rate": 9.626327394987995e-05, + "loss": 0.0309, + "step": 10060 + }, + { + "action_loss": 0.009677976369857788, + "epoch": 9.046762589928058, + "step": 10060 + }, + { + "epoch": 9.046762589928058, + "step": 10060, + "torque_loss": 0.15954314172267914 + }, + { + "epoch": 9.046762589928058, + "force_loss": 0.014350146986544132, + "step": 10060 + }, + { + "epoch": 9.055755395683454, + "grad_norm": 0.8795813322067261, + "learning_rate": 9.625281368268355e-05, + "loss": 0.0307, + "step": 10070 + }, + { + "action_loss": 0.007890405133366585, + "epoch": 9.055755395683454, + "step": 10070 + }, + { + "epoch": 9.055755395683454, + "step": 10070, + "torque_loss": 0.11823488026857376 + }, + { + "epoch": 9.055755395683454, + "force_loss": 0.004497329238802195, + "step": 10070 + }, + { + "epoch": 9.06474820143885, + "grad_norm": 0.4431263208389282, + "learning_rate": 9.624233936510357e-05, + "loss": 0.0268, + "step": 10080 + }, + { + "action_loss": 0.008813533931970596, + "epoch": 9.06474820143885, + "step": 10080 + }, + { + "epoch": 9.06474820143885, + "step": 10080, + "torque_loss": 0.1318175047636032 + }, + { + "epoch": 9.06474820143885, + "force_loss": 0.014058738946914673, + "step": 10080 + }, + { + "epoch": 9.073741007194245, + "grad_norm": 0.8673973083496094, + "learning_rate": 9.623185100032187e-05, + "loss": 0.031, + "step": 10090 + }, + { + "action_loss": 0.009516742080450058, + "epoch": 9.073741007194245, + "step": 10090 + }, + { + "epoch": 9.073741007194245, + "step": 10090, + "torque_loss": 0.1367594599723816 + }, + { + "epoch": 9.073741007194245, + "force_loss": 0.00948475394397974, + "step": 10090 + }, + { + "epoch": 9.082733812949641, + "grad_norm": 0.4420112669467926, + "learning_rate": 9.62213485915245e-05, + "loss": 0.0282, + "step": 10100 + }, + { + "action_loss": 0.012250508181750774, + "epoch": 9.082733812949641, + "step": 10100 + }, + { + "epoch": 9.082733812949641, + "step": 10100, + "torque_loss": 0.17002497613430023 + }, + { + "epoch": 9.082733812949641, + "force_loss": 0.008248533122241497, + "step": 10100 + }, + { + "epoch": 9.091726618705035, + "grad_norm": 0.3544306755065918, + "learning_rate": 9.621083214190186e-05, + "loss": 0.0263, + "step": 10110 + }, + { + "action_loss": 0.010369070805609226, + "epoch": 9.091726618705035, + "step": 10110 + }, + { + "epoch": 9.091726618705035, + "step": 10110, + "torque_loss": 0.11875941604375839 + }, + { + "epoch": 9.091726618705035, + "force_loss": 0.012134049087762833, + "step": 10110 + }, + { + "epoch": 9.100719424460431, + "grad_norm": 0.8716443777084351, + "learning_rate": 9.62003016546485e-05, + "loss": 0.0312, + "step": 10120 + }, + { + "action_loss": 0.017326300963759422, + "epoch": 9.100719424460431, + "step": 10120 + }, + { + "epoch": 9.100719424460431, + "step": 10120, + "torque_loss": 0.15255622565746307 + }, + { + "epoch": 9.100719424460431, + "force_loss": 0.018519021570682526, + "step": 10120 + }, + { + "epoch": 9.109712230215827, + "grad_norm": 0.32582125067710876, + "learning_rate": 9.618975713296339e-05, + "loss": 0.0247, + "step": 10130 + }, + { + "action_loss": 0.00469488138332963, + "epoch": 9.109712230215827, + "step": 10130 + }, + { + "epoch": 9.109712230215827, + "step": 10130, + "torque_loss": 0.15045224130153656 + }, + { + "epoch": 9.109712230215827, + "force_loss": 0.007093103602528572, + "step": 10130 + }, + { + "epoch": 9.118705035971223, + "grad_norm": 0.35480907559394836, + "learning_rate": 9.61791985800496e-05, + "loss": 0.0229, + "step": 10140 + }, + { + "action_loss": 0.022707441821694374, + "epoch": 9.118705035971223, + "step": 10140 + }, + { + "epoch": 9.118705035971223, + "step": 10140, + "torque_loss": 0.14760971069335938 + }, + { + "epoch": 9.118705035971223, + "force_loss": 0.022919008508324623, + "step": 10140 + }, + { + "epoch": 9.127697841726619, + "grad_norm": 0.7083514332771301, + "learning_rate": 9.616862599911458e-05, + "loss": 0.0303, + "step": 10150 + }, + { + "action_loss": 0.01765744388103485, + "epoch": 9.127697841726619, + "step": 10150 + }, + { + "epoch": 9.127697841726619, + "step": 10150, + "torque_loss": 0.1288621872663498 + }, + { + "epoch": 9.127697841726619, + "force_loss": 0.028317635878920555, + "step": 10150 + }, + { + "epoch": 9.136690647482014, + "grad_norm": 0.42028236389160156, + "learning_rate": 9.615803939337e-05, + "loss": 0.0287, + "step": 10160 + }, + { + "action_loss": 0.014068935066461563, + "epoch": 9.136690647482014, + "step": 10160 + }, + { + "epoch": 9.136690647482014, + "step": 10160, + "torque_loss": 0.12550655007362366 + }, + { + "epoch": 9.136690647482014, + "force_loss": 0.01162963267415762, + "step": 10160 + }, + { + "epoch": 9.14568345323741, + "grad_norm": 0.4233534038066864, + "learning_rate": 9.614743876603178e-05, + "loss": 0.0274, + "step": 10170 + }, + { + "action_loss": 0.017068881541490555, + "epoch": 9.14568345323741, + "step": 10170 + }, + { + "epoch": 9.14568345323741, + "step": 10170, + "torque_loss": 0.13255678117275238 + }, + { + "epoch": 9.14568345323741, + "force_loss": 0.013134836219251156, + "step": 10170 + }, + { + "epoch": 9.154676258992806, + "grad_norm": 0.6075559258460999, + "learning_rate": 9.613682412032013e-05, + "loss": 0.0286, + "step": 10180 + }, + { + "action_loss": 0.02802620828151703, + "epoch": 9.154676258992806, + "step": 10180 + }, + { + "epoch": 9.154676258992806, + "step": 10180, + "torque_loss": 0.15462185442447662 + }, + { + "epoch": 9.154676258992806, + "force_loss": 0.023218432441353798, + "step": 10180 + }, + { + "epoch": 9.163669064748202, + "grad_norm": 0.5244428515434265, + "learning_rate": 9.612619545945947e-05, + "loss": 0.0364, + "step": 10190 + }, + { + "action_loss": 0.005900113377720118, + "epoch": 9.163669064748202, + "step": 10190 + }, + { + "epoch": 9.163669064748202, + "step": 10190, + "torque_loss": 0.13469235599040985 + }, + { + "epoch": 9.163669064748202, + "force_loss": 0.010282638482749462, + "step": 10190 + }, + { + "epoch": 9.172661870503598, + "grad_norm": 0.6628869771957397, + "learning_rate": 9.611555278667852e-05, + "loss": 0.0266, + "step": 10200 + }, + { + "action_loss": 0.012148316018283367, + "epoch": 9.172661870503598, + "step": 10200 + }, + { + "epoch": 9.172661870503598, + "step": 10200, + "torque_loss": 0.15297381579875946 + }, + { + "epoch": 9.172661870503598, + "force_loss": 0.009634559042751789, + "step": 10200 + }, + { + "epoch": 9.181654676258994, + "grad_norm": 0.4624648690223694, + "learning_rate": 9.610489610521024e-05, + "loss": 0.0275, + "step": 10210 + }, + { + "action_loss": 0.008905833587050438, + "epoch": 9.181654676258994, + "step": 10210 + }, + { + "epoch": 9.181654676258994, + "step": 10210, + "torque_loss": 0.08853333443403244 + }, + { + "epoch": 9.181654676258994, + "force_loss": 0.010210342705249786, + "step": 10210 + }, + { + "epoch": 9.190647482014388, + "grad_norm": 0.6700626611709595, + "learning_rate": 9.609422541829187e-05, + "loss": 0.0255, + "step": 10220 + }, + { + "action_loss": 0.02126232720911503, + "epoch": 9.190647482014388, + "step": 10220 + }, + { + "epoch": 9.190647482014388, + "step": 10220, + "torque_loss": 0.1380685716867447 + }, + { + "epoch": 9.190647482014388, + "force_loss": 0.012280315160751343, + "step": 10220 + }, + { + "epoch": 9.199640287769784, + "grad_norm": 0.40555429458618164, + "learning_rate": 9.608354072916486e-05, + "loss": 0.0356, + "step": 10230 + }, + { + "action_loss": 0.006029499229043722, + "epoch": 9.199640287769784, + "step": 10230 + }, + { + "epoch": 9.199640287769784, + "step": 10230, + "torque_loss": 0.1045355424284935 + }, + { + "epoch": 9.199640287769784, + "force_loss": 0.007806442677974701, + "step": 10230 + }, + { + "epoch": 9.20863309352518, + "grad_norm": 0.9280624389648438, + "learning_rate": 9.607284204107493e-05, + "loss": 0.0314, + "step": 10240 + }, + { + "action_loss": 0.009026926942169666, + "epoch": 9.20863309352518, + "step": 10240 + }, + { + "epoch": 9.20863309352518, + "step": 10240, + "torque_loss": 0.16856233775615692 + }, + { + "epoch": 9.20863309352518, + "force_loss": 0.006835187319666147, + "step": 10240 + }, + { + "epoch": 9.217625899280575, + "grad_norm": 1.458818793296814, + "learning_rate": 9.606212935727208e-05, + "loss": 0.0257, + "step": 10250 + }, + { + "action_loss": 0.01133593637496233, + "epoch": 9.217625899280575, + "step": 10250 + }, + { + "epoch": 9.217625899280575, + "step": 10250, + "torque_loss": 0.1393679827451706 + }, + { + "epoch": 9.217625899280575, + "force_loss": 0.008330075070261955, + "step": 10250 + }, + { + "epoch": 9.226618705035971, + "grad_norm": 0.6630749702453613, + "learning_rate": 9.605140268101052e-05, + "loss": 0.0278, + "step": 10260 + }, + { + "action_loss": 0.00489208335056901, + "epoch": 9.226618705035971, + "step": 10260 + }, + { + "epoch": 9.226618705035971, + "step": 10260, + "torque_loss": 0.14676211774349213 + }, + { + "epoch": 9.226618705035971, + "force_loss": 0.006073640193790197, + "step": 10260 + }, + { + "epoch": 9.235611510791367, + "grad_norm": 0.6277463436126709, + "learning_rate": 9.604066201554875e-05, + "loss": 0.0254, + "step": 10270 + }, + { + "action_loss": 0.01280087698251009, + "epoch": 9.235611510791367, + "step": 10270 + }, + { + "epoch": 9.235611510791367, + "step": 10270, + "torque_loss": 0.13024820387363434 + }, + { + "epoch": 9.235611510791367, + "force_loss": 0.02367275021970272, + "step": 10270 + }, + { + "epoch": 9.244604316546763, + "grad_norm": 0.7234767079353333, + "learning_rate": 9.60299073641495e-05, + "loss": 0.029, + "step": 10280 + }, + { + "action_loss": 0.008430641144514084, + "epoch": 9.244604316546763, + "step": 10280 + }, + { + "epoch": 9.244604316546763, + "step": 10280, + "torque_loss": 0.1173025369644165 + }, + { + "epoch": 9.244604316546763, + "force_loss": 0.008532408624887466, + "step": 10280 + }, + { + "epoch": 9.253597122302159, + "grad_norm": 0.6215731501579285, + "learning_rate": 9.601913873007974e-05, + "loss": 0.0238, + "step": 10290 + }, + { + "action_loss": 0.012716807425022125, + "epoch": 9.253597122302159, + "step": 10290 + }, + { + "epoch": 9.253597122302159, + "step": 10290, + "torque_loss": 0.14327767491340637 + }, + { + "epoch": 9.253597122302159, + "force_loss": 0.021583078429102898, + "step": 10290 + }, + { + "epoch": 9.262589928057555, + "grad_norm": 0.6028773784637451, + "learning_rate": 9.60083561166107e-05, + "loss": 0.0317, + "step": 10300 + }, + { + "action_loss": 0.006474910769611597, + "epoch": 9.262589928057555, + "step": 10300 + }, + { + "epoch": 9.262589928057555, + "step": 10300, + "torque_loss": 0.11099831014871597 + }, + { + "epoch": 9.262589928057555, + "force_loss": 0.006636692676693201, + "step": 10300 + }, + { + "epoch": 9.27158273381295, + "grad_norm": 0.6199492812156677, + "learning_rate": 9.599755952701783e-05, + "loss": 0.0296, + "step": 10310 + }, + { + "action_loss": 0.014989577233791351, + "epoch": 9.27158273381295, + "step": 10310 + }, + { + "epoch": 9.27158273381295, + "step": 10310, + "torque_loss": 0.14685826003551483 + }, + { + "epoch": 9.27158273381295, + "force_loss": 0.010977485217154026, + "step": 10310 + }, + { + "epoch": 9.280575539568344, + "grad_norm": 0.4850398600101471, + "learning_rate": 9.598674896458089e-05, + "loss": 0.0312, + "step": 10320 + }, + { + "action_loss": 0.004792255815118551, + "epoch": 9.280575539568344, + "step": 10320 + }, + { + "epoch": 9.280575539568344, + "step": 10320, + "torque_loss": 0.09150233119726181 + }, + { + "epoch": 9.280575539568344, + "force_loss": 0.004847896751016378, + "step": 10320 + }, + { + "epoch": 9.28956834532374, + "grad_norm": 0.5110653042793274, + "learning_rate": 9.597592443258383e-05, + "loss": 0.0221, + "step": 10330 + }, + { + "action_loss": 0.00591149041429162, + "epoch": 9.28956834532374, + "step": 10330 + }, + { + "epoch": 9.28956834532374, + "step": 10330, + "torque_loss": 0.1200617179274559 + }, + { + "epoch": 9.28956834532374, + "force_loss": 0.005874169524759054, + "step": 10330 + }, + { + "epoch": 9.298561151079136, + "grad_norm": 0.6226730942726135, + "learning_rate": 9.596508593431483e-05, + "loss": 0.0232, + "step": 10340 + }, + { + "action_loss": 0.006625715643167496, + "epoch": 9.298561151079136, + "step": 10340 + }, + { + "epoch": 9.298561151079136, + "step": 10340, + "torque_loss": 0.09729776531457901 + }, + { + "epoch": 9.298561151079136, + "force_loss": 0.0058612339198589325, + "step": 10340 + }, + { + "epoch": 9.307553956834532, + "grad_norm": 0.5901241302490234, + "learning_rate": 9.59542334730664e-05, + "loss": 0.026, + "step": 10350 + }, + { + "action_loss": 0.01095897238701582, + "epoch": 9.307553956834532, + "step": 10350 + }, + { + "epoch": 9.307553956834532, + "step": 10350, + "torque_loss": 0.12940442562103271 + }, + { + "epoch": 9.307553956834532, + "force_loss": 0.009427826851606369, + "step": 10350 + }, + { + "epoch": 9.316546762589928, + "grad_norm": 0.5962520837783813, + "learning_rate": 9.594336705213516e-05, + "loss": 0.0283, + "step": 10360 + }, + { + "action_loss": 0.00791415199637413, + "epoch": 9.316546762589928, + "step": 10360 + }, + { + "epoch": 9.316546762589928, + "step": 10360, + "torque_loss": 0.12326723337173462 + }, + { + "epoch": 9.316546762589928, + "force_loss": 0.008934843353927135, + "step": 10360 + }, + { + "epoch": 9.325539568345324, + "grad_norm": 0.35113057494163513, + "learning_rate": 9.593248667482208e-05, + "loss": 0.0322, + "step": 10370 + }, + { + "action_loss": 0.03247269615530968, + "epoch": 9.325539568345324, + "step": 10370 + }, + { + "epoch": 9.325539568345324, + "step": 10370, + "torque_loss": 0.1403774619102478 + }, + { + "epoch": 9.325539568345324, + "force_loss": 0.04250963404774666, + "step": 10370 + }, + { + "epoch": 9.33453237410072, + "grad_norm": 0.6529799103736877, + "learning_rate": 9.592159234443233e-05, + "loss": 0.0339, + "step": 10380 + }, + { + "action_loss": 0.020657196640968323, + "epoch": 9.33453237410072, + "step": 10380 + }, + { + "epoch": 9.33453237410072, + "step": 10380, + "torque_loss": 0.15035028755664825 + }, + { + "epoch": 9.33453237410072, + "force_loss": 0.01743476837873459, + "step": 10380 + }, + { + "epoch": 9.343525179856115, + "grad_norm": 0.8577680587768555, + "learning_rate": 9.59106840642753e-05, + "loss": 0.0351, + "step": 10390 + }, + { + "action_loss": 0.016335487365722656, + "epoch": 9.343525179856115, + "step": 10390 + }, + { + "epoch": 9.343525179856115, + "step": 10390, + "torque_loss": 0.11230379343032837 + }, + { + "epoch": 9.343525179856115, + "force_loss": 0.024720648303627968, + "step": 10390 + }, + { + "epoch": 9.352517985611511, + "grad_norm": 0.7196080088615417, + "learning_rate": 9.589976183766467e-05, + "loss": 0.0299, + "step": 10400 + }, + { + "action_loss": 0.007247576955705881, + "epoch": 9.352517985611511, + "step": 10400 + }, + { + "epoch": 9.352517985611511, + "step": 10400, + "torque_loss": 0.09510833024978638 + }, + { + "epoch": 9.352517985611511, + "force_loss": 0.009220664389431477, + "step": 10400 + }, + { + "epoch": 9.361510791366907, + "grad_norm": 1.1985657215118408, + "learning_rate": 9.58888256679183e-05, + "loss": 0.0308, + "step": 10410 + }, + { + "action_loss": 0.008461021818220615, + "epoch": 9.361510791366907, + "step": 10410 + }, + { + "epoch": 9.361510791366907, + "step": 10410, + "torque_loss": 0.1299380362033844 + }, + { + "epoch": 9.361510791366907, + "force_loss": 0.007819849997758865, + "step": 10410 + }, + { + "epoch": 9.370503597122303, + "grad_norm": 1.1465376615524292, + "learning_rate": 9.587787555835832e-05, + "loss": 0.0279, + "step": 10420 + }, + { + "action_loss": 0.022557318210601807, + "epoch": 9.370503597122303, + "step": 10420 + }, + { + "epoch": 9.370503597122303, + "step": 10420, + "torque_loss": 0.16325058043003082 + }, + { + "epoch": 9.370503597122303, + "force_loss": 0.024700278416275978, + "step": 10420 + }, + { + "epoch": 9.379496402877697, + "grad_norm": 0.41409018635749817, + "learning_rate": 9.586691151231107e-05, + "loss": 0.0297, + "step": 10430 + }, + { + "action_loss": 0.007909419946372509, + "epoch": 9.379496402877697, + "step": 10430 + }, + { + "epoch": 9.379496402877697, + "step": 10430, + "torque_loss": 0.108908511698246 + }, + { + "epoch": 9.379496402877697, + "force_loss": 0.007891024462878704, + "step": 10430 + }, + { + "epoch": 9.388489208633093, + "grad_norm": 0.6971279978752136, + "learning_rate": 9.585593353310715e-05, + "loss": 0.0307, + "step": 10440 + }, + { + "action_loss": 0.02104235254228115, + "epoch": 9.388489208633093, + "step": 10440 + }, + { + "epoch": 9.388489208633093, + "step": 10440, + "torque_loss": 0.2005579024553299 + }, + { + "epoch": 9.388489208633093, + "force_loss": 0.018684981390833855, + "step": 10440 + }, + { + "epoch": 9.397482014388489, + "grad_norm": 0.49678513407707214, + "learning_rate": 9.58449416240814e-05, + "loss": 0.031, + "step": 10450 + }, + { + "action_loss": 0.006697792559862137, + "epoch": 9.397482014388489, + "step": 10450 + }, + { + "epoch": 9.397482014388489, + "step": 10450, + "torque_loss": 0.13596634566783905 + }, + { + "epoch": 9.397482014388489, + "force_loss": 0.00531019875779748, + "step": 10450 + }, + { + "epoch": 9.406474820143885, + "grad_norm": 0.3666587173938751, + "learning_rate": 9.583393578857283e-05, + "loss": 0.0275, + "step": 10460 + }, + { + "action_loss": 0.011480015702545643, + "epoch": 9.406474820143885, + "step": 10460 + }, + { + "epoch": 9.406474820143885, + "step": 10460, + "torque_loss": 0.11620926111936569 + }, + { + "epoch": 9.406474820143885, + "force_loss": 0.010091166943311691, + "step": 10460 + }, + { + "epoch": 9.41546762589928, + "grad_norm": 0.3673293888568878, + "learning_rate": 9.582291602992474e-05, + "loss": 0.0267, + "step": 10470 + }, + { + "action_loss": 0.013791979290544987, + "epoch": 9.41546762589928, + "step": 10470 + }, + { + "epoch": 9.41546762589928, + "step": 10470, + "torque_loss": 0.12247496098279953 + }, + { + "epoch": 9.41546762589928, + "force_loss": 0.013123749755322933, + "step": 10470 + }, + { + "epoch": 9.424460431654676, + "grad_norm": 0.4128532111644745, + "learning_rate": 9.581188235148466e-05, + "loss": 0.0287, + "step": 10480 + }, + { + "action_loss": 0.004348634276539087, + "epoch": 9.424460431654676, + "step": 10480 + }, + { + "epoch": 9.424460431654676, + "step": 10480, + "torque_loss": 0.13451822102069855 + }, + { + "epoch": 9.424460431654676, + "force_loss": 0.005837304051965475, + "step": 10480 + }, + { + "epoch": 9.433453237410072, + "grad_norm": 0.8465983271598816, + "learning_rate": 9.58008347566043e-05, + "loss": 0.0257, + "step": 10490 + }, + { + "action_loss": 0.01786666549742222, + "epoch": 9.433453237410072, + "step": 10490 + }, + { + "epoch": 9.433453237410072, + "step": 10490, + "torque_loss": 0.19296510517597198 + }, + { + "epoch": 9.433453237410072, + "force_loss": 0.016864048317074776, + "step": 10490 + }, + { + "epoch": 9.442446043165468, + "grad_norm": 0.47716739773750305, + "learning_rate": 9.578977324863965e-05, + "loss": 0.0277, + "step": 10500 + }, + { + "action_loss": 0.009205177426338196, + "epoch": 9.442446043165468, + "step": 10500 + }, + { + "epoch": 9.442446043165468, + "step": 10500, + "torque_loss": 0.17087721824645996 + }, + { + "epoch": 9.442446043165468, + "force_loss": 0.00732346111908555, + "step": 10500 + }, + { + "epoch": 9.451438848920864, + "grad_norm": 0.7659825086593628, + "learning_rate": 9.577869783095089e-05, + "loss": 0.0292, + "step": 10510 + }, + { + "action_loss": 0.016239508986473083, + "epoch": 9.451438848920864, + "step": 10510 + }, + { + "epoch": 9.451438848920864, + "step": 10510, + "torque_loss": 0.15334972739219666 + }, + { + "epoch": 9.451438848920864, + "force_loss": 0.008361724205315113, + "step": 10510 + }, + { + "epoch": 9.46043165467626, + "grad_norm": 0.8403934836387634, + "learning_rate": 9.576760850690245e-05, + "loss": 0.0302, + "step": 10520 + }, + { + "action_loss": 0.014726934023201466, + "epoch": 9.46043165467626, + "step": 10520 + }, + { + "epoch": 9.46043165467626, + "step": 10520, + "torque_loss": 0.19925636053085327 + }, + { + "epoch": 9.46043165467626, + "force_loss": 0.01486267801374197, + "step": 10520 + }, + { + "epoch": 9.469424460431656, + "grad_norm": 0.8984691500663757, + "learning_rate": 9.575650527986298e-05, + "loss": 0.0371, + "step": 10530 + }, + { + "action_loss": 0.006004458758980036, + "epoch": 9.469424460431656, + "step": 10530 + }, + { + "epoch": 9.469424460431656, + "step": 10530, + "torque_loss": 0.11854443699121475 + }, + { + "epoch": 9.469424460431656, + "force_loss": 0.010365949012339115, + "step": 10530 + }, + { + "epoch": 9.47841726618705, + "grad_norm": 0.6599291563034058, + "learning_rate": 9.574538815320531e-05, + "loss": 0.0247, + "step": 10540 + }, + { + "action_loss": 0.01251822430640459, + "epoch": 9.47841726618705, + "step": 10540 + }, + { + "epoch": 9.47841726618705, + "step": 10540, + "torque_loss": 0.15485882759094238 + }, + { + "epoch": 9.47841726618705, + "force_loss": 0.013469722121953964, + "step": 10540 + }, + { + "epoch": 9.487410071942445, + "grad_norm": 0.6501151323318481, + "learning_rate": 9.573425713030656e-05, + "loss": 0.0323, + "step": 10550 + }, + { + "action_loss": 0.006832594517618418, + "epoch": 9.487410071942445, + "step": 10550 + }, + { + "epoch": 9.487410071942445, + "step": 10550, + "torque_loss": 0.15817953646183014 + }, + { + "epoch": 9.487410071942445, + "force_loss": 0.0060196369886398315, + "step": 10550 + }, + { + "epoch": 9.496402877697841, + "grad_norm": 0.7456974983215332, + "learning_rate": 9.572311221454806e-05, + "loss": 0.0257, + "step": 10560 + }, + { + "action_loss": 0.024950919672846794, + "epoch": 9.496402877697841, + "step": 10560 + }, + { + "epoch": 9.496402877697841, + "step": 10560, + "torque_loss": 0.13057518005371094 + }, + { + "epoch": 9.496402877697841, + "force_loss": 0.019463906064629555, + "step": 10560 + }, + { + "epoch": 9.505395683453237, + "grad_norm": 0.9070531725883484, + "learning_rate": 9.57119534093153e-05, + "loss": 0.0315, + "step": 10570 + }, + { + "action_loss": 0.010373858734965324, + "epoch": 9.505395683453237, + "step": 10570 + }, + { + "epoch": 9.505395683453237, + "step": 10570, + "torque_loss": 0.14706474542617798 + }, + { + "epoch": 9.505395683453237, + "force_loss": 0.00816503819078207, + "step": 10570 + }, + { + "epoch": 9.514388489208633, + "grad_norm": 1.062094807624817, + "learning_rate": 9.570078071799806e-05, + "loss": 0.0296, + "step": 10580 + }, + { + "action_loss": 0.009817644022405148, + "epoch": 9.514388489208633, + "step": 10580 + }, + { + "epoch": 9.514388489208633, + "step": 10580, + "torque_loss": 0.12185729295015335 + }, + { + "epoch": 9.514388489208633, + "force_loss": 0.012036922387778759, + "step": 10580 + }, + { + "epoch": 9.523381294964029, + "grad_norm": 0.8121934533119202, + "learning_rate": 9.568959414399028e-05, + "loss": 0.0274, + "step": 10590 + }, + { + "action_loss": 0.025352761149406433, + "epoch": 9.523381294964029, + "step": 10590 + }, + { + "epoch": 9.523381294964029, + "step": 10590, + "torque_loss": 0.11938933283090591 + }, + { + "epoch": 9.523381294964029, + "force_loss": 0.025092795491218567, + "step": 10590 + }, + { + "epoch": 9.532374100719425, + "grad_norm": 0.4860723614692688, + "learning_rate": 9.567839369069018e-05, + "loss": 0.0289, + "step": 10600 + }, + { + "action_loss": 0.009876593947410583, + "epoch": 9.532374100719425, + "step": 10600 + }, + { + "epoch": 9.532374100719425, + "step": 10600, + "torque_loss": 0.14806832373142242 + }, + { + "epoch": 9.532374100719425, + "force_loss": 0.009916700422763824, + "step": 10600 + }, + { + "epoch": 9.54136690647482, + "grad_norm": 0.6804236769676208, + "learning_rate": 9.566717936150013e-05, + "loss": 0.029, + "step": 10610 + }, + { + "action_loss": 0.021389300003647804, + "epoch": 9.54136690647482, + "step": 10610 + }, + { + "epoch": 9.54136690647482, + "step": 10610, + "torque_loss": 0.17321930825710297 + }, + { + "epoch": 9.54136690647482, + "force_loss": 0.01682678610086441, + "step": 10610 + }, + { + "epoch": 9.550359712230216, + "grad_norm": 0.6666664481163025, + "learning_rate": 9.565595115982678e-05, + "loss": 0.0266, + "step": 10620 + }, + { + "action_loss": 0.009704948402941227, + "epoch": 9.550359712230216, + "step": 10620 + }, + { + "epoch": 9.550359712230216, + "step": 10620, + "torque_loss": 0.14480507373809814 + }, + { + "epoch": 9.550359712230216, + "force_loss": 0.016170455142855644, + "step": 10620 + }, + { + "epoch": 9.559352517985612, + "grad_norm": 0.4129558801651001, + "learning_rate": 9.564470908908094e-05, + "loss": 0.0303, + "step": 10630 + }, + { + "action_loss": 0.008663482964038849, + "epoch": 9.559352517985612, + "step": 10630 + }, + { + "epoch": 9.559352517985612, + "step": 10630, + "torque_loss": 0.10041690617799759 + }, + { + "epoch": 9.559352517985612, + "force_loss": 0.00800513755530119, + "step": 10630 + }, + { + "epoch": 9.568345323741006, + "grad_norm": 0.7748432755470276, + "learning_rate": 9.563345315267764e-05, + "loss": 0.0261, + "step": 10640 + }, + { + "action_loss": 0.02055913209915161, + "epoch": 9.568345323741006, + "step": 10640 + }, + { + "epoch": 9.568345323741006, + "step": 10640, + "torque_loss": 0.17869479954242706 + }, + { + "epoch": 9.568345323741006, + "force_loss": 0.015143473632633686, + "step": 10640 + }, + { + "epoch": 9.577338129496402, + "grad_norm": 0.44985678791999817, + "learning_rate": 9.562218335403616e-05, + "loss": 0.0283, + "step": 10650 + }, + { + "action_loss": 0.008624009788036346, + "epoch": 9.577338129496402, + "step": 10650 + }, + { + "epoch": 9.577338129496402, + "step": 10650, + "torque_loss": 0.17126911878585815 + }, + { + "epoch": 9.577338129496402, + "force_loss": 0.009126664139330387, + "step": 10650 + }, + { + "epoch": 9.586330935251798, + "grad_norm": 0.37473663687705994, + "learning_rate": 9.561089969657999e-05, + "loss": 0.0251, + "step": 10660 + }, + { + "action_loss": 0.02132773958146572, + "epoch": 9.586330935251798, + "step": 10660 + }, + { + "epoch": 9.586330935251798, + "step": 10660, + "torque_loss": 0.15396931767463684 + }, + { + "epoch": 9.586330935251798, + "force_loss": 0.02179611660540104, + "step": 10660 + }, + { + "epoch": 9.595323741007194, + "grad_norm": 0.4981321394443512, + "learning_rate": 9.559960218373673e-05, + "loss": 0.0288, + "step": 10670 + }, + { + "action_loss": 0.01068919152021408, + "epoch": 9.595323741007194, + "step": 10670 + }, + { + "epoch": 9.595323741007194, + "step": 10670, + "torque_loss": 0.11746841669082642 + }, + { + "epoch": 9.595323741007194, + "force_loss": 0.012537163682281971, + "step": 10670 + }, + { + "epoch": 9.60431654676259, + "grad_norm": 0.5899496674537659, + "learning_rate": 9.558829081893836e-05, + "loss": 0.0281, + "step": 10680 + }, + { + "action_loss": 0.014468804001808167, + "epoch": 9.60431654676259, + "step": 10680 + }, + { + "epoch": 9.60431654676259, + "step": 10680, + "torque_loss": 0.11580521613359451 + }, + { + "epoch": 9.60431654676259, + "force_loss": 0.010967371053993702, + "step": 10680 + }, + { + "epoch": 9.613309352517986, + "grad_norm": 0.6741436719894409, + "learning_rate": 9.55769656056209e-05, + "loss": 0.0267, + "step": 10690 + }, + { + "action_loss": 0.007974970154464245, + "epoch": 9.613309352517986, + "step": 10690 + }, + { + "epoch": 9.613309352517986, + "step": 10690, + "torque_loss": 0.11001706123352051 + }, + { + "epoch": 9.613309352517986, + "force_loss": 0.01084264274686575, + "step": 10690 + }, + { + "epoch": 9.622302158273381, + "grad_norm": 1.2521754503250122, + "learning_rate": 9.556562654722469e-05, + "loss": 0.0294, + "step": 10700 + }, + { + "action_loss": 0.010585453361272812, + "epoch": 9.622302158273381, + "step": 10700 + }, + { + "epoch": 9.622302158273381, + "step": 10700, + "torque_loss": 0.13352619111537933 + }, + { + "epoch": 9.622302158273381, + "force_loss": 0.009462296031415462, + "step": 10700 + }, + { + "epoch": 9.631294964028777, + "grad_norm": 0.8372606039047241, + "learning_rate": 9.555427364719422e-05, + "loss": 0.0312, + "step": 10710 + }, + { + "action_loss": 0.014742672443389893, + "epoch": 9.631294964028777, + "step": 10710 + }, + { + "epoch": 9.631294964028777, + "step": 10710, + "torque_loss": 0.11293081194162369 + }, + { + "epoch": 9.631294964028777, + "force_loss": 0.023100676015019417, + "step": 10710 + }, + { + "epoch": 9.640287769784173, + "grad_norm": 0.46177878975868225, + "learning_rate": 9.55429069089782e-05, + "loss": 0.0274, + "step": 10720 + }, + { + "action_loss": 0.019772058352828026, + "epoch": 9.640287769784173, + "step": 10720 + }, + { + "epoch": 9.640287769784173, + "step": 10720, + "torque_loss": 0.2091190367937088 + }, + { + "epoch": 9.640287769784173, + "force_loss": 0.019305391237139702, + "step": 10720 + }, + { + "epoch": 9.649280575539569, + "grad_norm": 0.7177428603172302, + "learning_rate": 9.553152633602956e-05, + "loss": 0.0328, + "step": 10730 + }, + { + "action_loss": 0.01486204657703638, + "epoch": 9.649280575539569, + "step": 10730 + }, + { + "epoch": 9.649280575539569, + "step": 10730, + "torque_loss": 0.14825069904327393 + }, + { + "epoch": 9.649280575539569, + "force_loss": 0.020056938752532005, + "step": 10730 + }, + { + "epoch": 9.658273381294965, + "grad_norm": 0.3259560465812683, + "learning_rate": 9.552013193180543e-05, + "loss": 0.0333, + "step": 10740 + }, + { + "action_loss": 0.008836471475660801, + "epoch": 9.658273381294965, + "step": 10740 + }, + { + "epoch": 9.658273381294965, + "step": 10740, + "torque_loss": 0.15030623972415924 + }, + { + "epoch": 9.658273381294965, + "force_loss": 0.009539115242660046, + "step": 10740 + }, + { + "epoch": 9.667266187050359, + "grad_norm": 0.9642545580863953, + "learning_rate": 9.550872369976707e-05, + "loss": 0.0286, + "step": 10750 + }, + { + "action_loss": 0.0097518190741539, + "epoch": 9.667266187050359, + "step": 10750 + }, + { + "epoch": 9.667266187050359, + "step": 10750, + "torque_loss": 0.14309723675251007 + }, + { + "epoch": 9.667266187050359, + "force_loss": 0.00840467307716608, + "step": 10750 + }, + { + "epoch": 9.676258992805755, + "grad_norm": 0.4511452317237854, + "learning_rate": 9.549730164338007e-05, + "loss": 0.0278, + "step": 10760 + }, + { + "action_loss": 0.01700589247047901, + "epoch": 9.676258992805755, + "step": 10760 + }, + { + "epoch": 9.676258992805755, + "step": 10760, + "torque_loss": 0.145081028342247 + }, + { + "epoch": 9.676258992805755, + "force_loss": 0.008743572980165482, + "step": 10760 + }, + { + "epoch": 9.68525179856115, + "grad_norm": 0.4731258451938629, + "learning_rate": 9.548586576611408e-05, + "loss": 0.028, + "step": 10770 + }, + { + "action_loss": 0.009498675353825092, + "epoch": 9.68525179856115, + "step": 10770 + }, + { + "epoch": 9.68525179856115, + "step": 10770, + "torque_loss": 0.11881691217422485 + }, + { + "epoch": 9.68525179856115, + "force_loss": 0.007521938998252153, + "step": 10770 + }, + { + "epoch": 9.694244604316546, + "grad_norm": 0.4556579887866974, + "learning_rate": 9.54744160714431e-05, + "loss": 0.026, + "step": 10780 + }, + { + "action_loss": 0.009431825019419193, + "epoch": 9.694244604316546, + "step": 10780 + }, + { + "epoch": 9.694244604316546, + "step": 10780, + "torque_loss": 0.1173371970653534 + }, + { + "epoch": 9.694244604316546, + "force_loss": 0.007316796109080315, + "step": 10780 + }, + { + "epoch": 9.703237410071942, + "grad_norm": 0.390954852104187, + "learning_rate": 9.546295256284516e-05, + "loss": 0.0265, + "step": 10790 + }, + { + "action_loss": 0.027017468586564064, + "epoch": 9.703237410071942, + "step": 10790 + }, + { + "epoch": 9.703237410071942, + "step": 10790, + "torque_loss": 0.16717219352722168 + }, + { + "epoch": 9.703237410071942, + "force_loss": 0.030752798542380333, + "step": 10790 + }, + { + "epoch": 9.712230215827338, + "grad_norm": 0.4961920976638794, + "learning_rate": 9.545147524380265e-05, + "loss": 0.0301, + "step": 10800 + }, + { + "action_loss": 0.015549294650554657, + "epoch": 9.712230215827338, + "step": 10800 + }, + { + "epoch": 9.712230215827338, + "step": 10800, + "torque_loss": 0.1894809752702713 + }, + { + "epoch": 9.712230215827338, + "force_loss": 0.022457608953118324, + "step": 10800 + }, + { + "epoch": 9.721223021582734, + "grad_norm": 0.6873412728309631, + "learning_rate": 9.543998411780201e-05, + "loss": 0.0324, + "step": 10810 + }, + { + "action_loss": 0.009921756573021412, + "epoch": 9.721223021582734, + "step": 10810 + }, + { + "epoch": 9.721223021582734, + "step": 10810, + "torque_loss": 0.148454949259758 + }, + { + "epoch": 9.721223021582734, + "force_loss": 0.008466281928122044, + "step": 10810 + }, + { + "epoch": 9.73021582733813, + "grad_norm": 0.6092079877853394, + "learning_rate": 9.542847918833397e-05, + "loss": 0.0273, + "step": 10820 + }, + { + "action_loss": 0.010918482206761837, + "epoch": 9.73021582733813, + "step": 10820 + }, + { + "epoch": 9.73021582733813, + "step": 10820, + "torque_loss": 0.1380562037229538 + }, + { + "epoch": 9.73021582733813, + "force_loss": 0.014418157748878002, + "step": 10820 + }, + { + "epoch": 9.739208633093526, + "grad_norm": 0.5506334900856018, + "learning_rate": 9.541696045889343e-05, + "loss": 0.028, + "step": 10830 + }, + { + "action_loss": 0.009138555265963078, + "epoch": 9.739208633093526, + "step": 10830 + }, + { + "epoch": 9.739208633093526, + "step": 10830, + "torque_loss": 0.12567301094532013 + }, + { + "epoch": 9.739208633093526, + "force_loss": 0.010566767305135727, + "step": 10830 + }, + { + "epoch": 9.748201438848922, + "grad_norm": 0.43438073992729187, + "learning_rate": 9.540542793297947e-05, + "loss": 0.0262, + "step": 10840 + }, + { + "action_loss": 0.011103163473308086, + "epoch": 9.748201438848922, + "step": 10840 + }, + { + "epoch": 9.748201438848922, + "step": 10840, + "torque_loss": 0.18787236511707306 + }, + { + "epoch": 9.748201438848922, + "force_loss": 0.009045661427080631, + "step": 10840 + }, + { + "epoch": 9.757194244604317, + "grad_norm": 0.7502126097679138, + "learning_rate": 9.539388161409537e-05, + "loss": 0.0283, + "step": 10850 + }, + { + "action_loss": 0.010768038220703602, + "epoch": 9.757194244604317, + "step": 10850 + }, + { + "epoch": 9.757194244604317, + "step": 10850, + "torque_loss": 0.1307252198457718 + }, + { + "epoch": 9.757194244604317, + "force_loss": 0.017645349726080894, + "step": 10850 + }, + { + "epoch": 9.766187050359711, + "grad_norm": 0.4489460289478302, + "learning_rate": 9.538232150574857e-05, + "loss": 0.0301, + "step": 10860 + }, + { + "action_loss": 0.008940274827182293, + "epoch": 9.766187050359711, + "step": 10860 + }, + { + "epoch": 9.766187050359711, + "step": 10860, + "torque_loss": 0.14543353021144867 + }, + { + "epoch": 9.766187050359711, + "force_loss": 0.009979513473808765, + "step": 10860 + }, + { + "epoch": 9.775179856115107, + "grad_norm": 0.8527157306671143, + "learning_rate": 9.537074761145076e-05, + "loss": 0.0252, + "step": 10870 + }, + { + "action_loss": 0.012462868355214596, + "epoch": 9.775179856115107, + "step": 10870 + }, + { + "epoch": 9.775179856115107, + "step": 10870, + "torque_loss": 0.1327194720506668 + }, + { + "epoch": 9.775179856115107, + "force_loss": 0.01534614060074091, + "step": 10870 + }, + { + "epoch": 9.784172661870503, + "grad_norm": 0.8630487322807312, + "learning_rate": 9.535915993471778e-05, + "loss": 0.0299, + "step": 10880 + }, + { + "action_loss": 0.01268719881772995, + "epoch": 9.784172661870503, + "step": 10880 + }, + { + "epoch": 9.784172661870503, + "step": 10880, + "torque_loss": 0.15173323452472687 + }, + { + "epoch": 9.784172661870503, + "force_loss": 0.015962107107043266, + "step": 10880 + }, + { + "epoch": 9.793165467625899, + "grad_norm": 0.5018957257270813, + "learning_rate": 9.534755847906964e-05, + "loss": 0.0373, + "step": 10890 + }, + { + "action_loss": 0.008178669027984142, + "epoch": 9.793165467625899, + "step": 10890 + }, + { + "epoch": 9.793165467625899, + "step": 10890, + "torque_loss": 0.1684926152229309 + }, + { + "epoch": 9.793165467625899, + "force_loss": 0.006757232826203108, + "step": 10890 + }, + { + "epoch": 9.802158273381295, + "grad_norm": 0.7773537039756775, + "learning_rate": 9.533594324803057e-05, + "loss": 0.0298, + "step": 10900 + }, + { + "action_loss": 0.012452635914087296, + "epoch": 9.802158273381295, + "step": 10900 + }, + { + "epoch": 9.802158273381295, + "step": 10900, + "torque_loss": 0.1251821219921112 + }, + { + "epoch": 9.802158273381295, + "force_loss": 0.010154018178582191, + "step": 10900 + }, + { + "epoch": 9.81115107913669, + "grad_norm": 0.5600789189338684, + "learning_rate": 9.532431424512895e-05, + "loss": 0.0243, + "step": 10910 + }, + { + "action_loss": 0.013393896631896496, + "epoch": 9.81115107913669, + "step": 10910 + }, + { + "epoch": 9.81115107913669, + "step": 10910, + "torque_loss": 0.15502311289310455 + }, + { + "epoch": 9.81115107913669, + "force_loss": 0.013520360924303532, + "step": 10910 + }, + { + "epoch": 9.820143884892087, + "grad_norm": 0.6210010647773743, + "learning_rate": 9.531267147389741e-05, + "loss": 0.0311, + "step": 10920 + }, + { + "action_loss": 0.017992960289120674, + "epoch": 9.820143884892087, + "step": 10920 + }, + { + "epoch": 9.820143884892087, + "step": 10920, + "torque_loss": 0.15930990874767303 + }, + { + "epoch": 9.820143884892087, + "force_loss": 0.012523858807981014, + "step": 10920 + }, + { + "epoch": 9.829136690647482, + "grad_norm": 0.6890802383422852, + "learning_rate": 9.530101493787266e-05, + "loss": 0.0301, + "step": 10930 + }, + { + "action_loss": 0.009423593990504742, + "epoch": 9.829136690647482, + "step": 10930 + }, + { + "epoch": 9.829136690647482, + "step": 10930, + "torque_loss": 0.11356351524591446 + }, + { + "epoch": 9.829136690647482, + "force_loss": 0.017987238243222237, + "step": 10930 + }, + { + "epoch": 9.838129496402878, + "grad_norm": 0.6505790948867798, + "learning_rate": 9.528934464059571e-05, + "loss": 0.025, + "step": 10940 + }, + { + "action_loss": 0.01121435035020113, + "epoch": 9.838129496402878, + "step": 10940 + }, + { + "epoch": 9.838129496402878, + "step": 10940, + "torque_loss": 0.1927977055311203 + }, + { + "epoch": 9.838129496402878, + "force_loss": 0.011948294937610626, + "step": 10940 + }, + { + "epoch": 9.847122302158274, + "grad_norm": 0.5397326350212097, + "learning_rate": 9.527766058561163e-05, + "loss": 0.0243, + "step": 10950 + }, + { + "action_loss": 0.014351271092891693, + "epoch": 9.847122302158274, + "step": 10950 + }, + { + "epoch": 9.847122302158274, + "step": 10950, + "torque_loss": 0.1684246063232422 + }, + { + "epoch": 9.847122302158274, + "force_loss": 0.009647504426538944, + "step": 10950 + }, + { + "epoch": 9.85611510791367, + "grad_norm": 0.9588680267333984, + "learning_rate": 9.526596277646976e-05, + "loss": 0.0278, + "step": 10960 + }, + { + "action_loss": 0.017118675634264946, + "epoch": 9.85611510791367, + "step": 10960 + }, + { + "epoch": 9.85611510791367, + "step": 10960, + "torque_loss": 0.13536207377910614 + }, + { + "epoch": 9.85611510791367, + "force_loss": 0.019327238202095032, + "step": 10960 + }, + { + "epoch": 9.865107913669064, + "grad_norm": 0.6891125440597534, + "learning_rate": 9.525425121672358e-05, + "loss": 0.0263, + "step": 10970 + }, + { + "action_loss": 0.0067167277447879314, + "epoch": 9.865107913669064, + "step": 10970 + }, + { + "epoch": 9.865107913669064, + "step": 10970, + "torque_loss": 0.16867922246456146 + }, + { + "epoch": 9.865107913669064, + "force_loss": 0.004338176921010017, + "step": 10970 + }, + { + "epoch": 9.87410071942446, + "grad_norm": 0.9081276059150696, + "learning_rate": 9.524252590993074e-05, + "loss": 0.0249, + "step": 10980 + }, + { + "action_loss": 0.008715505711734295, + "epoch": 9.87410071942446, + "step": 10980 + }, + { + "epoch": 9.87410071942446, + "step": 10980, + "torque_loss": 0.129295215010643 + }, + { + "epoch": 9.87410071942446, + "force_loss": 0.013092712499201298, + "step": 10980 + }, + { + "epoch": 9.883093525179856, + "grad_norm": 0.43884941935539246, + "learning_rate": 9.523078685965309e-05, + "loss": 0.0248, + "step": 10990 + }, + { + "action_loss": 0.00759974867105484, + "epoch": 9.883093525179856, + "step": 10990 + }, + { + "epoch": 9.883093525179856, + "step": 10990, + "torque_loss": 0.16198456287384033 + }, + { + "epoch": 9.883093525179856, + "force_loss": 0.006631703581660986, + "step": 10990 + }, + { + "epoch": 9.892086330935252, + "grad_norm": 0.5851729512214661, + "learning_rate": 9.521903406945664e-05, + "loss": 0.0267, + "step": 11000 + }, + { + "action_loss": 0.009170866571366787, + "epoch": 9.892086330935252, + "step": 11000 + }, + { + "epoch": 9.892086330935252, + "step": 11000, + "torque_loss": 0.08510661125183105 + }, + { + "epoch": 9.892086330935252, + "force_loss": 0.018143830820918083, + "step": 11000 + }, + { + "epoch": 9.901079136690647, + "grad_norm": 0.704824686050415, + "learning_rate": 9.520726754291158e-05, + "loss": 0.027, + "step": 11010 + }, + { + "action_loss": 0.017982706427574158, + "epoch": 9.901079136690647, + "step": 11010 + }, + { + "epoch": 9.901079136690647, + "step": 11010, + "torque_loss": 0.10604137182235718 + }, + { + "epoch": 9.901079136690647, + "force_loss": 0.019240552559494972, + "step": 11010 + }, + { + "epoch": 9.910071942446043, + "grad_norm": 0.6339232325553894, + "learning_rate": 9.519548728359227e-05, + "loss": 0.0279, + "step": 11020 + }, + { + "action_loss": 0.010949399322271347, + "epoch": 9.910071942446043, + "step": 11020 + }, + { + "epoch": 9.910071942446043, + "step": 11020, + "torque_loss": 0.14291070401668549 + }, + { + "epoch": 9.910071942446043, + "force_loss": 0.010003216564655304, + "step": 11020 + }, + { + "epoch": 9.91906474820144, + "grad_norm": 0.7300173044204712, + "learning_rate": 9.518369329507726e-05, + "loss": 0.0238, + "step": 11030 + }, + { + "action_loss": 0.011186365969479084, + "epoch": 9.91906474820144, + "step": 11030 + }, + { + "epoch": 9.91906474820144, + "step": 11030, + "torque_loss": 0.13138091564178467 + }, + { + "epoch": 9.91906474820144, + "force_loss": 0.01066580694168806, + "step": 11030 + }, + { + "epoch": 9.928057553956835, + "grad_norm": 0.7806556820869446, + "learning_rate": 9.51718855809492e-05, + "loss": 0.0249, + "step": 11040 + }, + { + "action_loss": 0.009963017888367176, + "epoch": 9.928057553956835, + "step": 11040 + }, + { + "epoch": 9.928057553956835, + "step": 11040, + "torque_loss": 0.16646207869052887 + }, + { + "epoch": 9.928057553956835, + "force_loss": 0.010010754689574242, + "step": 11040 + }, + { + "epoch": 9.93705035971223, + "grad_norm": 0.4272620975971222, + "learning_rate": 9.516006414479502e-05, + "loss": 0.0304, + "step": 11050 + }, + { + "action_loss": 0.010648422874510288, + "epoch": 9.93705035971223, + "step": 11050 + }, + { + "epoch": 9.93705035971223, + "step": 11050, + "torque_loss": 0.1669343262910843 + }, + { + "epoch": 9.93705035971223, + "force_loss": 0.011715386062860489, + "step": 11050 + }, + { + "epoch": 9.946043165467627, + "grad_norm": 0.7803710699081421, + "learning_rate": 9.514822899020572e-05, + "loss": 0.0385, + "step": 11060 + }, + { + "action_loss": 0.01156608760356903, + "epoch": 9.946043165467627, + "step": 11060 + }, + { + "epoch": 9.946043165467627, + "step": 11060, + "torque_loss": 0.11944753676652908 + }, + { + "epoch": 9.946043165467627, + "force_loss": 0.010257956571877003, + "step": 11060 + }, + { + "epoch": 9.95503597122302, + "grad_norm": 0.46194639801979065, + "learning_rate": 9.513638012077654e-05, + "loss": 0.0296, + "step": 11070 + }, + { + "action_loss": 0.01348278671503067, + "epoch": 9.95503597122302, + "step": 11070 + }, + { + "epoch": 9.95503597122302, + "step": 11070, + "torque_loss": 0.15218187868595123 + }, + { + "epoch": 9.95503597122302, + "force_loss": 0.010267366655170918, + "step": 11070 + }, + { + "epoch": 9.964028776978417, + "grad_norm": 0.436085045337677, + "learning_rate": 9.512451754010683e-05, + "loss": 0.0261, + "step": 11080 + }, + { + "action_loss": 0.012708738446235657, + "epoch": 9.964028776978417, + "step": 11080 + }, + { + "epoch": 9.964028776978417, + "step": 11080, + "torque_loss": 0.1555136889219284 + }, + { + "epoch": 9.964028776978417, + "force_loss": 0.009203293360769749, + "step": 11080 + }, + { + "epoch": 9.973021582733812, + "grad_norm": 0.6861566305160522, + "learning_rate": 9.511264125180013e-05, + "loss": 0.0249, + "step": 11090 + }, + { + "action_loss": 0.01185446698218584, + "epoch": 9.973021582733812, + "step": 11090 + }, + { + "epoch": 9.973021582733812, + "step": 11090, + "torque_loss": 0.16799135506153107 + }, + { + "epoch": 9.973021582733812, + "force_loss": 0.01190869975835085, + "step": 11090 + }, + { + "epoch": 9.982014388489208, + "grad_norm": 0.5024900436401367, + "learning_rate": 9.510075125946414e-05, + "loss": 0.0268, + "step": 11100 + }, + { + "action_loss": 0.015929432585835457, + "epoch": 9.982014388489208, + "step": 11100 + }, + { + "epoch": 9.982014388489208, + "step": 11100, + "torque_loss": 0.1766018122434616 + }, + { + "epoch": 9.982014388489208, + "force_loss": 0.013793278485536575, + "step": 11100 + }, + { + "epoch": 9.991007194244604, + "grad_norm": 0.5480630397796631, + "learning_rate": 9.508884756671075e-05, + "loss": 0.0291, + "step": 11110 + }, + { + "action_loss": 0.008085153996944427, + "epoch": 9.991007194244604, + "step": 11110 + }, + { + "epoch": 9.991007194244604, + "step": 11110, + "torque_loss": 0.15915803611278534 + }, + { + "epoch": 9.991007194244604, + "force_loss": 0.006082264240831137, + "step": 11110 + }, + { + "epoch": 10.0, + "grad_norm": 0.7992289066314697, + "learning_rate": 9.507693017715596e-05, + "loss": 0.0286, + "step": 11120 + }, + { + "action_loss": 0.004015494138002396, + "epoch": 10.0, + "step": 11120 + }, + { + "epoch": 10.0, + "step": 11120, + "torque_loss": 0.08135860413312912 + }, + { + "epoch": 10.0, + "force_loss": 0.004399992059916258, + "step": 11120 + }, + { + "epoch": 10.008992805755396, + "grad_norm": 0.3683903217315674, + "learning_rate": 9.506499909441997e-05, + "loss": 0.0299, + "step": 11130 + }, + { + "action_loss": 0.00492004444822669, + "epoch": 10.008992805755396, + "step": 11130 + }, + { + "epoch": 10.008992805755396, + "step": 11130, + "torque_loss": 0.11255601048469543 + }, + { + "epoch": 10.008992805755396, + "force_loss": 0.004956409800797701, + "step": 11130 + }, + { + "epoch": 10.017985611510792, + "grad_norm": 0.7232905030250549, + "learning_rate": 9.505305432212713e-05, + "loss": 0.0248, + "step": 11140 + }, + { + "action_loss": 0.011769063770771027, + "epoch": 10.017985611510792, + "step": 11140 + }, + { + "epoch": 10.017985611510792, + "step": 11140, + "torque_loss": 0.11822396516799927 + }, + { + "epoch": 10.017985611510792, + "force_loss": 0.019234588369727135, + "step": 11140 + }, + { + "epoch": 10.026978417266188, + "grad_norm": 0.5520509481430054, + "learning_rate": 9.504109586390595e-05, + "loss": 0.026, + "step": 11150 + }, + { + "action_loss": 0.0076273479498922825, + "epoch": 10.026978417266188, + "step": 11150 + }, + { + "epoch": 10.026978417266188, + "step": 11150, + "torque_loss": 0.12028899788856506 + }, + { + "epoch": 10.026978417266188, + "force_loss": 0.0052971444092690945, + "step": 11150 + }, + { + "epoch": 10.035971223021583, + "grad_norm": 0.39104026556015015, + "learning_rate": 9.502912372338908e-05, + "loss": 0.0301, + "step": 11160 + }, + { + "action_loss": 0.007534921169281006, + "epoch": 10.035971223021583, + "step": 11160 + }, + { + "epoch": 10.035971223021583, + "step": 11160, + "torque_loss": 0.14494585990905762 + }, + { + "epoch": 10.035971223021583, + "force_loss": 0.008034944534301758, + "step": 11160 + }, + { + "epoch": 10.04496402877698, + "grad_norm": 0.744063675403595, + "learning_rate": 9.501713790421335e-05, + "loss": 0.0294, + "step": 11170 + }, + { + "action_loss": 0.006292261183261871, + "epoch": 10.04496402877698, + "step": 11170 + }, + { + "epoch": 10.04496402877698, + "step": 11170, + "torque_loss": 0.14006108045578003 + }, + { + "epoch": 10.04496402877698, + "force_loss": 0.005716968793421984, + "step": 11170 + }, + { + "epoch": 10.053956834532373, + "grad_norm": 1.0360183715820312, + "learning_rate": 9.500513841001974e-05, + "loss": 0.0277, + "step": 11180 + }, + { + "action_loss": 0.019556446000933647, + "epoch": 10.053956834532373, + "step": 11180 + }, + { + "epoch": 10.053956834532373, + "step": 11180, + "torque_loss": 0.17862176895141602 + }, + { + "epoch": 10.053956834532373, + "force_loss": 0.016817452386021614, + "step": 11180 + }, + { + "epoch": 10.06294964028777, + "grad_norm": 0.5944479703903198, + "learning_rate": 9.499312524445336e-05, + "loss": 0.0323, + "step": 11190 + }, + { + "action_loss": 0.005233692470937967, + "epoch": 10.06294964028777, + "step": 11190 + }, + { + "epoch": 10.06294964028777, + "step": 11190, + "torque_loss": 0.11514333635568619 + }, + { + "epoch": 10.06294964028777, + "force_loss": 0.005665199365466833, + "step": 11190 + }, + { + "epoch": 10.071942446043165, + "grad_norm": 0.32586511969566345, + "learning_rate": 9.498109841116351e-05, + "loss": 0.028, + "step": 11200 + }, + { + "action_loss": 0.016690149903297424, + "epoch": 10.071942446043165, + "step": 11200 + }, + { + "epoch": 10.071942446043165, + "step": 11200, + "torque_loss": 0.15541528165340424 + }, + { + "epoch": 10.071942446043165, + "force_loss": 0.017409134656190872, + "step": 11200 + }, + { + "epoch": 10.08093525179856, + "grad_norm": 0.5725913047790527, + "learning_rate": 9.496905791380363e-05, + "loss": 0.0366, + "step": 11210 + }, + { + "action_loss": 0.014832686632871628, + "epoch": 10.08093525179856, + "step": 11210 + }, + { + "epoch": 10.08093525179856, + "step": 11210, + "torque_loss": 0.13739167153835297 + }, + { + "epoch": 10.08093525179856, + "force_loss": 0.01938687637448311, + "step": 11210 + }, + { + "epoch": 10.089928057553957, + "grad_norm": 0.4827929437160492, + "learning_rate": 9.495700375603129e-05, + "loss": 0.0276, + "step": 11220 + }, + { + "action_loss": 0.014541879296302795, + "epoch": 10.089928057553957, + "step": 11220 + }, + { + "epoch": 10.089928057553957, + "step": 11220, + "torque_loss": 0.17996855080127716 + }, + { + "epoch": 10.089928057553957, + "force_loss": 0.014629076234996319, + "step": 11220 + }, + { + "epoch": 10.098920863309353, + "grad_norm": 0.6409385800361633, + "learning_rate": 9.494493594150822e-05, + "loss": 0.0308, + "step": 11230 + }, + { + "action_loss": 0.008698993362486362, + "epoch": 10.098920863309353, + "step": 11230 + }, + { + "epoch": 10.098920863309353, + "step": 11230, + "torque_loss": 0.09485579282045364 + }, + { + "epoch": 10.098920863309353, + "force_loss": 0.007293974980711937, + "step": 11230 + }, + { + "epoch": 10.107913669064748, + "grad_norm": 0.9839959740638733, + "learning_rate": 9.493285447390032e-05, + "loss": 0.0312, + "step": 11240 + }, + { + "action_loss": 0.013759724795818329, + "epoch": 10.107913669064748, + "step": 11240 + }, + { + "epoch": 10.107913669064748, + "step": 11240, + "torque_loss": 0.14273864030838013 + }, + { + "epoch": 10.107913669064748, + "force_loss": 0.01785869710147381, + "step": 11240 + }, + { + "epoch": 10.116906474820144, + "grad_norm": 0.3355298936367035, + "learning_rate": 9.492075935687761e-05, + "loss": 0.0274, + "step": 11250 + }, + { + "action_loss": 0.01849684678018093, + "epoch": 10.116906474820144, + "step": 11250 + }, + { + "epoch": 10.116906474820144, + "step": 11250, + "torque_loss": 0.10407856851816177 + }, + { + "epoch": 10.116906474820144, + "force_loss": 0.007260418031364679, + "step": 11250 + }, + { + "epoch": 10.12589928057554, + "grad_norm": 0.34183377027511597, + "learning_rate": 9.490865059411427e-05, + "loss": 0.0261, + "step": 11260 + }, + { + "action_loss": 0.06025155261158943, + "epoch": 10.12589928057554, + "step": 11260 + }, + { + "epoch": 10.12589928057554, + "step": 11260, + "torque_loss": 0.17157061398029327 + }, + { + "epoch": 10.12589928057554, + "force_loss": 0.06326596438884735, + "step": 11260 + }, + { + "epoch": 10.134892086330936, + "grad_norm": 0.33712413907051086, + "learning_rate": 9.489652818928863e-05, + "loss": 0.0277, + "step": 11270 + }, + { + "action_loss": 0.02074318192899227, + "epoch": 10.134892086330936, + "step": 11270 + }, + { + "epoch": 10.134892086330936, + "step": 11270, + "torque_loss": 0.1118684709072113 + }, + { + "epoch": 10.134892086330936, + "force_loss": 0.02923084795475006, + "step": 11270 + }, + { + "epoch": 10.14388489208633, + "grad_norm": 0.5187804102897644, + "learning_rate": 9.488439214608315e-05, + "loss": 0.0394, + "step": 11280 + }, + { + "action_loss": 0.015663141384720802, + "epoch": 10.14388489208633, + "step": 11280 + }, + { + "epoch": 10.14388489208633, + "step": 11280, + "torque_loss": 0.18274737894535065 + }, + { + "epoch": 10.14388489208633, + "force_loss": 0.014046695083379745, + "step": 11280 + }, + { + "epoch": 10.152877697841726, + "grad_norm": 0.49942636489868164, + "learning_rate": 9.487224246818444e-05, + "loss": 0.0254, + "step": 11290 + }, + { + "action_loss": 0.014512017369270325, + "epoch": 10.152877697841726, + "step": 11290 + }, + { + "epoch": 10.152877697841726, + "step": 11290, + "torque_loss": 0.16143356263637543 + }, + { + "epoch": 10.152877697841726, + "force_loss": 0.012113831005990505, + "step": 11290 + }, + { + "epoch": 10.161870503597122, + "grad_norm": 0.33942297101020813, + "learning_rate": 9.486007915928325e-05, + "loss": 0.0263, + "step": 11300 + }, + { + "action_loss": 0.008723310194909573, + "epoch": 10.161870503597122, + "step": 11300 + }, + { + "epoch": 10.161870503597122, + "step": 11300, + "torque_loss": 0.18283824622631073 + }, + { + "epoch": 10.161870503597122, + "force_loss": 0.006449391599744558, + "step": 11300 + }, + { + "epoch": 10.170863309352518, + "grad_norm": 0.6692873239517212, + "learning_rate": 9.484790222307448e-05, + "loss": 0.0277, + "step": 11310 + }, + { + "action_loss": 0.003828558139503002, + "epoch": 10.170863309352518, + "step": 11310 + }, + { + "epoch": 10.170863309352518, + "step": 11310, + "torque_loss": 0.1257026344537735 + }, + { + "epoch": 10.170863309352518, + "force_loss": 0.005450219381600618, + "step": 11310 + }, + { + "epoch": 10.179856115107913, + "grad_norm": 0.7476741075515747, + "learning_rate": 9.483571166325716e-05, + "loss": 0.0313, + "step": 11320 + }, + { + "action_loss": 0.010637554340064526, + "epoch": 10.179856115107913, + "step": 11320 + }, + { + "epoch": 10.179856115107913, + "step": 11320, + "torque_loss": 0.13662751019001007 + }, + { + "epoch": 10.179856115107913, + "force_loss": 0.010956441052258015, + "step": 11320 + }, + { + "epoch": 10.18884892086331, + "grad_norm": 0.7356482744216919, + "learning_rate": 9.482350748353444e-05, + "loss": 0.0281, + "step": 11330 + }, + { + "action_loss": 0.006456850096583366, + "epoch": 10.18884892086331, + "step": 11330 + }, + { + "epoch": 10.18884892086331, + "step": 11330, + "torque_loss": 0.11050192266702652 + }, + { + "epoch": 10.18884892086331, + "force_loss": 0.007361396681517363, + "step": 11330 + }, + { + "epoch": 10.197841726618705, + "grad_norm": 0.5127983689308167, + "learning_rate": 9.481128968761363e-05, + "loss": 0.0264, + "step": 11340 + }, + { + "action_loss": 0.010660975240170956, + "epoch": 10.197841726618705, + "step": 11340 + }, + { + "epoch": 10.197841726618705, + "step": 11340, + "torque_loss": 0.12635789811611176 + }, + { + "epoch": 10.197841726618705, + "force_loss": 0.0071951462887227535, + "step": 11340 + }, + { + "epoch": 10.206834532374101, + "grad_norm": 0.5391913056373596, + "learning_rate": 9.479905827920621e-05, + "loss": 0.0263, + "step": 11350 + }, + { + "action_loss": 0.016435103490948677, + "epoch": 10.206834532374101, + "step": 11350 + }, + { + "epoch": 10.206834532374101, + "step": 11350, + "torque_loss": 0.13317665457725525 + }, + { + "epoch": 10.206834532374101, + "force_loss": 0.01528336200863123, + "step": 11350 + }, + { + "epoch": 10.215827338129497, + "grad_norm": 0.8430057764053345, + "learning_rate": 9.478681326202773e-05, + "loss": 0.0296, + "step": 11360 + }, + { + "action_loss": 0.012869472615420818, + "epoch": 10.215827338129497, + "step": 11360 + }, + { + "epoch": 10.215827338129497, + "step": 11360, + "torque_loss": 0.15476185083389282 + }, + { + "epoch": 10.215827338129497, + "force_loss": 0.013207174837589264, + "step": 11360 + }, + { + "epoch": 10.224820143884893, + "grad_norm": 0.5041043162345886, + "learning_rate": 9.477455463979791e-05, + "loss": 0.0268, + "step": 11370 + }, + { + "action_loss": 0.007422102149575949, + "epoch": 10.224820143884893, + "step": 11370 + }, + { + "epoch": 10.224820143884893, + "step": 11370, + "torque_loss": 0.11074385792016983 + }, + { + "epoch": 10.224820143884893, + "force_loss": 0.004675613716244698, + "step": 11370 + }, + { + "epoch": 10.233812949640289, + "grad_norm": 0.9896944761276245, + "learning_rate": 9.476228241624059e-05, + "loss": 0.0357, + "step": 11380 + }, + { + "action_loss": 0.008223764598369598, + "epoch": 10.233812949640289, + "step": 11380 + }, + { + "epoch": 10.233812949640289, + "step": 11380, + "torque_loss": 0.12861405313014984 + }, + { + "epoch": 10.233812949640289, + "force_loss": 0.006207530852407217, + "step": 11380 + }, + { + "epoch": 10.242805755395683, + "grad_norm": 0.5693405866622925, + "learning_rate": 9.474999659508374e-05, + "loss": 0.0273, + "step": 11390 + }, + { + "action_loss": 0.005569235887378454, + "epoch": 10.242805755395683, + "step": 11390 + }, + { + "epoch": 10.242805755395683, + "step": 11390, + "torque_loss": 0.09525828808546066 + }, + { + "epoch": 10.242805755395683, + "force_loss": 0.0063684540800750256, + "step": 11390 + }, + { + "epoch": 10.251798561151078, + "grad_norm": 0.4121360182762146, + "learning_rate": 9.47376971800595e-05, + "loss": 0.0237, + "step": 11400 + }, + { + "action_loss": 0.006347741931676865, + "epoch": 10.251798561151078, + "step": 11400 + }, + { + "epoch": 10.251798561151078, + "step": 11400, + "torque_loss": 0.09575831890106201 + }, + { + "epoch": 10.251798561151078, + "force_loss": 0.004907703958451748, + "step": 11400 + }, + { + "epoch": 10.260791366906474, + "grad_norm": 0.5295513272285461, + "learning_rate": 9.472538417490409e-05, + "loss": 0.0323, + "step": 11410 + }, + { + "action_loss": 0.007348763290792704, + "epoch": 10.260791366906474, + "step": 11410 + }, + { + "epoch": 10.260791366906474, + "step": 11410, + "torque_loss": 0.10169955343008041 + }, + { + "epoch": 10.260791366906474, + "force_loss": 0.0060578309930861, + "step": 11410 + }, + { + "epoch": 10.26978417266187, + "grad_norm": 0.3044617772102356, + "learning_rate": 9.471305758335784e-05, + "loss": 0.0286, + "step": 11420 + }, + { + "action_loss": 0.005669146776199341, + "epoch": 10.26978417266187, + "step": 11420 + }, + { + "epoch": 10.26978417266187, + "step": 11420, + "torque_loss": 0.12416287511587143 + }, + { + "epoch": 10.26978417266187, + "force_loss": 0.00506193283945322, + "step": 11420 + }, + { + "epoch": 10.278776978417266, + "grad_norm": 0.4337250888347626, + "learning_rate": 9.47007174091653e-05, + "loss": 0.0345, + "step": 11430 + }, + { + "action_loss": 0.01646108366549015, + "epoch": 10.278776978417266, + "step": 11430 + }, + { + "epoch": 10.278776978417266, + "step": 11430, + "torque_loss": 0.1496351808309555 + }, + { + "epoch": 10.278776978417266, + "force_loss": 0.008551005274057388, + "step": 11430 + }, + { + "epoch": 10.287769784172662, + "grad_norm": 0.39356017112731934, + "learning_rate": 9.468836365607507e-05, + "loss": 0.032, + "step": 11440 + }, + { + "action_loss": 0.021052265539765358, + "epoch": 10.287769784172662, + "step": 11440 + }, + { + "epoch": 10.287769784172662, + "step": 11440, + "torque_loss": 0.22915327548980713 + }, + { + "epoch": 10.287769784172662, + "force_loss": 0.01646186225116253, + "step": 11440 + }, + { + "epoch": 10.296762589928058, + "grad_norm": 1.0541752576828003, + "learning_rate": 9.467599632783988e-05, + "loss": 0.0338, + "step": 11450 + }, + { + "action_loss": 0.009766120463609695, + "epoch": 10.296762589928058, + "step": 11450 + }, + { + "epoch": 10.296762589928058, + "step": 11450, + "torque_loss": 0.15463463962078094 + }, + { + "epoch": 10.296762589928058, + "force_loss": 0.016685975715517998, + "step": 11450 + }, + { + "epoch": 10.305755395683454, + "grad_norm": 0.7311800718307495, + "learning_rate": 9.466361542821662e-05, + "loss": 0.0293, + "step": 11460 + }, + { + "action_loss": 0.012333345599472523, + "epoch": 10.305755395683454, + "step": 11460 + }, + { + "epoch": 10.305755395683454, + "step": 11460, + "torque_loss": 0.1635270118713379 + }, + { + "epoch": 10.305755395683454, + "force_loss": 0.010200919583439827, + "step": 11460 + }, + { + "epoch": 10.31474820143885, + "grad_norm": 0.33149927854537964, + "learning_rate": 9.465122096096625e-05, + "loss": 0.0248, + "step": 11470 + }, + { + "action_loss": 0.012985029257833958, + "epoch": 10.31474820143885, + "step": 11470 + }, + { + "epoch": 10.31474820143885, + "step": 11470, + "torque_loss": 0.1617441475391388 + }, + { + "epoch": 10.31474820143885, + "force_loss": 0.01682000607252121, + "step": 11470 + }, + { + "epoch": 10.323741007194245, + "grad_norm": 0.7311398386955261, + "learning_rate": 9.463881292985391e-05, + "loss": 0.0304, + "step": 11480 + }, + { + "action_loss": 0.011703367345035076, + "epoch": 10.323741007194245, + "step": 11480 + }, + { + "epoch": 10.323741007194245, + "step": 11480, + "torque_loss": 0.1737755686044693 + }, + { + "epoch": 10.323741007194245, + "force_loss": 0.015321140177547932, + "step": 11480 + }, + { + "epoch": 10.332733812949641, + "grad_norm": 0.6987825632095337, + "learning_rate": 9.462639133864881e-05, + "loss": 0.026, + "step": 11490 + }, + { + "action_loss": 0.008947235532104969, + "epoch": 10.332733812949641, + "step": 11490 + }, + { + "epoch": 10.332733812949641, + "step": 11490, + "torque_loss": 0.18349778652191162 + }, + { + "epoch": 10.332733812949641, + "force_loss": 0.011550438590347767, + "step": 11490 + }, + { + "epoch": 10.341726618705035, + "grad_norm": 1.3858733177185059, + "learning_rate": 9.461395619112432e-05, + "loss": 0.0272, + "step": 11500 + }, + { + "action_loss": 0.008816898800432682, + "epoch": 10.341726618705035, + "step": 11500 + }, + { + "epoch": 10.341726618705035, + "step": 11500, + "torque_loss": 0.14297789335250854 + }, + { + "epoch": 10.341726618705035, + "force_loss": 0.007423428352922201, + "step": 11500 + }, + { + "epoch": 10.350719424460431, + "grad_norm": 0.8188197016716003, + "learning_rate": 9.460150749105791e-05, + "loss": 0.0233, + "step": 11510 + }, + { + "action_loss": 0.008732032030820847, + "epoch": 10.350719424460431, + "step": 11510 + }, + { + "epoch": 10.350719424460431, + "step": 11510, + "torque_loss": 0.17078770697116852 + }, + { + "epoch": 10.350719424460431, + "force_loss": 0.011270567774772644, + "step": 11510 + }, + { + "epoch": 10.359712230215827, + "grad_norm": 0.7546162605285645, + "learning_rate": 9.458904524223116e-05, + "loss": 0.0298, + "step": 11520 + }, + { + "action_loss": 0.021800050511956215, + "epoch": 10.359712230215827, + "step": 11520 + }, + { + "epoch": 10.359712230215827, + "step": 11520, + "torque_loss": 0.1783391833305359 + }, + { + "epoch": 10.359712230215827, + "force_loss": 0.012747381813824177, + "step": 11520 + }, + { + "epoch": 10.368705035971223, + "grad_norm": 1.0759632587432861, + "learning_rate": 9.457656944842976e-05, + "loss": 0.0319, + "step": 11530 + }, + { + "action_loss": 0.007475728634744883, + "epoch": 10.368705035971223, + "step": 11530 + }, + { + "epoch": 10.368705035971223, + "step": 11530, + "torque_loss": 0.16289037466049194 + }, + { + "epoch": 10.368705035971223, + "force_loss": 0.006704117637127638, + "step": 11530 + }, + { + "epoch": 10.377697841726619, + "grad_norm": 0.9225375056266785, + "learning_rate": 9.456408011344353e-05, + "loss": 0.0248, + "step": 11540 + }, + { + "action_loss": 0.012882369570434093, + "epoch": 10.377697841726619, + "step": 11540 + }, + { + "epoch": 10.377697841726619, + "step": 11540, + "torque_loss": 0.1400008648633957 + }, + { + "epoch": 10.377697841726619, + "force_loss": 0.00630096485838294, + "step": 11540 + }, + { + "epoch": 10.386690647482014, + "grad_norm": 0.4783977270126343, + "learning_rate": 9.455157724106643e-05, + "loss": 0.03, + "step": 11550 + }, + { + "action_loss": 0.0238427072763443, + "epoch": 10.386690647482014, + "step": 11550 + }, + { + "epoch": 10.386690647482014, + "step": 11550, + "torque_loss": 0.22121857106685638 + }, + { + "epoch": 10.386690647482014, + "force_loss": 0.034579694271087646, + "step": 11550 + }, + { + "epoch": 10.39568345323741, + "grad_norm": 0.8244315981864929, + "learning_rate": 9.453906083509647e-05, + "loss": 0.0318, + "step": 11560 + }, + { + "action_loss": 0.014974999241530895, + "epoch": 10.39568345323741, + "step": 11560 + }, + { + "epoch": 10.39568345323741, + "step": 11560, + "torque_loss": 0.11194352060556412 + }, + { + "epoch": 10.39568345323741, + "force_loss": 0.018318871036171913, + "step": 11560 + }, + { + "epoch": 10.404676258992806, + "grad_norm": 0.7440147995948792, + "learning_rate": 9.45265308993358e-05, + "loss": 0.0239, + "step": 11570 + }, + { + "action_loss": 0.037725578993558884, + "epoch": 10.404676258992806, + "step": 11570 + }, + { + "epoch": 10.404676258992806, + "step": 11570, + "torque_loss": 0.2239779233932495 + }, + { + "epoch": 10.404676258992806, + "force_loss": 0.04689591005444527, + "step": 11570 + }, + { + "epoch": 10.413669064748202, + "grad_norm": 0.34233060479164124, + "learning_rate": 9.451398743759071e-05, + "loss": 0.0294, + "step": 11580 + }, + { + "action_loss": 0.017021184787154198, + "epoch": 10.413669064748202, + "step": 11580 + }, + { + "epoch": 10.413669064748202, + "step": 11580, + "torque_loss": 0.13548994064331055 + }, + { + "epoch": 10.413669064748202, + "force_loss": 0.015162634663283825, + "step": 11580 + }, + { + "epoch": 10.422661870503598, + "grad_norm": 0.6761921644210815, + "learning_rate": 9.450143045367156e-05, + "loss": 0.0272, + "step": 11590 + }, + { + "action_loss": 0.011015105061233044, + "epoch": 10.422661870503598, + "step": 11590 + }, + { + "epoch": 10.422661870503598, + "step": 11590, + "torque_loss": 0.13699547946453094 + }, + { + "epoch": 10.422661870503598, + "force_loss": 0.010299679823219776, + "step": 11590 + }, + { + "epoch": 10.431654676258994, + "grad_norm": 0.6008148789405823, + "learning_rate": 9.448885995139283e-05, + "loss": 0.0234, + "step": 11600 + }, + { + "action_loss": 0.011528166942298412, + "epoch": 10.431654676258994, + "step": 11600 + }, + { + "epoch": 10.431654676258994, + "step": 11600, + "torque_loss": 0.11631127446889877 + }, + { + "epoch": 10.431654676258994, + "force_loss": 0.009262864477932453, + "step": 11600 + }, + { + "epoch": 10.440647482014388, + "grad_norm": 0.6367351412773132, + "learning_rate": 9.44762759345731e-05, + "loss": 0.0239, + "step": 11610 + }, + { + "action_loss": 0.007582036778330803, + "epoch": 10.440647482014388, + "step": 11610 + }, + { + "epoch": 10.440647482014388, + "step": 11610, + "torque_loss": 0.19286580383777618 + }, + { + "epoch": 10.440647482014388, + "force_loss": 0.0071846297942101955, + "step": 11610 + }, + { + "epoch": 10.449640287769784, + "grad_norm": 0.6339514255523682, + "learning_rate": 9.446367840703509e-05, + "loss": 0.026, + "step": 11620 + }, + { + "action_loss": 0.012275408953428268, + "epoch": 10.449640287769784, + "step": 11620 + }, + { + "epoch": 10.449640287769784, + "step": 11620, + "torque_loss": 0.13773991167545319 + }, + { + "epoch": 10.449640287769784, + "force_loss": 0.018510090187191963, + "step": 11620 + }, + { + "epoch": 10.45863309352518, + "grad_norm": 0.47833752632141113, + "learning_rate": 9.445106737260556e-05, + "loss": 0.022, + "step": 11630 + }, + { + "action_loss": 0.005452519282698631, + "epoch": 10.45863309352518, + "step": 11630 + }, + { + "epoch": 10.45863309352518, + "step": 11630, + "torque_loss": 0.14784525334835052 + }, + { + "epoch": 10.45863309352518, + "force_loss": 0.0059990473091602325, + "step": 11630 + }, + { + "epoch": 10.467625899280575, + "grad_norm": 0.6986682415008545, + "learning_rate": 9.443844283511543e-05, + "loss": 0.0283, + "step": 11640 + }, + { + "action_loss": 0.011630523018538952, + "epoch": 10.467625899280575, + "step": 11640 + }, + { + "epoch": 10.467625899280575, + "step": 11640, + "torque_loss": 0.1290273517370224 + }, + { + "epoch": 10.467625899280575, + "force_loss": 0.01168547198176384, + "step": 11640 + }, + { + "epoch": 10.476618705035971, + "grad_norm": 0.45972588658332825, + "learning_rate": 9.442580479839968e-05, + "loss": 0.0338, + "step": 11650 + }, + { + "action_loss": 0.015398656018078327, + "epoch": 10.476618705035971, + "step": 11650 + }, + { + "epoch": 10.476618705035971, + "step": 11650, + "torque_loss": 0.14129537343978882 + }, + { + "epoch": 10.476618705035971, + "force_loss": 0.017711279913783073, + "step": 11650 + }, + { + "epoch": 10.485611510791367, + "grad_norm": 0.7560446262359619, + "learning_rate": 9.441315326629745e-05, + "loss": 0.0328, + "step": 11660 + }, + { + "action_loss": 0.014941698871552944, + "epoch": 10.485611510791367, + "step": 11660 + }, + { + "epoch": 10.485611510791367, + "step": 11660, + "torque_loss": 0.12922406196594238 + }, + { + "epoch": 10.485611510791367, + "force_loss": 0.009879964403808117, + "step": 11660 + }, + { + "epoch": 10.494604316546763, + "grad_norm": 0.6328633427619934, + "learning_rate": 9.44004882426519e-05, + "loss": 0.0308, + "step": 11670 + }, + { + "action_loss": 0.009849830530583858, + "epoch": 10.494604316546763, + "step": 11670 + }, + { + "epoch": 10.494604316546763, + "step": 11670, + "torque_loss": 0.1196376383304596 + }, + { + "epoch": 10.494604316546763, + "force_loss": 0.00854521244764328, + "step": 11670 + }, + { + "epoch": 10.503597122302159, + "grad_norm": 0.8618049025535583, + "learning_rate": 9.438780973131037e-05, + "loss": 0.0296, + "step": 11680 + }, + { + "action_loss": 0.007375011686235666, + "epoch": 10.503597122302159, + "step": 11680 + }, + { + "epoch": 10.503597122302159, + "step": 11680, + "torque_loss": 0.1726502627134323 + }, + { + "epoch": 10.503597122302159, + "force_loss": 0.0059968214482069016, + "step": 11680 + }, + { + "epoch": 10.512589928057555, + "grad_norm": 0.8481230139732361, + "learning_rate": 9.437511773612423e-05, + "loss": 0.0311, + "step": 11690 + }, + { + "action_loss": 0.01055148709565401, + "epoch": 10.512589928057555, + "step": 11690 + }, + { + "epoch": 10.512589928057555, + "step": 11690, + "torque_loss": 0.10800374299287796 + }, + { + "epoch": 10.512589928057555, + "force_loss": 0.008011865429580212, + "step": 11690 + }, + { + "epoch": 10.52158273381295, + "grad_norm": 0.3983135521411896, + "learning_rate": 9.436241226094896e-05, + "loss": 0.0351, + "step": 11700 + }, + { + "action_loss": 0.044229984283447266, + "epoch": 10.52158273381295, + "step": 11700 + }, + { + "epoch": 10.52158273381295, + "step": 11700, + "torque_loss": 0.15636643767356873 + }, + { + "epoch": 10.52158273381295, + "force_loss": 0.0358545146882534, + "step": 11700 + }, + { + "epoch": 10.530575539568346, + "grad_norm": 0.5260031819343567, + "learning_rate": 9.434969330964418e-05, + "loss": 0.0282, + "step": 11710 + }, + { + "action_loss": 0.00560585455968976, + "epoch": 10.530575539568346, + "step": 11710 + }, + { + "epoch": 10.530575539568346, + "step": 11710, + "torque_loss": 0.15837110579013824 + }, + { + "epoch": 10.530575539568346, + "force_loss": 0.006068322341889143, + "step": 11710 + }, + { + "epoch": 10.53956834532374, + "grad_norm": 0.7153356671333313, + "learning_rate": 9.433696088607356e-05, + "loss": 0.0267, + "step": 11720 + }, + { + "action_loss": 0.011927996762096882, + "epoch": 10.53956834532374, + "step": 11720 + }, + { + "epoch": 10.53956834532374, + "step": 11720, + "torque_loss": 0.12231427431106567 + }, + { + "epoch": 10.53956834532374, + "force_loss": 0.02651011385023594, + "step": 11720 + }, + { + "epoch": 10.548561151079136, + "grad_norm": 0.5481546521186829, + "learning_rate": 9.432421499410486e-05, + "loss": 0.0256, + "step": 11730 + }, + { + "action_loss": 0.012892067432403564, + "epoch": 10.548561151079136, + "step": 11730 + }, + { + "epoch": 10.548561151079136, + "step": 11730, + "torque_loss": 0.15337607264518738 + }, + { + "epoch": 10.548561151079136, + "force_loss": 0.010688972659409046, + "step": 11730 + }, + { + "epoch": 10.557553956834532, + "grad_norm": 0.506474494934082, + "learning_rate": 9.431145563760998e-05, + "loss": 0.0275, + "step": 11740 + }, + { + "action_loss": 0.019991571083664894, + "epoch": 10.557553956834532, + "step": 11740 + }, + { + "epoch": 10.557553956834532, + "step": 11740, + "torque_loss": 0.1517646312713623 + }, + { + "epoch": 10.557553956834532, + "force_loss": 0.02033424936234951, + "step": 11740 + }, + { + "epoch": 10.566546762589928, + "grad_norm": 1.1932791471481323, + "learning_rate": 9.429868282046484e-05, + "loss": 0.034, + "step": 11750 + }, + { + "action_loss": 0.011276725679636002, + "epoch": 10.566546762589928, + "step": 11750 + }, + { + "epoch": 10.566546762589928, + "step": 11750, + "torque_loss": 0.14569266140460968 + }, + { + "epoch": 10.566546762589928, + "force_loss": 0.014366704039275646, + "step": 11750 + }, + { + "epoch": 10.575539568345324, + "grad_norm": 0.9943850636482239, + "learning_rate": 9.428589654654951e-05, + "loss": 0.0367, + "step": 11760 + }, + { + "action_loss": 0.011266603134572506, + "epoch": 10.575539568345324, + "step": 11760 + }, + { + "epoch": 10.575539568345324, + "step": 11760, + "torque_loss": 0.14086222648620605 + }, + { + "epoch": 10.575539568345324, + "force_loss": 0.009158913046121597, + "step": 11760 + }, + { + "epoch": 10.58453237410072, + "grad_norm": 0.6900865435600281, + "learning_rate": 9.42730968197481e-05, + "loss": 0.0272, + "step": 11770 + }, + { + "action_loss": 0.008175398223102093, + "epoch": 10.58453237410072, + "step": 11770 + }, + { + "epoch": 10.58453237410072, + "step": 11770, + "torque_loss": 0.16871309280395508 + }, + { + "epoch": 10.58453237410072, + "force_loss": 0.0069722323678433895, + "step": 11770 + }, + { + "epoch": 10.593525179856115, + "grad_norm": 0.4927932620048523, + "learning_rate": 9.426028364394883e-05, + "loss": 0.0271, + "step": 11780 + }, + { + "action_loss": 0.011634415946900845, + "epoch": 10.593525179856115, + "step": 11780 + }, + { + "epoch": 10.593525179856115, + "step": 11780, + "torque_loss": 0.10223275423049927 + }, + { + "epoch": 10.593525179856115, + "force_loss": 0.00927719846367836, + "step": 11780 + }, + { + "epoch": 10.602517985611511, + "grad_norm": 1.3977556228637695, + "learning_rate": 9.424745702304402e-05, + "loss": 0.0255, + "step": 11790 + }, + { + "action_loss": 0.009241175837814808, + "epoch": 10.602517985611511, + "step": 11790 + }, + { + "epoch": 10.602517985611511, + "step": 11790, + "torque_loss": 0.15933172404766083 + }, + { + "epoch": 10.602517985611511, + "force_loss": 0.010606445372104645, + "step": 11790 + }, + { + "epoch": 10.611510791366907, + "grad_norm": 0.8013253808021545, + "learning_rate": 9.423461696093006e-05, + "loss": 0.0316, + "step": 11800 + }, + { + "action_loss": 0.029430806636810303, + "epoch": 10.611510791366907, + "step": 11800 + }, + { + "epoch": 10.611510791366907, + "step": 11800, + "torque_loss": 0.24555903673171997 + }, + { + "epoch": 10.611510791366907, + "force_loss": 0.047616854310035706, + "step": 11800 + }, + { + "epoch": 10.620503597122303, + "grad_norm": 0.31562918424606323, + "learning_rate": 9.422176346150741e-05, + "loss": 0.0279, + "step": 11810 + }, + { + "action_loss": 0.00797791127115488, + "epoch": 10.620503597122303, + "step": 11810 + }, + { + "epoch": 10.620503597122303, + "step": 11810, + "torque_loss": 0.10208842158317566 + }, + { + "epoch": 10.620503597122303, + "force_loss": 0.010428149253129959, + "step": 11810 + }, + { + "epoch": 10.629496402877697, + "grad_norm": 0.9264644980430603, + "learning_rate": 9.420889652868063e-05, + "loss": 0.027, + "step": 11820 + }, + { + "action_loss": 0.005660125520080328, + "epoch": 10.629496402877697, + "step": 11820 + }, + { + "epoch": 10.629496402877697, + "step": 11820, + "torque_loss": 0.11972040683031082 + }, + { + "epoch": 10.629496402877697, + "force_loss": 0.011650193482637405, + "step": 11820 + }, + { + "epoch": 10.638489208633093, + "grad_norm": 0.3744155168533325, + "learning_rate": 9.419601616635836e-05, + "loss": 0.0269, + "step": 11830 + }, + { + "action_loss": 0.011947602033615112, + "epoch": 10.638489208633093, + "step": 11830 + }, + { + "epoch": 10.638489208633093, + "step": 11830, + "torque_loss": 0.14063523709774017 + }, + { + "epoch": 10.638489208633093, + "force_loss": 0.009198009967803955, + "step": 11830 + }, + { + "epoch": 10.647482014388489, + "grad_norm": 0.372519314289093, + "learning_rate": 9.418312237845331e-05, + "loss": 0.0302, + "step": 11840 + }, + { + "action_loss": 0.008128146640956402, + "epoch": 10.647482014388489, + "step": 11840 + }, + { + "epoch": 10.647482014388489, + "step": 11840, + "torque_loss": 0.1194186806678772 + }, + { + "epoch": 10.647482014388489, + "force_loss": 0.005439929664134979, + "step": 11840 + }, + { + "epoch": 10.656474820143885, + "grad_norm": 2.5389938354492188, + "learning_rate": 9.417021516888225e-05, + "loss": 0.0287, + "step": 11850 + }, + { + "action_loss": 0.010768379084765911, + "epoch": 10.656474820143885, + "step": 11850 + }, + { + "epoch": 10.656474820143885, + "step": 11850, + "torque_loss": 0.12309720367193222 + }, + { + "epoch": 10.656474820143885, + "force_loss": 0.01216029655188322, + "step": 11850 + }, + { + "epoch": 10.66546762589928, + "grad_norm": 0.6142351031303406, + "learning_rate": 9.415729454156608e-05, + "loss": 0.026, + "step": 11860 + }, + { + "action_loss": 0.024880558252334595, + "epoch": 10.66546762589928, + "step": 11860 + }, + { + "epoch": 10.66546762589928, + "step": 11860, + "torque_loss": 0.12127278000116348 + }, + { + "epoch": 10.66546762589928, + "force_loss": 0.02288181148469448, + "step": 11860 + }, + { + "epoch": 10.674460431654676, + "grad_norm": 0.22927068173885345, + "learning_rate": 9.414436050042973e-05, + "loss": 0.0285, + "step": 11870 + }, + { + "action_loss": 0.01672782376408577, + "epoch": 10.674460431654676, + "step": 11870 + }, + { + "epoch": 10.674460431654676, + "step": 11870, + "torque_loss": 0.10165462642908096 + }, + { + "epoch": 10.674460431654676, + "force_loss": 0.015788892284035683, + "step": 11870 + }, + { + "epoch": 10.683453237410072, + "grad_norm": 0.5687105655670166, + "learning_rate": 9.413141304940223e-05, + "loss": 0.0273, + "step": 11880 + }, + { + "action_loss": 0.011985464952886105, + "epoch": 10.683453237410072, + "step": 11880 + }, + { + "epoch": 10.683453237410072, + "step": 11880, + "torque_loss": 0.1488141566514969 + }, + { + "epoch": 10.683453237410072, + "force_loss": 0.016240663826465607, + "step": 11880 + }, + { + "epoch": 10.692446043165468, + "grad_norm": 0.7064986824989319, + "learning_rate": 9.411845219241666e-05, + "loss": 0.0263, + "step": 11890 + }, + { + "action_loss": 0.0039049230981618166, + "epoch": 10.692446043165468, + "step": 11890 + }, + { + "epoch": 10.692446043165468, + "step": 11890, + "torque_loss": 0.11934468895196915 + }, + { + "epoch": 10.692446043165468, + "force_loss": 0.004777942318469286, + "step": 11890 + }, + { + "epoch": 10.701438848920864, + "grad_norm": 0.3298688232898712, + "learning_rate": 9.410547793341021e-05, + "loss": 0.0271, + "step": 11900 + }, + { + "action_loss": 0.009596536867320538, + "epoch": 10.701438848920864, + "step": 11900 + }, + { + "epoch": 10.701438848920864, + "step": 11900, + "torque_loss": 0.12201857566833496 + }, + { + "epoch": 10.701438848920864, + "force_loss": 0.007582666352391243, + "step": 11900 + }, + { + "epoch": 10.71043165467626, + "grad_norm": 0.5734339952468872, + "learning_rate": 9.409249027632408e-05, + "loss": 0.0252, + "step": 11910 + }, + { + "action_loss": 0.004827613011002541, + "epoch": 10.71043165467626, + "step": 11910 + }, + { + "epoch": 10.71043165467626, + "step": 11910, + "torque_loss": 0.14751964807510376 + }, + { + "epoch": 10.71043165467626, + "force_loss": 0.005613662302494049, + "step": 11910 + }, + { + "epoch": 10.719424460431654, + "grad_norm": 0.3818264603614807, + "learning_rate": 9.407948922510362e-05, + "loss": 0.0255, + "step": 11920 + }, + { + "action_loss": 0.009109928272664547, + "epoch": 10.719424460431654, + "step": 11920 + }, + { + "epoch": 10.719424460431654, + "step": 11920, + "torque_loss": 0.08783774822950363 + }, + { + "epoch": 10.719424460431654, + "force_loss": 0.008770224638283253, + "step": 11920 + }, + { + "epoch": 10.72841726618705, + "grad_norm": 0.5351333618164062, + "learning_rate": 9.406647478369817e-05, + "loss": 0.0235, + "step": 11930 + }, + { + "action_loss": 0.017036335542798042, + "epoch": 10.72841726618705, + "step": 11930 + }, + { + "epoch": 10.72841726618705, + "step": 11930, + "torque_loss": 0.16139338910579681 + }, + { + "epoch": 10.72841726618705, + "force_loss": 0.02104955166578293, + "step": 11930 + }, + { + "epoch": 10.737410071942445, + "grad_norm": 0.46916478872299194, + "learning_rate": 9.405344695606118e-05, + "loss": 0.0263, + "step": 11940 + }, + { + "action_loss": 0.004225591663271189, + "epoch": 10.737410071942445, + "step": 11940 + }, + { + "epoch": 10.737410071942445, + "step": 11940, + "torque_loss": 0.12437965720891953 + }, + { + "epoch": 10.737410071942445, + "force_loss": 0.0045722294598817825, + "step": 11940 + }, + { + "epoch": 10.746402877697841, + "grad_norm": 0.7353121638298035, + "learning_rate": 9.404040574615018e-05, + "loss": 0.0257, + "step": 11950 + }, + { + "action_loss": 0.02080698497593403, + "epoch": 10.746402877697841, + "step": 11950 + }, + { + "epoch": 10.746402877697841, + "step": 11950, + "torque_loss": 0.16435514390468597 + }, + { + "epoch": 10.746402877697841, + "force_loss": 0.017621351405978203, + "step": 11950 + }, + { + "epoch": 10.755395683453237, + "grad_norm": 0.5872335433959961, + "learning_rate": 9.402735115792674e-05, + "loss": 0.0281, + "step": 11960 + }, + { + "action_loss": 0.004947586450725794, + "epoch": 10.755395683453237, + "step": 11960 + }, + { + "epoch": 10.755395683453237, + "step": 11960, + "torque_loss": 0.14728674292564392 + }, + { + "epoch": 10.755395683453237, + "force_loss": 0.004926704335957766, + "step": 11960 + }, + { + "epoch": 10.764388489208633, + "grad_norm": 0.4793684184551239, + "learning_rate": 9.401428319535649e-05, + "loss": 0.0232, + "step": 11970 + }, + { + "action_loss": 0.008999993093311787, + "epoch": 10.764388489208633, + "step": 11970 + }, + { + "epoch": 10.764388489208633, + "step": 11970, + "torque_loss": 0.1286764293909073 + }, + { + "epoch": 10.764388489208633, + "force_loss": 0.008132883347570896, + "step": 11970 + }, + { + "epoch": 10.773381294964029, + "grad_norm": 0.4197932779788971, + "learning_rate": 9.400120186240912e-05, + "loss": 0.027, + "step": 11980 + }, + { + "action_loss": 0.011099553667008877, + "epoch": 10.773381294964029, + "step": 11980 + }, + { + "epoch": 10.773381294964029, + "step": 11980, + "torque_loss": 0.11338604241609573 + }, + { + "epoch": 10.773381294964029, + "force_loss": 0.010286360047757626, + "step": 11980 + }, + { + "epoch": 10.782374100719425, + "grad_norm": 0.6464086771011353, + "learning_rate": 9.398810716305844e-05, + "loss": 0.026, + "step": 11990 + }, + { + "action_loss": 0.019942304119467735, + "epoch": 10.782374100719425, + "step": 11990 + }, + { + "epoch": 10.782374100719425, + "step": 11990, + "torque_loss": 0.1684349775314331 + }, + { + "epoch": 10.782374100719425, + "force_loss": 0.013247677125036716, + "step": 11990 + }, + { + "epoch": 10.79136690647482, + "grad_norm": 0.6473360657691956, + "learning_rate": 9.397499910128222e-05, + "loss": 0.0272, + "step": 12000 + }, + { + "action_loss": 0.006471951026469469, + "epoch": 10.79136690647482, + "step": 12000 + }, + { + "epoch": 10.79136690647482, + "step": 12000, + "torque_loss": 0.1268175095319748 + }, + { + "epoch": 10.79136690647482, + "force_loss": 0.005824244115501642, + "step": 12000 + }, + { + "epoch": 10.800359712230216, + "grad_norm": 0.26278069615364075, + "learning_rate": 9.396187768106237e-05, + "loss": 0.0232, + "step": 12010 + }, + { + "action_loss": 0.010234486311674118, + "epoch": 10.800359712230216, + "step": 12010 + }, + { + "epoch": 10.800359712230216, + "step": 12010, + "torque_loss": 0.13185055553913116 + }, + { + "epoch": 10.800359712230216, + "force_loss": 0.006136443465948105, + "step": 12010 + }, + { + "epoch": 10.809352517985612, + "grad_norm": 0.7460739612579346, + "learning_rate": 9.394874290638482e-05, + "loss": 0.0268, + "step": 12020 + }, + { + "action_loss": 0.016185598447918892, + "epoch": 10.809352517985612, + "step": 12020 + }, + { + "epoch": 10.809352517985612, + "step": 12020, + "torque_loss": 0.1253592073917389 + }, + { + "epoch": 10.809352517985612, + "force_loss": 0.012232418172061443, + "step": 12020 + }, + { + "epoch": 10.818345323741006, + "grad_norm": 0.5842387080192566, + "learning_rate": 9.393559478123959e-05, + "loss": 0.0291, + "step": 12030 + }, + { + "action_loss": 0.015043514780700207, + "epoch": 10.818345323741006, + "step": 12030 + }, + { + "epoch": 10.818345323741006, + "step": 12030, + "torque_loss": 0.16573457419872284 + }, + { + "epoch": 10.818345323741006, + "force_loss": 0.020701823756098747, + "step": 12030 + }, + { + "epoch": 10.827338129496402, + "grad_norm": 0.764706552028656, + "learning_rate": 9.39224333096207e-05, + "loss": 0.0262, + "step": 12040 + }, + { + "action_loss": 0.006273603066802025, + "epoch": 10.827338129496402, + "step": 12040 + }, + { + "epoch": 10.827338129496402, + "step": 12040, + "torque_loss": 0.09680929780006409 + }, + { + "epoch": 10.827338129496402, + "force_loss": 0.007812932133674622, + "step": 12040 + }, + { + "epoch": 10.836330935251798, + "grad_norm": 0.5986528992652893, + "learning_rate": 9.390925849552629e-05, + "loss": 0.0275, + "step": 12050 + }, + { + "action_loss": 0.010162926279008389, + "epoch": 10.836330935251798, + "step": 12050 + }, + { + "epoch": 10.836330935251798, + "step": 12050, + "torque_loss": 0.1518034189939499 + }, + { + "epoch": 10.836330935251798, + "force_loss": 0.0305942390114069, + "step": 12050 + }, + { + "epoch": 10.845323741007194, + "grad_norm": 0.5597752928733826, + "learning_rate": 9.389607034295849e-05, + "loss": 0.0242, + "step": 12060 + }, + { + "action_loss": 0.014853022992610931, + "epoch": 10.845323741007194, + "step": 12060 + }, + { + "epoch": 10.845323741007194, + "step": 12060, + "torque_loss": 0.14084966480731964 + }, + { + "epoch": 10.845323741007194, + "force_loss": 0.013705846853554249, + "step": 12060 + }, + { + "epoch": 10.85431654676259, + "grad_norm": 0.44602155685424805, + "learning_rate": 9.388286885592355e-05, + "loss": 0.0326, + "step": 12070 + }, + { + "action_loss": 0.007571047637611628, + "epoch": 10.85431654676259, + "step": 12070 + }, + { + "epoch": 10.85431654676259, + "step": 12070, + "torque_loss": 0.1311572641134262 + }, + { + "epoch": 10.85431654676259, + "force_loss": 0.015139713883399963, + "step": 12070 + }, + { + "epoch": 10.863309352517986, + "grad_norm": 0.3522129952907562, + "learning_rate": 9.386965403843168e-05, + "loss": 0.0255, + "step": 12080 + }, + { + "action_loss": 0.009334375150501728, + "epoch": 10.863309352517986, + "step": 12080 + }, + { + "epoch": 10.863309352517986, + "step": 12080, + "torque_loss": 0.1605527400970459 + }, + { + "epoch": 10.863309352517986, + "force_loss": 0.009085362777113914, + "step": 12080 + }, + { + "epoch": 10.872302158273381, + "grad_norm": 0.5320757031440735, + "learning_rate": 9.385642589449726e-05, + "loss": 0.0258, + "step": 12090 + }, + { + "action_loss": 0.0033140864688903093, + "epoch": 10.872302158273381, + "step": 12090 + }, + { + "epoch": 10.872302158273381, + "step": 12090, + "torque_loss": 0.10213208198547363 + }, + { + "epoch": 10.872302158273381, + "force_loss": 0.0058080703020095825, + "step": 12090 + }, + { + "epoch": 10.881294964028777, + "grad_norm": 0.45063185691833496, + "learning_rate": 9.38431844281386e-05, + "loss": 0.0227, + "step": 12100 + }, + { + "action_loss": 0.00553587032482028, + "epoch": 10.881294964028777, + "step": 12100 + }, + { + "epoch": 10.881294964028777, + "step": 12100, + "torque_loss": 0.1320485919713974 + }, + { + "epoch": 10.881294964028777, + "force_loss": 0.00692811468616128, + "step": 12100 + }, + { + "epoch": 10.890287769784173, + "grad_norm": 0.2876790761947632, + "learning_rate": 9.38299296433781e-05, + "loss": 0.0254, + "step": 12110 + }, + { + "action_loss": 0.01288487296551466, + "epoch": 10.890287769784173, + "step": 12110 + }, + { + "epoch": 10.890287769784173, + "step": 12110, + "torque_loss": 0.12228699773550034 + }, + { + "epoch": 10.890287769784173, + "force_loss": 0.009581823833286762, + "step": 12110 + }, + { + "epoch": 10.899280575539569, + "grad_norm": 0.6605455875396729, + "learning_rate": 9.381666154424226e-05, + "loss": 0.0266, + "step": 12120 + }, + { + "action_loss": 0.008676379919052124, + "epoch": 10.899280575539569, + "step": 12120 + }, + { + "epoch": 10.899280575539569, + "step": 12120, + "torque_loss": 0.11593180149793625 + }, + { + "epoch": 10.899280575539569, + "force_loss": 0.006400665733963251, + "step": 12120 + }, + { + "epoch": 10.908273381294965, + "grad_norm": 0.9221895337104797, + "learning_rate": 9.380338013476157e-05, + "loss": 0.024, + "step": 12130 + }, + { + "action_loss": 0.014356992207467556, + "epoch": 10.908273381294965, + "step": 12130 + }, + { + "epoch": 10.908273381294965, + "step": 12130, + "torque_loss": 0.14662493765354156 + }, + { + "epoch": 10.908273381294965, + "force_loss": 0.018285570666193962, + "step": 12130 + }, + { + "epoch": 10.917266187050359, + "grad_norm": 0.9484205842018127, + "learning_rate": 9.379008541897054e-05, + "loss": 0.0262, + "step": 12140 + }, + { + "action_loss": 0.0054193236865103245, + "epoch": 10.917266187050359, + "step": 12140 + }, + { + "epoch": 10.917266187050359, + "step": 12140, + "torque_loss": 0.12005052715539932 + }, + { + "epoch": 10.917266187050359, + "force_loss": 0.004963604733347893, + "step": 12140 + }, + { + "epoch": 10.926258992805755, + "grad_norm": 0.24158206582069397, + "learning_rate": 9.377677740090777e-05, + "loss": 0.0319, + "step": 12150 + }, + { + "action_loss": 0.009313946589827538, + "epoch": 10.926258992805755, + "step": 12150 + }, + { + "epoch": 10.926258992805755, + "step": 12150, + "torque_loss": 0.1337345987558365 + }, + { + "epoch": 10.926258992805755, + "force_loss": 0.012692399322986603, + "step": 12150 + }, + { + "epoch": 10.93525179856115, + "grad_norm": 0.5572620034217834, + "learning_rate": 9.376345608461588e-05, + "loss": 0.0266, + "step": 12160 + }, + { + "action_loss": 0.010906982235610485, + "epoch": 10.93525179856115, + "step": 12160 + }, + { + "epoch": 10.93525179856115, + "step": 12160, + "torque_loss": 0.1310039907693863 + }, + { + "epoch": 10.93525179856115, + "force_loss": 0.008720236830413342, + "step": 12160 + }, + { + "epoch": 10.944244604316546, + "grad_norm": 0.4573909044265747, + "learning_rate": 9.375012147414155e-05, + "loss": 0.0228, + "step": 12170 + }, + { + "action_loss": 0.011558172293007374, + "epoch": 10.944244604316546, + "step": 12170 + }, + { + "epoch": 10.944244604316546, + "step": 12170, + "torque_loss": 0.17224471271038055 + }, + { + "epoch": 10.944244604316546, + "force_loss": 0.01524873822927475, + "step": 12170 + }, + { + "epoch": 10.953237410071942, + "grad_norm": 0.40970471501350403, + "learning_rate": 9.373677357353545e-05, + "loss": 0.0263, + "step": 12180 + }, + { + "action_loss": 0.019229477271437645, + "epoch": 10.953237410071942, + "step": 12180 + }, + { + "epoch": 10.953237410071942, + "step": 12180, + "torque_loss": 0.1303432136774063 + }, + { + "epoch": 10.953237410071942, + "force_loss": 0.02540399134159088, + "step": 12180 + }, + { + "epoch": 10.962230215827338, + "grad_norm": 0.6801599860191345, + "learning_rate": 9.372341238685237e-05, + "loss": 0.0251, + "step": 12190 + }, + { + "action_loss": 0.005424709524959326, + "epoch": 10.962230215827338, + "step": 12190 + }, + { + "epoch": 10.962230215827338, + "step": 12190, + "torque_loss": 0.17813409864902496 + }, + { + "epoch": 10.962230215827338, + "force_loss": 0.004139252007007599, + "step": 12190 + }, + { + "epoch": 10.971223021582734, + "grad_norm": 0.6432246565818787, + "learning_rate": 9.371003791815102e-05, + "loss": 0.0228, + "step": 12200 + }, + { + "action_loss": 0.004331761505454779, + "epoch": 10.971223021582734, + "step": 12200 + }, + { + "epoch": 10.971223021582734, + "step": 12200, + "torque_loss": 0.10015038400888443 + }, + { + "epoch": 10.971223021582734, + "force_loss": 0.004639073740690947, + "step": 12200 + }, + { + "epoch": 10.98021582733813, + "grad_norm": 0.6928061246871948, + "learning_rate": 9.369665017149429e-05, + "loss": 0.0251, + "step": 12210 + }, + { + "action_loss": 0.01653742603957653, + "epoch": 10.98021582733813, + "step": 12210 + }, + { + "epoch": 10.98021582733813, + "step": 12210, + "torque_loss": 0.1767071932554245 + }, + { + "epoch": 10.98021582733813, + "force_loss": 0.008227099664509296, + "step": 12210 + }, + { + "epoch": 10.989208633093526, + "grad_norm": 0.3463161587715149, + "learning_rate": 9.368324915094895e-05, + "loss": 0.0228, + "step": 12220 + }, + { + "action_loss": 0.025904029607772827, + "epoch": 10.989208633093526, + "step": 12220 + }, + { + "epoch": 10.989208633093526, + "step": 12220, + "torque_loss": 0.1508151888847351 + }, + { + "epoch": 10.989208633093526, + "force_loss": 0.03892321512103081, + "step": 12220 + }, + { + "epoch": 10.998201438848922, + "grad_norm": 0.6688510775566101, + "learning_rate": 9.366983486058591e-05, + "loss": 0.0313, + "step": 12230 + }, + { + "action_loss": 0.004591356497257948, + "epoch": 10.998201438848922, + "step": 12230 + }, + { + "epoch": 10.998201438848922, + "step": 12230, + "torque_loss": 0.1349913328886032 + }, + { + "epoch": 10.998201438848922, + "force_loss": 0.004875443410128355, + "step": 12230 + }, + { + "epoch": 11.007194244604317, + "grad_norm": 0.6722777485847473, + "learning_rate": 9.365640730448009e-05, + "loss": 0.0343, + "step": 12240 + }, + { + "action_loss": 0.010542180389165878, + "epoch": 11.007194244604317, + "step": 12240 + }, + { + "epoch": 11.007194244604317, + "step": 12240, + "torque_loss": 0.1542787104845047 + }, + { + "epoch": 11.007194244604317, + "force_loss": 0.010751006193459034, + "step": 12240 + }, + { + "epoch": 11.016187050359711, + "grad_norm": 0.8943272829055786, + "learning_rate": 9.36429664867104e-05, + "loss": 0.0338, + "step": 12250 + }, + { + "action_loss": 0.007278565317392349, + "epoch": 11.016187050359711, + "step": 12250 + }, + { + "epoch": 11.016187050359711, + "step": 12250, + "torque_loss": 0.11434619873762131 + }, + { + "epoch": 11.016187050359711, + "force_loss": 0.007861269637942314, + "step": 12250 + }, + { + "epoch": 11.025179856115107, + "grad_norm": 0.31568488478660583, + "learning_rate": 9.362951241135982e-05, + "loss": 0.0297, + "step": 12260 + }, + { + "action_loss": 0.010769858956336975, + "epoch": 11.025179856115107, + "step": 12260 + }, + { + "epoch": 11.025179856115107, + "step": 12260, + "torque_loss": 0.13687936961650848 + }, + { + "epoch": 11.025179856115107, + "force_loss": 0.010664626955986023, + "step": 12260 + }, + { + "epoch": 11.034172661870503, + "grad_norm": 0.8092395663261414, + "learning_rate": 9.361604508251534e-05, + "loss": 0.026, + "step": 12270 + }, + { + "action_loss": 0.007826884277164936, + "epoch": 11.034172661870503, + "step": 12270 + }, + { + "epoch": 11.034172661870503, + "step": 12270, + "torque_loss": 0.1676703691482544 + }, + { + "epoch": 11.034172661870503, + "force_loss": 0.007892599329352379, + "step": 12270 + }, + { + "epoch": 11.043165467625899, + "grad_norm": 0.4204530417919159, + "learning_rate": 9.360256450426799e-05, + "loss": 0.0254, + "step": 12280 + }, + { + "action_loss": 0.01079513132572174, + "epoch": 11.043165467625899, + "step": 12280 + }, + { + "epoch": 11.043165467625899, + "step": 12280, + "torque_loss": 0.16246967017650604 + }, + { + "epoch": 11.043165467625899, + "force_loss": 0.00886585097759962, + "step": 12280 + }, + { + "epoch": 11.052158273381295, + "grad_norm": 0.5435256361961365, + "learning_rate": 9.358907068071279e-05, + "loss": 0.0254, + "step": 12290 + }, + { + "action_loss": 0.005063696298748255, + "epoch": 11.052158273381295, + "step": 12290 + }, + { + "epoch": 11.052158273381295, + "step": 12290, + "torque_loss": 0.13453452289104462 + }, + { + "epoch": 11.052158273381295, + "force_loss": 0.0044000535272061825, + "step": 12290 + }, + { + "epoch": 11.06115107913669, + "grad_norm": 0.328739196062088, + "learning_rate": 9.357556361594882e-05, + "loss": 0.0226, + "step": 12300 + }, + { + "action_loss": 0.004865929950028658, + "epoch": 11.06115107913669, + "step": 12300 + }, + { + "epoch": 11.06115107913669, + "step": 12300, + "torque_loss": 0.13972128927707672 + }, + { + "epoch": 11.06115107913669, + "force_loss": 0.0032887046691030264, + "step": 12300 + }, + { + "epoch": 11.070143884892087, + "grad_norm": 0.6979000568389893, + "learning_rate": 9.356204331407917e-05, + "loss": 0.0266, + "step": 12310 + }, + { + "action_loss": 0.037070319056510925, + "epoch": 11.070143884892087, + "step": 12310 + }, + { + "epoch": 11.070143884892087, + "step": 12310, + "torque_loss": 0.1636311560869217 + }, + { + "epoch": 11.070143884892087, + "force_loss": 0.04909073933959007, + "step": 12310 + }, + { + "epoch": 11.079136690647482, + "grad_norm": 0.599552571773529, + "learning_rate": 9.354850977921094e-05, + "loss": 0.0312, + "step": 12320 + }, + { + "action_loss": 0.008449557237327099, + "epoch": 11.079136690647482, + "step": 12320 + }, + { + "epoch": 11.079136690647482, + "step": 12320, + "torque_loss": 0.12021370977163315 + }, + { + "epoch": 11.079136690647482, + "force_loss": 0.00739591708406806, + "step": 12320 + }, + { + "epoch": 11.088129496402878, + "grad_norm": 0.3833772838115692, + "learning_rate": 9.353496301545529e-05, + "loss": 0.0263, + "step": 12330 + }, + { + "action_loss": 0.010584007017314434, + "epoch": 11.088129496402878, + "step": 12330 + }, + { + "epoch": 11.088129496402878, + "step": 12330, + "torque_loss": 0.13035576045513153 + }, + { + "epoch": 11.088129496402878, + "force_loss": 0.01557536143809557, + "step": 12330 + }, + { + "epoch": 11.097122302158274, + "grad_norm": 0.6187864542007446, + "learning_rate": 9.352140302692733e-05, + "loss": 0.0269, + "step": 12340 + }, + { + "action_loss": 0.018573325127363205, + "epoch": 11.097122302158274, + "step": 12340 + }, + { + "epoch": 11.097122302158274, + "step": 12340, + "torque_loss": 0.17013008892536163 + }, + { + "epoch": 11.097122302158274, + "force_loss": 0.024161985144019127, + "step": 12340 + }, + { + "epoch": 11.10611510791367, + "grad_norm": 0.9494650363922119, + "learning_rate": 9.350782981774627e-05, + "loss": 0.0285, + "step": 12350 + }, + { + "action_loss": 0.011366683058440685, + "epoch": 11.10611510791367, + "step": 12350 + }, + { + "epoch": 11.10611510791367, + "step": 12350, + "torque_loss": 0.15562209486961365 + }, + { + "epoch": 11.10611510791367, + "force_loss": 0.010943818837404251, + "step": 12350 + }, + { + "epoch": 11.115107913669064, + "grad_norm": 0.8779278993606567, + "learning_rate": 9.349424339203526e-05, + "loss": 0.0271, + "step": 12360 + }, + { + "action_loss": 0.014526404440402985, + "epoch": 11.115107913669064, + "step": 12360 + }, + { + "epoch": 11.115107913669064, + "step": 12360, + "torque_loss": 0.13063812255859375 + }, + { + "epoch": 11.115107913669064, + "force_loss": 0.015571859665215015, + "step": 12360 + }, + { + "epoch": 11.12410071942446, + "grad_norm": 0.806794285774231, + "learning_rate": 9.34806437539215e-05, + "loss": 0.0289, + "step": 12370 + }, + { + "action_loss": 0.01971241645514965, + "epoch": 11.12410071942446, + "step": 12370 + }, + { + "epoch": 11.12410071942446, + "step": 12370, + "torque_loss": 0.15413321554660797 + }, + { + "epoch": 11.12410071942446, + "force_loss": 0.025549190118908882, + "step": 12370 + }, + { + "epoch": 11.133093525179856, + "grad_norm": 0.6933779120445251, + "learning_rate": 9.346703090753622e-05, + "loss": 0.0299, + "step": 12380 + }, + { + "action_loss": 0.011384930461645126, + "epoch": 11.133093525179856, + "step": 12380 + }, + { + "epoch": 11.133093525179856, + "step": 12380, + "torque_loss": 0.17711804807186127 + }, + { + "epoch": 11.133093525179856, + "force_loss": 0.011022083461284637, + "step": 12380 + }, + { + "epoch": 11.142086330935252, + "grad_norm": 0.6616423726081848, + "learning_rate": 9.345340485701461e-05, + "loss": 0.0296, + "step": 12390 + }, + { + "action_loss": 0.010563376359641552, + "epoch": 11.142086330935252, + "step": 12390 + }, + { + "epoch": 11.142086330935252, + "step": 12390, + "torque_loss": 0.12168272584676743 + }, + { + "epoch": 11.142086330935252, + "force_loss": 0.006697139237076044, + "step": 12390 + }, + { + "epoch": 11.151079136690647, + "grad_norm": 0.5267893075942993, + "learning_rate": 9.343976560649595e-05, + "loss": 0.0235, + "step": 12400 + }, + { + "action_loss": 0.008783641271293163, + "epoch": 11.151079136690647, + "step": 12400 + }, + { + "epoch": 11.151079136690647, + "step": 12400, + "torque_loss": 0.15881018340587616 + }, + { + "epoch": 11.151079136690647, + "force_loss": 0.01086476445198059, + "step": 12400 + }, + { + "epoch": 11.160071942446043, + "grad_norm": 0.3190533518791199, + "learning_rate": 9.342611316012344e-05, + "loss": 0.0258, + "step": 12410 + }, + { + "action_loss": 0.014200364239513874, + "epoch": 11.160071942446043, + "step": 12410 + }, + { + "epoch": 11.160071942446043, + "step": 12410, + "torque_loss": 0.15340310335159302 + }, + { + "epoch": 11.160071942446043, + "force_loss": 0.014808292500674725, + "step": 12410 + }, + { + "epoch": 11.16906474820144, + "grad_norm": 0.8420072197914124, + "learning_rate": 9.341244752204437e-05, + "loss": 0.0282, + "step": 12420 + }, + { + "action_loss": 0.011746775358915329, + "epoch": 11.16906474820144, + "step": 12420 + }, + { + "epoch": 11.16906474820144, + "step": 12420, + "torque_loss": 0.14641153812408447 + }, + { + "epoch": 11.16906474820144, + "force_loss": 0.00971127301454544, + "step": 12420 + }, + { + "epoch": 11.178057553956835, + "grad_norm": 0.36430823802948, + "learning_rate": 9.339876869640995e-05, + "loss": 0.0216, + "step": 12430 + }, + { + "action_loss": 0.009064631536602974, + "epoch": 11.178057553956835, + "step": 12430 + }, + { + "epoch": 11.178057553956835, + "step": 12430, + "torque_loss": 0.13181424140930176 + }, + { + "epoch": 11.178057553956835, + "force_loss": 0.011014536023139954, + "step": 12430 + }, + { + "epoch": 11.18705035971223, + "grad_norm": 0.36544182896614075, + "learning_rate": 9.33850766873755e-05, + "loss": 0.0289, + "step": 12440 + }, + { + "action_loss": 0.02069510892033577, + "epoch": 11.18705035971223, + "step": 12440 + }, + { + "epoch": 11.18705035971223, + "step": 12440, + "torque_loss": 0.1885557919740677 + }, + { + "epoch": 11.18705035971223, + "force_loss": 0.02032717876136303, + "step": 12440 + }, + { + "epoch": 11.196043165467627, + "grad_norm": 0.8094887137413025, + "learning_rate": 9.337137149910028e-05, + "loss": 0.0298, + "step": 12450 + }, + { + "action_loss": 0.0070904274471104145, + "epoch": 11.196043165467627, + "step": 12450 + }, + { + "epoch": 11.196043165467627, + "step": 12450, + "torque_loss": 0.12666967511177063 + }, + { + "epoch": 11.196043165467627, + "force_loss": 0.006781961768865585, + "step": 12450 + }, + { + "epoch": 11.20503597122302, + "grad_norm": 0.6613590121269226, + "learning_rate": 9.335765313574753e-05, + "loss": 0.0242, + "step": 12460 + }, + { + "action_loss": 0.008711927570402622, + "epoch": 11.20503597122302, + "step": 12460 + }, + { + "epoch": 11.20503597122302, + "step": 12460, + "torque_loss": 0.139108344912529 + }, + { + "epoch": 11.20503597122302, + "force_loss": 0.005266461055725813, + "step": 12460 + }, + { + "epoch": 11.214028776978417, + "grad_norm": 0.5062190890312195, + "learning_rate": 9.334392160148457e-05, + "loss": 0.0262, + "step": 12470 + }, + { + "action_loss": 0.008867678232491016, + "epoch": 11.214028776978417, + "step": 12470 + }, + { + "epoch": 11.214028776978417, + "step": 12470, + "torque_loss": 0.12036507576704025 + }, + { + "epoch": 11.214028776978417, + "force_loss": 0.015314076095819473, + "step": 12470 + }, + { + "epoch": 11.223021582733812, + "grad_norm": 0.922947108745575, + "learning_rate": 9.333017690048264e-05, + "loss": 0.0279, + "step": 12480 + }, + { + "action_loss": 0.008514330722391605, + "epoch": 11.223021582733812, + "step": 12480 + }, + { + "epoch": 11.223021582733812, + "step": 12480, + "torque_loss": 0.11133581399917603 + }, + { + "epoch": 11.223021582733812, + "force_loss": 0.006068896502256393, + "step": 12480 + }, + { + "epoch": 11.232014388489208, + "grad_norm": 0.8144896626472473, + "learning_rate": 9.331641903691706e-05, + "loss": 0.0259, + "step": 12490 + }, + { + "action_loss": 0.014711529016494751, + "epoch": 11.232014388489208, + "step": 12490 + }, + { + "epoch": 11.232014388489208, + "step": 12490, + "torque_loss": 0.12662211060523987 + }, + { + "epoch": 11.232014388489208, + "force_loss": 0.012998889200389385, + "step": 12490 + }, + { + "epoch": 11.241007194244604, + "grad_norm": 0.495222270488739, + "learning_rate": 9.330264801496707e-05, + "loss": 0.0343, + "step": 12500 + }, + { + "action_loss": 0.021865472197532654, + "epoch": 11.241007194244604, + "step": 12500 + }, + { + "epoch": 11.241007194244604, + "step": 12500, + "torque_loss": 0.18252168595790863 + }, + { + "epoch": 11.241007194244604, + "force_loss": 0.028112413361668587, + "step": 12500 + }, + { + "epoch": 11.25, + "grad_norm": 0.42036470770835876, + "learning_rate": 9.328886383881594e-05, + "loss": 0.0324, + "step": 12510 + }, + { + "action_loss": 0.019588559865951538, + "epoch": 11.25, + "step": 12510 + }, + { + "epoch": 11.25, + "step": 12510, + "torque_loss": 0.21246810257434845 + }, + { + "epoch": 11.25, + "force_loss": 0.00969478115439415, + "step": 12510 + }, + { + "epoch": 11.258992805755396, + "grad_norm": 0.49514368176460266, + "learning_rate": 9.327506651265095e-05, + "loss": 0.0338, + "step": 12520 + }, + { + "action_loss": 0.01497744768857956, + "epoch": 11.258992805755396, + "step": 12520 + }, + { + "epoch": 11.258992805755396, + "step": 12520, + "torque_loss": 0.12542618811130524 + }, + { + "epoch": 11.258992805755396, + "force_loss": 0.009002608247101307, + "step": 12520 + }, + { + "epoch": 11.267985611510792, + "grad_norm": 0.5654804706573486, + "learning_rate": 9.326125604066338e-05, + "loss": 0.0287, + "step": 12530 + }, + { + "action_loss": 0.005585464183241129, + "epoch": 11.267985611510792, + "step": 12530 + }, + { + "epoch": 11.267985611510792, + "step": 12530, + "torque_loss": 0.10648307204246521 + }, + { + "epoch": 11.267985611510792, + "force_loss": 0.00592239061370492, + "step": 12530 + }, + { + "epoch": 11.276978417266188, + "grad_norm": 0.49654582142829895, + "learning_rate": 9.324743242704847e-05, + "loss": 0.0258, + "step": 12540 + }, + { + "action_loss": 0.01231973897665739, + "epoch": 11.276978417266188, + "step": 12540 + }, + { + "epoch": 11.276978417266188, + "step": 12540, + "torque_loss": 0.19205810129642487 + }, + { + "epoch": 11.276978417266188, + "force_loss": 0.011897188611328602, + "step": 12540 + }, + { + "epoch": 11.285971223021583, + "grad_norm": 0.4989960789680481, + "learning_rate": 9.323359567600546e-05, + "loss": 0.0309, + "step": 12550 + }, + { + "action_loss": 0.014130854047834873, + "epoch": 11.285971223021583, + "step": 12550 + }, + { + "epoch": 11.285971223021583, + "step": 12550, + "torque_loss": 0.13981622457504272 + }, + { + "epoch": 11.285971223021583, + "force_loss": 0.018393585458397865, + "step": 12550 + }, + { + "epoch": 11.29496402877698, + "grad_norm": 0.693494975566864, + "learning_rate": 9.321974579173761e-05, + "loss": 0.0252, + "step": 12560 + }, + { + "action_loss": 0.007720166817307472, + "epoch": 11.29496402877698, + "step": 12560 + }, + { + "epoch": 11.29496402877698, + "step": 12560, + "torque_loss": 0.1686241775751114 + }, + { + "epoch": 11.29496402877698, + "force_loss": 0.006033910438418388, + "step": 12560 + }, + { + "epoch": 11.303956834532373, + "grad_norm": 0.44988879561424255, + "learning_rate": 9.320588277845213e-05, + "loss": 0.0345, + "step": 12570 + }, + { + "action_loss": 0.013009771704673767, + "epoch": 11.303956834532373, + "step": 12570 + }, + { + "epoch": 11.303956834532373, + "step": 12570, + "torque_loss": 0.18443401157855988 + }, + { + "epoch": 11.303956834532373, + "force_loss": 0.014809790067374706, + "step": 12570 + }, + { + "epoch": 11.31294964028777, + "grad_norm": 0.3342968225479126, + "learning_rate": 9.319200664036026e-05, + "loss": 0.0257, + "step": 12580 + }, + { + "action_loss": 0.004076090175658464, + "epoch": 11.31294964028777, + "step": 12580 + }, + { + "epoch": 11.31294964028777, + "step": 12580, + "torque_loss": 0.12488287687301636 + }, + { + "epoch": 11.31294964028777, + "force_loss": 0.004519579000771046, + "step": 12580 + }, + { + "epoch": 11.321942446043165, + "grad_norm": 0.6651833057403564, + "learning_rate": 9.31781173816772e-05, + "loss": 0.0245, + "step": 12590 + }, + { + "action_loss": 0.011581075377762318, + "epoch": 11.321942446043165, + "step": 12590 + }, + { + "epoch": 11.321942446043165, + "step": 12590, + "torque_loss": 0.13377225399017334 + }, + { + "epoch": 11.321942446043165, + "force_loss": 0.014463149011135101, + "step": 12590 + }, + { + "epoch": 11.33093525179856, + "grad_norm": 0.33726048469543457, + "learning_rate": 9.316421500662212e-05, + "loss": 0.0249, + "step": 12600 + }, + { + "action_loss": 0.0035350958351045847, + "epoch": 11.33093525179856, + "step": 12600 + }, + { + "epoch": 11.33093525179856, + "step": 12600, + "torque_loss": 0.08888860791921616 + }, + { + "epoch": 11.33093525179856, + "force_loss": 0.004291305784136057, + "step": 12600 + }, + { + "epoch": 11.339928057553957, + "grad_norm": 0.34957993030548096, + "learning_rate": 9.31502995194182e-05, + "loss": 0.0263, + "step": 12610 + }, + { + "action_loss": 0.009594803676009178, + "epoch": 11.339928057553957, + "step": 12610 + }, + { + "epoch": 11.339928057553957, + "step": 12610, + "torque_loss": 0.17818152904510498 + }, + { + "epoch": 11.339928057553957, + "force_loss": 0.009814041666686535, + "step": 12610 + }, + { + "epoch": 11.348920863309353, + "grad_norm": 0.3459634482860565, + "learning_rate": 9.31363709242926e-05, + "loss": 0.0276, + "step": 12620 + }, + { + "action_loss": 0.012794236652553082, + "epoch": 11.348920863309353, + "step": 12620 + }, + { + "epoch": 11.348920863309353, + "step": 12620, + "torque_loss": 0.14862807095050812 + }, + { + "epoch": 11.348920863309353, + "force_loss": 0.008390388451516628, + "step": 12620 + }, + { + "epoch": 11.357913669064748, + "grad_norm": 0.31086641550064087, + "learning_rate": 9.312242922547647e-05, + "loss": 0.0209, + "step": 12630 + }, + { + "action_loss": 0.008075055666267872, + "epoch": 11.357913669064748, + "step": 12630 + }, + { + "epoch": 11.357913669064748, + "step": 12630, + "torque_loss": 0.1388295441865921 + }, + { + "epoch": 11.357913669064748, + "force_loss": 0.010876013897359371, + "step": 12630 + }, + { + "epoch": 11.366906474820144, + "grad_norm": 0.515978991985321, + "learning_rate": 9.310847442720492e-05, + "loss": 0.0281, + "step": 12640 + }, + { + "action_loss": 0.01441405713558197, + "epoch": 11.366906474820144, + "step": 12640 + }, + { + "epoch": 11.366906474820144, + "step": 12640, + "torque_loss": 0.1357070356607437 + }, + { + "epoch": 11.366906474820144, + "force_loss": 0.008753783069550991, + "step": 12640 + }, + { + "epoch": 11.37589928057554, + "grad_norm": 0.443697452545166, + "learning_rate": 9.309450653371706e-05, + "loss": 0.0264, + "step": 12650 + }, + { + "action_loss": 0.014335624873638153, + "epoch": 11.37589928057554, + "step": 12650 + }, + { + "epoch": 11.37589928057554, + "step": 12650, + "torque_loss": 0.13199064135551453 + }, + { + "epoch": 11.37589928057554, + "force_loss": 0.015366551466286182, + "step": 12650 + }, + { + "epoch": 11.384892086330936, + "grad_norm": 0.8964945673942566, + "learning_rate": 9.308052554925595e-05, + "loss": 0.0264, + "step": 12660 + }, + { + "action_loss": 0.02808235026896, + "epoch": 11.384892086330936, + "step": 12660 + }, + { + "epoch": 11.384892086330936, + "step": 12660, + "torque_loss": 0.14010505378246307 + }, + { + "epoch": 11.384892086330936, + "force_loss": 0.03326769545674324, + "step": 12660 + }, + { + "epoch": 11.39388489208633, + "grad_norm": 0.5438146591186523, + "learning_rate": 9.306653147806867e-05, + "loss": 0.0276, + "step": 12670 + }, + { + "action_loss": 0.0042573525570333, + "epoch": 11.39388489208633, + "step": 12670 + }, + { + "epoch": 11.39388489208633, + "step": 12670, + "torque_loss": 0.10584481805562973 + }, + { + "epoch": 11.39388489208633, + "force_loss": 0.004664700012654066, + "step": 12670 + }, + { + "epoch": 11.402877697841726, + "grad_norm": 0.3931425213813782, + "learning_rate": 9.305252432440622e-05, + "loss": 0.0258, + "step": 12680 + }, + { + "action_loss": 0.005934089422225952, + "epoch": 11.402877697841726, + "step": 12680 + }, + { + "epoch": 11.402877697841726, + "step": 12680, + "torque_loss": 0.10848554223775864 + }, + { + "epoch": 11.402877697841726, + "force_loss": 0.011862953193485737, + "step": 12680 + }, + { + "epoch": 11.411870503597122, + "grad_norm": 0.4016677439212799, + "learning_rate": 9.303850409252361e-05, + "loss": 0.0237, + "step": 12690 + }, + { + "action_loss": 0.008334841579198837, + "epoch": 11.411870503597122, + "step": 12690 + }, + { + "epoch": 11.411870503597122, + "step": 12690, + "torque_loss": 0.11518295854330063 + }, + { + "epoch": 11.411870503597122, + "force_loss": 0.0063664354383945465, + "step": 12690 + }, + { + "epoch": 11.420863309352518, + "grad_norm": 0.502417266368866, + "learning_rate": 9.302447078667985e-05, + "loss": 0.0236, + "step": 12700 + }, + { + "action_loss": 0.00721868546679616, + "epoch": 11.420863309352518, + "step": 12700 + }, + { + "epoch": 11.420863309352518, + "step": 12700, + "torque_loss": 0.11511284857988358 + }, + { + "epoch": 11.420863309352518, + "force_loss": 0.0068063754588365555, + "step": 12700 + }, + { + "epoch": 11.429856115107913, + "grad_norm": 0.7590480446815491, + "learning_rate": 9.301042441113783e-05, + "loss": 0.025, + "step": 12710 + }, + { + "action_loss": 0.008520825766026974, + "epoch": 11.429856115107913, + "step": 12710 + }, + { + "epoch": 11.429856115107913, + "step": 12710, + "torque_loss": 0.15272606909275055 + }, + { + "epoch": 11.429856115107913, + "force_loss": 0.007586202118545771, + "step": 12710 + }, + { + "epoch": 11.43884892086331, + "grad_norm": 0.3182271718978882, + "learning_rate": 9.299636497016451e-05, + "loss": 0.0304, + "step": 12720 + }, + { + "action_loss": 0.042789313942193985, + "epoch": 11.43884892086331, + "step": 12720 + }, + { + "epoch": 11.43884892086331, + "step": 12720, + "torque_loss": 0.20183898508548737 + }, + { + "epoch": 11.43884892086331, + "force_loss": 0.060008030384778976, + "step": 12720 + }, + { + "epoch": 11.447841726618705, + "grad_norm": 0.46768414974212646, + "learning_rate": 9.298229246803076e-05, + "loss": 0.0268, + "step": 12730 + }, + { + "action_loss": 0.008419915102422237, + "epoch": 11.447841726618705, + "step": 12730 + }, + { + "epoch": 11.447841726618705, + "step": 12730, + "torque_loss": 0.09191560000181198 + }, + { + "epoch": 11.447841726618705, + "force_loss": 0.005917018745094538, + "step": 12730 + }, + { + "epoch": 11.456834532374101, + "grad_norm": 0.7658388018608093, + "learning_rate": 9.296820690901144e-05, + "loss": 0.0294, + "step": 12740 + }, + { + "action_loss": 0.004596956539899111, + "epoch": 11.456834532374101, + "step": 12740 + }, + { + "epoch": 11.456834532374101, + "step": 12740, + "torque_loss": 0.11617550998926163 + }, + { + "epoch": 11.456834532374101, + "force_loss": 0.004909941460937262, + "step": 12740 + }, + { + "epoch": 11.465827338129497, + "grad_norm": 0.41740214824676514, + "learning_rate": 9.295410829738539e-05, + "loss": 0.0254, + "step": 12750 + }, + { + "action_loss": 0.009858358651399612, + "epoch": 11.465827338129497, + "step": 12750 + }, + { + "epoch": 11.465827338129497, + "step": 12750, + "torque_loss": 0.14883457124233246 + }, + { + "epoch": 11.465827338129497, + "force_loss": 0.011447202414274216, + "step": 12750 + }, + { + "epoch": 11.474820143884893, + "grad_norm": 0.5770699977874756, + "learning_rate": 9.293999663743535e-05, + "loss": 0.0262, + "step": 12760 + }, + { + "action_loss": 0.010206750594079494, + "epoch": 11.474820143884893, + "step": 12760 + }, + { + "epoch": 11.474820143884893, + "step": 12760, + "torque_loss": 0.185093954205513 + }, + { + "epoch": 11.474820143884893, + "force_loss": 0.018447376787662506, + "step": 12760 + }, + { + "epoch": 11.483812949640289, + "grad_norm": 0.5014813542366028, + "learning_rate": 9.292587193344813e-05, + "loss": 0.0303, + "step": 12770 + }, + { + "action_loss": 0.009192741475999355, + "epoch": 11.483812949640289, + "step": 12770 + }, + { + "epoch": 11.483812949640289, + "step": 12770, + "torque_loss": 0.14501263201236725 + }, + { + "epoch": 11.483812949640289, + "force_loss": 0.007580628152936697, + "step": 12770 + }, + { + "epoch": 11.492805755395683, + "grad_norm": 0.44411325454711914, + "learning_rate": 9.291173418971437e-05, + "loss": 0.0259, + "step": 12780 + }, + { + "action_loss": 0.0186526607722044, + "epoch": 11.492805755395683, + "step": 12780 + }, + { + "epoch": 11.492805755395683, + "step": 12780, + "torque_loss": 0.17377136647701263 + }, + { + "epoch": 11.492805755395683, + "force_loss": 0.014825411140918732, + "step": 12780 + }, + { + "epoch": 11.501798561151078, + "grad_norm": 0.4844873547554016, + "learning_rate": 9.28975834105288e-05, + "loss": 0.0278, + "step": 12790 + }, + { + "action_loss": 0.020406672731041908, + "epoch": 11.501798561151078, + "step": 12790 + }, + { + "epoch": 11.501798561151078, + "step": 12790, + "torque_loss": 0.1572578400373459 + }, + { + "epoch": 11.501798561151078, + "force_loss": 0.016273969784379005, + "step": 12790 + }, + { + "epoch": 11.510791366906474, + "grad_norm": 0.40174224972724915, + "learning_rate": 9.288341960019004e-05, + "loss": 0.0263, + "step": 12800 + }, + { + "action_loss": 0.004163198638707399, + "epoch": 11.510791366906474, + "step": 12800 + }, + { + "epoch": 11.510791366906474, + "step": 12800, + "torque_loss": 0.09697920083999634 + }, + { + "epoch": 11.510791366906474, + "force_loss": 0.0047917976044118404, + "step": 12800 + }, + { + "epoch": 11.51978417266187, + "grad_norm": 0.5060114860534668, + "learning_rate": 9.286924276300067e-05, + "loss": 0.0281, + "step": 12810 + }, + { + "action_loss": 0.00661865109577775, + "epoch": 11.51978417266187, + "step": 12810 + }, + { + "epoch": 11.51978417266187, + "step": 12810, + "torque_loss": 0.12898625433444977 + }, + { + "epoch": 11.51978417266187, + "force_loss": 0.010716870427131653, + "step": 12810 + }, + { + "epoch": 11.528776978417266, + "grad_norm": 0.5622050762176514, + "learning_rate": 9.285505290326726e-05, + "loss": 0.0256, + "step": 12820 + }, + { + "action_loss": 0.004694139119237661, + "epoch": 11.528776978417266, + "step": 12820 + }, + { + "epoch": 11.528776978417266, + "step": 12820, + "torque_loss": 0.10522476583719254 + }, + { + "epoch": 11.528776978417266, + "force_loss": 0.005791978910565376, + "step": 12820 + }, + { + "epoch": 11.537769784172662, + "grad_norm": 0.49937760829925537, + "learning_rate": 9.284085002530027e-05, + "loss": 0.0226, + "step": 12830 + }, + { + "action_loss": 0.0157787948846817, + "epoch": 11.537769784172662, + "step": 12830 + }, + { + "epoch": 11.537769784172662, + "step": 12830, + "torque_loss": 0.14168919622898102 + }, + { + "epoch": 11.537769784172662, + "force_loss": 0.013614925555884838, + "step": 12830 + }, + { + "epoch": 11.546762589928058, + "grad_norm": 0.48086875677108765, + "learning_rate": 9.282663413341422e-05, + "loss": 0.0314, + "step": 12840 + }, + { + "action_loss": 0.00636838236823678, + "epoch": 11.546762589928058, + "step": 12840 + }, + { + "epoch": 11.546762589928058, + "step": 12840, + "torque_loss": 0.15166963636875153 + }, + { + "epoch": 11.546762589928058, + "force_loss": 0.006735137198120356, + "step": 12840 + }, + { + "epoch": 11.555755395683454, + "grad_norm": 0.4735391139984131, + "learning_rate": 9.281240523192747e-05, + "loss": 0.0235, + "step": 12850 + }, + { + "action_loss": 0.013812369666993618, + "epoch": 11.555755395683454, + "step": 12850 + }, + { + "epoch": 11.555755395683454, + "step": 12850, + "torque_loss": 0.1245051696896553 + }, + { + "epoch": 11.555755395683454, + "force_loss": 0.016836203634738922, + "step": 12850 + }, + { + "epoch": 11.56474820143885, + "grad_norm": 0.49611353874206543, + "learning_rate": 9.279816332516242e-05, + "loss": 0.0232, + "step": 12860 + }, + { + "action_loss": 0.018690388649702072, + "epoch": 11.56474820143885, + "step": 12860 + }, + { + "epoch": 11.56474820143885, + "step": 12860, + "torque_loss": 0.14858348667621613 + }, + { + "epoch": 11.56474820143885, + "force_loss": 0.017054811120033264, + "step": 12860 + }, + { + "epoch": 11.573741007194245, + "grad_norm": 0.6356492638587952, + "learning_rate": 9.278390841744536e-05, + "loss": 0.0288, + "step": 12870 + }, + { + "action_loss": 0.005330819170922041, + "epoch": 11.573741007194245, + "step": 12870 + }, + { + "epoch": 11.573741007194245, + "step": 12870, + "torque_loss": 0.11260536313056946 + }, + { + "epoch": 11.573741007194245, + "force_loss": 0.004948218818753958, + "step": 12870 + }, + { + "epoch": 11.582733812949641, + "grad_norm": 0.7857191562652588, + "learning_rate": 9.276964051310658e-05, + "loss": 0.0259, + "step": 12880 + }, + { + "action_loss": 0.010961071588099003, + "epoch": 11.582733812949641, + "step": 12880 + }, + { + "epoch": 11.582733812949641, + "step": 12880, + "torque_loss": 0.1459680199623108 + }, + { + "epoch": 11.582733812949641, + "force_loss": 0.010086453519761562, + "step": 12880 + }, + { + "epoch": 11.591726618705035, + "grad_norm": 0.7839841842651367, + "learning_rate": 9.275535961648027e-05, + "loss": 0.0221, + "step": 12890 + }, + { + "action_loss": 0.005960522685199976, + "epoch": 11.591726618705035, + "step": 12890 + }, + { + "epoch": 11.591726618705035, + "step": 12890, + "torque_loss": 0.15388502180576324 + }, + { + "epoch": 11.591726618705035, + "force_loss": 0.0074750198982656, + "step": 12890 + }, + { + "epoch": 11.600719424460431, + "grad_norm": 0.4410642385482788, + "learning_rate": 9.274106573190459e-05, + "loss": 0.0227, + "step": 12900 + }, + { + "action_loss": 0.009813723154366016, + "epoch": 11.600719424460431, + "step": 12900 + }, + { + "epoch": 11.600719424460431, + "step": 12900, + "torque_loss": 0.1576903611421585 + }, + { + "epoch": 11.600719424460431, + "force_loss": 0.00767472991719842, + "step": 12900 + }, + { + "epoch": 11.609712230215827, + "grad_norm": 0.4177224934101105, + "learning_rate": 9.272675886372168e-05, + "loss": 0.0223, + "step": 12910 + }, + { + "action_loss": 0.013900388963520527, + "epoch": 11.609712230215827, + "step": 12910 + }, + { + "epoch": 11.609712230215827, + "step": 12910, + "torque_loss": 0.13374271988868713 + }, + { + "epoch": 11.609712230215827, + "force_loss": 0.006211420521140099, + "step": 12910 + }, + { + "epoch": 11.618705035971223, + "grad_norm": 0.42956387996673584, + "learning_rate": 9.271243901627754e-05, + "loss": 0.0261, + "step": 12920 + }, + { + "action_loss": 0.007312507834285498, + "epoch": 11.618705035971223, + "step": 12920 + }, + { + "epoch": 11.618705035971223, + "step": 12920, + "torque_loss": 0.12143468111753464 + }, + { + "epoch": 11.618705035971223, + "force_loss": 0.006631376687437296, + "step": 12920 + }, + { + "epoch": 11.627697841726619, + "grad_norm": 1.1085774898529053, + "learning_rate": 9.269810619392219e-05, + "loss": 0.0234, + "step": 12930 + }, + { + "action_loss": 0.0069969408214092255, + "epoch": 11.627697841726619, + "step": 12930 + }, + { + "epoch": 11.627697841726619, + "step": 12930, + "torque_loss": 0.07154836505651474 + }, + { + "epoch": 11.627697841726619, + "force_loss": 0.005450129508972168, + "step": 12930 + }, + { + "epoch": 11.636690647482014, + "grad_norm": 0.5631125569343567, + "learning_rate": 9.268376040100955e-05, + "loss": 0.0296, + "step": 12940 + }, + { + "action_loss": 0.014250400476157665, + "epoch": 11.636690647482014, + "step": 12940 + }, + { + "epoch": 11.636690647482014, + "step": 12940, + "torque_loss": 0.1898178607225418 + }, + { + "epoch": 11.636690647482014, + "force_loss": 0.01934933476150036, + "step": 12940 + }, + { + "epoch": 11.64568345323741, + "grad_norm": 0.6192155480384827, + "learning_rate": 9.266940164189752e-05, + "loss": 0.0225, + "step": 12950 + }, + { + "action_loss": 0.022436590865254402, + "epoch": 11.64568345323741, + "step": 12950 + }, + { + "epoch": 11.64568345323741, + "step": 12950, + "torque_loss": 0.11736515909433365 + }, + { + "epoch": 11.64568345323741, + "force_loss": 0.01960713043808937, + "step": 12950 + }, + { + "epoch": 11.654676258992806, + "grad_norm": 0.6199759244918823, + "learning_rate": 9.265502992094787e-05, + "loss": 0.0266, + "step": 12960 + }, + { + "action_loss": 0.0043710218742489815, + "epoch": 11.654676258992806, + "step": 12960 + }, + { + "epoch": 11.654676258992806, + "step": 12960, + "torque_loss": 0.10056054592132568 + }, + { + "epoch": 11.654676258992806, + "force_loss": 0.004458553157746792, + "step": 12960 + }, + { + "epoch": 11.663669064748202, + "grad_norm": 0.4860585033893585, + "learning_rate": 9.264064524252638e-05, + "loss": 0.0229, + "step": 12970 + }, + { + "action_loss": 0.003602929413318634, + "epoch": 11.663669064748202, + "step": 12970 + }, + { + "epoch": 11.663669064748202, + "step": 12970, + "torque_loss": 0.13326184451580048 + }, + { + "epoch": 11.663669064748202, + "force_loss": 0.004667218774557114, + "step": 12970 + }, + { + "epoch": 11.672661870503598, + "grad_norm": 0.3669048547744751, + "learning_rate": 9.262624761100271e-05, + "loss": 0.0267, + "step": 12980 + }, + { + "action_loss": 0.005022808909416199, + "epoch": 11.672661870503598, + "step": 12980 + }, + { + "epoch": 11.672661870503598, + "step": 12980, + "torque_loss": 0.10578330606222153 + }, + { + "epoch": 11.672661870503598, + "force_loss": 0.006402730941772461, + "step": 12980 + }, + { + "epoch": 11.681654676258994, + "grad_norm": 0.7687297463417053, + "learning_rate": 9.261183703075051e-05, + "loss": 0.0245, + "step": 12990 + }, + { + "action_loss": 0.015614025294780731, + "epoch": 11.681654676258994, + "step": 12990 + }, + { + "epoch": 11.681654676258994, + "step": 12990, + "torque_loss": 0.1742136925458908 + }, + { + "epoch": 11.681654676258994, + "force_loss": 0.012966055423021317, + "step": 12990 + }, + { + "epoch": 11.690647482014388, + "grad_norm": 1.1082276105880737, + "learning_rate": 9.259741350614733e-05, + "loss": 0.0243, + "step": 13000 + }, + { + "action_loss": 0.0065927873365581036, + "epoch": 11.690647482014388, + "step": 13000 + }, + { + "epoch": 11.690647482014388, + "step": 13000, + "torque_loss": 0.15699200332164764 + }, + { + "epoch": 11.690647482014388, + "force_loss": 0.0077374279499053955, + "step": 13000 + }, + { + "epoch": 11.699640287769784, + "grad_norm": 0.3770151734352112, + "learning_rate": 9.258297704157464e-05, + "loss": 0.0279, + "step": 13010 + }, + { + "action_loss": 0.005304222460836172, + "epoch": 11.699640287769784, + "step": 13010 + }, + { + "epoch": 11.699640287769784, + "step": 13010, + "torque_loss": 0.11752446740865707 + }, + { + "epoch": 11.699640287769784, + "force_loss": 0.0047838119789958, + "step": 13010 + }, + { + "epoch": 11.70863309352518, + "grad_norm": 0.43294763565063477, + "learning_rate": 9.256852764141786e-05, + "loss": 0.0265, + "step": 13020 + }, + { + "action_loss": 0.010032732039690018, + "epoch": 11.70863309352518, + "step": 13020 + }, + { + "epoch": 11.70863309352518, + "step": 13020, + "torque_loss": 0.1645011454820633 + }, + { + "epoch": 11.70863309352518, + "force_loss": 0.014812429435551167, + "step": 13020 + }, + { + "epoch": 11.717625899280575, + "grad_norm": 0.5424838066101074, + "learning_rate": 9.255406531006634e-05, + "loss": 0.0241, + "step": 13030 + }, + { + "action_loss": 0.008985103107988834, + "epoch": 11.717625899280575, + "step": 13030 + }, + { + "epoch": 11.717625899280575, + "step": 13030, + "torque_loss": 0.14888577163219452 + }, + { + "epoch": 11.717625899280575, + "force_loss": 0.011289309710264206, + "step": 13030 + }, + { + "epoch": 11.726618705035971, + "grad_norm": 0.34489864110946655, + "learning_rate": 9.253959005191335e-05, + "loss": 0.036, + "step": 13040 + }, + { + "action_loss": 0.010893319733440876, + "epoch": 11.726618705035971, + "step": 13040 + }, + { + "epoch": 11.726618705035971, + "step": 13040, + "torque_loss": 0.16150763630867004 + }, + { + "epoch": 11.726618705035971, + "force_loss": 0.00955308135598898, + "step": 13040 + }, + { + "epoch": 11.735611510791367, + "grad_norm": 0.5194714069366455, + "learning_rate": 9.25251018713561e-05, + "loss": 0.0299, + "step": 13050 + }, + { + "action_loss": 0.007421460468322039, + "epoch": 11.735611510791367, + "step": 13050 + }, + { + "epoch": 11.735611510791367, + "step": 13050, + "torque_loss": 0.14697431027889252 + }, + { + "epoch": 11.735611510791367, + "force_loss": 0.010507498867809772, + "step": 13050 + }, + { + "epoch": 11.744604316546763, + "grad_norm": 0.5162916779518127, + "learning_rate": 9.251060077279571e-05, + "loss": 0.0236, + "step": 13060 + }, + { + "action_loss": 0.02863156795501709, + "epoch": 11.744604316546763, + "step": 13060 + }, + { + "epoch": 11.744604316546763, + "step": 13060, + "torque_loss": 0.18968182802200317 + }, + { + "epoch": 11.744604316546763, + "force_loss": 0.05160766467452049, + "step": 13060 + }, + { + "epoch": 11.753597122302159, + "grad_norm": 0.43360474705696106, + "learning_rate": 9.249608676063724e-05, + "loss": 0.0313, + "step": 13070 + }, + { + "action_loss": 0.009612124413251877, + "epoch": 11.753597122302159, + "step": 13070 + }, + { + "epoch": 11.753597122302159, + "step": 13070, + "torque_loss": 0.13511134684085846 + }, + { + "epoch": 11.753597122302159, + "force_loss": 0.010957022197544575, + "step": 13070 + }, + { + "epoch": 11.762589928057555, + "grad_norm": 0.3442375361919403, + "learning_rate": 9.248155983928964e-05, + "loss": 0.0215, + "step": 13080 + }, + { + "action_loss": 0.008205555379390717, + "epoch": 11.762589928057555, + "step": 13080 + }, + { + "epoch": 11.762589928057555, + "step": 13080, + "torque_loss": 0.09010195732116699 + }, + { + "epoch": 11.762589928057555, + "force_loss": 0.009156804531812668, + "step": 13080 + }, + { + "epoch": 11.77158273381295, + "grad_norm": 0.521977961063385, + "learning_rate": 9.246702001316583e-05, + "loss": 0.0251, + "step": 13090 + }, + { + "action_loss": 0.01836331933736801, + "epoch": 11.77158273381295, + "step": 13090 + }, + { + "epoch": 11.77158273381295, + "step": 13090, + "torque_loss": 0.19094258546829224 + }, + { + "epoch": 11.77158273381295, + "force_loss": 0.01802162639796734, + "step": 13090 + }, + { + "epoch": 11.780575539568346, + "grad_norm": 0.7244899868965149, + "learning_rate": 9.245246728668262e-05, + "loss": 0.0265, + "step": 13100 + }, + { + "action_loss": 0.010238493792712688, + "epoch": 11.780575539568346, + "step": 13100 + }, + { + "epoch": 11.780575539568346, + "step": 13100, + "torque_loss": 0.13114742934703827 + }, + { + "epoch": 11.780575539568346, + "force_loss": 0.011190635152161121, + "step": 13100 + }, + { + "epoch": 11.78956834532374, + "grad_norm": 0.7381954789161682, + "learning_rate": 9.243790166426073e-05, + "loss": 0.0297, + "step": 13110 + }, + { + "action_loss": 0.028110459446907043, + "epoch": 11.78956834532374, + "step": 13110 + }, + { + "epoch": 11.78956834532374, + "step": 13110, + "torque_loss": 0.14432667195796967 + }, + { + "epoch": 11.78956834532374, + "force_loss": 0.024785900488495827, + "step": 13110 + }, + { + "epoch": 11.798561151079136, + "grad_norm": 0.5881648659706116, + "learning_rate": 9.242332315032484e-05, + "loss": 0.0267, + "step": 13120 + }, + { + "action_loss": 0.007070481777191162, + "epoch": 11.798561151079136, + "step": 13120 + }, + { + "epoch": 11.798561151079136, + "step": 13120, + "torque_loss": 0.11087200790643692 + }, + { + "epoch": 11.798561151079136, + "force_loss": 0.007086490746587515, + "step": 13120 + }, + { + "epoch": 11.807553956834532, + "grad_norm": 0.5413809418678284, + "learning_rate": 9.240873174930349e-05, + "loss": 0.0325, + "step": 13130 + }, + { + "action_loss": 0.004751246888190508, + "epoch": 11.807553956834532, + "step": 13130 + }, + { + "epoch": 11.807553956834532, + "step": 13130, + "torque_loss": 0.1028279960155487 + }, + { + "epoch": 11.807553956834532, + "force_loss": 0.005823200102895498, + "step": 13130 + }, + { + "epoch": 11.816546762589928, + "grad_norm": 0.634873628616333, + "learning_rate": 9.239412746562917e-05, + "loss": 0.031, + "step": 13140 + }, + { + "action_loss": 0.019993731752038002, + "epoch": 11.816546762589928, + "step": 13140 + }, + { + "epoch": 11.816546762589928, + "step": 13140, + "torque_loss": 0.1469378024339676 + }, + { + "epoch": 11.816546762589928, + "force_loss": 0.010980661027133465, + "step": 13140 + }, + { + "epoch": 11.825539568345324, + "grad_norm": 0.4987242817878723, + "learning_rate": 9.237951030373828e-05, + "loss": 0.0343, + "step": 13150 + }, + { + "action_loss": 0.016738135367631912, + "epoch": 11.825539568345324, + "step": 13150 + }, + { + "epoch": 11.825539568345324, + "step": 13150, + "torque_loss": 0.16462506353855133 + }, + { + "epoch": 11.825539568345324, + "force_loss": 0.012665067799389362, + "step": 13150 + }, + { + "epoch": 11.83453237410072, + "grad_norm": 0.6414262652397156, + "learning_rate": 9.236488026807113e-05, + "loss": 0.0328, + "step": 13160 + }, + { + "action_loss": 0.006865693721920252, + "epoch": 11.83453237410072, + "step": 13160 + }, + { + "epoch": 11.83453237410072, + "step": 13160, + "torque_loss": 0.12536965310573578 + }, + { + "epoch": 11.83453237410072, + "force_loss": 0.00795132014900446, + "step": 13160 + }, + { + "epoch": 11.843525179856115, + "grad_norm": 0.4860328435897827, + "learning_rate": 9.235023736307193e-05, + "loss": 0.0293, + "step": 13170 + }, + { + "action_loss": 0.006817122921347618, + "epoch": 11.843525179856115, + "step": 13170 + }, + { + "epoch": 11.843525179856115, + "step": 13170, + "torque_loss": 0.12449777126312256 + }, + { + "epoch": 11.843525179856115, + "force_loss": 0.0076742880046367645, + "step": 13170 + }, + { + "epoch": 11.852517985611511, + "grad_norm": 0.6348298788070679, + "learning_rate": 9.233558159318881e-05, + "loss": 0.0263, + "step": 13180 + }, + { + "action_loss": 0.0048419334925711155, + "epoch": 11.852517985611511, + "step": 13180 + }, + { + "epoch": 11.852517985611511, + "step": 13180, + "torque_loss": 0.15208874642848969 + }, + { + "epoch": 11.852517985611511, + "force_loss": 0.006664233282208443, + "step": 13180 + }, + { + "epoch": 11.861510791366907, + "grad_norm": 0.449191153049469, + "learning_rate": 9.232091296287382e-05, + "loss": 0.024, + "step": 13190 + }, + { + "action_loss": 0.0073325857520103455, + "epoch": 11.861510791366907, + "step": 13190 + }, + { + "epoch": 11.861510791366907, + "step": 13190, + "torque_loss": 0.13216394186019897 + }, + { + "epoch": 11.861510791366907, + "force_loss": 0.010304437950253487, + "step": 13190 + }, + { + "epoch": 11.870503597122303, + "grad_norm": 1.0398387908935547, + "learning_rate": 9.230623147658288e-05, + "loss": 0.0274, + "step": 13200 + }, + { + "action_loss": 0.010095992125570774, + "epoch": 11.870503597122303, + "step": 13200 + }, + { + "epoch": 11.870503597122303, + "step": 13200, + "torque_loss": 0.09582933038473129 + }, + { + "epoch": 11.870503597122303, + "force_loss": 0.007539154961705208, + "step": 13200 + }, + { + "epoch": 11.879496402877697, + "grad_norm": 0.30475932359695435, + "learning_rate": 9.229153713877586e-05, + "loss": 0.0249, + "step": 13210 + }, + { + "action_loss": 0.01730423979461193, + "epoch": 11.879496402877697, + "step": 13210 + }, + { + "epoch": 11.879496402877697, + "step": 13210, + "torque_loss": 0.1708080768585205 + }, + { + "epoch": 11.879496402877697, + "force_loss": 0.01646292582154274, + "step": 13210 + }, + { + "epoch": 11.888489208633093, + "grad_norm": 0.47347113490104675, + "learning_rate": 9.227682995391649e-05, + "loss": 0.0304, + "step": 13220 + }, + { + "action_loss": 0.026272406801581383, + "epoch": 11.888489208633093, + "step": 13220 + }, + { + "epoch": 11.888489208633093, + "step": 13220, + "torque_loss": 0.15797697007656097 + }, + { + "epoch": 11.888489208633093, + "force_loss": 0.019032509997487068, + "step": 13220 + }, + { + "epoch": 11.897482014388489, + "grad_norm": 0.420165479183197, + "learning_rate": 9.226210992647243e-05, + "loss": 0.0269, + "step": 13230 + }, + { + "action_loss": 0.008027888834476471, + "epoch": 11.897482014388489, + "step": 13230 + }, + { + "epoch": 11.897482014388489, + "step": 13230, + "torque_loss": 0.13189497590065002 + }, + { + "epoch": 11.897482014388489, + "force_loss": 0.005640607327222824, + "step": 13230 + }, + { + "epoch": 11.906474820143885, + "grad_norm": 0.5762290954589844, + "learning_rate": 9.224737706091525e-05, + "loss": 0.024, + "step": 13240 + }, + { + "action_loss": 0.017476534470915794, + "epoch": 11.906474820143885, + "step": 13240 + }, + { + "epoch": 11.906474820143885, + "step": 13240, + "torque_loss": 0.206381693482399 + }, + { + "epoch": 11.906474820143885, + "force_loss": 0.01907743513584137, + "step": 13240 + }, + { + "epoch": 11.91546762589928, + "grad_norm": 0.8608695268630981, + "learning_rate": 9.223263136172039e-05, + "loss": 0.0324, + "step": 13250 + }, + { + "action_loss": 0.014742610044777393, + "epoch": 11.91546762589928, + "step": 13250 + }, + { + "epoch": 11.91546762589928, + "step": 13250, + "torque_loss": 0.18341481685638428 + }, + { + "epoch": 11.91546762589928, + "force_loss": 0.015031460672616959, + "step": 13250 + }, + { + "epoch": 11.924460431654676, + "grad_norm": 0.5124015212059021, + "learning_rate": 9.22178728333672e-05, + "loss": 0.0269, + "step": 13260 + }, + { + "action_loss": 0.01137368381023407, + "epoch": 11.924460431654676, + "step": 13260 + }, + { + "epoch": 11.924460431654676, + "step": 13260, + "torque_loss": 0.11460211127996445 + }, + { + "epoch": 11.924460431654676, + "force_loss": 0.01490174699574709, + "step": 13260 + }, + { + "epoch": 11.933453237410072, + "grad_norm": 0.5288162231445312, + "learning_rate": 9.220310148033897e-05, + "loss": 0.0287, + "step": 13270 + }, + { + "action_loss": 0.0071401894092559814, + "epoch": 11.933453237410072, + "step": 13270 + }, + { + "epoch": 11.933453237410072, + "step": 13270, + "torque_loss": 0.10579943656921387 + }, + { + "epoch": 11.933453237410072, + "force_loss": 0.005128185730427504, + "step": 13270 + }, + { + "epoch": 11.942446043165468, + "grad_norm": 0.7475045323371887, + "learning_rate": 9.21883173071228e-05, + "loss": 0.0258, + "step": 13280 + }, + { + "action_loss": 0.005564589519053698, + "epoch": 11.942446043165468, + "step": 13280 + }, + { + "epoch": 11.942446043165468, + "step": 13280, + "torque_loss": 0.13392579555511475 + }, + { + "epoch": 11.942446043165468, + "force_loss": 0.005843350198119879, + "step": 13280 + }, + { + "epoch": 11.951438848920864, + "grad_norm": 0.3970196545124054, + "learning_rate": 9.217352031820976e-05, + "loss": 0.0216, + "step": 13290 + }, + { + "action_loss": 0.02049397863447666, + "epoch": 11.951438848920864, + "step": 13290 + }, + { + "epoch": 11.951438848920864, + "step": 13290, + "torque_loss": 0.12116687744855881 + }, + { + "epoch": 11.951438848920864, + "force_loss": 0.029718657955527306, + "step": 13290 + }, + { + "epoch": 11.96043165467626, + "grad_norm": 0.41319262981414795, + "learning_rate": 9.215871051809477e-05, + "loss": 0.0259, + "step": 13300 + }, + { + "action_loss": 0.004336673300713301, + "epoch": 11.96043165467626, + "step": 13300 + }, + { + "epoch": 11.96043165467626, + "step": 13300, + "torque_loss": 0.11368455737829208 + }, + { + "epoch": 11.96043165467626, + "force_loss": 0.004420553799718618, + "step": 13300 + }, + { + "epoch": 11.969424460431654, + "grad_norm": 0.7107324600219727, + "learning_rate": 9.214388791127666e-05, + "loss": 0.0257, + "step": 13310 + }, + { + "action_loss": 0.010378026403486729, + "epoch": 11.969424460431654, + "step": 13310 + }, + { + "epoch": 11.969424460431654, + "step": 13310, + "torque_loss": 0.15950961410999298 + }, + { + "epoch": 11.969424460431654, + "force_loss": 0.011539414525032043, + "step": 13310 + }, + { + "epoch": 11.97841726618705, + "grad_norm": 0.40262076258659363, + "learning_rate": 9.212905250225814e-05, + "loss": 0.0253, + "step": 13320 + }, + { + "action_loss": 0.0076032900251448154, + "epoch": 11.97841726618705, + "step": 13320 + }, + { + "epoch": 11.97841726618705, + "step": 13320, + "torque_loss": 0.18420910835266113 + }, + { + "epoch": 11.97841726618705, + "force_loss": 0.0048744529485702515, + "step": 13320 + }, + { + "epoch": 11.987410071942445, + "grad_norm": 0.660062313079834, + "learning_rate": 9.211420429554583e-05, + "loss": 0.0266, + "step": 13330 + }, + { + "action_loss": 0.004844684153795242, + "epoch": 11.987410071942445, + "step": 13330 + }, + { + "epoch": 11.987410071942445, + "step": 13330, + "torque_loss": 0.1788283735513687 + }, + { + "epoch": 11.987410071942445, + "force_loss": 0.006775469984859228, + "step": 13330 + }, + { + "epoch": 11.996402877697841, + "grad_norm": 1.07358717918396, + "learning_rate": 9.209934329565022e-05, + "loss": 0.0268, + "step": 13340 + }, + { + "action_loss": 0.0032872071024030447, + "epoch": 11.996402877697841, + "step": 13340 + }, + { + "epoch": 11.996402877697841, + "step": 13340, + "torque_loss": 0.133455291390419 + }, + { + "epoch": 11.996402877697841, + "force_loss": 0.004084920976310968, + "step": 13340 + }, + { + "epoch": 12.005395683453237, + "grad_norm": 0.6031844615936279, + "learning_rate": 9.208446950708568e-05, + "loss": 0.0223, + "step": 13350 + }, + { + "action_loss": 0.016608580946922302, + "epoch": 12.005395683453237, + "step": 13350 + }, + { + "epoch": 12.005395683453237, + "step": 13350, + "torque_loss": 0.19303618371486664 + }, + { + "epoch": 12.005395683453237, + "force_loss": 0.021011648699641228, + "step": 13350 + }, + { + "epoch": 12.014388489208633, + "grad_norm": 0.5078012943267822, + "learning_rate": 9.20695829343705e-05, + "loss": 0.0279, + "step": 13360 + }, + { + "action_loss": 0.004569401498883963, + "epoch": 12.014388489208633, + "step": 13360 + }, + { + "epoch": 12.014388489208633, + "step": 13360, + "torque_loss": 0.13210737705230713 + }, + { + "epoch": 12.014388489208633, + "force_loss": 0.0033373974729329348, + "step": 13360 + }, + { + "epoch": 12.023381294964029, + "grad_norm": 0.5666680335998535, + "learning_rate": 9.205468358202678e-05, + "loss": 0.0226, + "step": 13370 + }, + { + "action_loss": 0.008932110853493214, + "epoch": 12.023381294964029, + "step": 13370 + }, + { + "epoch": 12.023381294964029, + "step": 13370, + "torque_loss": 0.13694554567337036 + }, + { + "epoch": 12.023381294964029, + "force_loss": 0.00886202696710825, + "step": 13370 + }, + { + "epoch": 12.032374100719425, + "grad_norm": 0.5579889416694641, + "learning_rate": 9.203977145458059e-05, + "loss": 0.0286, + "step": 13380 + }, + { + "action_loss": 0.0067925299517810345, + "epoch": 12.032374100719425, + "step": 13380 + }, + { + "epoch": 12.032374100719425, + "step": 13380, + "torque_loss": 0.2041359692811966 + }, + { + "epoch": 12.032374100719425, + "force_loss": 0.008642517030239105, + "step": 13380 + }, + { + "epoch": 12.04136690647482, + "grad_norm": 0.5864238142967224, + "learning_rate": 9.202484655656182e-05, + "loss": 0.0259, + "step": 13390 + }, + { + "action_loss": 0.013879512436687946, + "epoch": 12.04136690647482, + "step": 13390 + }, + { + "epoch": 12.04136690647482, + "step": 13390, + "torque_loss": 0.16266094148159027 + }, + { + "epoch": 12.04136690647482, + "force_loss": 0.021635999903082848, + "step": 13390 + }, + { + "epoch": 12.050359712230216, + "grad_norm": 0.5106484889984131, + "learning_rate": 9.200990889250427e-05, + "loss": 0.0275, + "step": 13400 + }, + { + "action_loss": 0.005630366504192352, + "epoch": 12.050359712230216, + "step": 13400 + }, + { + "epoch": 12.050359712230216, + "step": 13400, + "torque_loss": 0.1035974845290184 + }, + { + "epoch": 12.050359712230216, + "force_loss": 0.008144255727529526, + "step": 13400 + }, + { + "epoch": 12.059352517985612, + "grad_norm": 0.3532458245754242, + "learning_rate": 9.19949584669456e-05, + "loss": 0.0226, + "step": 13410 + }, + { + "action_loss": 0.004766761790961027, + "epoch": 12.059352517985612, + "step": 13410 + }, + { + "epoch": 12.059352517985612, + "step": 13410, + "torque_loss": 0.152730792760849 + }, + { + "epoch": 12.059352517985612, + "force_loss": 0.0050053163431584835, + "step": 13410 + }, + { + "epoch": 12.068345323741006, + "grad_norm": 0.5839273929595947, + "learning_rate": 9.197999528442738e-05, + "loss": 0.0258, + "step": 13420 + }, + { + "action_loss": 0.006152821704745293, + "epoch": 12.068345323741006, + "step": 13420 + }, + { + "epoch": 12.068345323741006, + "step": 13420, + "torque_loss": 0.1193094328045845 + }, + { + "epoch": 12.068345323741006, + "force_loss": 0.005353698972612619, + "step": 13420 + }, + { + "epoch": 12.077338129496402, + "grad_norm": 0.278118759393692, + "learning_rate": 9.196501934949499e-05, + "loss": 0.0236, + "step": 13430 + }, + { + "action_loss": 0.0039503020234405994, + "epoch": 12.077338129496402, + "step": 13430 + }, + { + "epoch": 12.077338129496402, + "step": 13430, + "torque_loss": 0.0906924232840538 + }, + { + "epoch": 12.077338129496402, + "force_loss": 0.005831554066389799, + "step": 13430 + }, + { + "epoch": 12.086330935251798, + "grad_norm": 0.8984665870666504, + "learning_rate": 9.195003066669776e-05, + "loss": 0.0248, + "step": 13440 + }, + { + "action_loss": 0.017666278406977654, + "epoch": 12.086330935251798, + "step": 13440 + }, + { + "epoch": 12.086330935251798, + "step": 13440, + "torque_loss": 0.15089526772499084 + }, + { + "epoch": 12.086330935251798, + "force_loss": 0.01574471779167652, + "step": 13440 + }, + { + "epoch": 12.095323741007194, + "grad_norm": 0.6333910822868347, + "learning_rate": 9.193502924058884e-05, + "loss": 0.0237, + "step": 13450 + }, + { + "action_loss": 0.0071053714491426945, + "epoch": 12.095323741007194, + "step": 13450 + }, + { + "epoch": 12.095323741007194, + "step": 13450, + "torque_loss": 0.1242046132683754 + }, + { + "epoch": 12.095323741007194, + "force_loss": 0.008712325245141983, + "step": 13450 + }, + { + "epoch": 12.10431654676259, + "grad_norm": 0.6001847982406616, + "learning_rate": 9.192001507572526e-05, + "loss": 0.0267, + "step": 13460 + }, + { + "action_loss": 0.011919531039893627, + "epoch": 12.10431654676259, + "step": 13460 + }, + { + "epoch": 12.10431654676259, + "step": 13460, + "torque_loss": 0.1875322312116623 + }, + { + "epoch": 12.10431654676259, + "force_loss": 0.011915062554180622, + "step": 13460 + }, + { + "epoch": 12.113309352517986, + "grad_norm": 0.6838002800941467, + "learning_rate": 9.190498817666793e-05, + "loss": 0.0316, + "step": 13470 + }, + { + "action_loss": 0.00992722436785698, + "epoch": 12.113309352517986, + "step": 13470 + }, + { + "epoch": 12.113309352517986, + "step": 13470, + "torque_loss": 0.15418122708797455 + }, + { + "epoch": 12.113309352517986, + "force_loss": 0.012280096299946308, + "step": 13470 + }, + { + "epoch": 12.122302158273381, + "grad_norm": 0.5721536874771118, + "learning_rate": 9.188994854798163e-05, + "loss": 0.0245, + "step": 13480 + }, + { + "action_loss": 0.007268022745847702, + "epoch": 12.122302158273381, + "step": 13480 + }, + { + "epoch": 12.122302158273381, + "step": 13480, + "torque_loss": 0.11003028601408005 + }, + { + "epoch": 12.122302158273381, + "force_loss": 0.007329506333917379, + "step": 13480 + }, + { + "epoch": 12.131294964028777, + "grad_norm": 0.4797108769416809, + "learning_rate": 9.187489619423499e-05, + "loss": 0.0237, + "step": 13490 + }, + { + "action_loss": 0.004335633013397455, + "epoch": 12.131294964028777, + "step": 13490 + }, + { + "epoch": 12.131294964028777, + "step": 13490, + "torque_loss": 0.19956548511981964 + }, + { + "epoch": 12.131294964028777, + "force_loss": 0.0036118945572525263, + "step": 13490 + }, + { + "epoch": 12.140287769784173, + "grad_norm": 0.4975634813308716, + "learning_rate": 9.185983112000056e-05, + "loss": 0.0232, + "step": 13500 + }, + { + "action_loss": 0.006885308772325516, + "epoch": 12.140287769784173, + "step": 13500 + }, + { + "epoch": 12.140287769784173, + "step": 13500, + "torque_loss": 0.10912331193685532 + }, + { + "epoch": 12.140287769784173, + "force_loss": 0.0068674408830702305, + "step": 13500 + }, + { + "epoch": 12.149280575539569, + "grad_norm": 0.6214430928230286, + "learning_rate": 9.184475332985464e-05, + "loss": 0.0215, + "step": 13510 + }, + { + "action_loss": 0.004478698596358299, + "epoch": 12.149280575539569, + "step": 13510 + }, + { + "epoch": 12.149280575539569, + "step": 13510, + "torque_loss": 0.11821621656417847 + }, + { + "epoch": 12.149280575539569, + "force_loss": 0.004517280030995607, + "step": 13510 + }, + { + "epoch": 12.158273381294965, + "grad_norm": 0.22256101667881012, + "learning_rate": 9.182966282837754e-05, + "loss": 0.0248, + "step": 13520 + }, + { + "action_loss": 0.0061637721955776215, + "epoch": 12.158273381294965, + "step": 13520 + }, + { + "epoch": 12.158273381294965, + "step": 13520, + "torque_loss": 0.1224641278386116 + }, + { + "epoch": 12.158273381294965, + "force_loss": 0.006317870691418648, + "step": 13520 + }, + { + "epoch": 12.167266187050359, + "grad_norm": 0.23535418510437012, + "learning_rate": 9.18145596201533e-05, + "loss": 0.0206, + "step": 13530 + }, + { + "action_loss": 0.015792125836014748, + "epoch": 12.167266187050359, + "step": 13530 + }, + { + "epoch": 12.167266187050359, + "step": 13530, + "torque_loss": 0.12630845606327057 + }, + { + "epoch": 12.167266187050359, + "force_loss": 0.00916248094290495, + "step": 13530 + }, + { + "epoch": 12.176258992805755, + "grad_norm": 0.5914263725280762, + "learning_rate": 9.179944370976991e-05, + "loss": 0.0242, + "step": 13540 + }, + { + "action_loss": 0.010050378739833832, + "epoch": 12.176258992805755, + "step": 13540 + }, + { + "epoch": 12.176258992805755, + "step": 13540, + "torque_loss": 0.12712135910987854 + }, + { + "epoch": 12.176258992805755, + "force_loss": 0.006975242402404547, + "step": 13540 + }, + { + "epoch": 12.18525179856115, + "grad_norm": 0.5053604245185852, + "learning_rate": 9.178431510181918e-05, + "loss": 0.0208, + "step": 13550 + }, + { + "action_loss": 0.00847268383949995, + "epoch": 12.18525179856115, + "step": 13550 + }, + { + "epoch": 12.18525179856115, + "step": 13550, + "torque_loss": 0.06895231455564499 + }, + { + "epoch": 12.18525179856115, + "force_loss": 0.005697391927242279, + "step": 13550 + }, + { + "epoch": 12.194244604316546, + "grad_norm": 0.5936676859855652, + "learning_rate": 9.176917380089675e-05, + "loss": 0.0269, + "step": 13560 + }, + { + "action_loss": 0.007517187390476465, + "epoch": 12.194244604316546, + "step": 13560 + }, + { + "epoch": 12.194244604316546, + "step": 13560, + "torque_loss": 0.12213506549596786 + }, + { + "epoch": 12.194244604316546, + "force_loss": 0.0050654965452849865, + "step": 13560 + }, + { + "epoch": 12.203237410071942, + "grad_norm": 0.5953434109687805, + "learning_rate": 9.175401981160219e-05, + "loss": 0.0277, + "step": 13570 + }, + { + "action_loss": 0.006305044982582331, + "epoch": 12.203237410071942, + "step": 13570 + }, + { + "epoch": 12.203237410071942, + "step": 13570, + "torque_loss": 0.10977330058813095 + }, + { + "epoch": 12.203237410071942, + "force_loss": 0.005491024348884821, + "step": 13570 + }, + { + "epoch": 12.212230215827338, + "grad_norm": 0.40034228563308716, + "learning_rate": 9.173885313853885e-05, + "loss": 0.0241, + "step": 13580 + }, + { + "action_loss": 0.006344802677631378, + "epoch": 12.212230215827338, + "step": 13580 + }, + { + "epoch": 12.212230215827338, + "step": 13580, + "torque_loss": 0.149864062666893 + }, + { + "epoch": 12.212230215827338, + "force_loss": 0.005465306341648102, + "step": 13580 + }, + { + "epoch": 12.221223021582734, + "grad_norm": 0.60642409324646, + "learning_rate": 9.172367378631398e-05, + "loss": 0.0316, + "step": 13590 + }, + { + "action_loss": 0.018529774621129036, + "epoch": 12.221223021582734, + "step": 13590 + }, + { + "epoch": 12.221223021582734, + "step": 13590, + "torque_loss": 0.14816080033779144 + }, + { + "epoch": 12.221223021582734, + "force_loss": 0.014012637548148632, + "step": 13590 + }, + { + "epoch": 12.23021582733813, + "grad_norm": 0.6717132925987244, + "learning_rate": 9.170848175953866e-05, + "loss": 0.0335, + "step": 13600 + }, + { + "action_loss": 0.005674402695149183, + "epoch": 12.23021582733813, + "step": 13600 + }, + { + "epoch": 12.23021582733813, + "step": 13600, + "torque_loss": 0.10468759387731552 + }, + { + "epoch": 12.23021582733813, + "force_loss": 0.00724840210750699, + "step": 13600 + }, + { + "epoch": 12.239208633093526, + "grad_norm": 0.33241763710975647, + "learning_rate": 9.169327706282784e-05, + "loss": 0.0262, + "step": 13610 + }, + { + "action_loss": 0.0071988957934081554, + "epoch": 12.239208633093526, + "step": 13610 + }, + { + "epoch": 12.239208633093526, + "step": 13610, + "torque_loss": 0.11864650249481201 + }, + { + "epoch": 12.239208633093526, + "force_loss": 0.009233271703124046, + "step": 13610 + }, + { + "epoch": 12.248201438848922, + "grad_norm": 0.5763484239578247, + "learning_rate": 9.167805970080029e-05, + "loss": 0.0234, + "step": 13620 + }, + { + "action_loss": 0.01656474359333515, + "epoch": 12.248201438848922, + "step": 13620 + }, + { + "epoch": 12.248201438848922, + "step": 13620, + "torque_loss": 0.12728768587112427 + }, + { + "epoch": 12.248201438848922, + "force_loss": 0.020039258524775505, + "step": 13620 + }, + { + "epoch": 12.257194244604317, + "grad_norm": 0.38775160908699036, + "learning_rate": 9.166282967807864e-05, + "loss": 0.0251, + "step": 13630 + }, + { + "action_loss": 0.009864387102425098, + "epoch": 12.257194244604317, + "step": 13630 + }, + { + "epoch": 12.257194244604317, + "step": 13630, + "torque_loss": 0.09635195881128311 + }, + { + "epoch": 12.257194244604317, + "force_loss": 0.00766908610239625, + "step": 13630 + }, + { + "epoch": 12.266187050359711, + "grad_norm": 0.47758951783180237, + "learning_rate": 9.16475869992894e-05, + "loss": 0.0214, + "step": 13640 + }, + { + "action_loss": 0.009075481444597244, + "epoch": 12.266187050359711, + "step": 13640 + }, + { + "epoch": 12.266187050359711, + "step": 13640, + "torque_loss": 0.12635932862758636 + }, + { + "epoch": 12.266187050359711, + "force_loss": 0.006310591474175453, + "step": 13640 + }, + { + "epoch": 12.275179856115107, + "grad_norm": 0.5335314273834229, + "learning_rate": 9.163233166906284e-05, + "loss": 0.022, + "step": 13650 + }, + { + "action_loss": 0.00784197449684143, + "epoch": 12.275179856115107, + "step": 13650 + }, + { + "epoch": 12.275179856115107, + "step": 13650, + "torque_loss": 0.1303221732378006 + }, + { + "epoch": 12.275179856115107, + "force_loss": 0.005011549219489098, + "step": 13650 + }, + { + "epoch": 12.284172661870503, + "grad_norm": 0.5810086131095886, + "learning_rate": 9.161706369203317e-05, + "loss": 0.0253, + "step": 13660 + }, + { + "action_loss": 0.004765343386679888, + "epoch": 12.284172661870503, + "step": 13660 + }, + { + "epoch": 12.284172661870503, + "step": 13660, + "torque_loss": 0.0924394354224205 + }, + { + "epoch": 12.284172661870503, + "force_loss": 0.006362430285662413, + "step": 13660 + }, + { + "epoch": 12.293165467625899, + "grad_norm": 0.6272698640823364, + "learning_rate": 9.16017830728384e-05, + "loss": 0.022, + "step": 13670 + }, + { + "action_loss": 0.0043923030607402325, + "epoch": 12.293165467625899, + "step": 13670 + }, + { + "epoch": 12.293165467625899, + "step": 13670, + "torque_loss": 0.13075794279575348 + }, + { + "epoch": 12.293165467625899, + "force_loss": 0.003732785815373063, + "step": 13670 + }, + { + "epoch": 12.302158273381295, + "grad_norm": 0.7331401705741882, + "learning_rate": 9.158648981612035e-05, + "loss": 0.0252, + "step": 13680 + }, + { + "action_loss": 0.008111704140901566, + "epoch": 12.302158273381295, + "step": 13680 + }, + { + "epoch": 12.302158273381295, + "step": 13680, + "torque_loss": 0.12140810489654541 + }, + { + "epoch": 12.302158273381295, + "force_loss": 0.008820204995572567, + "step": 13680 + }, + { + "epoch": 12.31115107913669, + "grad_norm": 0.74886554479599, + "learning_rate": 9.157118392652472e-05, + "loss": 0.0241, + "step": 13690 + }, + { + "action_loss": 0.01673649065196514, + "epoch": 12.31115107913669, + "step": 13690 + }, + { + "epoch": 12.31115107913669, + "step": 13690, + "torque_loss": 0.14195752143859863 + }, + { + "epoch": 12.31115107913669, + "force_loss": 0.015183858573436737, + "step": 13690 + }, + { + "epoch": 12.320143884892087, + "grad_norm": 0.3879823088645935, + "learning_rate": 9.155586540870104e-05, + "loss": 0.0218, + "step": 13700 + }, + { + "action_loss": 0.009805773384869099, + "epoch": 12.320143884892087, + "step": 13700 + }, + { + "epoch": 12.320143884892087, + "step": 13700, + "torque_loss": 0.16158168017864227 + }, + { + "epoch": 12.320143884892087, + "force_loss": 0.007298277225345373, + "step": 13700 + }, + { + "epoch": 12.329136690647482, + "grad_norm": 0.4145404100418091, + "learning_rate": 9.154053426730267e-05, + "loss": 0.0254, + "step": 13710 + }, + { + "action_loss": 0.00787991564720869, + "epoch": 12.329136690647482, + "step": 13710 + }, + { + "epoch": 12.329136690647482, + "step": 13710, + "torque_loss": 0.12812145054340363 + }, + { + "epoch": 12.329136690647482, + "force_loss": 0.007166024763137102, + "step": 13710 + }, + { + "epoch": 12.338129496402878, + "grad_norm": 0.2867864966392517, + "learning_rate": 9.15251905069868e-05, + "loss": 0.0289, + "step": 13720 + }, + { + "action_loss": 0.007813703268766403, + "epoch": 12.338129496402878, + "step": 13720 + }, + { + "epoch": 12.338129496402878, + "step": 13720, + "torque_loss": 0.17746762931346893 + }, + { + "epoch": 12.338129496402878, + "force_loss": 0.011906628496944904, + "step": 13720 + }, + { + "epoch": 12.347122302158274, + "grad_norm": 0.33069443702697754, + "learning_rate": 9.150983413241446e-05, + "loss": 0.025, + "step": 13730 + }, + { + "action_loss": 0.011023983359336853, + "epoch": 12.347122302158274, + "step": 13730 + }, + { + "epoch": 12.347122302158274, + "step": 13730, + "torque_loss": 0.15857093036174774 + }, + { + "epoch": 12.347122302158274, + "force_loss": 0.009825992397964, + "step": 13730 + }, + { + "epoch": 12.35611510791367, + "grad_norm": 0.4829291105270386, + "learning_rate": 9.149446514825051e-05, + "loss": 0.0299, + "step": 13740 + }, + { + "action_loss": 0.005325306672602892, + "epoch": 12.35611510791367, + "step": 13740 + }, + { + "epoch": 12.35611510791367, + "step": 13740, + "torque_loss": 0.11463263630867004 + }, + { + "epoch": 12.35611510791367, + "force_loss": 0.005153453443199396, + "step": 13740 + }, + { + "epoch": 12.365107913669064, + "grad_norm": 0.6793624758720398, + "learning_rate": 9.147908355916365e-05, + "loss": 0.0278, + "step": 13750 + }, + { + "action_loss": 0.036259062588214874, + "epoch": 12.365107913669064, + "step": 13750 + }, + { + "epoch": 12.365107913669064, + "step": 13750, + "torque_loss": 0.20039959251880646 + }, + { + "epoch": 12.365107913669064, + "force_loss": 0.04130266234278679, + "step": 13750 + }, + { + "epoch": 12.37410071942446, + "grad_norm": 0.26231881976127625, + "learning_rate": 9.146368936982642e-05, + "loss": 0.0306, + "step": 13760 + }, + { + "action_loss": 0.013152661733329296, + "epoch": 12.37410071942446, + "step": 13760 + }, + { + "epoch": 12.37410071942446, + "step": 13760, + "torque_loss": 0.15253083407878876 + }, + { + "epoch": 12.37410071942446, + "force_loss": 0.013423360884189606, + "step": 13760 + }, + { + "epoch": 12.383093525179856, + "grad_norm": 0.3501366972923279, + "learning_rate": 9.144828258491511e-05, + "loss": 0.0277, + "step": 13770 + }, + { + "action_loss": 0.014741982333362103, + "epoch": 12.383093525179856, + "step": 13770 + }, + { + "epoch": 12.383093525179856, + "step": 13770, + "torque_loss": 0.14439605176448822 + }, + { + "epoch": 12.383093525179856, + "force_loss": 0.015146692283451557, + "step": 13770 + }, + { + "epoch": 12.392086330935252, + "grad_norm": 0.5469654202461243, + "learning_rate": 9.143286320910996e-05, + "loss": 0.0295, + "step": 13780 + }, + { + "action_loss": 0.011230617761611938, + "epoch": 12.392086330935252, + "step": 13780 + }, + { + "epoch": 12.392086330935252, + "step": 13780, + "torque_loss": 0.14415091276168823 + }, + { + "epoch": 12.392086330935252, + "force_loss": 0.007145017385482788, + "step": 13780 + }, + { + "epoch": 12.401079136690647, + "grad_norm": 0.5919626355171204, + "learning_rate": 9.141743124709491e-05, + "loss": 0.0258, + "step": 13790 + }, + { + "action_loss": 0.011638651601970196, + "epoch": 12.401079136690647, + "step": 13790 + }, + { + "epoch": 12.401079136690647, + "step": 13790, + "torque_loss": 0.17612363398075104 + }, + { + "epoch": 12.401079136690647, + "force_loss": 0.007354178931564093, + "step": 13790 + }, + { + "epoch": 12.410071942446043, + "grad_norm": 0.725797176361084, + "learning_rate": 9.140198670355784e-05, + "loss": 0.0252, + "step": 13800 + }, + { + "action_loss": 0.016116155311465263, + "epoch": 12.410071942446043, + "step": 13800 + }, + { + "epoch": 12.410071942446043, + "step": 13800, + "torque_loss": 0.1170927956700325 + }, + { + "epoch": 12.410071942446043, + "force_loss": 0.011015300638973713, + "step": 13800 + }, + { + "epoch": 12.41906474820144, + "grad_norm": 0.7040033936500549, + "learning_rate": 9.138652958319034e-05, + "loss": 0.0282, + "step": 13810 + }, + { + "action_loss": 0.0031529993284493685, + "epoch": 12.41906474820144, + "step": 13810 + }, + { + "epoch": 12.41906474820144, + "step": 13810, + "torque_loss": 0.1225736141204834 + }, + { + "epoch": 12.41906474820144, + "force_loss": 0.005469519644975662, + "step": 13810 + }, + { + "epoch": 12.428057553956835, + "grad_norm": 0.5092853307723999, + "learning_rate": 9.137105989068791e-05, + "loss": 0.0213, + "step": 13820 + }, + { + "action_loss": 0.01232543308287859, + "epoch": 12.428057553956835, + "step": 13820 + }, + { + "epoch": 12.428057553956835, + "step": 13820, + "torque_loss": 0.14313648641109467 + }, + { + "epoch": 12.428057553956835, + "force_loss": 0.02127975784242153, + "step": 13820 + }, + { + "epoch": 12.43705035971223, + "grad_norm": 0.4835895895957947, + "learning_rate": 9.135557763074983e-05, + "loss": 0.0327, + "step": 13830 + }, + { + "action_loss": 0.006219408009201288, + "epoch": 12.43705035971223, + "step": 13830 + }, + { + "epoch": 12.43705035971223, + "step": 13830, + "torque_loss": 0.1067761704325676 + }, + { + "epoch": 12.43705035971223, + "force_loss": 0.006293964106589556, + "step": 13830 + }, + { + "epoch": 12.446043165467627, + "grad_norm": 0.9791175127029419, + "learning_rate": 9.13400828080792e-05, + "loss": 0.0274, + "step": 13840 + }, + { + "action_loss": 0.0038767734076827765, + "epoch": 12.446043165467627, + "step": 13840 + }, + { + "epoch": 12.446043165467627, + "step": 13840, + "torque_loss": 0.09044162184000015 + }, + { + "epoch": 12.446043165467627, + "force_loss": 0.00449793878942728, + "step": 13840 + }, + { + "epoch": 12.45503597122302, + "grad_norm": 0.5134682655334473, + "learning_rate": 9.132457542738292e-05, + "loss": 0.0254, + "step": 13850 + }, + { + "action_loss": 0.005738839972764254, + "epoch": 12.45503597122302, + "step": 13850 + }, + { + "epoch": 12.45503597122302, + "step": 13850, + "torque_loss": 0.14481998980045319 + }, + { + "epoch": 12.45503597122302, + "force_loss": 0.005820055957883596, + "step": 13850 + }, + { + "epoch": 12.464028776978417, + "grad_norm": 0.29155710339546204, + "learning_rate": 9.130905549337174e-05, + "loss": 0.0216, + "step": 13860 + }, + { + "action_loss": 0.01345804426819086, + "epoch": 12.464028776978417, + "step": 13860 + }, + { + "epoch": 12.464028776978417, + "step": 13860, + "torque_loss": 0.14391465485095978 + }, + { + "epoch": 12.464028776978417, + "force_loss": 0.007132288068532944, + "step": 13860 + }, + { + "epoch": 12.473021582733812, + "grad_norm": 0.3183196783065796, + "learning_rate": 9.129352301076021e-05, + "loss": 0.0248, + "step": 13870 + }, + { + "action_loss": 0.009230182506144047, + "epoch": 12.473021582733812, + "step": 13870 + }, + { + "epoch": 12.473021582733812, + "step": 13870, + "torque_loss": 0.14582642912864685 + }, + { + "epoch": 12.473021582733812, + "force_loss": 0.008144509047269821, + "step": 13870 + }, + { + "epoch": 12.482014388489208, + "grad_norm": 0.6510902047157288, + "learning_rate": 9.127797798426668e-05, + "loss": 0.0247, + "step": 13880 + }, + { + "action_loss": 0.024639353156089783, + "epoch": 12.482014388489208, + "step": 13880 + }, + { + "epoch": 12.482014388489208, + "step": 13880, + "torque_loss": 0.16467545926570892 + }, + { + "epoch": 12.482014388489208, + "force_loss": 0.03665347024798393, + "step": 13880 + }, + { + "epoch": 12.491007194244604, + "grad_norm": 0.42777055501937866, + "learning_rate": 9.126242041861333e-05, + "loss": 0.0293, + "step": 13890 + }, + { + "action_loss": 0.01301325112581253, + "epoch": 12.491007194244604, + "step": 13890 + }, + { + "epoch": 12.491007194244604, + "step": 13890, + "torque_loss": 0.14655320346355438 + }, + { + "epoch": 12.491007194244604, + "force_loss": 0.008423476479947567, + "step": 13890 + }, + { + "epoch": 12.5, + "grad_norm": 0.42942848801612854, + "learning_rate": 9.124685031852611e-05, + "loss": 0.0279, + "step": 13900 + }, + { + "action_loss": 0.0063703334890306, + "epoch": 12.5, + "step": 13900 + }, + { + "epoch": 12.5, + "step": 13900, + "torque_loss": 0.1505795270204544 + }, + { + "epoch": 12.5, + "force_loss": 0.007232634350657463, + "step": 13900 + }, + { + "epoch": 12.508992805755396, + "grad_norm": 0.6118781566619873, + "learning_rate": 9.123126768873482e-05, + "loss": 0.0264, + "step": 13910 + }, + { + "action_loss": 0.009435432031750679, + "epoch": 12.508992805755396, + "step": 13910 + }, + { + "epoch": 12.508992805755396, + "step": 13910, + "torque_loss": 0.12627960741519928 + }, + { + "epoch": 12.508992805755396, + "force_loss": 0.00904346164315939, + "step": 13910 + }, + { + "epoch": 12.517985611510792, + "grad_norm": 0.5503523945808411, + "learning_rate": 9.121567253397308e-05, + "loss": 0.0279, + "step": 13920 + }, + { + "action_loss": 0.004087453242391348, + "epoch": 12.517985611510792, + "step": 13920 + }, + { + "epoch": 12.517985611510792, + "step": 13920, + "torque_loss": 0.10465057939291 + }, + { + "epoch": 12.517985611510792, + "force_loss": 0.007370705250650644, + "step": 13920 + }, + { + "epoch": 12.526978417266188, + "grad_norm": 0.5884429812431335, + "learning_rate": 9.120006485897824e-05, + "loss": 0.0223, + "step": 13930 + }, + { + "action_loss": 0.004481791984289885, + "epoch": 12.526978417266188, + "step": 13930 + }, + { + "epoch": 12.526978417266188, + "step": 13930, + "torque_loss": 0.10764376074075699 + }, + { + "epoch": 12.526978417266188, + "force_loss": 0.00729639595374465, + "step": 13930 + }, + { + "epoch": 12.535971223021583, + "grad_norm": 0.5687770247459412, + "learning_rate": 9.118444466849152e-05, + "loss": 0.0309, + "step": 13940 + }, + { + "action_loss": 0.01760326884686947, + "epoch": 12.535971223021583, + "step": 13940 + }, + { + "epoch": 12.535971223021583, + "step": 13940, + "torque_loss": 0.12070111185312271 + }, + { + "epoch": 12.535971223021583, + "force_loss": 0.021414687857031822, + "step": 13940 + }, + { + "epoch": 12.54496402877698, + "grad_norm": 0.40998855233192444, + "learning_rate": 9.116881196725793e-05, + "loss": 0.0249, + "step": 13950 + }, + { + "action_loss": 0.005727668758481741, + "epoch": 12.54496402877698, + "step": 13950 + }, + { + "epoch": 12.54496402877698, + "step": 13950, + "torque_loss": 0.10952623933553696 + }, + { + "epoch": 12.54496402877698, + "force_loss": 0.005893405992537737, + "step": 13950 + }, + { + "epoch": 12.553956834532373, + "grad_norm": 0.41071924567222595, + "learning_rate": 9.115316676002627e-05, + "loss": 0.0251, + "step": 13960 + }, + { + "action_loss": 0.015424457378685474, + "epoch": 12.553956834532373, + "step": 13960 + }, + { + "epoch": 12.553956834532373, + "step": 13960, + "torque_loss": 0.15265995264053345 + }, + { + "epoch": 12.553956834532373, + "force_loss": 0.012465615756809711, + "step": 13960 + }, + { + "epoch": 12.56294964028777, + "grad_norm": 0.40640953183174133, + "learning_rate": 9.113750905154911e-05, + "loss": 0.0262, + "step": 13970 + }, + { + "action_loss": 0.007475083228200674, + "epoch": 12.56294964028777, + "step": 13970 + }, + { + "epoch": 12.56294964028777, + "step": 13970, + "torque_loss": 0.14392529428005219 + }, + { + "epoch": 12.56294964028777, + "force_loss": 0.013792566955089569, + "step": 13970 + }, + { + "epoch": 12.571942446043165, + "grad_norm": 0.49539658427238464, + "learning_rate": 9.112183884658289e-05, + "loss": 0.0299, + "step": 13980 + }, + { + "action_loss": 0.007323987782001495, + "epoch": 12.571942446043165, + "step": 13980 + }, + { + "epoch": 12.571942446043165, + "step": 13980, + "torque_loss": 0.1938466578722 + }, + { + "epoch": 12.571942446043165, + "force_loss": 0.008375290781259537, + "step": 13980 + }, + { + "epoch": 12.58093525179856, + "grad_norm": 0.466087281703949, + "learning_rate": 9.11061561498878e-05, + "loss": 0.021, + "step": 13990 + }, + { + "action_loss": 0.02233838103711605, + "epoch": 12.58093525179856, + "step": 13990 + }, + { + "epoch": 12.58093525179856, + "step": 13990, + "torque_loss": 0.15092484652996063 + }, + { + "epoch": 12.58093525179856, + "force_loss": 0.018663665279746056, + "step": 13990 + }, + { + "epoch": 12.589928057553957, + "grad_norm": 0.7940137386322021, + "learning_rate": 9.109046096622779e-05, + "loss": 0.0282, + "step": 14000 + }, + { + "action_loss": 0.014001191593706608, + "epoch": 12.589928057553957, + "step": 14000 + }, + { + "epoch": 12.589928057553957, + "step": 14000, + "torque_loss": 0.1394633799791336 + }, + { + "epoch": 12.589928057553957, + "force_loss": 0.013311732560396194, + "step": 14000 + }, + { + "epoch": 12.598920863309353, + "grad_norm": 0.6612755656242371, + "learning_rate": 9.107475330037069e-05, + "loss": 0.0263, + "step": 14010 + }, + { + "action_loss": 0.01692165620625019, + "epoch": 12.598920863309353, + "step": 14010 + }, + { + "epoch": 12.598920863309353, + "step": 14010, + "torque_loss": 0.1343950480222702 + }, + { + "epoch": 12.598920863309353, + "force_loss": 0.010985836386680603, + "step": 14010 + }, + { + "epoch": 12.607913669064748, + "grad_norm": 0.38929954171180725, + "learning_rate": 9.105903315708806e-05, + "loss": 0.0267, + "step": 14020 + }, + { + "action_loss": 0.006525618489831686, + "epoch": 12.607913669064748, + "step": 14020 + }, + { + "epoch": 12.607913669064748, + "step": 14020, + "torque_loss": 0.1254248023033142 + }, + { + "epoch": 12.607913669064748, + "force_loss": 0.006911094766110182, + "step": 14020 + }, + { + "epoch": 12.616906474820144, + "grad_norm": 0.3836361765861511, + "learning_rate": 9.104330054115524e-05, + "loss": 0.0238, + "step": 14030 + }, + { + "action_loss": 0.006212487816810608, + "epoch": 12.616906474820144, + "step": 14030 + }, + { + "epoch": 12.616906474820144, + "step": 14030, + "torque_loss": 0.10357270389795303 + }, + { + "epoch": 12.616906474820144, + "force_loss": 0.005381166469305754, + "step": 14030 + }, + { + "epoch": 12.62589928057554, + "grad_norm": 0.547963559627533, + "learning_rate": 9.102755545735141e-05, + "loss": 0.0218, + "step": 14040 + }, + { + "action_loss": 0.006468549370765686, + "epoch": 12.62589928057554, + "step": 14040 + }, + { + "epoch": 12.62589928057554, + "step": 14040, + "torque_loss": 0.10992669314146042 + }, + { + "epoch": 12.62589928057554, + "force_loss": 0.0067933350801467896, + "step": 14040 + }, + { + "epoch": 12.634892086330936, + "grad_norm": 0.3229418992996216, + "learning_rate": 9.10117979104595e-05, + "loss": 0.0259, + "step": 14050 + }, + { + "action_loss": 0.011358804069459438, + "epoch": 12.634892086330936, + "step": 14050 + }, + { + "epoch": 12.634892086330936, + "step": 14050, + "torque_loss": 0.1362975686788559 + }, + { + "epoch": 12.634892086330936, + "force_loss": 0.013899271376430988, + "step": 14050 + }, + { + "epoch": 12.64388489208633, + "grad_norm": 0.4975723624229431, + "learning_rate": 9.099602790526624e-05, + "loss": 0.0274, + "step": 14060 + }, + { + "action_loss": 0.007108205463737249, + "epoch": 12.64388489208633, + "step": 14060 + }, + { + "epoch": 12.64388489208633, + "step": 14060, + "torque_loss": 0.13089169561862946 + }, + { + "epoch": 12.64388489208633, + "force_loss": 0.006654033903032541, + "step": 14060 + }, + { + "epoch": 12.652877697841726, + "grad_norm": 0.5616057515144348, + "learning_rate": 9.098024544656212e-05, + "loss": 0.0219, + "step": 14070 + }, + { + "action_loss": 0.005977492779493332, + "epoch": 12.652877697841726, + "step": 14070 + }, + { + "epoch": 12.652877697841726, + "step": 14070, + "torque_loss": 0.13386978209018707 + }, + { + "epoch": 12.652877697841726, + "force_loss": 0.009287810884416103, + "step": 14070 + }, + { + "epoch": 12.661870503597122, + "grad_norm": 0.6663073301315308, + "learning_rate": 9.096445053914148e-05, + "loss": 0.0336, + "step": 14080 + }, + { + "action_loss": 0.00646496145054698, + "epoch": 12.661870503597122, + "step": 14080 + }, + { + "epoch": 12.661870503597122, + "step": 14080, + "torque_loss": 0.12621334195137024 + }, + { + "epoch": 12.661870503597122, + "force_loss": 0.006149812135845423, + "step": 14080 + }, + { + "epoch": 12.670863309352518, + "grad_norm": 0.2959502637386322, + "learning_rate": 9.094864318780236e-05, + "loss": 0.0275, + "step": 14090 + }, + { + "action_loss": 0.04846276715397835, + "epoch": 12.670863309352518, + "step": 14090 + }, + { + "epoch": 12.670863309352518, + "step": 14090, + "torque_loss": 0.2021758109331131 + }, + { + "epoch": 12.670863309352518, + "force_loss": 0.0776110514998436, + "step": 14090 + }, + { + "epoch": 12.679856115107913, + "grad_norm": 0.4802953898906708, + "learning_rate": 9.093282339734663e-05, + "loss": 0.0366, + "step": 14100 + }, + { + "action_loss": 0.007498738821595907, + "epoch": 12.679856115107913, + "step": 14100 + }, + { + "epoch": 12.679856115107913, + "step": 14100, + "torque_loss": 0.12763415277004242 + }, + { + "epoch": 12.679856115107913, + "force_loss": 0.009072769433259964, + "step": 14100 + }, + { + "epoch": 12.68884892086331, + "grad_norm": 0.30895718932151794, + "learning_rate": 9.091699117257992e-05, + "loss": 0.0257, + "step": 14110 + }, + { + "action_loss": 0.005022708792239428, + "epoch": 12.68884892086331, + "step": 14110 + }, + { + "epoch": 12.68884892086331, + "step": 14110, + "torque_loss": 0.15535520017147064 + }, + { + "epoch": 12.68884892086331, + "force_loss": 0.005461633671075106, + "step": 14110 + }, + { + "epoch": 12.697841726618705, + "grad_norm": 0.46144697070121765, + "learning_rate": 9.090114651831163e-05, + "loss": 0.0247, + "step": 14120 + }, + { + "action_loss": 0.017287636175751686, + "epoch": 12.697841726618705, + "step": 14120 + }, + { + "epoch": 12.697841726618705, + "step": 14120, + "torque_loss": 0.1857975721359253 + }, + { + "epoch": 12.697841726618705, + "force_loss": 0.016247255727648735, + "step": 14120 + }, + { + "epoch": 12.706834532374101, + "grad_norm": 0.6360578536987305, + "learning_rate": 9.088528943935497e-05, + "loss": 0.0292, + "step": 14130 + }, + { + "action_loss": 0.016388682648539543, + "epoch": 12.706834532374101, + "step": 14130 + }, + { + "epoch": 12.706834532374101, + "step": 14130, + "torque_loss": 0.12045708298683167 + }, + { + "epoch": 12.706834532374101, + "force_loss": 0.012151663191616535, + "step": 14130 + }, + { + "epoch": 12.715827338129497, + "grad_norm": 0.7302191853523254, + "learning_rate": 9.086941994052689e-05, + "loss": 0.025, + "step": 14140 + }, + { + "action_loss": 0.008230630308389664, + "epoch": 12.715827338129497, + "step": 14140 + }, + { + "epoch": 12.715827338129497, + "step": 14140, + "torque_loss": 0.13785457611083984 + }, + { + "epoch": 12.715827338129497, + "force_loss": 0.007931435480713844, + "step": 14140 + }, + { + "epoch": 12.724820143884893, + "grad_norm": 0.36570969223976135, + "learning_rate": 9.085353802664813e-05, + "loss": 0.0233, + "step": 14150 + }, + { + "action_loss": 0.008267399854958057, + "epoch": 12.724820143884893, + "step": 14150 + }, + { + "epoch": 12.724820143884893, + "step": 14150, + "torque_loss": 0.1470588594675064 + }, + { + "epoch": 12.724820143884893, + "force_loss": 0.010002859868109226, + "step": 14150 + }, + { + "epoch": 12.733812949640289, + "grad_norm": 0.4242355525493622, + "learning_rate": 9.08376437025432e-05, + "loss": 0.0254, + "step": 14160 + }, + { + "action_loss": 0.006476314272731543, + "epoch": 12.733812949640289, + "step": 14160 + }, + { + "epoch": 12.733812949640289, + "step": 14160, + "torque_loss": 0.12720048427581787 + }, + { + "epoch": 12.733812949640289, + "force_loss": 0.011430182494223118, + "step": 14160 + }, + { + "epoch": 12.742805755395683, + "grad_norm": 0.4574201703071594, + "learning_rate": 9.082173697304035e-05, + "loss": 0.0263, + "step": 14170 + }, + { + "action_loss": 0.015163346193730831, + "epoch": 12.742805755395683, + "step": 14170 + }, + { + "epoch": 12.742805755395683, + "step": 14170, + "torque_loss": 0.15634046494960785 + }, + { + "epoch": 12.742805755395683, + "force_loss": 0.015708336606621742, + "step": 14170 + }, + { + "epoch": 12.751798561151078, + "grad_norm": 0.22592872381210327, + "learning_rate": 9.080581784297166e-05, + "loss": 0.0212, + "step": 14180 + }, + { + "action_loss": 0.016290761530399323, + "epoch": 12.751798561151078, + "step": 14180 + }, + { + "epoch": 12.751798561151078, + "step": 14180, + "torque_loss": 0.13783107697963715 + }, + { + "epoch": 12.751798561151078, + "force_loss": 0.0128251938149333, + "step": 14180 + }, + { + "epoch": 12.760791366906474, + "grad_norm": 0.6080386638641357, + "learning_rate": 9.078988631717291e-05, + "loss": 0.0247, + "step": 14190 + }, + { + "action_loss": 0.010328296571969986, + "epoch": 12.760791366906474, + "step": 14190 + }, + { + "epoch": 12.760791366906474, + "step": 14190, + "torque_loss": 0.12926451861858368 + }, + { + "epoch": 12.760791366906474, + "force_loss": 0.013141573406755924, + "step": 14190 + }, + { + "epoch": 12.76978417266187, + "grad_norm": 0.6275120377540588, + "learning_rate": 9.077394240048369e-05, + "loss": 0.0245, + "step": 14200 + }, + { + "action_loss": 0.004028057213872671, + "epoch": 12.76978417266187, + "step": 14200 + }, + { + "epoch": 12.76978417266187, + "step": 14200, + "torque_loss": 0.147388756275177 + }, + { + "epoch": 12.76978417266187, + "force_loss": 0.005528328474611044, + "step": 14200 + }, + { + "epoch": 12.778776978417266, + "grad_norm": 0.38772672414779663, + "learning_rate": 9.075798609774736e-05, + "loss": 0.0286, + "step": 14210 + }, + { + "action_loss": 0.004324526991695166, + "epoch": 12.778776978417266, + "step": 14210 + }, + { + "epoch": 12.778776978417266, + "step": 14210, + "torque_loss": 0.13769835233688354 + }, + { + "epoch": 12.778776978417266, + "force_loss": 0.0045209601521492004, + "step": 14210 + }, + { + "epoch": 12.787769784172662, + "grad_norm": 0.5099405646324158, + "learning_rate": 9.0742017413811e-05, + "loss": 0.0232, + "step": 14220 + }, + { + "action_loss": 0.008415845222771168, + "epoch": 12.787769784172662, + "step": 14220 + }, + { + "epoch": 12.787769784172662, + "step": 14220, + "torque_loss": 0.14204971492290497 + }, + { + "epoch": 12.787769784172662, + "force_loss": 0.010749172419309616, + "step": 14220 + }, + { + "epoch": 12.796762589928058, + "grad_norm": 0.6851628422737122, + "learning_rate": 9.072603635352548e-05, + "loss": 0.0258, + "step": 14230 + }, + { + "action_loss": 0.005719442386180162, + "epoch": 12.796762589928058, + "step": 14230 + }, + { + "epoch": 12.796762589928058, + "step": 14230, + "torque_loss": 0.15508398413658142 + }, + { + "epoch": 12.796762589928058, + "force_loss": 0.006898975465446711, + "step": 14230 + }, + { + "epoch": 12.805755395683454, + "grad_norm": 0.4878329932689667, + "learning_rate": 9.071004292174541e-05, + "loss": 0.0282, + "step": 14240 + }, + { + "action_loss": 0.0068457648158073425, + "epoch": 12.805755395683454, + "step": 14240 + }, + { + "epoch": 12.805755395683454, + "step": 14240, + "torque_loss": 0.08456694334745407 + }, + { + "epoch": 12.805755395683454, + "force_loss": 0.008580955676734447, + "step": 14240 + }, + { + "epoch": 12.81474820143885, + "grad_norm": 0.9357104301452637, + "learning_rate": 9.06940371233292e-05, + "loss": 0.024, + "step": 14250 + }, + { + "action_loss": 0.0034057635348290205, + "epoch": 12.81474820143885, + "step": 14250 + }, + { + "epoch": 12.81474820143885, + "step": 14250, + "torque_loss": 0.15382985770702362 + }, + { + "epoch": 12.81474820143885, + "force_loss": 0.003341132774949074, + "step": 14250 + }, + { + "epoch": 12.823741007194245, + "grad_norm": 0.34692761301994324, + "learning_rate": 9.067801896313898e-05, + "loss": 0.0279, + "step": 14260 + }, + { + "action_loss": 0.005263553466647863, + "epoch": 12.823741007194245, + "step": 14260 + }, + { + "epoch": 12.823741007194245, + "step": 14260, + "torque_loss": 0.10076197981834412 + }, + { + "epoch": 12.823741007194245, + "force_loss": 0.008891968987882137, + "step": 14260 + }, + { + "epoch": 12.832733812949641, + "grad_norm": 0.3128192126750946, + "learning_rate": 9.066198844604064e-05, + "loss": 0.0265, + "step": 14270 + }, + { + "action_loss": 0.007821241393685341, + "epoch": 12.832733812949641, + "step": 14270 + }, + { + "epoch": 12.832733812949641, + "step": 14270, + "torque_loss": 0.12226840853691101 + }, + { + "epoch": 12.832733812949641, + "force_loss": 0.005744075868278742, + "step": 14270 + }, + { + "epoch": 12.841726618705035, + "grad_norm": 0.42730021476745605, + "learning_rate": 9.06459455769038e-05, + "loss": 0.033, + "step": 14280 + }, + { + "action_loss": 0.0123290391638875, + "epoch": 12.841726618705035, + "step": 14280 + }, + { + "epoch": 12.841726618705035, + "step": 14280, + "torque_loss": 0.2124076634645462 + }, + { + "epoch": 12.841726618705035, + "force_loss": 0.012979678809642792, + "step": 14280 + }, + { + "epoch": 12.850719424460431, + "grad_norm": 0.7423769235610962, + "learning_rate": 9.062989036060193e-05, + "loss": 0.0338, + "step": 14290 + }, + { + "action_loss": 0.013494514860212803, + "epoch": 12.850719424460431, + "step": 14290 + }, + { + "epoch": 12.850719424460431, + "step": 14290, + "torque_loss": 0.14486967027187347 + }, + { + "epoch": 12.850719424460431, + "force_loss": 0.008547775447368622, + "step": 14290 + }, + { + "epoch": 12.859712230215827, + "grad_norm": 0.26264509558677673, + "learning_rate": 9.061382280201212e-05, + "loss": 0.023, + "step": 14300 + }, + { + "action_loss": 0.03842094540596008, + "epoch": 12.859712230215827, + "step": 14300 + }, + { + "epoch": 12.859712230215827, + "step": 14300, + "torque_loss": 0.19657373428344727 + }, + { + "epoch": 12.859712230215827, + "force_loss": 0.050675153732299805, + "step": 14300 + }, + { + "epoch": 12.868705035971223, + "grad_norm": 0.5091500878334045, + "learning_rate": 9.059774290601528e-05, + "loss": 0.0309, + "step": 14310 + }, + { + "action_loss": 0.005208694841712713, + "epoch": 12.868705035971223, + "step": 14310 + }, + { + "epoch": 12.868705035971223, + "step": 14310, + "torque_loss": 0.13491962850093842 + }, + { + "epoch": 12.868705035971223, + "force_loss": 0.006779221352189779, + "step": 14310 + }, + { + "epoch": 12.877697841726619, + "grad_norm": 0.3828144967556, + "learning_rate": 9.058165067749606e-05, + "loss": 0.0247, + "step": 14320 + }, + { + "action_loss": 0.012771953828632832, + "epoch": 12.877697841726619, + "step": 14320 + }, + { + "epoch": 12.877697841726619, + "step": 14320, + "torque_loss": 0.16346320509910583 + }, + { + "epoch": 12.877697841726619, + "force_loss": 0.011284224689006805, + "step": 14320 + }, + { + "epoch": 12.886690647482014, + "grad_norm": 0.5729163885116577, + "learning_rate": 9.056554612134288e-05, + "loss": 0.029, + "step": 14330 + }, + { + "action_loss": 0.009330385364592075, + "epoch": 12.886690647482014, + "step": 14330 + }, + { + "epoch": 12.886690647482014, + "step": 14330, + "torque_loss": 0.1821797639131546 + }, + { + "epoch": 12.886690647482014, + "force_loss": 0.006371926516294479, + "step": 14330 + }, + { + "epoch": 12.89568345323741, + "grad_norm": 0.4588429927825928, + "learning_rate": 9.054942924244785e-05, + "loss": 0.0264, + "step": 14340 + }, + { + "action_loss": 0.020296970382332802, + "epoch": 12.89568345323741, + "step": 14340 + }, + { + "epoch": 12.89568345323741, + "step": 14340, + "torque_loss": 0.14829859137535095 + }, + { + "epoch": 12.89568345323741, + "force_loss": 0.025701550766825676, + "step": 14340 + }, + { + "epoch": 12.904676258992806, + "grad_norm": 0.3945634067058563, + "learning_rate": 9.053330004570686e-05, + "loss": 0.0273, + "step": 14350 + }, + { + "action_loss": 0.006152736488729715, + "epoch": 12.904676258992806, + "step": 14350 + }, + { + "epoch": 12.904676258992806, + "step": 14350, + "torque_loss": 0.18100863695144653 + }, + { + "epoch": 12.904676258992806, + "force_loss": 0.007932188920676708, + "step": 14350 + }, + { + "epoch": 12.913669064748202, + "grad_norm": 0.5920173525810242, + "learning_rate": 9.051715853601955e-05, + "loss": 0.0251, + "step": 14360 + }, + { + "action_loss": 0.013649355620145798, + "epoch": 12.913669064748202, + "step": 14360 + }, + { + "epoch": 12.913669064748202, + "step": 14360, + "torque_loss": 0.14411768317222595 + }, + { + "epoch": 12.913669064748202, + "force_loss": 0.010054810903966427, + "step": 14360 + }, + { + "epoch": 12.922661870503598, + "grad_norm": 0.36797231435775757, + "learning_rate": 9.050100471828926e-05, + "loss": 0.0306, + "step": 14370 + }, + { + "action_loss": 0.009905297309160233, + "epoch": 12.922661870503598, + "step": 14370 + }, + { + "epoch": 12.922661870503598, + "step": 14370, + "torque_loss": 0.15208782255649567 + }, + { + "epoch": 12.922661870503598, + "force_loss": 0.015537414699792862, + "step": 14370 + }, + { + "epoch": 12.931654676258994, + "grad_norm": 0.33273354172706604, + "learning_rate": 9.048483859742311e-05, + "loss": 0.0293, + "step": 14380 + }, + { + "action_loss": 0.009125870652496815, + "epoch": 12.931654676258994, + "step": 14380 + }, + { + "epoch": 12.931654676258994, + "step": 14380, + "torque_loss": 0.1439599245786667 + }, + { + "epoch": 12.931654676258994, + "force_loss": 0.011200557462871075, + "step": 14380 + }, + { + "epoch": 12.940647482014388, + "grad_norm": 0.46101492643356323, + "learning_rate": 9.046866017833193e-05, + "loss": 0.0215, + "step": 14390 + }, + { + "action_loss": 0.011300328187644482, + "epoch": 12.940647482014388, + "step": 14390 + }, + { + "epoch": 12.940647482014388, + "step": 14390, + "torque_loss": 0.13503290712833405 + }, + { + "epoch": 12.940647482014388, + "force_loss": 0.009805439971387386, + "step": 14390 + }, + { + "epoch": 12.949640287769784, + "grad_norm": 0.3314456641674042, + "learning_rate": 9.045246946593029e-05, + "loss": 0.0254, + "step": 14400 + }, + { + "action_loss": 0.005221536383032799, + "epoch": 12.949640287769784, + "step": 14400 + }, + { + "epoch": 12.949640287769784, + "step": 14400, + "torque_loss": 0.14278076589107513 + }, + { + "epoch": 12.949640287769784, + "force_loss": 0.00457338010892272, + "step": 14400 + }, + { + "epoch": 12.95863309352518, + "grad_norm": 0.710391104221344, + "learning_rate": 9.043626646513652e-05, + "loss": 0.0233, + "step": 14410 + }, + { + "action_loss": 0.004302104935050011, + "epoch": 12.95863309352518, + "step": 14410 + }, + { + "epoch": 12.95863309352518, + "step": 14410, + "torque_loss": 0.14447474479675293 + }, + { + "epoch": 12.95863309352518, + "force_loss": 0.0045445640571415424, + "step": 14410 + }, + { + "epoch": 12.967625899280575, + "grad_norm": 0.6171999573707581, + "learning_rate": 9.042005118087267e-05, + "loss": 0.0225, + "step": 14420 + }, + { + "action_loss": 0.002514268271625042, + "epoch": 12.967625899280575, + "step": 14420 + }, + { + "epoch": 12.967625899280575, + "step": 14420, + "torque_loss": 0.09670746326446533 + }, + { + "epoch": 12.967625899280575, + "force_loss": 0.0036105215549468994, + "step": 14420 + }, + { + "epoch": 12.976618705035971, + "grad_norm": 0.441057950258255, + "learning_rate": 9.040382361806448e-05, + "loss": 0.0281, + "step": 14430 + }, + { + "action_loss": 0.004521968308836222, + "epoch": 12.976618705035971, + "step": 14430 + }, + { + "epoch": 12.976618705035971, + "step": 14430, + "torque_loss": 0.13695268332958221 + }, + { + "epoch": 12.976618705035971, + "force_loss": 0.005111285485327244, + "step": 14430 + }, + { + "epoch": 12.985611510791367, + "grad_norm": 0.36860525608062744, + "learning_rate": 9.038758378164148e-05, + "loss": 0.0317, + "step": 14440 + }, + { + "action_loss": 0.008770656771957874, + "epoch": 12.985611510791367, + "step": 14440 + }, + { + "epoch": 12.985611510791367, + "step": 14440, + "torque_loss": 0.13073667883872986 + }, + { + "epoch": 12.985611510791367, + "force_loss": 0.0072477455250918865, + "step": 14440 + }, + { + "epoch": 12.994604316546763, + "grad_norm": 0.42620325088500977, + "learning_rate": 9.037133167653691e-05, + "loss": 0.0245, + "step": 14450 + }, + { + "action_loss": 0.017914768308401108, + "epoch": 12.994604316546763, + "step": 14450 + }, + { + "epoch": 12.994604316546763, + "step": 14450, + "torque_loss": 0.15631122887134552 + }, + { + "epoch": 12.994604316546763, + "force_loss": 0.012198311276733875, + "step": 14450 + }, + { + "epoch": 13.003597122302159, + "grad_norm": 0.3327738344669342, + "learning_rate": 9.035506730768771e-05, + "loss": 0.0245, + "step": 14460 + }, + { + "action_loss": 0.0034663667902350426, + "epoch": 13.003597122302159, + "step": 14460 + }, + { + "epoch": 13.003597122302159, + "step": 14460, + "torque_loss": 0.10766801983118057 + }, + { + "epoch": 13.003597122302159, + "force_loss": 0.0042332750745117664, + "step": 14460 + }, + { + "epoch": 13.012589928057555, + "grad_norm": 0.4596000611782074, + "learning_rate": 9.033879068003458e-05, + "loss": 0.0242, + "step": 14470 + }, + { + "action_loss": 0.007420101668685675, + "epoch": 13.012589928057555, + "step": 14470 + }, + { + "epoch": 13.012589928057555, + "step": 14470, + "torque_loss": 0.12554533779621124 + }, + { + "epoch": 13.012589928057555, + "force_loss": 0.00997112225741148, + "step": 14470 + }, + { + "epoch": 13.02158273381295, + "grad_norm": 0.441500723361969, + "learning_rate": 9.032250179852193e-05, + "loss": 0.0255, + "step": 14480 + }, + { + "action_loss": 0.007700768765062094, + "epoch": 13.02158273381295, + "step": 14480 + }, + { + "epoch": 13.02158273381295, + "step": 14480, + "torque_loss": 0.12382280081510544 + }, + { + "epoch": 13.02158273381295, + "force_loss": 0.004308726638555527, + "step": 14480 + }, + { + "epoch": 13.030575539568344, + "grad_norm": 0.26572030782699585, + "learning_rate": 9.030620066809787e-05, + "loss": 0.0228, + "step": 14490 + }, + { + "action_loss": 0.006130896508693695, + "epoch": 13.030575539568344, + "step": 14490 + }, + { + "epoch": 13.030575539568344, + "step": 14490, + "torque_loss": 0.11724263429641724 + }, + { + "epoch": 13.030575539568344, + "force_loss": 0.006546203047037125, + "step": 14490 + }, + { + "epoch": 13.03956834532374, + "grad_norm": 0.32110661268234253, + "learning_rate": 9.028988729371428e-05, + "loss": 0.0233, + "step": 14500 + }, + { + "action_loss": 0.011450658552348614, + "epoch": 13.03956834532374, + "step": 14500 + }, + { + "epoch": 13.03956834532374, + "step": 14500, + "torque_loss": 0.15253585577011108 + }, + { + "epoch": 13.03956834532374, + "force_loss": 0.017569176852703094, + "step": 14500 + }, + { + "epoch": 13.048561151079136, + "grad_norm": 0.3928966522216797, + "learning_rate": 9.027356168032673e-05, + "loss": 0.0256, + "step": 14510 + }, + { + "action_loss": 0.004405443090945482, + "epoch": 13.048561151079136, + "step": 14510 + }, + { + "epoch": 13.048561151079136, + "step": 14510, + "torque_loss": 0.12579254806041718 + }, + { + "epoch": 13.048561151079136, + "force_loss": 0.005160333588719368, + "step": 14510 + }, + { + "epoch": 13.057553956834532, + "grad_norm": 0.677519679069519, + "learning_rate": 9.02572238328945e-05, + "loss": 0.0274, + "step": 14520 + }, + { + "action_loss": 0.011080168187618256, + "epoch": 13.057553956834532, + "step": 14520 + }, + { + "epoch": 13.057553956834532, + "step": 14520, + "torque_loss": 0.09257916361093521 + }, + { + "epoch": 13.057553956834532, + "force_loss": 0.009711046703159809, + "step": 14520 + }, + { + "epoch": 13.066546762589928, + "grad_norm": 0.4754875600337982, + "learning_rate": 9.02408737563806e-05, + "loss": 0.0264, + "step": 14530 + }, + { + "action_loss": 0.006562229245901108, + "epoch": 13.066546762589928, + "step": 14530 + }, + { + "epoch": 13.066546762589928, + "step": 14530, + "torque_loss": 0.15128637850284576 + }, + { + "epoch": 13.066546762589928, + "force_loss": 0.006391929928213358, + "step": 14530 + }, + { + "epoch": 13.075539568345324, + "grad_norm": 0.3993264138698578, + "learning_rate": 9.022451145575174e-05, + "loss": 0.0249, + "step": 14540 + }, + { + "action_loss": 0.003944835159927607, + "epoch": 13.075539568345324, + "step": 14540 + }, + { + "epoch": 13.075539568345324, + "step": 14540, + "torque_loss": 0.1241079792380333 + }, + { + "epoch": 13.075539568345324, + "force_loss": 0.004877950064837933, + "step": 14540 + }, + { + "epoch": 13.08453237410072, + "grad_norm": 0.8384110927581787, + "learning_rate": 9.02081369359784e-05, + "loss": 0.0233, + "step": 14550 + }, + { + "action_loss": 0.0061592659913003445, + "epoch": 13.08453237410072, + "step": 14550 + }, + { + "epoch": 13.08453237410072, + "step": 14550, + "torque_loss": 0.09389689564704895 + }, + { + "epoch": 13.08453237410072, + "force_loss": 0.007377520203590393, + "step": 14550 + }, + { + "epoch": 13.093525179856115, + "grad_norm": 0.5473024845123291, + "learning_rate": 9.019175020203465e-05, + "loss": 0.0214, + "step": 14560 + }, + { + "action_loss": 0.00808979943394661, + "epoch": 13.093525179856115, + "step": 14560 + }, + { + "epoch": 13.093525179856115, + "step": 14560, + "torque_loss": 0.18886379897594452 + }, + { + "epoch": 13.093525179856115, + "force_loss": 0.0075078862719237804, + "step": 14560 + }, + { + "epoch": 13.102517985611511, + "grad_norm": 0.8063585162162781, + "learning_rate": 9.017535125889842e-05, + "loss": 0.0212, + "step": 14570 + }, + { + "action_loss": 0.01819506473839283, + "epoch": 13.102517985611511, + "step": 14570 + }, + { + "epoch": 13.102517985611511, + "step": 14570, + "torque_loss": 0.15776431560516357 + }, + { + "epoch": 13.102517985611511, + "force_loss": 0.01641392707824707, + "step": 14570 + }, + { + "epoch": 13.111510791366907, + "grad_norm": 0.34871530532836914, + "learning_rate": 9.015894011155124e-05, + "loss": 0.0272, + "step": 14580 + }, + { + "action_loss": 0.018761910498142242, + "epoch": 13.111510791366907, + "step": 14580 + }, + { + "epoch": 13.111510791366907, + "step": 14580, + "torque_loss": 0.17186866700649261 + }, + { + "epoch": 13.111510791366907, + "force_loss": 0.01540849357843399, + "step": 14580 + }, + { + "epoch": 13.120503597122303, + "grad_norm": 0.5542654395103455, + "learning_rate": 9.014251676497838e-05, + "loss": 0.0297, + "step": 14590 + }, + { + "action_loss": 0.0072064208798110485, + "epoch": 13.120503597122303, + "step": 14590 + }, + { + "epoch": 13.120503597122303, + "step": 14590, + "torque_loss": 0.11462417989969254 + }, + { + "epoch": 13.120503597122303, + "force_loss": 0.007690413389354944, + "step": 14590 + }, + { + "epoch": 13.129496402877697, + "grad_norm": 0.3077492415904999, + "learning_rate": 9.012608122416884e-05, + "loss": 0.024, + "step": 14600 + }, + { + "action_loss": 0.006153901573270559, + "epoch": 13.129496402877697, + "step": 14600 + }, + { + "epoch": 13.129496402877697, + "step": 14600, + "torque_loss": 0.1357250064611435 + }, + { + "epoch": 13.129496402877697, + "force_loss": 0.006215449422597885, + "step": 14600 + }, + { + "epoch": 13.138489208633093, + "grad_norm": 0.3804683983325958, + "learning_rate": 9.010963349411529e-05, + "loss": 0.0277, + "step": 14610 + }, + { + "action_loss": 0.028855418786406517, + "epoch": 13.138489208633093, + "step": 14610 + }, + { + "epoch": 13.138489208633093, + "step": 14610, + "torque_loss": 0.14814309775829315 + }, + { + "epoch": 13.138489208633093, + "force_loss": 0.024809643626213074, + "step": 14610 + }, + { + "epoch": 13.147482014388489, + "grad_norm": 0.3638586699962616, + "learning_rate": 9.00931735798141e-05, + "loss": 0.0279, + "step": 14620 + }, + { + "action_loss": 0.007410066667944193, + "epoch": 13.147482014388489, + "step": 14620 + }, + { + "epoch": 13.147482014388489, + "step": 14620, + "torque_loss": 0.11786165088415146 + }, + { + "epoch": 13.147482014388489, + "force_loss": 0.004824485629796982, + "step": 14620 + }, + { + "epoch": 13.156474820143885, + "grad_norm": 0.40190863609313965, + "learning_rate": 9.00767014862654e-05, + "loss": 0.0227, + "step": 14630 + }, + { + "action_loss": 0.0038182036951184273, + "epoch": 13.156474820143885, + "step": 14630 + }, + { + "epoch": 13.156474820143885, + "step": 14630, + "torque_loss": 0.13455848395824432 + }, + { + "epoch": 13.156474820143885, + "force_loss": 0.003665941534563899, + "step": 14630 + }, + { + "epoch": 13.16546762589928, + "grad_norm": 0.4742845296859741, + "learning_rate": 9.006021721847295e-05, + "loss": 0.0229, + "step": 14640 + }, + { + "action_loss": 0.017086701467633247, + "epoch": 13.16546762589928, + "step": 14640 + }, + { + "epoch": 13.16546762589928, + "step": 14640, + "torque_loss": 0.16399197280406952 + }, + { + "epoch": 13.16546762589928, + "force_loss": 0.0141557976603508, + "step": 14640 + }, + { + "epoch": 13.174460431654676, + "grad_norm": 0.6930900812149048, + "learning_rate": 9.004372078144423e-05, + "loss": 0.0255, + "step": 14650 + }, + { + "action_loss": 0.017907066270709038, + "epoch": 13.174460431654676, + "step": 14650 + }, + { + "epoch": 13.174460431654676, + "step": 14650, + "torque_loss": 0.16293351352214813 + }, + { + "epoch": 13.174460431654676, + "force_loss": 0.024945497512817383, + "step": 14650 + }, + { + "epoch": 13.183453237410072, + "grad_norm": 0.5228962898254395, + "learning_rate": 9.002721218019043e-05, + "loss": 0.0265, + "step": 14660 + }, + { + "action_loss": 0.006672768387943506, + "epoch": 13.183453237410072, + "step": 14660 + }, + { + "epoch": 13.183453237410072, + "step": 14660, + "torque_loss": 0.11546932905912399 + }, + { + "epoch": 13.183453237410072, + "force_loss": 0.007314169779419899, + "step": 14660 + }, + { + "epoch": 13.192446043165468, + "grad_norm": 0.40475955605506897, + "learning_rate": 9.001069141972642e-05, + "loss": 0.0233, + "step": 14670 + }, + { + "action_loss": 0.012470702640712261, + "epoch": 13.192446043165468, + "step": 14670 + }, + { + "epoch": 13.192446043165468, + "step": 14670, + "torque_loss": 0.09179221838712692 + }, + { + "epoch": 13.192446043165468, + "force_loss": 0.024287141859531403, + "step": 14670 + }, + { + "epoch": 13.201438848920864, + "grad_norm": 0.40371808409690857, + "learning_rate": 8.99941585050708e-05, + "loss": 0.0274, + "step": 14680 + }, + { + "action_loss": 0.010434582829475403, + "epoch": 13.201438848920864, + "step": 14680 + }, + { + "epoch": 13.201438848920864, + "step": 14680, + "torque_loss": 0.16278213262557983 + }, + { + "epoch": 13.201438848920864, + "force_loss": 0.011291473172605038, + "step": 14680 + }, + { + "epoch": 13.21043165467626, + "grad_norm": 0.431395947933197, + "learning_rate": 8.997761344124578e-05, + "loss": 0.0262, + "step": 14690 + }, + { + "action_loss": 0.004833641927689314, + "epoch": 13.21043165467626, + "step": 14690 + }, + { + "epoch": 13.21043165467626, + "step": 14690, + "torque_loss": 0.16232754290103912 + }, + { + "epoch": 13.21043165467626, + "force_loss": 0.004413580056279898, + "step": 14690 + }, + { + "epoch": 13.219424460431656, + "grad_norm": 0.315735399723053, + "learning_rate": 8.996105623327737e-05, + "loss": 0.0224, + "step": 14700 + }, + { + "action_loss": 0.008078006096184254, + "epoch": 13.219424460431656, + "step": 14700 + }, + { + "epoch": 13.219424460431656, + "step": 14700, + "torque_loss": 0.13483105599880219 + }, + { + "epoch": 13.219424460431656, + "force_loss": 0.0045743403024971485, + "step": 14700 + }, + { + "epoch": 13.22841726618705, + "grad_norm": 0.3824552297592163, + "learning_rate": 8.994448688619517e-05, + "loss": 0.0235, + "step": 14710 + }, + { + "action_loss": 0.006124551873654127, + "epoch": 13.22841726618705, + "step": 14710 + }, + { + "epoch": 13.22841726618705, + "step": 14710, + "torque_loss": 0.11799757927656174 + }, + { + "epoch": 13.22841726618705, + "force_loss": 0.006544084753841162, + "step": 14710 + }, + { + "epoch": 13.237410071942445, + "grad_norm": 0.3821107745170593, + "learning_rate": 8.992790540503253e-05, + "loss": 0.0245, + "step": 14720 + }, + { + "action_loss": 0.007243847008794546, + "epoch": 13.237410071942445, + "step": 14720 + }, + { + "epoch": 13.237410071942445, + "step": 14720, + "torque_loss": 0.1174304261803627 + }, + { + "epoch": 13.237410071942445, + "force_loss": 0.004242307972162962, + "step": 14720 + }, + { + "epoch": 13.246402877697841, + "grad_norm": 0.2759091556072235, + "learning_rate": 8.991131179482648e-05, + "loss": 0.0219, + "step": 14730 + }, + { + "action_loss": 0.006294190883636475, + "epoch": 13.246402877697841, + "step": 14730 + }, + { + "epoch": 13.246402877697841, + "step": 14730, + "torque_loss": 0.13765421509742737 + }, + { + "epoch": 13.246402877697841, + "force_loss": 0.00725896842777729, + "step": 14730 + }, + { + "epoch": 13.255395683453237, + "grad_norm": 0.4401913583278656, + "learning_rate": 8.989470606061768e-05, + "loss": 0.0273, + "step": 14740 + }, + { + "action_loss": 0.009603387676179409, + "epoch": 13.255395683453237, + "step": 14740 + }, + { + "epoch": 13.255395683453237, + "step": 14740, + "torque_loss": 0.13843569159507751 + }, + { + "epoch": 13.255395683453237, + "force_loss": 0.007131265010684729, + "step": 14740 + }, + { + "epoch": 13.264388489208633, + "grad_norm": 0.4701067805290222, + "learning_rate": 8.987808820745056e-05, + "loss": 0.0267, + "step": 14750 + }, + { + "action_loss": 0.011600494384765625, + "epoch": 13.264388489208633, + "step": 14750 + }, + { + "epoch": 13.264388489208633, + "step": 14750, + "torque_loss": 0.1349010467529297 + }, + { + "epoch": 13.264388489208633, + "force_loss": 0.016512449830770493, + "step": 14750 + }, + { + "epoch": 13.273381294964029, + "grad_norm": 0.5634354948997498, + "learning_rate": 8.986145824037315e-05, + "loss": 0.0235, + "step": 14760 + }, + { + "action_loss": 0.018544087186455727, + "epoch": 13.273381294964029, + "step": 14760 + }, + { + "epoch": 13.273381294964029, + "step": 14760, + "torque_loss": 0.13523080945014954 + }, + { + "epoch": 13.273381294964029, + "force_loss": 0.02288237400352955, + "step": 14760 + }, + { + "epoch": 13.282374100719425, + "grad_norm": 0.4378294050693512, + "learning_rate": 8.984481616443721e-05, + "loss": 0.0336, + "step": 14770 + }, + { + "action_loss": 0.010487220250070095, + "epoch": 13.282374100719425, + "step": 14770 + }, + { + "epoch": 13.282374100719425, + "step": 14770, + "torque_loss": 0.15143419802188873 + }, + { + "epoch": 13.282374100719425, + "force_loss": 0.00698075070977211, + "step": 14770 + }, + { + "epoch": 13.29136690647482, + "grad_norm": 0.3537493944168091, + "learning_rate": 8.982816198469815e-05, + "loss": 0.023, + "step": 14780 + }, + { + "action_loss": 0.0047593191266059875, + "epoch": 13.29136690647482, + "step": 14780 + }, + { + "epoch": 13.29136690647482, + "step": 14780, + "torque_loss": 0.10496661812067032 + }, + { + "epoch": 13.29136690647482, + "force_loss": 0.003831305541098118, + "step": 14780 + }, + { + "epoch": 13.300359712230216, + "grad_norm": 0.21889355778694153, + "learning_rate": 8.98114957062151e-05, + "loss": 0.0269, + "step": 14790 + }, + { + "action_loss": 0.007127129938453436, + "epoch": 13.300359712230216, + "step": 14790 + }, + { + "epoch": 13.300359712230216, + "step": 14790, + "torque_loss": 0.14200273156166077 + }, + { + "epoch": 13.300359712230216, + "force_loss": 0.013411667197942734, + "step": 14790 + }, + { + "epoch": 13.309352517985612, + "grad_norm": 0.3099175691604614, + "learning_rate": 8.97948173340508e-05, + "loss": 0.0221, + "step": 14800 + }, + { + "action_loss": 0.006687544751912355, + "epoch": 13.309352517985612, + "step": 14800 + }, + { + "epoch": 13.309352517985612, + "step": 14800, + "torque_loss": 0.14193305373191833 + }, + { + "epoch": 13.309352517985612, + "force_loss": 0.0054398588836193085, + "step": 14800 + }, + { + "epoch": 13.318345323741006, + "grad_norm": 0.6632286906242371, + "learning_rate": 8.977812687327172e-05, + "loss": 0.0202, + "step": 14810 + }, + { + "action_loss": 0.03508426994085312, + "epoch": 13.318345323741006, + "step": 14810 + }, + { + "epoch": 13.318345323741006, + "step": 14810, + "torque_loss": 0.1740388721227646 + }, + { + "epoch": 13.318345323741006, + "force_loss": 0.02941388450562954, + "step": 14810 + }, + { + "epoch": 13.327338129496402, + "grad_norm": 0.5804286599159241, + "learning_rate": 8.976142432894798e-05, + "loss": 0.0261, + "step": 14820 + }, + { + "action_loss": 0.006049271672964096, + "epoch": 13.327338129496402, + "step": 14820 + }, + { + "epoch": 13.327338129496402, + "step": 14820, + "torque_loss": 0.10105837136507034 + }, + { + "epoch": 13.327338129496402, + "force_loss": 0.006374318152666092, + "step": 14820 + }, + { + "epoch": 13.336330935251798, + "grad_norm": 0.5489843487739563, + "learning_rate": 8.974470970615336e-05, + "loss": 0.0271, + "step": 14830 + }, + { + "action_loss": 0.019841017201542854, + "epoch": 13.336330935251798, + "step": 14830 + }, + { + "epoch": 13.336330935251798, + "step": 14830, + "torque_loss": 0.1333627551794052 + }, + { + "epoch": 13.336330935251798, + "force_loss": 0.018949931487441063, + "step": 14830 + }, + { + "epoch": 13.345323741007194, + "grad_norm": 0.5089920163154602, + "learning_rate": 8.972798300996534e-05, + "loss": 0.0284, + "step": 14840 + }, + { + "action_loss": 0.004613576456904411, + "epoch": 13.345323741007194, + "step": 14840 + }, + { + "epoch": 13.345323741007194, + "step": 14840, + "torque_loss": 0.12690582871437073 + }, + { + "epoch": 13.345323741007194, + "force_loss": 0.006062695290893316, + "step": 14840 + }, + { + "epoch": 13.35431654676259, + "grad_norm": 0.5182651281356812, + "learning_rate": 8.971124424546504e-05, + "loss": 0.023, + "step": 14850 + }, + { + "action_loss": 0.006306968629360199, + "epoch": 13.35431654676259, + "step": 14850 + }, + { + "epoch": 13.35431654676259, + "step": 14850, + "torque_loss": 0.1303333193063736 + }, + { + "epoch": 13.35431654676259, + "force_loss": 0.009413695894181728, + "step": 14850 + }, + { + "epoch": 13.363309352517986, + "grad_norm": 0.28933581709861755, + "learning_rate": 8.969449341773724e-05, + "loss": 0.0257, + "step": 14860 + }, + { + "action_loss": 0.009394045919179916, + "epoch": 13.363309352517986, + "step": 14860 + }, + { + "epoch": 13.363309352517986, + "step": 14860, + "torque_loss": 0.1231849193572998 + }, + { + "epoch": 13.363309352517986, + "force_loss": 0.010514688678085804, + "step": 14860 + }, + { + "epoch": 13.372302158273381, + "grad_norm": 0.5404540300369263, + "learning_rate": 8.967773053187042e-05, + "loss": 0.0298, + "step": 14870 + }, + { + "action_loss": 0.007335355039685965, + "epoch": 13.372302158273381, + "step": 14870 + }, + { + "epoch": 13.372302158273381, + "step": 14870, + "torque_loss": 0.09684888273477554 + }, + { + "epoch": 13.372302158273381, + "force_loss": 0.010095199570059776, + "step": 14870 + }, + { + "epoch": 13.381294964028777, + "grad_norm": 0.2733374834060669, + "learning_rate": 8.966095559295668e-05, + "loss": 0.0242, + "step": 14880 + }, + { + "action_loss": 0.004407978150993586, + "epoch": 13.381294964028777, + "step": 14880 + }, + { + "epoch": 13.381294964028777, + "step": 14880, + "torque_loss": 0.16121593117713928 + }, + { + "epoch": 13.381294964028777, + "force_loss": 0.00505354767665267, + "step": 14880 + }, + { + "epoch": 13.390287769784173, + "grad_norm": 0.3442193567752838, + "learning_rate": 8.964416860609184e-05, + "loss": 0.0268, + "step": 14890 + }, + { + "action_loss": 0.008913670666515827, + "epoch": 13.390287769784173, + "step": 14890 + }, + { + "epoch": 13.390287769784173, + "step": 14890, + "torque_loss": 0.10350557416677475 + }, + { + "epoch": 13.390287769784173, + "force_loss": 0.008378827013075352, + "step": 14890 + }, + { + "epoch": 13.399280575539569, + "grad_norm": 0.7161759734153748, + "learning_rate": 8.962736957637532e-05, + "loss": 0.0211, + "step": 14900 + }, + { + "action_loss": 0.002452212618663907, + "epoch": 13.399280575539569, + "step": 14900 + }, + { + "epoch": 13.399280575539569, + "step": 14900, + "torque_loss": 0.0714641883969307 + }, + { + "epoch": 13.399280575539569, + "force_loss": 0.0027752381283789873, + "step": 14900 + }, + { + "epoch": 13.408273381294965, + "grad_norm": 0.23730121552944183, + "learning_rate": 8.96105585089102e-05, + "loss": 0.0188, + "step": 14910 + }, + { + "action_loss": 0.03291613236069679, + "epoch": 13.408273381294965, + "step": 14910 + }, + { + "epoch": 13.408273381294965, + "step": 14910, + "torque_loss": 0.1959151029586792 + }, + { + "epoch": 13.408273381294965, + "force_loss": 0.024787189438939095, + "step": 14910 + }, + { + "epoch": 13.417266187050359, + "grad_norm": 0.32745203375816345, + "learning_rate": 8.959373540880329e-05, + "loss": 0.0307, + "step": 14920 + }, + { + "action_loss": 0.008861460722982883, + "epoch": 13.417266187050359, + "step": 14920 + }, + { + "epoch": 13.417266187050359, + "step": 14920, + "torque_loss": 0.1019737720489502 + }, + { + "epoch": 13.417266187050359, + "force_loss": 0.0068197958171367645, + "step": 14920 + }, + { + "epoch": 13.426258992805755, + "grad_norm": 0.3431726396083832, + "learning_rate": 8.957690028116495e-05, + "loss": 0.0268, + "step": 14930 + }, + { + "action_loss": 0.007505863904953003, + "epoch": 13.426258992805755, + "step": 14930 + }, + { + "epoch": 13.426258992805755, + "step": 14930, + "torque_loss": 0.12415751814842224 + }, + { + "epoch": 13.426258992805755, + "force_loss": 0.0055229924619197845, + "step": 14930 + }, + { + "epoch": 13.43525179856115, + "grad_norm": 0.2953338027000427, + "learning_rate": 8.956005313110928e-05, + "loss": 0.0234, + "step": 14940 + }, + { + "action_loss": 0.0041083176620304585, + "epoch": 13.43525179856115, + "step": 14940 + }, + { + "epoch": 13.43525179856115, + "step": 14940, + "torque_loss": 0.16240069270133972 + }, + { + "epoch": 13.43525179856115, + "force_loss": 0.005068450700491667, + "step": 14940 + }, + { + "epoch": 13.444244604316546, + "grad_norm": 0.3561532199382782, + "learning_rate": 8.9543193963754e-05, + "loss": 0.0265, + "step": 14950 + }, + { + "action_loss": 0.011054660193622112, + "epoch": 13.444244604316546, + "step": 14950 + }, + { + "epoch": 13.444244604316546, + "step": 14950, + "torque_loss": 0.11108561605215073 + }, + { + "epoch": 13.444244604316546, + "force_loss": 0.009395153261721134, + "step": 14950 + }, + { + "epoch": 13.453237410071942, + "grad_norm": 0.6054490208625793, + "learning_rate": 8.952632278422048e-05, + "loss": 0.0247, + "step": 14960 + }, + { + "action_loss": 0.010276337154209614, + "epoch": 13.453237410071942, + "step": 14960 + }, + { + "epoch": 13.453237410071942, + "step": 14960, + "torque_loss": 0.11921677738428116 + }, + { + "epoch": 13.453237410071942, + "force_loss": 0.008038091473281384, + "step": 14960 + }, + { + "epoch": 13.462230215827338, + "grad_norm": 0.42296022176742554, + "learning_rate": 8.95094395976337e-05, + "loss": 0.0289, + "step": 14970 + }, + { + "action_loss": 0.011814437806606293, + "epoch": 13.462230215827338, + "step": 14970 + }, + { + "epoch": 13.462230215827338, + "step": 14970, + "torque_loss": 0.15883596241474152 + }, + { + "epoch": 13.462230215827338, + "force_loss": 0.0067948270589113235, + "step": 14970 + }, + { + "epoch": 13.471223021582734, + "grad_norm": 0.36728227138519287, + "learning_rate": 8.949254440912239e-05, + "loss": 0.0271, + "step": 14980 + }, + { + "action_loss": 0.00897889118641615, + "epoch": 13.471223021582734, + "step": 14980 + }, + { + "epoch": 13.471223021582734, + "step": 14980, + "torque_loss": 0.14344914257526398 + }, + { + "epoch": 13.471223021582734, + "force_loss": 0.0068875751458108425, + "step": 14980 + }, + { + "epoch": 13.48021582733813, + "grad_norm": 0.5201298594474792, + "learning_rate": 8.94756372238188e-05, + "loss": 0.027, + "step": 14990 + }, + { + "action_loss": 0.012513409368693829, + "epoch": 13.48021582733813, + "step": 14990 + }, + { + "epoch": 13.48021582733813, + "step": 14990, + "torque_loss": 0.15048789978027344 + }, + { + "epoch": 13.48021582733813, + "force_loss": 0.010743759572505951, + "step": 14990 + }, + { + "epoch": 13.489208633093526, + "grad_norm": 0.4812636375427246, + "learning_rate": 8.945871804685892e-05, + "loss": 0.0249, + "step": 15000 + }, + { + "action_loss": 0.011180832982063293, + "epoch": 13.489208633093526, + "step": 15000 + }, + { + "epoch": 13.489208633093526, + "step": 15000, + "torque_loss": 0.18805503845214844 + }, + { + "epoch": 13.489208633093526, + "force_loss": 0.0050537544302642345, + "step": 15000 + }, + { + "epoch": 13.498201438848922, + "grad_norm": 0.2539704740047455, + "learning_rate": 8.944178688338236e-05, + "loss": 0.023, + "step": 15010 + }, + { + "action_loss": 0.018569903448224068, + "epoch": 13.498201438848922, + "step": 15010 + }, + { + "epoch": 13.498201438848922, + "step": 15010, + "torque_loss": 0.11188825964927673 + }, + { + "epoch": 13.498201438848922, + "force_loss": 0.017913958057761192, + "step": 15010 + }, + { + "epoch": 13.507194244604317, + "grad_norm": 0.5525025725364685, + "learning_rate": 8.942484373853233e-05, + "loss": 0.0206, + "step": 15020 + }, + { + "action_loss": 0.00969319324940443, + "epoch": 13.507194244604317, + "step": 15020 + }, + { + "epoch": 13.507194244604317, + "step": 15020, + "torque_loss": 0.13361899554729462 + }, + { + "epoch": 13.507194244604317, + "force_loss": 0.0065063959918916225, + "step": 15020 + }, + { + "epoch": 13.516187050359711, + "grad_norm": 0.6648332476615906, + "learning_rate": 8.940788861745572e-05, + "loss": 0.0295, + "step": 15030 + }, + { + "action_loss": 0.004434612579643726, + "epoch": 13.516187050359711, + "step": 15030 + }, + { + "epoch": 13.516187050359711, + "step": 15030, + "torque_loss": 0.12967358529567719 + }, + { + "epoch": 13.516187050359711, + "force_loss": 0.00821138545870781, + "step": 15030 + }, + { + "epoch": 13.525179856115107, + "grad_norm": 0.4235135614871979, + "learning_rate": 8.939092152530308e-05, + "loss": 0.0242, + "step": 15040 + }, + { + "action_loss": 0.007038809359073639, + "epoch": 13.525179856115107, + "step": 15040 + }, + { + "epoch": 13.525179856115107, + "step": 15040, + "torque_loss": 0.1582469940185547 + }, + { + "epoch": 13.525179856115107, + "force_loss": 0.007739563938230276, + "step": 15040 + }, + { + "epoch": 13.534172661870503, + "grad_norm": 0.6255474090576172, + "learning_rate": 8.937394246722853e-05, + "loss": 0.0233, + "step": 15050 + }, + { + "action_loss": 0.011083395220339298, + "epoch": 13.534172661870503, + "step": 15050 + }, + { + "epoch": 13.534172661870503, + "step": 15050, + "torque_loss": 0.15567262470722198 + }, + { + "epoch": 13.534172661870503, + "force_loss": 0.008961132727563381, + "step": 15050 + }, + { + "epoch": 13.543165467625899, + "grad_norm": 0.6343432664871216, + "learning_rate": 8.935695144838984e-05, + "loss": 0.03, + "step": 15060 + }, + { + "action_loss": 0.01805730350315571, + "epoch": 13.543165467625899, + "step": 15060 + }, + { + "epoch": 13.543165467625899, + "step": 15060, + "torque_loss": 0.13492028415203094 + }, + { + "epoch": 13.543165467625899, + "force_loss": 0.012581147253513336, + "step": 15060 + }, + { + "epoch": 13.552158273381295, + "grad_norm": 1.0415568351745605, + "learning_rate": 8.933994847394849e-05, + "loss": 0.0242, + "step": 15070 + }, + { + "action_loss": 0.007867749780416489, + "epoch": 13.552158273381295, + "step": 15070 + }, + { + "epoch": 13.552158273381295, + "step": 15070, + "torque_loss": 0.14760714769363403 + }, + { + "epoch": 13.552158273381295, + "force_loss": 0.006027327850461006, + "step": 15070 + }, + { + "epoch": 13.56115107913669, + "grad_norm": 0.2590728998184204, + "learning_rate": 8.932293354906949e-05, + "loss": 0.0243, + "step": 15080 + }, + { + "action_loss": 0.0067918128333985806, + "epoch": 13.56115107913669, + "step": 15080 + }, + { + "epoch": 13.56115107913669, + "step": 15080, + "torque_loss": 0.16374565660953522 + }, + { + "epoch": 13.56115107913669, + "force_loss": 0.0048575554974377155, + "step": 15080 + }, + { + "epoch": 13.570143884892087, + "grad_norm": 0.3504115641117096, + "learning_rate": 8.930590667892153e-05, + "loss": 0.0255, + "step": 15090 + }, + { + "action_loss": 0.009288015775382519, + "epoch": 13.570143884892087, + "step": 15090 + }, + { + "epoch": 13.570143884892087, + "step": 15090, + "torque_loss": 0.10502278804779053 + }, + { + "epoch": 13.570143884892087, + "force_loss": 0.008320000022649765, + "step": 15090 + }, + { + "epoch": 13.579136690647482, + "grad_norm": 0.4551716148853302, + "learning_rate": 8.928886786867696e-05, + "loss": 0.0231, + "step": 15100 + }, + { + "action_loss": 0.005145413335412741, + "epoch": 13.579136690647482, + "step": 15100 + }, + { + "epoch": 13.579136690647482, + "step": 15100, + "torque_loss": 0.1395842432975769 + }, + { + "epoch": 13.579136690647482, + "force_loss": 0.007424239069223404, + "step": 15100 + }, + { + "epoch": 13.588129496402878, + "grad_norm": 0.33234086632728577, + "learning_rate": 8.927181712351168e-05, + "loss": 0.0205, + "step": 15110 + }, + { + "action_loss": 0.007719407323747873, + "epoch": 13.588129496402878, + "step": 15110 + }, + { + "epoch": 13.588129496402878, + "step": 15110, + "torque_loss": 0.12911628186702728 + }, + { + "epoch": 13.588129496402878, + "force_loss": 0.00613104784861207, + "step": 15110 + }, + { + "epoch": 13.597122302158274, + "grad_norm": 0.6952317953109741, + "learning_rate": 8.925475444860527e-05, + "loss": 0.0243, + "step": 15120 + }, + { + "action_loss": 0.005236349534243345, + "epoch": 13.597122302158274, + "step": 15120 + }, + { + "epoch": 13.597122302158274, + "step": 15120, + "torque_loss": 0.09986937791109085 + }, + { + "epoch": 13.597122302158274, + "force_loss": 0.007467932999134064, + "step": 15120 + }, + { + "epoch": 13.60611510791367, + "grad_norm": 0.2264104187488556, + "learning_rate": 8.923767984914092e-05, + "loss": 0.0202, + "step": 15130 + }, + { + "action_loss": 0.005853123497217894, + "epoch": 13.60611510791367, + "step": 15130 + }, + { + "epoch": 13.60611510791367, + "step": 15130, + "torque_loss": 0.14523430168628693 + }, + { + "epoch": 13.60611510791367, + "force_loss": 0.004577724263072014, + "step": 15130 + }, + { + "epoch": 13.615107913669064, + "grad_norm": 0.7617133259773254, + "learning_rate": 8.922059333030545e-05, + "loss": 0.0324, + "step": 15140 + }, + { + "action_loss": 0.003778591752052307, + "epoch": 13.615107913669064, + "step": 15140 + }, + { + "epoch": 13.615107913669064, + "step": 15140, + "torque_loss": 0.09954670071601868 + }, + { + "epoch": 13.615107913669064, + "force_loss": 0.003781534731388092, + "step": 15140 + }, + { + "epoch": 13.62410071942446, + "grad_norm": 0.3433564603328705, + "learning_rate": 8.920349489728928e-05, + "loss": 0.0242, + "step": 15150 + }, + { + "action_loss": 0.0068816691637039185, + "epoch": 13.62410071942446, + "step": 15150 + }, + { + "epoch": 13.62410071942446, + "step": 15150, + "torque_loss": 0.16625277698040009 + }, + { + "epoch": 13.62410071942446, + "force_loss": 0.006247811019420624, + "step": 15150 + }, + { + "epoch": 13.633093525179856, + "grad_norm": 0.8180539011955261, + "learning_rate": 8.918638455528646e-05, + "loss": 0.0315, + "step": 15160 + }, + { + "action_loss": 0.006157733500003815, + "epoch": 13.633093525179856, + "step": 15160 + }, + { + "epoch": 13.633093525179856, + "step": 15160, + "torque_loss": 0.12670379877090454 + }, + { + "epoch": 13.633093525179856, + "force_loss": 0.01235104352235794, + "step": 15160 + }, + { + "epoch": 13.642086330935252, + "grad_norm": 0.24847963452339172, + "learning_rate": 8.916926230949468e-05, + "loss": 0.0262, + "step": 15170 + }, + { + "action_loss": 0.007512775715440512, + "epoch": 13.642086330935252, + "step": 15170 + }, + { + "epoch": 13.642086330935252, + "step": 15170, + "torque_loss": 0.13621143996715546 + }, + { + "epoch": 13.642086330935252, + "force_loss": 0.0039144582115113735, + "step": 15170 + }, + { + "epoch": 13.651079136690647, + "grad_norm": 0.6130836009979248, + "learning_rate": 8.915212816511522e-05, + "loss": 0.0238, + "step": 15180 + }, + { + "action_loss": 0.006106710527092218, + "epoch": 13.651079136690647, + "step": 15180 + }, + { + "epoch": 13.651079136690647, + "step": 15180, + "torque_loss": 0.1378335952758789 + }, + { + "epoch": 13.651079136690647, + "force_loss": 0.004762370139360428, + "step": 15180 + }, + { + "epoch": 13.660071942446043, + "grad_norm": 0.513279914855957, + "learning_rate": 8.913498212735296e-05, + "loss": 0.0266, + "step": 15190 + }, + { + "action_loss": 0.011448142118752003, + "epoch": 13.660071942446043, + "step": 15190 + }, + { + "epoch": 13.660071942446043, + "step": 15190, + "torque_loss": 0.17824681103229523 + }, + { + "epoch": 13.660071942446043, + "force_loss": 0.022146375849843025, + "step": 15190 + }, + { + "epoch": 13.66906474820144, + "grad_norm": 0.5906609296798706, + "learning_rate": 8.911782420141643e-05, + "loss": 0.0266, + "step": 15200 + }, + { + "action_loss": 0.005149825010448694, + "epoch": 13.66906474820144, + "step": 15200 + }, + { + "epoch": 13.66906474820144, + "step": 15200, + "torque_loss": 0.0891626700758934 + }, + { + "epoch": 13.66906474820144, + "force_loss": 0.004900491330772638, + "step": 15200 + }, + { + "epoch": 13.678057553956835, + "grad_norm": 0.48827168345451355, + "learning_rate": 8.910065439251775e-05, + "loss": 0.0212, + "step": 15210 + }, + { + "action_loss": 0.00458289822563529, + "epoch": 13.678057553956835, + "step": 15210 + }, + { + "epoch": 13.678057553956835, + "step": 15210, + "torque_loss": 0.10856308788061142 + }, + { + "epoch": 13.678057553956835, + "force_loss": 0.004713992588222027, + "step": 15210 + }, + { + "epoch": 13.68705035971223, + "grad_norm": 0.9654158353805542, + "learning_rate": 8.908347270587268e-05, + "loss": 0.0225, + "step": 15220 + }, + { + "action_loss": 0.005796946119517088, + "epoch": 13.68705035971223, + "step": 15220 + }, + { + "epoch": 13.68705035971223, + "step": 15220, + "torque_loss": 0.11669004708528519 + }, + { + "epoch": 13.68705035971223, + "force_loss": 0.011185649782419205, + "step": 15220 + }, + { + "epoch": 13.696043165467627, + "grad_norm": 0.463498592376709, + "learning_rate": 8.906627914670054e-05, + "loss": 0.0247, + "step": 15230 + }, + { + "action_loss": 0.004784261807799339, + "epoch": 13.696043165467627, + "step": 15230 + }, + { + "epoch": 13.696043165467627, + "step": 15230, + "torque_loss": 0.16224533319473267 + }, + { + "epoch": 13.696043165467627, + "force_loss": 0.004794577602297068, + "step": 15230 + }, + { + "epoch": 13.70503597122302, + "grad_norm": 0.2059027999639511, + "learning_rate": 8.904907372022427e-05, + "loss": 0.0229, + "step": 15240 + }, + { + "action_loss": 0.006131669040769339, + "epoch": 13.70503597122302, + "step": 15240 + }, + { + "epoch": 13.70503597122302, + "step": 15240, + "torque_loss": 0.12671232223510742 + }, + { + "epoch": 13.70503597122302, + "force_loss": 0.009363076649606228, + "step": 15240 + }, + { + "epoch": 13.714028776978417, + "grad_norm": 0.612990140914917, + "learning_rate": 8.903185643167042e-05, + "loss": 0.0274, + "step": 15250 + }, + { + "action_loss": 0.0032046486157923937, + "epoch": 13.714028776978417, + "step": 15250 + }, + { + "epoch": 13.714028776978417, + "step": 15250, + "torque_loss": 0.053358182311058044 + }, + { + "epoch": 13.714028776978417, + "force_loss": 0.005081131588667631, + "step": 15250 + }, + { + "epoch": 13.723021582733812, + "grad_norm": 0.304829478263855, + "learning_rate": 8.901462728626919e-05, + "loss": 0.0221, + "step": 15260 + }, + { + "action_loss": 0.018701009452342987, + "epoch": 13.723021582733812, + "step": 15260 + }, + { + "epoch": 13.723021582733812, + "step": 15260, + "torque_loss": 0.14688244462013245 + }, + { + "epoch": 13.723021582733812, + "force_loss": 0.027699897065758705, + "step": 15260 + }, + { + "epoch": 13.732014388489208, + "grad_norm": 0.704147458076477, + "learning_rate": 8.899738628925429e-05, + "loss": 0.0275, + "step": 15270 + }, + { + "action_loss": 0.005055636633187532, + "epoch": 13.732014388489208, + "step": 15270 + }, + { + "epoch": 13.732014388489208, + "step": 15270, + "torque_loss": 0.14347197115421295 + }, + { + "epoch": 13.732014388489208, + "force_loss": 0.005784823093563318, + "step": 15270 + }, + { + "epoch": 13.741007194244604, + "grad_norm": 0.33921340107917786, + "learning_rate": 8.898013344586312e-05, + "loss": 0.0214, + "step": 15280 + }, + { + "action_loss": 0.03477925434708595, + "epoch": 13.741007194244604, + "step": 15280 + }, + { + "epoch": 13.741007194244604, + "step": 15280, + "torque_loss": 0.15228581428527832 + }, + { + "epoch": 13.741007194244604, + "force_loss": 0.04001857712864876, + "step": 15280 + }, + { + "epoch": 13.75, + "grad_norm": 0.4462069272994995, + "learning_rate": 8.896286876133661e-05, + "loss": 0.0282, + "step": 15290 + }, + { + "action_loss": 0.004876232240349054, + "epoch": 13.75, + "step": 15290 + }, + { + "epoch": 13.75, + "step": 15290, + "torque_loss": 0.13058489561080933 + }, + { + "epoch": 13.75, + "force_loss": 0.006025476381182671, + "step": 15290 + }, + { + "epoch": 13.758992805755396, + "grad_norm": 0.7754341959953308, + "learning_rate": 8.894559224091933e-05, + "loss": 0.0238, + "step": 15300 + }, + { + "action_loss": 0.018312720581889153, + "epoch": 13.758992805755396, + "step": 15300 + }, + { + "epoch": 13.758992805755396, + "step": 15300, + "torque_loss": 0.15676982700824738 + }, + { + "epoch": 13.758992805755396, + "force_loss": 0.02750256098806858, + "step": 15300 + }, + { + "epoch": 13.767985611510792, + "grad_norm": 0.41330650448799133, + "learning_rate": 8.892830388985942e-05, + "loss": 0.0264, + "step": 15310 + }, + { + "action_loss": 0.00818963348865509, + "epoch": 13.767985611510792, + "step": 15310 + }, + { + "epoch": 13.767985611510792, + "step": 15310, + "torque_loss": 0.18316282331943512 + }, + { + "epoch": 13.767985611510792, + "force_loss": 0.009668399579823017, + "step": 15310 + }, + { + "epoch": 13.776978417266188, + "grad_norm": 0.4735620319843292, + "learning_rate": 8.891100371340864e-05, + "loss": 0.0235, + "step": 15320 + }, + { + "action_loss": 0.01410482544451952, + "epoch": 13.776978417266188, + "step": 15320 + }, + { + "epoch": 13.776978417266188, + "step": 15320, + "torque_loss": 0.11079797893762589 + }, + { + "epoch": 13.776978417266188, + "force_loss": 0.008774803020060062, + "step": 15320 + }, + { + "epoch": 13.785971223021583, + "grad_norm": 0.556950569152832, + "learning_rate": 8.889369171682231e-05, + "loss": 0.0279, + "step": 15330 + }, + { + "action_loss": 0.006459081545472145, + "epoch": 13.785971223021583, + "step": 15330 + }, + { + "epoch": 13.785971223021583, + "step": 15330, + "torque_loss": 0.18019777536392212 + }, + { + "epoch": 13.785971223021583, + "force_loss": 0.006488729268312454, + "step": 15330 + }, + { + "epoch": 13.79496402877698, + "grad_norm": 0.5908033847808838, + "learning_rate": 8.887636790535936e-05, + "loss": 0.0226, + "step": 15340 + }, + { + "action_loss": 0.005060199648141861, + "epoch": 13.79496402877698, + "step": 15340 + }, + { + "epoch": 13.79496402877698, + "step": 15340, + "torque_loss": 0.1453528255224228 + }, + { + "epoch": 13.79496402877698, + "force_loss": 0.004749844316393137, + "step": 15340 + }, + { + "epoch": 13.803956834532373, + "grad_norm": 0.2986871004104614, + "learning_rate": 8.885903228428231e-05, + "loss": 0.0227, + "step": 15350 + }, + { + "action_loss": 0.01119461003690958, + "epoch": 13.803956834532373, + "step": 15350 + }, + { + "epoch": 13.803956834532373, + "step": 15350, + "torque_loss": 0.15257616341114044 + }, + { + "epoch": 13.803956834532373, + "force_loss": 0.009376834146678448, + "step": 15350 + }, + { + "epoch": 13.81294964028777, + "grad_norm": 0.5822528004646301, + "learning_rate": 8.884168485885727e-05, + "loss": 0.0228, + "step": 15360 + }, + { + "action_loss": 0.005400268826633692, + "epoch": 13.81294964028777, + "step": 15360 + }, + { + "epoch": 13.81294964028777, + "step": 15360, + "torque_loss": 0.1534426063299179 + }, + { + "epoch": 13.81294964028777, + "force_loss": 0.0056499093770980835, + "step": 15360 + }, + { + "epoch": 13.821942446043165, + "grad_norm": 0.3519933521747589, + "learning_rate": 8.882432563435393e-05, + "loss": 0.0255, + "step": 15370 + }, + { + "action_loss": 0.008613456971943378, + "epoch": 13.821942446043165, + "step": 15370 + }, + { + "epoch": 13.821942446043165, + "step": 15370, + "torque_loss": 0.16823451220989227 + }, + { + "epoch": 13.821942446043165, + "force_loss": 0.009782267734408379, + "step": 15370 + }, + { + "epoch": 13.83093525179856, + "grad_norm": 0.5148043036460876, + "learning_rate": 8.880695461604556e-05, + "loss": 0.0223, + "step": 15380 + }, + { + "action_loss": 0.0024448649492114782, + "epoch": 13.83093525179856, + "step": 15380 + }, + { + "epoch": 13.83093525179856, + "step": 15380, + "torque_loss": 0.13449786603450775 + }, + { + "epoch": 13.83093525179856, + "force_loss": 0.004522038623690605, + "step": 15380 + }, + { + "epoch": 13.839928057553957, + "grad_norm": 0.6680033802986145, + "learning_rate": 8.878957180920901e-05, + "loss": 0.023, + "step": 15390 + }, + { + "action_loss": 0.004556481260806322, + "epoch": 13.839928057553957, + "step": 15390 + }, + { + "epoch": 13.839928057553957, + "step": 15390, + "torque_loss": 0.13199438154697418 + }, + { + "epoch": 13.839928057553957, + "force_loss": 0.005707425530999899, + "step": 15390 + }, + { + "epoch": 13.848920863309353, + "grad_norm": 0.41075780987739563, + "learning_rate": 8.877217721912473e-05, + "loss": 0.0234, + "step": 15400 + }, + { + "action_loss": 0.005182987544685602, + "epoch": 13.848920863309353, + "step": 15400 + }, + { + "epoch": 13.848920863309353, + "step": 15400, + "torque_loss": 0.11845013499259949 + }, + { + "epoch": 13.848920863309353, + "force_loss": 0.006118921097368002, + "step": 15400 + }, + { + "epoch": 13.857913669064748, + "grad_norm": 0.420786052942276, + "learning_rate": 8.875477085107673e-05, + "loss": 0.0238, + "step": 15410 + }, + { + "action_loss": 0.0063112773932516575, + "epoch": 13.857913669064748, + "step": 15410 + }, + { + "epoch": 13.857913669064748, + "step": 15410, + "torque_loss": 0.12901552021503448 + }, + { + "epoch": 13.857913669064748, + "force_loss": 0.0066695548593997955, + "step": 15410 + }, + { + "epoch": 13.866906474820144, + "grad_norm": 0.4857789874076843, + "learning_rate": 8.87373527103526e-05, + "loss": 0.0263, + "step": 15420 + }, + { + "action_loss": 0.0046065147034823895, + "epoch": 13.866906474820144, + "step": 15420 + }, + { + "epoch": 13.866906474820144, + "step": 15420, + "torque_loss": 0.10553174465894699 + }, + { + "epoch": 13.866906474820144, + "force_loss": 0.005900380667299032, + "step": 15420 + }, + { + "epoch": 13.87589928057554, + "grad_norm": 0.5139258503913879, + "learning_rate": 8.871992280224353e-05, + "loss": 0.0236, + "step": 15430 + }, + { + "action_loss": 0.006548629608005285, + "epoch": 13.87589928057554, + "step": 15430 + }, + { + "epoch": 13.87589928057554, + "step": 15430, + "torque_loss": 0.14469869434833527 + }, + { + "epoch": 13.87589928057554, + "force_loss": 0.008571171201765537, + "step": 15430 + }, + { + "epoch": 13.884892086330936, + "grad_norm": 0.4033961296081543, + "learning_rate": 8.870248113204422e-05, + "loss": 0.0229, + "step": 15440 + }, + { + "action_loss": 0.0070077404379844666, + "epoch": 13.884892086330936, + "step": 15440 + }, + { + "epoch": 13.884892086330936, + "step": 15440, + "torque_loss": 0.12000685185194016 + }, + { + "epoch": 13.884892086330936, + "force_loss": 0.007769885007292032, + "step": 15440 + }, + { + "epoch": 13.89388489208633, + "grad_norm": 0.9563060998916626, + "learning_rate": 8.868502770505306e-05, + "loss": 0.0259, + "step": 15450 + }, + { + "action_loss": 0.011506802402436733, + "epoch": 13.89388489208633, + "step": 15450 + }, + { + "epoch": 13.89388489208633, + "step": 15450, + "torque_loss": 0.13790783286094666 + }, + { + "epoch": 13.89388489208633, + "force_loss": 0.008600750006735325, + "step": 15450 + }, + { + "epoch": 13.902877697841726, + "grad_norm": 0.46110257506370544, + "learning_rate": 8.86675625265719e-05, + "loss": 0.029, + "step": 15460 + }, + { + "action_loss": 0.00926098134368658, + "epoch": 13.902877697841726, + "step": 15460 + }, + { + "epoch": 13.902877697841726, + "step": 15460, + "torque_loss": 0.17907463014125824 + }, + { + "epoch": 13.902877697841726, + "force_loss": 0.008339088410139084, + "step": 15460 + }, + { + "epoch": 13.911870503597122, + "grad_norm": 0.3648589551448822, + "learning_rate": 8.865008560190618e-05, + "loss": 0.0227, + "step": 15470 + }, + { + "action_loss": 0.03231241554021835, + "epoch": 13.911870503597122, + "step": 15470 + }, + { + "epoch": 13.911870503597122, + "step": 15470, + "torque_loss": 0.19119863212108612 + }, + { + "epoch": 13.911870503597122, + "force_loss": 0.03968670964241028, + "step": 15470 + }, + { + "epoch": 13.920863309352518, + "grad_norm": 0.29791513085365295, + "learning_rate": 8.863259693636496e-05, + "loss": 0.0261, + "step": 15480 + }, + { + "action_loss": 0.012710523791611195, + "epoch": 13.920863309352518, + "step": 15480 + }, + { + "epoch": 13.920863309352518, + "step": 15480, + "torque_loss": 0.13339431583881378 + }, + { + "epoch": 13.920863309352518, + "force_loss": 0.012615206651389599, + "step": 15480 + }, + { + "epoch": 13.929856115107913, + "grad_norm": 0.44263288378715515, + "learning_rate": 8.861509653526083e-05, + "loss": 0.0241, + "step": 15490 + }, + { + "action_loss": 0.01084295567125082, + "epoch": 13.929856115107913, + "step": 15490 + }, + { + "epoch": 13.929856115107913, + "step": 15490, + "torque_loss": 0.19021831452846527 + }, + { + "epoch": 13.929856115107913, + "force_loss": 0.013560633175075054, + "step": 15490 + }, + { + "epoch": 13.93884892086331, + "grad_norm": 0.20355476438999176, + "learning_rate": 8.859758440390993e-05, + "loss": 0.0248, + "step": 15500 + }, + { + "action_loss": 0.005911046639084816, + "epoch": 13.93884892086331, + "step": 15500 + }, + { + "epoch": 13.93884892086331, + "step": 15500, + "torque_loss": 0.13253700733184814 + }, + { + "epoch": 13.93884892086331, + "force_loss": 0.011428029276430607, + "step": 15500 + }, + { + "epoch": 13.947841726618705, + "grad_norm": 0.4832836091518402, + "learning_rate": 8.858006054763202e-05, + "loss": 0.0213, + "step": 15510 + }, + { + "action_loss": 0.007079319562762976, + "epoch": 13.947841726618705, + "step": 15510 + }, + { + "epoch": 13.947841726618705, + "step": 15510, + "torque_loss": 0.11854541301727295 + }, + { + "epoch": 13.947841726618705, + "force_loss": 0.009166515432298183, + "step": 15510 + }, + { + "epoch": 13.956834532374101, + "grad_norm": 0.36489251255989075, + "learning_rate": 8.856252497175035e-05, + "loss": 0.025, + "step": 15520 + }, + { + "action_loss": 0.005048893857747316, + "epoch": 13.956834532374101, + "step": 15520 + }, + { + "epoch": 13.956834532374101, + "step": 15520, + "torque_loss": 0.1286717802286148 + }, + { + "epoch": 13.956834532374101, + "force_loss": 0.004200175404548645, + "step": 15520 + }, + { + "epoch": 13.965827338129497, + "grad_norm": 0.2418166548013687, + "learning_rate": 8.854497768159178e-05, + "loss": 0.0196, + "step": 15530 + }, + { + "action_loss": 0.012202930636703968, + "epoch": 13.965827338129497, + "step": 15530 + }, + { + "epoch": 13.965827338129497, + "step": 15530, + "torque_loss": 0.1705620288848877 + }, + { + "epoch": 13.965827338129497, + "force_loss": 0.007922484539449215, + "step": 15530 + }, + { + "epoch": 13.974820143884893, + "grad_norm": 0.5926933288574219, + "learning_rate": 8.852741868248671e-05, + "loss": 0.0229, + "step": 15540 + }, + { + "action_loss": 0.010377899743616581, + "epoch": 13.974820143884893, + "step": 15540 + }, + { + "epoch": 13.974820143884893, + "step": 15540, + "torque_loss": 0.16820472478866577 + }, + { + "epoch": 13.974820143884893, + "force_loss": 0.009452663362026215, + "step": 15540 + }, + { + "epoch": 13.983812949640289, + "grad_norm": 0.9483239054679871, + "learning_rate": 8.85098479797691e-05, + "loss": 0.0263, + "step": 15550 + }, + { + "action_loss": 0.011345763690769672, + "epoch": 13.983812949640289, + "step": 15550 + }, + { + "epoch": 13.983812949640289, + "step": 15550, + "torque_loss": 0.17303144931793213 + }, + { + "epoch": 13.983812949640289, + "force_loss": 0.005850790534168482, + "step": 15550 + }, + { + "epoch": 13.992805755395683, + "grad_norm": 0.4478403329849243, + "learning_rate": 8.849226557877646e-05, + "loss": 0.0249, + "step": 15560 + }, + { + "action_loss": 0.007818459533154964, + "epoch": 13.992805755395683, + "step": 15560 + }, + { + "epoch": 13.992805755395683, + "step": 15560, + "torque_loss": 0.12763364613056183 + }, + { + "epoch": 13.992805755395683, + "force_loss": 0.007843210361897945, + "step": 15560 + }, + { + "epoch": 14.001798561151078, + "grad_norm": 0.8684555888175964, + "learning_rate": 8.84746714848499e-05, + "loss": 0.0252, + "step": 15570 + }, + { + "action_loss": 0.004722162615507841, + "epoch": 14.001798561151078, + "step": 15570 + }, + { + "epoch": 14.001798561151078, + "step": 15570, + "torque_loss": 0.13987940549850464 + }, + { + "epoch": 14.001798561151078, + "force_loss": 0.007056157570332289, + "step": 15570 + }, + { + "epoch": 14.010791366906474, + "grad_norm": 0.23410508036613464, + "learning_rate": 8.845706570333397e-05, + "loss": 0.0219, + "step": 15580 + }, + { + "action_loss": 0.007552033755928278, + "epoch": 14.010791366906474, + "step": 15580 + }, + { + "epoch": 14.010791366906474, + "step": 15580, + "torque_loss": 0.13435308635234833 + }, + { + "epoch": 14.010791366906474, + "force_loss": 0.010103273205459118, + "step": 15580 + }, + { + "epoch": 14.01978417266187, + "grad_norm": 0.2973777651786804, + "learning_rate": 8.84394482395769e-05, + "loss": 0.0289, + "step": 15590 + }, + { + "action_loss": 0.005697654094547033, + "epoch": 14.01978417266187, + "step": 15590 + }, + { + "epoch": 14.01978417266187, + "step": 15590, + "torque_loss": 0.13407254219055176 + }, + { + "epoch": 14.01978417266187, + "force_loss": 0.012538072653114796, + "step": 15590 + }, + { + "epoch": 14.028776978417266, + "grad_norm": 0.5000838041305542, + "learning_rate": 8.842181909893038e-05, + "loss": 0.0193, + "step": 15600 + }, + { + "action_loss": 0.009854399599134922, + "epoch": 14.028776978417266, + "step": 15600 + }, + { + "epoch": 14.028776978417266, + "step": 15600, + "torque_loss": 0.14689792692661285 + }, + { + "epoch": 14.028776978417266, + "force_loss": 0.011832110583782196, + "step": 15600 + }, + { + "epoch": 14.037769784172662, + "grad_norm": 0.21835018694400787, + "learning_rate": 8.840417828674969e-05, + "loss": 0.0239, + "step": 15610 + }, + { + "action_loss": 0.006035699043422937, + "epoch": 14.037769784172662, + "step": 15610 + }, + { + "epoch": 14.037769784172662, + "step": 15610, + "torque_loss": 0.12497264891862869 + }, + { + "epoch": 14.037769784172662, + "force_loss": 0.008065945468842983, + "step": 15610 + }, + { + "epoch": 14.046762589928058, + "grad_norm": 0.32977935671806335, + "learning_rate": 8.838652580839364e-05, + "loss": 0.0261, + "step": 15620 + }, + { + "action_loss": 0.008962932042777538, + "epoch": 14.046762589928058, + "step": 15620 + }, + { + "epoch": 14.046762589928058, + "step": 15620, + "torque_loss": 0.14133070409297943 + }, + { + "epoch": 14.046762589928058, + "force_loss": 0.008453644812107086, + "step": 15620 + }, + { + "epoch": 14.055755395683454, + "grad_norm": 0.5109555125236511, + "learning_rate": 8.836886166922458e-05, + "loss": 0.0322, + "step": 15630 + }, + { + "action_loss": 0.0034796353429555893, + "epoch": 14.055755395683454, + "step": 15630 + }, + { + "epoch": 14.055755395683454, + "step": 15630, + "torque_loss": 0.07112327218055725 + }, + { + "epoch": 14.055755395683454, + "force_loss": 0.0038140921387821436, + "step": 15630 + }, + { + "epoch": 14.06474820143885, + "grad_norm": 0.34478431940078735, + "learning_rate": 8.835118587460844e-05, + "loss": 0.0232, + "step": 15640 + }, + { + "action_loss": 0.009331970475614071, + "epoch": 14.06474820143885, + "step": 15640 + }, + { + "epoch": 14.06474820143885, + "step": 15640, + "torque_loss": 0.1184937134385109 + }, + { + "epoch": 14.06474820143885, + "force_loss": 0.014365400187671185, + "step": 15640 + }, + { + "epoch": 14.073741007194245, + "grad_norm": 0.7019870281219482, + "learning_rate": 8.83334984299146e-05, + "loss": 0.031, + "step": 15650 + }, + { + "action_loss": 0.009050595574080944, + "epoch": 14.073741007194245, + "step": 15650 + }, + { + "epoch": 14.073741007194245, + "step": 15650, + "torque_loss": 0.13808287680149078 + }, + { + "epoch": 14.073741007194245, + "force_loss": 0.008266493678092957, + "step": 15650 + }, + { + "epoch": 14.082733812949641, + "grad_norm": 0.691999077796936, + "learning_rate": 8.83157993405161e-05, + "loss": 0.0315, + "step": 15660 + }, + { + "action_loss": 0.004204514902085066, + "epoch": 14.082733812949641, + "step": 15660 + }, + { + "epoch": 14.082733812949641, + "step": 15660, + "torque_loss": 0.15880849957466125 + }, + { + "epoch": 14.082733812949641, + "force_loss": 0.004555519670248032, + "step": 15660 + }, + { + "epoch": 14.091726618705035, + "grad_norm": 0.6082274317741394, + "learning_rate": 8.829808861178943e-05, + "loss": 0.0248, + "step": 15670 + }, + { + "action_loss": 0.0051100715063512325, + "epoch": 14.091726618705035, + "step": 15670 + }, + { + "epoch": 14.091726618705035, + "step": 15670, + "torque_loss": 0.08284465223550797 + }, + { + "epoch": 14.091726618705035, + "force_loss": 0.011337089352309704, + "step": 15670 + }, + { + "epoch": 14.100719424460431, + "grad_norm": 0.4260847568511963, + "learning_rate": 8.828036624911464e-05, + "loss": 0.0271, + "step": 15680 + }, + { + "action_loss": 0.007330385968089104, + "epoch": 14.100719424460431, + "step": 15680 + }, + { + "epoch": 14.100719424460431, + "step": 15680, + "torque_loss": 0.1403549164533615 + }, + { + "epoch": 14.100719424460431, + "force_loss": 0.009846248663961887, + "step": 15680 + }, + { + "epoch": 14.109712230215827, + "grad_norm": 0.3712562918663025, + "learning_rate": 8.826263225787532e-05, + "loss": 0.0247, + "step": 15690 + }, + { + "action_loss": 0.004076914396136999, + "epoch": 14.109712230215827, + "step": 15690 + }, + { + "epoch": 14.109712230215827, + "step": 15690, + "torque_loss": 0.17920346558094025 + }, + { + "epoch": 14.109712230215827, + "force_loss": 0.004728124476969242, + "step": 15690 + }, + { + "epoch": 14.118705035971223, + "grad_norm": 0.487091988325119, + "learning_rate": 8.824488664345858e-05, + "loss": 0.022, + "step": 15700 + }, + { + "action_loss": 0.020175985991954803, + "epoch": 14.118705035971223, + "step": 15700 + }, + { + "epoch": 14.118705035971223, + "step": 15700, + "torque_loss": 0.1243928000330925 + }, + { + "epoch": 14.118705035971223, + "force_loss": 0.01809723861515522, + "step": 15700 + }, + { + "epoch": 14.127697841726619, + "grad_norm": 0.5377398729324341, + "learning_rate": 8.822712941125508e-05, + "loss": 0.0222, + "step": 15710 + }, + { + "action_loss": 0.00976415816694498, + "epoch": 14.127697841726619, + "step": 15710 + }, + { + "epoch": 14.127697841726619, + "step": 15710, + "torque_loss": 0.16083715856075287 + }, + { + "epoch": 14.127697841726619, + "force_loss": 0.014677882194519043, + "step": 15710 + }, + { + "epoch": 14.136690647482014, + "grad_norm": 0.7602187991142273, + "learning_rate": 8.820936056665898e-05, + "loss": 0.0252, + "step": 15720 + }, + { + "action_loss": 0.005407609045505524, + "epoch": 14.136690647482014, + "step": 15720 + }, + { + "epoch": 14.136690647482014, + "step": 15720, + "torque_loss": 0.1260979324579239 + }, + { + "epoch": 14.136690647482014, + "force_loss": 0.003310616360977292, + "step": 15720 + }, + { + "epoch": 14.14568345323741, + "grad_norm": 0.2339577078819275, + "learning_rate": 8.819158011506801e-05, + "loss": 0.0246, + "step": 15730 + }, + { + "action_loss": 0.004923509433865547, + "epoch": 14.14568345323741, + "step": 15730 + }, + { + "epoch": 14.14568345323741, + "step": 15730, + "torque_loss": 0.09556185454130173 + }, + { + "epoch": 14.14568345323741, + "force_loss": 0.00431869737803936, + "step": 15730 + }, + { + "epoch": 14.154676258992806, + "grad_norm": 0.2566169202327728, + "learning_rate": 8.81737880618834e-05, + "loss": 0.0229, + "step": 15740 + }, + { + "action_loss": 0.007789431139826775, + "epoch": 14.154676258992806, + "step": 15740 + }, + { + "epoch": 14.154676258992806, + "step": 15740, + "torque_loss": 0.15237168967723846 + }, + { + "epoch": 14.154676258992806, + "force_loss": 0.009500320069491863, + "step": 15740 + }, + { + "epoch": 14.163669064748202, + "grad_norm": 0.24414263665676117, + "learning_rate": 8.815598441250987e-05, + "loss": 0.0269, + "step": 15750 + }, + { + "action_loss": 0.0037577971816062927, + "epoch": 14.163669064748202, + "step": 15750 + }, + { + "epoch": 14.163669064748202, + "step": 15750, + "torque_loss": 0.11859043687582016 + }, + { + "epoch": 14.163669064748202, + "force_loss": 0.0034807927440851927, + "step": 15750 + }, + { + "epoch": 14.172661870503598, + "grad_norm": 0.3403555750846863, + "learning_rate": 8.813816917235576e-05, + "loss": 0.025, + "step": 15760 + }, + { + "action_loss": 0.014798753894865513, + "epoch": 14.172661870503598, + "step": 15760 + }, + { + "epoch": 14.172661870503598, + "step": 15760, + "torque_loss": 0.149000883102417 + }, + { + "epoch": 14.172661870503598, + "force_loss": 0.024646872654557228, + "step": 15760 + }, + { + "epoch": 14.181654676258994, + "grad_norm": 0.33511635661125183, + "learning_rate": 8.812034234683282e-05, + "loss": 0.0253, + "step": 15770 + }, + { + "action_loss": 0.009019267745316029, + "epoch": 14.181654676258994, + "step": 15770 + }, + { + "epoch": 14.181654676258994, + "step": 15770, + "torque_loss": 0.14723806083202362 + }, + { + "epoch": 14.181654676258994, + "force_loss": 0.01763060688972473, + "step": 15770 + }, + { + "epoch": 14.190647482014388, + "grad_norm": 0.5340002775192261, + "learning_rate": 8.810250394135637e-05, + "loss": 0.024, + "step": 15780 + }, + { + "action_loss": 0.01773165911436081, + "epoch": 14.190647482014388, + "step": 15780 + }, + { + "epoch": 14.190647482014388, + "step": 15780, + "torque_loss": 0.09388729184865952 + }, + { + "epoch": 14.190647482014388, + "force_loss": 0.022907018661499023, + "step": 15780 + }, + { + "epoch": 14.199640287769784, + "grad_norm": 0.5116387605667114, + "learning_rate": 8.808465396134529e-05, + "loss": 0.0266, + "step": 15790 + }, + { + "action_loss": 0.014380519278347492, + "epoch": 14.199640287769784, + "step": 15790 + }, + { + "epoch": 14.199640287769784, + "step": 15790, + "torque_loss": 0.11947671324014664 + }, + { + "epoch": 14.199640287769784, + "force_loss": 0.02432125248014927, + "step": 15790 + }, + { + "epoch": 14.20863309352518, + "grad_norm": 0.5784803032875061, + "learning_rate": 8.806679241222189e-05, + "loss": 0.0221, + "step": 15800 + }, + { + "action_loss": 0.007958601228892803, + "epoch": 14.20863309352518, + "step": 15800 + }, + { + "epoch": 14.20863309352518, + "step": 15800, + "torque_loss": 0.12594085931777954 + }, + { + "epoch": 14.20863309352518, + "force_loss": 0.00979639682918787, + "step": 15800 + }, + { + "epoch": 14.217625899280575, + "grad_norm": 0.6483083367347717, + "learning_rate": 8.804891929941203e-05, + "loss": 0.0288, + "step": 15810 + }, + { + "action_loss": 0.008797496557235718, + "epoch": 14.217625899280575, + "step": 15810 + }, + { + "epoch": 14.217625899280575, + "step": 15810, + "torque_loss": 0.1706986427307129 + }, + { + "epoch": 14.217625899280575, + "force_loss": 0.011068209074437618, + "step": 15810 + }, + { + "epoch": 14.226618705035971, + "grad_norm": 0.41875794529914856, + "learning_rate": 8.803103462834514e-05, + "loss": 0.0279, + "step": 15820 + }, + { + "action_loss": 0.00846771989017725, + "epoch": 14.226618705035971, + "step": 15820 + }, + { + "epoch": 14.226618705035971, + "step": 15820, + "torque_loss": 0.25680890679359436 + }, + { + "epoch": 14.226618705035971, + "force_loss": 0.004131515976041555, + "step": 15820 + }, + { + "epoch": 14.235611510791367, + "grad_norm": 0.4922666847705841, + "learning_rate": 8.801313840445408e-05, + "loss": 0.0255, + "step": 15830 + }, + { + "action_loss": 0.00815120991319418, + "epoch": 14.235611510791367, + "step": 15830 + }, + { + "epoch": 14.235611510791367, + "step": 15830, + "torque_loss": 0.10232289880514145 + }, + { + "epoch": 14.235611510791367, + "force_loss": 0.006058324594050646, + "step": 15830 + }, + { + "epoch": 14.244604316546763, + "grad_norm": 0.38295236229896545, + "learning_rate": 8.799523063317524e-05, + "loss": 0.0229, + "step": 15840 + }, + { + "action_loss": 0.00865989550948143, + "epoch": 14.244604316546763, + "step": 15840 + }, + { + "epoch": 14.244604316546763, + "step": 15840, + "torque_loss": 0.1718813180923462 + }, + { + "epoch": 14.244604316546763, + "force_loss": 0.006196597125381231, + "step": 15840 + }, + { + "epoch": 14.253597122302159, + "grad_norm": 0.29465678334236145, + "learning_rate": 8.797731131994854e-05, + "loss": 0.0227, + "step": 15850 + }, + { + "action_loss": 0.02060581184923649, + "epoch": 14.253597122302159, + "step": 15850 + }, + { + "epoch": 14.253597122302159, + "step": 15850, + "torque_loss": 0.15192629396915436 + }, + { + "epoch": 14.253597122302159, + "force_loss": 0.04282194375991821, + "step": 15850 + }, + { + "epoch": 14.262589928057555, + "grad_norm": 0.3826133906841278, + "learning_rate": 8.795938047021739e-05, + "loss": 0.0281, + "step": 15860 + }, + { + "action_loss": 0.006589151918888092, + "epoch": 14.262589928057555, + "step": 15860 + }, + { + "epoch": 14.262589928057555, + "step": 15860, + "torque_loss": 0.11835622787475586 + }, + { + "epoch": 14.262589928057555, + "force_loss": 0.0061923484317958355, + "step": 15860 + }, + { + "epoch": 14.27158273381295, + "grad_norm": 0.48784953355789185, + "learning_rate": 8.794143808942872e-05, + "loss": 0.0244, + "step": 15870 + }, + { + "action_loss": 0.010280195623636246, + "epoch": 14.27158273381295, + "step": 15870 + }, + { + "epoch": 14.27158273381295, + "step": 15870, + "torque_loss": 0.1272284835577011 + }, + { + "epoch": 14.27158273381295, + "force_loss": 0.0068427324295043945, + "step": 15870 + }, + { + "epoch": 14.280575539568344, + "grad_norm": 0.5823810696601868, + "learning_rate": 8.792348418303296e-05, + "loss": 0.0257, + "step": 15880 + }, + { + "action_loss": 0.028448795899748802, + "epoch": 14.280575539568344, + "step": 15880 + }, + { + "epoch": 14.280575539568344, + "step": 15880, + "torque_loss": 0.16948986053466797 + }, + { + "epoch": 14.280575539568344, + "force_loss": 0.02727888710796833, + "step": 15880 + }, + { + "epoch": 14.28956834532374, + "grad_norm": 0.7188531756401062, + "learning_rate": 8.790551875648398e-05, + "loss": 0.0299, + "step": 15890 + }, + { + "action_loss": 0.019151948392391205, + "epoch": 14.28956834532374, + "step": 15890 + }, + { + "epoch": 14.28956834532374, + "step": 15890, + "torque_loss": 0.12116354703903198 + }, + { + "epoch": 14.28956834532374, + "force_loss": 0.020395763218402863, + "step": 15890 + }, + { + "epoch": 14.298561151079136, + "grad_norm": 0.4301486015319824, + "learning_rate": 8.788754181523926e-05, + "loss": 0.0314, + "step": 15900 + }, + { + "action_loss": 0.03381485119462013, + "epoch": 14.298561151079136, + "step": 15900 + }, + { + "epoch": 14.298561151079136, + "step": 15900, + "torque_loss": 0.20251469314098358 + }, + { + "epoch": 14.298561151079136, + "force_loss": 0.049152523279190063, + "step": 15900 + }, + { + "epoch": 14.307553956834532, + "grad_norm": 0.4857807159423828, + "learning_rate": 8.78695533647597e-05, + "loss": 0.03, + "step": 15910 + }, + { + "action_loss": 0.01003540400415659, + "epoch": 14.307553956834532, + "step": 15910 + }, + { + "epoch": 14.307553956834532, + "step": 15910, + "torque_loss": 0.15890617668628693 + }, + { + "epoch": 14.307553956834532, + "force_loss": 0.008113347925245762, + "step": 15910 + }, + { + "epoch": 14.316546762589928, + "grad_norm": 0.5157390236854553, + "learning_rate": 8.785155341050972e-05, + "loss": 0.035, + "step": 15920 + }, + { + "action_loss": 0.012707020156085491, + "epoch": 14.316546762589928, + "step": 15920 + }, + { + "epoch": 14.316546762589928, + "step": 15920, + "torque_loss": 0.15442773699760437 + }, + { + "epoch": 14.316546762589928, + "force_loss": 0.009181453846395016, + "step": 15920 + }, + { + "epoch": 14.325539568345324, + "grad_norm": 0.2399837076663971, + "learning_rate": 8.783354195795721e-05, + "loss": 0.0263, + "step": 15930 + }, + { + "action_loss": 0.012498520314693451, + "epoch": 14.325539568345324, + "step": 15930 + }, + { + "epoch": 14.325539568345324, + "step": 15930, + "torque_loss": 0.11486140638589859 + }, + { + "epoch": 14.325539568345324, + "force_loss": 0.01330383587628603, + "step": 15930 + }, + { + "epoch": 14.33453237410072, + "grad_norm": 0.6676514744758606, + "learning_rate": 8.78155190125736e-05, + "loss": 0.0259, + "step": 15940 + }, + { + "action_loss": 0.00856395810842514, + "epoch": 14.33453237410072, + "step": 15940 + }, + { + "epoch": 14.33453237410072, + "step": 15940, + "torque_loss": 0.1485954225063324 + }, + { + "epoch": 14.33453237410072, + "force_loss": 0.010877604596316814, + "step": 15940 + }, + { + "epoch": 14.343525179856115, + "grad_norm": 0.45051026344299316, + "learning_rate": 8.779748457983378e-05, + "loss": 0.0308, + "step": 15950 + }, + { + "action_loss": 0.0058227889239788055, + "epoch": 14.343525179856115, + "step": 15950 + }, + { + "epoch": 14.343525179856115, + "step": 15950, + "torque_loss": 0.1357569545507431 + }, + { + "epoch": 14.343525179856115, + "force_loss": 0.005941580981016159, + "step": 15950 + }, + { + "epoch": 14.352517985611511, + "grad_norm": 0.3117270767688751, + "learning_rate": 8.777943866521612e-05, + "loss": 0.0343, + "step": 15960 + }, + { + "action_loss": 0.007253460586071014, + "epoch": 14.352517985611511, + "step": 15960 + }, + { + "epoch": 14.352517985611511, + "step": 15960, + "torque_loss": 0.15881727635860443 + }, + { + "epoch": 14.352517985611511, + "force_loss": 0.006982345134019852, + "step": 15960 + }, + { + "epoch": 14.361510791366907, + "grad_norm": 0.5149706602096558, + "learning_rate": 8.77613812742025e-05, + "loss": 0.0226, + "step": 15970 + }, + { + "action_loss": 0.010725936852395535, + "epoch": 14.361510791366907, + "step": 15970 + }, + { + "epoch": 14.361510791366907, + "step": 15970, + "torque_loss": 0.13262148201465607 + }, + { + "epoch": 14.361510791366907, + "force_loss": 0.009241418913006783, + "step": 15970 + }, + { + "epoch": 14.370503597122303, + "grad_norm": 0.7514430284500122, + "learning_rate": 8.774331241227829e-05, + "loss": 0.0296, + "step": 15980 + }, + { + "action_loss": 0.010539411567151546, + "epoch": 14.370503597122303, + "step": 15980 + }, + { + "epoch": 14.370503597122303, + "step": 15980, + "torque_loss": 0.12404807657003403 + }, + { + "epoch": 14.370503597122303, + "force_loss": 0.013775442726910114, + "step": 15980 + }, + { + "epoch": 14.379496402877697, + "grad_norm": 0.29295358061790466, + "learning_rate": 8.772523208493232e-05, + "loss": 0.0228, + "step": 15990 + }, + { + "action_loss": 0.014459223486483097, + "epoch": 14.379496402877697, + "step": 15990 + }, + { + "epoch": 14.379496402877697, + "step": 15990, + "torque_loss": 0.1487133502960205 + }, + { + "epoch": 14.379496402877697, + "force_loss": 0.009827100671827793, + "step": 15990 + }, + { + "epoch": 14.388489208633093, + "grad_norm": 0.6131527423858643, + "learning_rate": 8.770714029765692e-05, + "loss": 0.0311, + "step": 16000 + }, + { + "action_loss": 0.015181586146354675, + "epoch": 14.388489208633093, + "step": 16000 + }, + { + "epoch": 14.388489208633093, + "step": 16000, + "torque_loss": 0.14069007337093353 + }, + { + "epoch": 14.388489208633093, + "force_loss": 0.011419035494327545, + "step": 16000 + }, + { + "epoch": 14.397482014388489, + "grad_norm": 0.4668498933315277, + "learning_rate": 8.768903705594789e-05, + "loss": 0.0303, + "step": 16010 + }, + { + "action_loss": 0.01304984837770462, + "epoch": 14.397482014388489, + "step": 16010 + }, + { + "epoch": 14.397482014388489, + "step": 16010, + "torque_loss": 0.16862152516841888 + }, + { + "epoch": 14.397482014388489, + "force_loss": 0.011156275868415833, + "step": 16010 + }, + { + "epoch": 14.406474820143885, + "grad_norm": 0.44027066230773926, + "learning_rate": 8.767092236530453e-05, + "loss": 0.0277, + "step": 16020 + }, + { + "action_loss": 0.016446055844426155, + "epoch": 14.406474820143885, + "step": 16020 + }, + { + "epoch": 14.406474820143885, + "step": 16020, + "torque_loss": 0.14097067713737488 + }, + { + "epoch": 14.406474820143885, + "force_loss": 0.016288094222545624, + "step": 16020 + }, + { + "epoch": 14.41546762589928, + "grad_norm": 0.6568413376808167, + "learning_rate": 8.76527962312296e-05, + "loss": 0.031, + "step": 16030 + }, + { + "action_loss": 0.006981673184782267, + "epoch": 14.41546762589928, + "step": 16030 + }, + { + "epoch": 14.41546762589928, + "step": 16030, + "torque_loss": 0.12521032989025116 + }, + { + "epoch": 14.41546762589928, + "force_loss": 0.006815902888774872, + "step": 16030 + }, + { + "epoch": 14.424460431654676, + "grad_norm": 0.33631667494773865, + "learning_rate": 8.763465865922934e-05, + "loss": 0.024, + "step": 16040 + }, + { + "action_loss": 0.031969260424375534, + "epoch": 14.424460431654676, + "step": 16040 + }, + { + "epoch": 14.424460431654676, + "step": 16040, + "torque_loss": 0.1744951754808426 + }, + { + "epoch": 14.424460431654676, + "force_loss": 0.035487834364175797, + "step": 16040 + }, + { + "epoch": 14.433453237410072, + "grad_norm": 0.5534899234771729, + "learning_rate": 8.761650965481347e-05, + "loss": 0.0299, + "step": 16050 + }, + { + "action_loss": 0.011798910796642303, + "epoch": 14.433453237410072, + "step": 16050 + }, + { + "epoch": 14.433453237410072, + "step": 16050, + "torque_loss": 0.1327945739030838 + }, + { + "epoch": 14.433453237410072, + "force_loss": 0.007753260433673859, + "step": 16050 + }, + { + "epoch": 14.442446043165468, + "grad_norm": 0.4077386260032654, + "learning_rate": 8.759834922349516e-05, + "loss": 0.0248, + "step": 16060 + }, + { + "action_loss": 0.006675384473055601, + "epoch": 14.442446043165468, + "step": 16060 + }, + { + "epoch": 14.442446043165468, + "step": 16060, + "torque_loss": 0.16668426990509033 + }, + { + "epoch": 14.442446043165468, + "force_loss": 0.0039557721465826035, + "step": 16060 + }, + { + "epoch": 14.451438848920864, + "grad_norm": 0.3269275426864624, + "learning_rate": 8.758017737079108e-05, + "loss": 0.0261, + "step": 16070 + }, + { + "action_loss": 0.007361941039562225, + "epoch": 14.451438848920864, + "step": 16070 + }, + { + "epoch": 14.451438848920864, + "step": 16070, + "torque_loss": 0.11449036747217178 + }, + { + "epoch": 14.451438848920864, + "force_loss": 0.005952224135398865, + "step": 16070 + }, + { + "epoch": 14.46043165467626, + "grad_norm": 0.5325619578361511, + "learning_rate": 8.756199410222137e-05, + "loss": 0.0205, + "step": 16080 + }, + { + "action_loss": 0.0062446617521345615, + "epoch": 14.46043165467626, + "step": 16080 + }, + { + "epoch": 14.46043165467626, + "step": 16080, + "torque_loss": 0.14131304621696472 + }, + { + "epoch": 14.46043165467626, + "force_loss": 0.0054467604495584965, + "step": 16080 + }, + { + "epoch": 14.469424460431656, + "grad_norm": 0.5006517767906189, + "learning_rate": 8.754379942330963e-05, + "loss": 0.0241, + "step": 16090 + }, + { + "action_loss": 0.006374505814164877, + "epoch": 14.469424460431656, + "step": 16090 + }, + { + "epoch": 14.469424460431656, + "step": 16090, + "torque_loss": 0.14624978601932526 + }, + { + "epoch": 14.469424460431656, + "force_loss": 0.005361600313335657, + "step": 16090 + }, + { + "epoch": 14.47841726618705, + "grad_norm": 0.2686760127544403, + "learning_rate": 8.75255933395829e-05, + "loss": 0.0224, + "step": 16100 + }, + { + "action_loss": 0.01720951683819294, + "epoch": 14.47841726618705, + "step": 16100 + }, + { + "epoch": 14.47841726618705, + "step": 16100, + "torque_loss": 0.16719822585582733 + }, + { + "epoch": 14.47841726618705, + "force_loss": 0.014178097248077393, + "step": 16100 + }, + { + "epoch": 14.487410071942445, + "grad_norm": 0.4230107069015503, + "learning_rate": 8.750737585657171e-05, + "loss": 0.0265, + "step": 16110 + }, + { + "action_loss": 0.006234128028154373, + "epoch": 14.487410071942445, + "step": 16110 + }, + { + "epoch": 14.487410071942445, + "step": 16110, + "torque_loss": 0.10787993669509888 + }, + { + "epoch": 14.487410071942445, + "force_loss": 0.0037304898723959923, + "step": 16110 + }, + { + "epoch": 14.496402877697841, + "grad_norm": 0.6370156407356262, + "learning_rate": 8.748914697981008e-05, + "loss": 0.0247, + "step": 16120 + }, + { + "action_loss": 0.008517253212630749, + "epoch": 14.496402877697841, + "step": 16120 + }, + { + "epoch": 14.496402877697841, + "step": 16120, + "torque_loss": 0.08847641944885254 + }, + { + "epoch": 14.496402877697841, + "force_loss": 0.0049628303386271, + "step": 16120 + }, + { + "epoch": 14.505395683453237, + "grad_norm": 0.7003411650657654, + "learning_rate": 8.747090671483542e-05, + "loss": 0.0249, + "step": 16130 + }, + { + "action_loss": 0.002361086430028081, + "epoch": 14.505395683453237, + "step": 16130 + }, + { + "epoch": 14.505395683453237, + "step": 16130, + "torque_loss": 0.07902330905199051 + }, + { + "epoch": 14.505395683453237, + "force_loss": 0.004729801788926125, + "step": 16130 + }, + { + "epoch": 14.514388489208633, + "grad_norm": 0.34488117694854736, + "learning_rate": 8.745265506718869e-05, + "loss": 0.019, + "step": 16140 + }, + { + "action_loss": 0.004949633032083511, + "epoch": 14.514388489208633, + "step": 16140 + }, + { + "epoch": 14.514388489208633, + "step": 16140, + "torque_loss": 0.1301463097333908 + }, + { + "epoch": 14.514388489208633, + "force_loss": 0.006746729835867882, + "step": 16140 + }, + { + "epoch": 14.523381294964029, + "grad_norm": 0.8607816696166992, + "learning_rate": 8.74343920424142e-05, + "loss": 0.0245, + "step": 16150 + }, + { + "action_loss": 0.009195582009851933, + "epoch": 14.523381294964029, + "step": 16150 + }, + { + "epoch": 14.523381294964029, + "step": 16150, + "torque_loss": 0.12722842395305634 + }, + { + "epoch": 14.523381294964029, + "force_loss": 0.01190421637147665, + "step": 16150 + }, + { + "epoch": 14.532374100719425, + "grad_norm": 0.46893593668937683, + "learning_rate": 8.741611764605982e-05, + "loss": 0.0236, + "step": 16160 + }, + { + "action_loss": 0.005657311528921127, + "epoch": 14.532374100719425, + "step": 16160 + }, + { + "epoch": 14.532374100719425, + "step": 16160, + "torque_loss": 0.1434476524591446 + }, + { + "epoch": 14.532374100719425, + "force_loss": 0.005069083068519831, + "step": 16160 + }, + { + "epoch": 14.54136690647482, + "grad_norm": 0.28299471735954285, + "learning_rate": 8.739783188367682e-05, + "loss": 0.0245, + "step": 16170 + }, + { + "action_loss": 0.005101411137729883, + "epoch": 14.54136690647482, + "step": 16170 + }, + { + "epoch": 14.54136690647482, + "step": 16170, + "torque_loss": 0.11687228083610535 + }, + { + "epoch": 14.54136690647482, + "force_loss": 0.004526472184807062, + "step": 16170 + }, + { + "epoch": 14.550359712230216, + "grad_norm": 0.5864328145980835, + "learning_rate": 8.737953476081991e-05, + "loss": 0.0236, + "step": 16180 + }, + { + "action_loss": 0.00856483168900013, + "epoch": 14.550359712230216, + "step": 16180 + }, + { + "epoch": 14.550359712230216, + "step": 16180, + "torque_loss": 0.13133420050144196 + }, + { + "epoch": 14.550359712230216, + "force_loss": 0.009303194470703602, + "step": 16180 + }, + { + "epoch": 14.559352517985612, + "grad_norm": 0.36528876423835754, + "learning_rate": 8.73612262830473e-05, + "loss": 0.0233, + "step": 16190 + }, + { + "action_loss": 0.0048843990080058575, + "epoch": 14.559352517985612, + "step": 16190 + }, + { + "epoch": 14.559352517985612, + "step": 16190, + "torque_loss": 0.10311051458120346 + }, + { + "epoch": 14.559352517985612, + "force_loss": 0.004061442334204912, + "step": 16190 + }, + { + "epoch": 14.568345323741006, + "grad_norm": 0.362733393907547, + "learning_rate": 8.734290645592061e-05, + "loss": 0.0198, + "step": 16200 + }, + { + "action_loss": 0.00472608907148242, + "epoch": 14.568345323741006, + "step": 16200 + }, + { + "epoch": 14.568345323741006, + "step": 16200, + "torque_loss": 0.16307905316352844 + }, + { + "epoch": 14.568345323741006, + "force_loss": 0.0038498062640428543, + "step": 16200 + }, + { + "epoch": 14.577338129496402, + "grad_norm": 0.6847972273826599, + "learning_rate": 8.732457528500493e-05, + "loss": 0.0233, + "step": 16210 + }, + { + "action_loss": 0.006037202198058367, + "epoch": 14.577338129496402, + "step": 16210 + }, + { + "epoch": 14.577338129496402, + "step": 16210, + "torque_loss": 0.18439090251922607 + }, + { + "epoch": 14.577338129496402, + "force_loss": 0.0037185444962233305, + "step": 16210 + }, + { + "epoch": 14.586330935251798, + "grad_norm": 0.578105092048645, + "learning_rate": 8.730623277586875e-05, + "loss": 0.0245, + "step": 16220 + }, + { + "action_loss": 0.009703953750431538, + "epoch": 14.586330935251798, + "step": 16220 + }, + { + "epoch": 14.586330935251798, + "step": 16220, + "torque_loss": 0.11771911382675171 + }, + { + "epoch": 14.586330935251798, + "force_loss": 0.008038971573114395, + "step": 16220 + }, + { + "epoch": 14.595323741007194, + "grad_norm": 0.3750212490558624, + "learning_rate": 8.72878789340841e-05, + "loss": 0.0217, + "step": 16230 + }, + { + "action_loss": 0.00477495975792408, + "epoch": 14.595323741007194, + "step": 16230 + }, + { + "epoch": 14.595323741007194, + "step": 16230, + "torque_loss": 0.11629226803779602 + }, + { + "epoch": 14.595323741007194, + "force_loss": 0.0035047137644141912, + "step": 16230 + }, + { + "epoch": 14.60431654676259, + "grad_norm": 0.31527113914489746, + "learning_rate": 8.726951376522635e-05, + "loss": 0.0256, + "step": 16240 + }, + { + "action_loss": 0.007542117964476347, + "epoch": 14.60431654676259, + "step": 16240 + }, + { + "epoch": 14.60431654676259, + "step": 16240, + "torque_loss": 0.12231564521789551 + }, + { + "epoch": 14.60431654676259, + "force_loss": 0.005131407640874386, + "step": 16240 + }, + { + "epoch": 14.613309352517986, + "grad_norm": 0.4643120765686035, + "learning_rate": 8.725113727487435e-05, + "loss": 0.0223, + "step": 16250 + }, + { + "action_loss": 0.01127786934375763, + "epoch": 14.613309352517986, + "step": 16250 + }, + { + "epoch": 14.613309352517986, + "step": 16250, + "torque_loss": 0.13514098525047302 + }, + { + "epoch": 14.613309352517986, + "force_loss": 0.007229645270854235, + "step": 16250 + }, + { + "epoch": 14.622302158273381, + "grad_norm": 0.33437561988830566, + "learning_rate": 8.723274946861042e-05, + "loss": 0.0215, + "step": 16260 + }, + { + "action_loss": 0.004695317707955837, + "epoch": 14.622302158273381, + "step": 16260 + }, + { + "epoch": 14.622302158273381, + "step": 16260, + "torque_loss": 0.11406657844781876 + }, + { + "epoch": 14.622302158273381, + "force_loss": 0.007661603856831789, + "step": 16260 + }, + { + "epoch": 14.631294964028777, + "grad_norm": 0.7634636163711548, + "learning_rate": 8.721435035202026e-05, + "loss": 0.0232, + "step": 16270 + }, + { + "action_loss": 0.015687547624111176, + "epoch": 14.631294964028777, + "step": 16270 + }, + { + "epoch": 14.631294964028777, + "step": 16270, + "torque_loss": 0.16938884556293488 + }, + { + "epoch": 14.631294964028777, + "force_loss": 0.02431638538837433, + "step": 16270 + }, + { + "epoch": 14.640287769784173, + "grad_norm": 0.38326090574264526, + "learning_rate": 8.719593993069306e-05, + "loss": 0.0304, + "step": 16280 + }, + { + "action_loss": 0.00968262366950512, + "epoch": 14.640287769784173, + "step": 16280 + }, + { + "epoch": 14.640287769784173, + "step": 16280, + "torque_loss": 0.10673090070486069 + }, + { + "epoch": 14.640287769784173, + "force_loss": 0.01634608395397663, + "step": 16280 + }, + { + "epoch": 14.649280575539569, + "grad_norm": 0.7706455588340759, + "learning_rate": 8.717751821022139e-05, + "loss": 0.0256, + "step": 16290 + }, + { + "action_loss": 0.008624917827546597, + "epoch": 14.649280575539569, + "step": 16290 + }, + { + "epoch": 14.649280575539569, + "step": 16290, + "torque_loss": 0.1214180588722229 + }, + { + "epoch": 14.649280575539569, + "force_loss": 0.011329042725265026, + "step": 16290 + }, + { + "epoch": 14.658273381294965, + "grad_norm": 0.42331382632255554, + "learning_rate": 8.715908519620134e-05, + "loss": 0.0255, + "step": 16300 + }, + { + "action_loss": 0.023016376420855522, + "epoch": 14.658273381294965, + "step": 16300 + }, + { + "epoch": 14.658273381294965, + "step": 16300, + "torque_loss": 0.15137328207492828 + }, + { + "epoch": 14.658273381294965, + "force_loss": 0.021671846508979797, + "step": 16300 + }, + { + "epoch": 14.667266187050359, + "grad_norm": 0.33794134855270386, + "learning_rate": 8.71406408942323e-05, + "loss": 0.0262, + "step": 16310 + }, + { + "action_loss": 0.010126424953341484, + "epoch": 14.667266187050359, + "step": 16310 + }, + { + "epoch": 14.667266187050359, + "step": 16310, + "torque_loss": 0.10819221287965775 + }, + { + "epoch": 14.667266187050359, + "force_loss": 0.007030328270047903, + "step": 16310 + }, + { + "epoch": 14.676258992805755, + "grad_norm": 0.2426094263792038, + "learning_rate": 8.712218530991723e-05, + "loss": 0.0211, + "step": 16320 + }, + { + "action_loss": 0.011374286375939846, + "epoch": 14.676258992805755, + "step": 16320 + }, + { + "epoch": 14.676258992805755, + "step": 16320, + "torque_loss": 0.1683548241853714 + }, + { + "epoch": 14.676258992805755, + "force_loss": 0.013959120959043503, + "step": 16320 + }, + { + "epoch": 14.68525179856115, + "grad_norm": 0.7456856966018677, + "learning_rate": 8.710371844886241e-05, + "loss": 0.0226, + "step": 16330 + }, + { + "action_loss": 0.014729071408510208, + "epoch": 14.68525179856115, + "step": 16330 + }, + { + "epoch": 14.68525179856115, + "step": 16330, + "torque_loss": 0.1768568903207779 + }, + { + "epoch": 14.68525179856115, + "force_loss": 0.01182624977082014, + "step": 16330 + }, + { + "epoch": 14.694244604316546, + "grad_norm": 0.4690254032611847, + "learning_rate": 8.708524031667758e-05, + "loss": 0.0238, + "step": 16340 + }, + { + "action_loss": 0.015546525828540325, + "epoch": 14.694244604316546, + "step": 16340 + }, + { + "epoch": 14.694244604316546, + "step": 16340, + "torque_loss": 0.14213943481445312 + }, + { + "epoch": 14.694244604316546, + "force_loss": 0.01066828053444624, + "step": 16340 + }, + { + "epoch": 14.703237410071942, + "grad_norm": 0.4124860465526581, + "learning_rate": 8.706675091897592e-05, + "loss": 0.0273, + "step": 16350 + }, + { + "action_loss": 0.008112015202641487, + "epoch": 14.703237410071942, + "step": 16350 + }, + { + "epoch": 14.703237410071942, + "step": 16350, + "torque_loss": 0.1799161434173584 + }, + { + "epoch": 14.703237410071942, + "force_loss": 0.004709279630333185, + "step": 16350 + }, + { + "epoch": 14.712230215827338, + "grad_norm": 0.2928280532360077, + "learning_rate": 8.704825026137404e-05, + "loss": 0.0258, + "step": 16360 + }, + { + "action_loss": 0.011181623674929142, + "epoch": 14.712230215827338, + "step": 16360 + }, + { + "epoch": 14.712230215827338, + "step": 16360, + "torque_loss": 0.15154887735843658 + }, + { + "epoch": 14.712230215827338, + "force_loss": 0.011326193809509277, + "step": 16360 + }, + { + "epoch": 14.721223021582734, + "grad_norm": 0.2906883955001831, + "learning_rate": 8.702973834949192e-05, + "loss": 0.025, + "step": 16370 + }, + { + "action_loss": 0.010558349080383778, + "epoch": 14.721223021582734, + "step": 16370 + }, + { + "epoch": 14.721223021582734, + "step": 16370, + "torque_loss": 0.13773809373378754 + }, + { + "epoch": 14.721223021582734, + "force_loss": 0.011783388443291187, + "step": 16370 + }, + { + "epoch": 14.73021582733813, + "grad_norm": 0.7011801600456238, + "learning_rate": 8.701121518895301e-05, + "loss": 0.0252, + "step": 16380 + }, + { + "action_loss": 0.011409520171582699, + "epoch": 14.73021582733813, + "step": 16380 + }, + { + "epoch": 14.73021582733813, + "step": 16380, + "torque_loss": 0.15905313193798065 + }, + { + "epoch": 14.73021582733813, + "force_loss": 0.017366239801049232, + "step": 16380 + }, + { + "epoch": 14.739208633093526, + "grad_norm": 0.4976460635662079, + "learning_rate": 8.699268078538414e-05, + "loss": 0.0277, + "step": 16390 + }, + { + "action_loss": 0.00357674527913332, + "epoch": 14.739208633093526, + "step": 16390 + }, + { + "epoch": 14.739208633093526, + "step": 16390, + "torque_loss": 0.1501704305410385 + }, + { + "epoch": 14.739208633093526, + "force_loss": 0.0039963372983038425, + "step": 16390 + }, + { + "epoch": 14.748201438848922, + "grad_norm": 0.4516439437866211, + "learning_rate": 8.69741351444156e-05, + "loss": 0.0223, + "step": 16400 + }, + { + "action_loss": 0.011151435784995556, + "epoch": 14.748201438848922, + "step": 16400 + }, + { + "epoch": 14.748201438848922, + "step": 16400, + "torque_loss": 0.1500273495912552 + }, + { + "epoch": 14.748201438848922, + "force_loss": 0.010211797431111336, + "step": 16400 + }, + { + "epoch": 14.757194244604317, + "grad_norm": 0.42409786581993103, + "learning_rate": 8.695557827168101e-05, + "loss": 0.022, + "step": 16410 + }, + { + "action_loss": 0.007657613139599562, + "epoch": 14.757194244604317, + "step": 16410 + }, + { + "epoch": 14.757194244604317, + "step": 16410, + "torque_loss": 0.1123104989528656 + }, + { + "epoch": 14.757194244604317, + "force_loss": 0.00562768941745162, + "step": 16410 + }, + { + "epoch": 14.766187050359711, + "grad_norm": 0.3427654504776001, + "learning_rate": 8.693701017281753e-05, + "loss": 0.0254, + "step": 16420 + }, + { + "action_loss": 0.013873311690986156, + "epoch": 14.766187050359711, + "step": 16420 + }, + { + "epoch": 14.766187050359711, + "step": 16420, + "torque_loss": 0.17615483701229095 + }, + { + "epoch": 14.766187050359711, + "force_loss": 0.024800434708595276, + "step": 16420 + }, + { + "epoch": 14.775179856115107, + "grad_norm": 1.050300121307373, + "learning_rate": 8.691843085346563e-05, + "loss": 0.0283, + "step": 16430 + }, + { + "action_loss": 0.006628282833844423, + "epoch": 14.775179856115107, + "step": 16430 + }, + { + "epoch": 14.775179856115107, + "step": 16430, + "torque_loss": 0.09419536590576172 + }, + { + "epoch": 14.775179856115107, + "force_loss": 0.006657319143414497, + "step": 16430 + }, + { + "epoch": 14.784172661870503, + "grad_norm": 0.5347671508789062, + "learning_rate": 8.689984031926919e-05, + "loss": 0.0226, + "step": 16440 + }, + { + "action_loss": 0.008300053887069225, + "epoch": 14.784172661870503, + "step": 16440 + }, + { + "epoch": 14.784172661870503, + "step": 16440, + "torque_loss": 0.14323776960372925 + }, + { + "epoch": 14.784172661870503, + "force_loss": 0.007797125726938248, + "step": 16440 + }, + { + "epoch": 14.793165467625899, + "grad_norm": 0.32349851727485657, + "learning_rate": 8.688123857587555e-05, + "loss": 0.0242, + "step": 16450 + }, + { + "action_loss": 0.0037973488215357065, + "epoch": 14.793165467625899, + "step": 16450 + }, + { + "epoch": 14.793165467625899, + "step": 16450, + "torque_loss": 0.12730592489242554 + }, + { + "epoch": 14.793165467625899, + "force_loss": 0.0064923143945634365, + "step": 16450 + }, + { + "epoch": 14.802158273381295, + "grad_norm": 0.6720380783081055, + "learning_rate": 8.686262562893544e-05, + "loss": 0.0255, + "step": 16460 + }, + { + "action_loss": 0.008057990111410618, + "epoch": 14.802158273381295, + "step": 16460 + }, + { + "epoch": 14.802158273381295, + "step": 16460, + "torque_loss": 0.120577372610569 + }, + { + "epoch": 14.802158273381295, + "force_loss": 0.007687706965953112, + "step": 16460 + }, + { + "epoch": 14.81115107913669, + "grad_norm": 0.25501903891563416, + "learning_rate": 8.684400148410294e-05, + "loss": 0.0212, + "step": 16470 + }, + { + "action_loss": 0.0035678327549248934, + "epoch": 14.81115107913669, + "step": 16470 + }, + { + "epoch": 14.81115107913669, + "step": 16470, + "torque_loss": 0.12409751862287521 + }, + { + "epoch": 14.81115107913669, + "force_loss": 0.006432787049561739, + "step": 16470 + }, + { + "epoch": 14.820143884892087, + "grad_norm": 0.22578421235084534, + "learning_rate": 8.682536614703562e-05, + "loss": 0.0227, + "step": 16480 + }, + { + "action_loss": 0.008843987248837948, + "epoch": 14.820143884892087, + "step": 16480 + }, + { + "epoch": 14.820143884892087, + "step": 16480, + "torque_loss": 0.1574493944644928 + }, + { + "epoch": 14.820143884892087, + "force_loss": 0.011142074130475521, + "step": 16480 + }, + { + "epoch": 14.829136690647482, + "grad_norm": 0.5097362399101257, + "learning_rate": 8.680671962339437e-05, + "loss": 0.0224, + "step": 16490 + }, + { + "action_loss": 0.008273611776530743, + "epoch": 14.829136690647482, + "step": 16490 + }, + { + "epoch": 14.829136690647482, + "step": 16490, + "torque_loss": 0.1288774013519287 + }, + { + "epoch": 14.829136690647482, + "force_loss": 0.010392571799457073, + "step": 16490 + }, + { + "epoch": 14.838129496402878, + "grad_norm": 0.45725390315055847, + "learning_rate": 8.678806191884352e-05, + "loss": 0.0253, + "step": 16500 + }, + { + "action_loss": 0.008720788173377514, + "epoch": 14.838129496402878, + "step": 16500 + }, + { + "epoch": 14.838129496402878, + "step": 16500, + "torque_loss": 0.16255836188793182 + }, + { + "epoch": 14.838129496402878, + "force_loss": 0.008494697511196136, + "step": 16500 + }, + { + "epoch": 14.847122302158274, + "grad_norm": 0.21858061850070953, + "learning_rate": 8.67693930390508e-05, + "loss": 0.0217, + "step": 16510 + }, + { + "action_loss": 0.028219209983944893, + "epoch": 14.847122302158274, + "step": 16510 + }, + { + "epoch": 14.847122302158274, + "step": 16510, + "torque_loss": 0.11343064159154892 + }, + { + "epoch": 14.847122302158274, + "force_loss": 0.021377915516495705, + "step": 16510 + }, + { + "epoch": 14.85611510791367, + "grad_norm": 0.32938069105148315, + "learning_rate": 8.67507129896873e-05, + "loss": 0.0216, + "step": 16520 + }, + { + "action_loss": 0.013413523323833942, + "epoch": 14.85611510791367, + "step": 16520 + }, + { + "epoch": 14.85611510791367, + "step": 16520, + "torque_loss": 0.1496639847755432 + }, + { + "epoch": 14.85611510791367, + "force_loss": 0.009586935862898827, + "step": 16520 + }, + { + "epoch": 14.865107913669064, + "grad_norm": 0.5014426708221436, + "learning_rate": 8.673202177642757e-05, + "loss": 0.0303, + "step": 16530 + }, + { + "action_loss": 0.014454229734838009, + "epoch": 14.865107913669064, + "step": 16530 + }, + { + "epoch": 14.865107913669064, + "step": 16530, + "torque_loss": 0.19893985986709595 + }, + { + "epoch": 14.865107913669064, + "force_loss": 0.01378584373742342, + "step": 16530 + }, + { + "epoch": 14.87410071942446, + "grad_norm": 0.38330456614494324, + "learning_rate": 8.671331940494945e-05, + "loss": 0.0331, + "step": 16540 + }, + { + "action_loss": 0.013897606171667576, + "epoch": 14.87410071942446, + "step": 16540 + }, + { + "epoch": 14.87410071942446, + "step": 16540, + "torque_loss": 0.1659325212240219 + }, + { + "epoch": 14.87410071942446, + "force_loss": 0.013821497559547424, + "step": 16540 + }, + { + "epoch": 14.883093525179856, + "grad_norm": 0.34845295548439026, + "learning_rate": 8.669460588093427e-05, + "loss": 0.0273, + "step": 16550 + }, + { + "action_loss": 0.006512706633657217, + "epoch": 14.883093525179856, + "step": 16550 + }, + { + "epoch": 14.883093525179856, + "step": 16550, + "torque_loss": 0.17432083189487457 + }, + { + "epoch": 14.883093525179856, + "force_loss": 0.007318615447729826, + "step": 16550 + }, + { + "epoch": 14.892086330935252, + "grad_norm": 0.277150958776474, + "learning_rate": 8.667588121006667e-05, + "loss": 0.0231, + "step": 16560 + }, + { + "action_loss": 0.030604219064116478, + "epoch": 14.892086330935252, + "step": 16560 + }, + { + "epoch": 14.892086330935252, + "step": 16560, + "torque_loss": 0.12815971672534943 + }, + { + "epoch": 14.892086330935252, + "force_loss": 0.040705639868974686, + "step": 16560 + }, + { + "epoch": 14.901079136690647, + "grad_norm": 0.4285851716995239, + "learning_rate": 8.665714539803475e-05, + "loss": 0.0293, + "step": 16570 + }, + { + "action_loss": 0.006151033565402031, + "epoch": 14.901079136690647, + "step": 16570 + }, + { + "epoch": 14.901079136690647, + "step": 16570, + "torque_loss": 0.09936177730560303 + }, + { + "epoch": 14.901079136690647, + "force_loss": 0.0033768664579838514, + "step": 16570 + }, + { + "epoch": 14.910071942446043, + "grad_norm": 0.7274307012557983, + "learning_rate": 8.663839845052993e-05, + "loss": 0.0211, + "step": 16580 + }, + { + "action_loss": 0.009478853084146976, + "epoch": 14.910071942446043, + "step": 16580 + }, + { + "epoch": 14.910071942446043, + "step": 16580, + "torque_loss": 0.149641215801239 + }, + { + "epoch": 14.910071942446043, + "force_loss": 0.0051117464900016785, + "step": 16580 + }, + { + "epoch": 14.91906474820144, + "grad_norm": 0.8926911354064941, + "learning_rate": 8.661964037324703e-05, + "loss": 0.0264, + "step": 16590 + }, + { + "action_loss": 0.018504738807678223, + "epoch": 14.91906474820144, + "step": 16590 + }, + { + "epoch": 14.91906474820144, + "step": 16590, + "torque_loss": 0.12704424560070038 + }, + { + "epoch": 14.91906474820144, + "force_loss": 0.021473711356520653, + "step": 16590 + }, + { + "epoch": 14.928057553956835, + "grad_norm": 0.3026418089866638, + "learning_rate": 8.660087117188427e-05, + "loss": 0.0206, + "step": 16600 + }, + { + "action_loss": 0.011007758788764477, + "epoch": 14.928057553956835, + "step": 16600 + }, + { + "epoch": 14.928057553956835, + "step": 16600, + "torque_loss": 0.14828522503376007 + }, + { + "epoch": 14.928057553956835, + "force_loss": 0.008979643695056438, + "step": 16600 + }, + { + "epoch": 14.93705035971223, + "grad_norm": 0.34866297245025635, + "learning_rate": 8.658209085214325e-05, + "loss": 0.0235, + "step": 16610 + }, + { + "action_loss": 0.011805638670921326, + "epoch": 14.93705035971223, + "step": 16610 + }, + { + "epoch": 14.93705035971223, + "step": 16610, + "torque_loss": 0.16651158034801483 + }, + { + "epoch": 14.93705035971223, + "force_loss": 0.010302189737558365, + "step": 16610 + }, + { + "epoch": 14.946043165467627, + "grad_norm": 0.43696945905685425, + "learning_rate": 8.656329941972891e-05, + "loss": 0.0325, + "step": 16620 + }, + { + "action_loss": 0.006582924630492926, + "epoch": 14.946043165467627, + "step": 16620 + }, + { + "epoch": 14.946043165467627, + "step": 16620, + "torque_loss": 0.09430796653032303 + }, + { + "epoch": 14.946043165467627, + "force_loss": 0.006717576179653406, + "step": 16620 + }, + { + "epoch": 14.95503597122302, + "grad_norm": 0.28286707401275635, + "learning_rate": 8.654449688034963e-05, + "loss": 0.026, + "step": 16630 + }, + { + "action_loss": 0.013563995249569416, + "epoch": 14.95503597122302, + "step": 16630 + }, + { + "epoch": 14.95503597122302, + "step": 16630, + "torque_loss": 0.1204179897904396 + }, + { + "epoch": 14.95503597122302, + "force_loss": 0.01628933474421501, + "step": 16630 + }, + { + "epoch": 14.964028776978417, + "grad_norm": 0.27962011098861694, + "learning_rate": 8.652568323971706e-05, + "loss": 0.0259, + "step": 16640 + }, + { + "action_loss": 0.004835537634789944, + "epoch": 14.964028776978417, + "step": 16640 + }, + { + "epoch": 14.964028776978417, + "step": 16640, + "torque_loss": 0.12375181913375854 + }, + { + "epoch": 14.964028776978417, + "force_loss": 0.007731992285698652, + "step": 16640 + }, + { + "epoch": 14.973021582733812, + "grad_norm": 0.2986242175102234, + "learning_rate": 8.650685850354636e-05, + "loss": 0.0229, + "step": 16650 + }, + { + "action_loss": 0.01162691693753004, + "epoch": 14.973021582733812, + "step": 16650 + }, + { + "epoch": 14.973021582733812, + "step": 16650, + "torque_loss": 0.1142582893371582 + }, + { + "epoch": 14.973021582733812, + "force_loss": 0.01357144396752119, + "step": 16650 + }, + { + "epoch": 14.982014388489208, + "grad_norm": 0.40612348914146423, + "learning_rate": 8.648802267755593e-05, + "loss": 0.0271, + "step": 16660 + }, + { + "action_loss": 0.0030443326104432344, + "epoch": 14.982014388489208, + "step": 16660 + }, + { + "epoch": 14.982014388489208, + "step": 16660, + "torque_loss": 0.10475953668355942 + }, + { + "epoch": 14.982014388489208, + "force_loss": 0.0034353325609117746, + "step": 16660 + }, + { + "epoch": 14.991007194244604, + "grad_norm": 0.3785460889339447, + "learning_rate": 8.646917576746764e-05, + "loss": 0.0258, + "step": 16670 + }, + { + "action_loss": 0.006419685203582048, + "epoch": 14.991007194244604, + "step": 16670 + }, + { + "epoch": 14.991007194244604, + "step": 16670, + "torque_loss": 0.13704830408096313 + }, + { + "epoch": 14.991007194244604, + "force_loss": 0.00422352971509099, + "step": 16670 + }, + { + "epoch": 15.0, + "grad_norm": 0.3477333188056946, + "learning_rate": 8.645031777900666e-05, + "loss": 0.0215, + "step": 16680 + }, + { + "action_loss": 0.009861718863248825, + "epoch": 15.0, + "step": 16680 + }, + { + "epoch": 15.0, + "step": 16680, + "torque_loss": 0.11473336815834045 + }, + { + "epoch": 15.0, + "force_loss": 0.008207684382796288, + "step": 16680 + }, + { + "epoch": 15.008992805755396, + "grad_norm": 0.33410927653312683, + "learning_rate": 8.643144871790154e-05, + "loss": 0.0233, + "step": 16690 + }, + { + "action_loss": 0.005458649713546038, + "epoch": 15.008992805755396, + "step": 16690 + }, + { + "epoch": 15.008992805755396, + "step": 16690, + "torque_loss": 0.1323322057723999 + }, + { + "epoch": 15.008992805755396, + "force_loss": 0.004970806650817394, + "step": 16690 + }, + { + "epoch": 15.017985611510792, + "grad_norm": 0.26669541001319885, + "learning_rate": 8.641256858988424e-05, + "loss": 0.0212, + "step": 16700 + }, + { + "action_loss": 0.005379742477089167, + "epoch": 15.017985611510792, + "step": 16700 + }, + { + "epoch": 15.017985611510792, + "step": 16700, + "torque_loss": 0.096858911216259 + }, + { + "epoch": 15.017985611510792, + "force_loss": 0.010163123719394207, + "step": 16700 + }, + { + "epoch": 15.026978417266188, + "grad_norm": 0.47695598006248474, + "learning_rate": 8.639367740069e-05, + "loss": 0.0228, + "step": 16710 + }, + { + "action_loss": 0.00768280727788806, + "epoch": 15.026978417266188, + "step": 16710 + }, + { + "epoch": 15.026978417266188, + "step": 16710, + "torque_loss": 0.12936653196811676 + }, + { + "epoch": 15.026978417266188, + "force_loss": 0.006900383625179529, + "step": 16710 + }, + { + "epoch": 15.035971223021583, + "grad_norm": 0.4819808900356293, + "learning_rate": 8.63747751560575e-05, + "loss": 0.0229, + "step": 16720 + }, + { + "action_loss": 0.005033209454268217, + "epoch": 15.035971223021583, + "step": 16720 + }, + { + "epoch": 15.035971223021583, + "step": 16720, + "torque_loss": 0.11651767045259476 + }, + { + "epoch": 15.035971223021583, + "force_loss": 0.004356353543698788, + "step": 16720 + }, + { + "epoch": 15.04496402877698, + "grad_norm": 0.3256521224975586, + "learning_rate": 8.635586186172871e-05, + "loss": 0.0209, + "step": 16730 + }, + { + "action_loss": 0.004675617441534996, + "epoch": 15.04496402877698, + "step": 16730 + }, + { + "epoch": 15.04496402877698, + "step": 16730, + "torque_loss": 0.14101998507976532 + }, + { + "epoch": 15.04496402877698, + "force_loss": 0.005581811536103487, + "step": 16730 + }, + { + "epoch": 15.053956834532373, + "grad_norm": 0.4358675479888916, + "learning_rate": 8.633693752344902e-05, + "loss": 0.0209, + "step": 16740 + }, + { + "action_loss": 0.004803153220564127, + "epoch": 15.053956834532373, + "step": 16740 + }, + { + "epoch": 15.053956834532373, + "step": 16740, + "torque_loss": 0.12264394015073776 + }, + { + "epoch": 15.053956834532373, + "force_loss": 0.005168326664716005, + "step": 16740 + }, + { + "epoch": 15.06294964028777, + "grad_norm": 0.6398131847381592, + "learning_rate": 8.631800214696713e-05, + "loss": 0.0214, + "step": 16750 + }, + { + "action_loss": 0.0083686001598835, + "epoch": 15.06294964028777, + "step": 16750 + }, + { + "epoch": 15.06294964028777, + "step": 16750, + "torque_loss": 0.1393813043832779 + }, + { + "epoch": 15.06294964028777, + "force_loss": 0.0058719017542898655, + "step": 16750 + }, + { + "epoch": 15.071942446043165, + "grad_norm": 0.22567465901374817, + "learning_rate": 8.629905573803511e-05, + "loss": 0.0231, + "step": 16760 + }, + { + "action_loss": 0.0068164789117872715, + "epoch": 15.071942446043165, + "step": 16760 + }, + { + "epoch": 15.071942446043165, + "step": 16760, + "torque_loss": 0.11044230312108994 + }, + { + "epoch": 15.071942446043165, + "force_loss": 0.005865610670298338, + "step": 16760 + }, + { + "epoch": 15.08093525179856, + "grad_norm": 0.33949726819992065, + "learning_rate": 8.628009830240839e-05, + "loss": 0.0235, + "step": 16770 + }, + { + "action_loss": 0.0038899772334843874, + "epoch": 15.08093525179856, + "step": 16770 + }, + { + "epoch": 15.08093525179856, + "step": 16770, + "torque_loss": 0.0786958560347557 + }, + { + "epoch": 15.08093525179856, + "force_loss": 0.004531762097030878, + "step": 16770 + }, + { + "epoch": 15.089928057553957, + "grad_norm": 0.2313470095396042, + "learning_rate": 8.626112984584571e-05, + "loss": 0.0236, + "step": 16780 + }, + { + "action_loss": 0.004389610607177019, + "epoch": 15.089928057553957, + "step": 16780 + }, + { + "epoch": 15.089928057553957, + "step": 16780, + "torque_loss": 0.09413108974695206 + }, + { + "epoch": 15.089928057553957, + "force_loss": 0.006620475556701422, + "step": 16780 + }, + { + "epoch": 15.098920863309353, + "grad_norm": 0.28301331400871277, + "learning_rate": 8.62421503741092e-05, + "loss": 0.0233, + "step": 16790 + }, + { + "action_loss": 0.004337919410318136, + "epoch": 15.098920863309353, + "step": 16790 + }, + { + "epoch": 15.098920863309353, + "step": 16790, + "torque_loss": 0.1020972952246666 + }, + { + "epoch": 15.098920863309353, + "force_loss": 0.005252324044704437, + "step": 16790 + }, + { + "epoch": 15.107913669064748, + "grad_norm": 0.19795122742652893, + "learning_rate": 8.622315989296432e-05, + "loss": 0.0183, + "step": 16800 + }, + { + "action_loss": 0.0032609812915325165, + "epoch": 15.107913669064748, + "step": 16800 + }, + { + "epoch": 15.107913669064748, + "step": 16800, + "torque_loss": 0.11267898231744766 + }, + { + "epoch": 15.107913669064748, + "force_loss": 0.003354280022904277, + "step": 16800 + }, + { + "epoch": 15.116906474820144, + "grad_norm": 0.24201449751853943, + "learning_rate": 8.62041584081799e-05, + "loss": 0.0228, + "step": 16810 + }, + { + "action_loss": 0.007624324411153793, + "epoch": 15.116906474820144, + "step": 16810 + }, + { + "epoch": 15.116906474820144, + "step": 16810, + "torque_loss": 0.09145904332399368 + }, + { + "epoch": 15.116906474820144, + "force_loss": 0.011103170923888683, + "step": 16810 + }, + { + "epoch": 15.12589928057554, + "grad_norm": 0.4952791631221771, + "learning_rate": 8.618514592552807e-05, + "loss": 0.0233, + "step": 16820 + }, + { + "action_loss": 0.005998767446726561, + "epoch": 15.12589928057554, + "step": 16820 + }, + { + "epoch": 15.12589928057554, + "step": 16820, + "torque_loss": 0.10756029933691025 + }, + { + "epoch": 15.12589928057554, + "force_loss": 0.006025247275829315, + "step": 16820 + }, + { + "epoch": 15.134892086330936, + "grad_norm": 0.3274683654308319, + "learning_rate": 8.616612245078431e-05, + "loss": 0.023, + "step": 16830 + }, + { + "action_loss": 0.014972631819546223, + "epoch": 15.134892086330936, + "step": 16830 + }, + { + "epoch": 15.134892086330936, + "step": 16830, + "torque_loss": 0.12423815578222275 + }, + { + "epoch": 15.134892086330936, + "force_loss": 0.012586652301251888, + "step": 16830 + }, + { + "epoch": 15.14388489208633, + "grad_norm": 0.2552795708179474, + "learning_rate": 8.614708798972746e-05, + "loss": 0.0223, + "step": 16840 + }, + { + "action_loss": 0.009253703989088535, + "epoch": 15.14388489208633, + "step": 16840 + }, + { + "epoch": 15.14388489208633, + "step": 16840, + "torque_loss": 0.1225009635090828 + }, + { + "epoch": 15.14388489208633, + "force_loss": 0.006945467088371515, + "step": 16840 + }, + { + "epoch": 15.152877697841726, + "grad_norm": 0.402065247297287, + "learning_rate": 8.61280425481397e-05, + "loss": 0.0208, + "step": 16850 + }, + { + "action_loss": 0.013394013047218323, + "epoch": 15.152877697841726, + "step": 16850 + }, + { + "epoch": 15.152877697841726, + "step": 16850, + "torque_loss": 0.14408591389656067 + }, + { + "epoch": 15.152877697841726, + "force_loss": 0.016764340922236443, + "step": 16850 + }, + { + "epoch": 15.161870503597122, + "grad_norm": 0.5309786200523376, + "learning_rate": 8.61089861318065e-05, + "loss": 0.0232, + "step": 16860 + }, + { + "action_loss": 0.006734972819685936, + "epoch": 15.161870503597122, + "step": 16860 + }, + { + "epoch": 15.161870503597122, + "step": 16860, + "torque_loss": 0.11887051910161972 + }, + { + "epoch": 15.161870503597122, + "force_loss": 0.004631218034774065, + "step": 16860 + }, + { + "epoch": 15.170863309352518, + "grad_norm": 0.5137143731117249, + "learning_rate": 8.608991874651673e-05, + "loss": 0.019, + "step": 16870 + }, + { + "action_loss": 0.018816007301211357, + "epoch": 15.170863309352518, + "step": 16870 + }, + { + "epoch": 15.170863309352518, + "step": 16870, + "torque_loss": 0.16703300178050995 + }, + { + "epoch": 15.170863309352518, + "force_loss": 0.010347138158977032, + "step": 16870 + }, + { + "epoch": 15.179856115107913, + "grad_norm": 0.2642015814781189, + "learning_rate": 8.607084039806255e-05, + "loss": 0.0236, + "step": 16880 + }, + { + "action_loss": 0.014908102340996265, + "epoch": 15.179856115107913, + "step": 16880 + }, + { + "epoch": 15.179856115107913, + "step": 16880, + "torque_loss": 0.1228601410984993 + }, + { + "epoch": 15.179856115107913, + "force_loss": 0.01598764955997467, + "step": 16880 + }, + { + "epoch": 15.18884892086331, + "grad_norm": 0.6216331124305725, + "learning_rate": 8.605175109223944e-05, + "loss": 0.0218, + "step": 16890 + }, + { + "action_loss": 0.012915591709315777, + "epoch": 15.18884892086331, + "step": 16890 + }, + { + "epoch": 15.18884892086331, + "step": 16890, + "torque_loss": 0.17278863489627838 + }, + { + "epoch": 15.18884892086331, + "force_loss": 0.01144468691200018, + "step": 16890 + }, + { + "epoch": 15.197841726618705, + "grad_norm": 0.5609726905822754, + "learning_rate": 8.603265083484624e-05, + "loss": 0.022, + "step": 16900 + }, + { + "action_loss": 0.004875303711742163, + "epoch": 15.197841726618705, + "step": 16900 + }, + { + "epoch": 15.197841726618705, + "step": 16900, + "torque_loss": 0.14107267558574677 + }, + { + "epoch": 15.197841726618705, + "force_loss": 0.005135141778737307, + "step": 16900 + }, + { + "epoch": 15.206834532374101, + "grad_norm": 0.18485307693481445, + "learning_rate": 8.60135396316851e-05, + "loss": 0.0204, + "step": 16910 + }, + { + "action_loss": 0.006360320840030909, + "epoch": 15.206834532374101, + "step": 16910 + }, + { + "epoch": 15.206834532374101, + "step": 16910, + "torque_loss": 0.11764228343963623 + }, + { + "epoch": 15.206834532374101, + "force_loss": 0.017634594812989235, + "step": 16910 + }, + { + "epoch": 15.215827338129497, + "grad_norm": 0.4924125075340271, + "learning_rate": 8.599441748856152e-05, + "loss": 0.02, + "step": 16920 + }, + { + "action_loss": 0.011829804629087448, + "epoch": 15.215827338129497, + "step": 16920 + }, + { + "epoch": 15.215827338129497, + "step": 16920, + "torque_loss": 0.12550397217273712 + }, + { + "epoch": 15.215827338129497, + "force_loss": 0.014131136238574982, + "step": 16920 + }, + { + "epoch": 15.224820143884893, + "grad_norm": 0.31032586097717285, + "learning_rate": 8.597528441128427e-05, + "loss": 0.0235, + "step": 16930 + }, + { + "action_loss": 0.004942980129271746, + "epoch": 15.224820143884893, + "step": 16930 + }, + { + "epoch": 15.224820143884893, + "step": 16930, + "torque_loss": 0.09666839987039566 + }, + { + "epoch": 15.224820143884893, + "force_loss": 0.003027847735211253, + "step": 16930 + }, + { + "epoch": 15.233812949640289, + "grad_norm": 0.2096644639968872, + "learning_rate": 8.595614040566549e-05, + "loss": 0.021, + "step": 16940 + }, + { + "action_loss": 0.002870767144486308, + "epoch": 15.233812949640289, + "step": 16940 + }, + { + "epoch": 15.233812949640289, + "step": 16940, + "torque_loss": 0.1073073148727417 + }, + { + "epoch": 15.233812949640289, + "force_loss": 0.003411786863580346, + "step": 16940 + }, + { + "epoch": 15.242805755395683, + "grad_norm": 0.4573443830013275, + "learning_rate": 8.593698547752063e-05, + "loss": 0.0198, + "step": 16950 + }, + { + "action_loss": 0.004562361165881157, + "epoch": 15.242805755395683, + "step": 16950 + }, + { + "epoch": 15.242805755395683, + "step": 16950, + "torque_loss": 0.11304136365652084 + }, + { + "epoch": 15.242805755395683, + "force_loss": 0.006221102550625801, + "step": 16950 + }, + { + "epoch": 15.251798561151078, + "grad_norm": 0.461597740650177, + "learning_rate": 8.591781963266843e-05, + "loss": 0.0349, + "step": 16960 + }, + { + "action_loss": 0.003422211855649948, + "epoch": 15.251798561151078, + "step": 16960 + }, + { + "epoch": 15.251798561151078, + "step": 16960, + "torque_loss": 0.12643198668956757 + }, + { + "epoch": 15.251798561151078, + "force_loss": 0.0030089672654867172, + "step": 16960 + }, + { + "epoch": 15.260791366906474, + "grad_norm": 0.422500878572464, + "learning_rate": 8.5898642876931e-05, + "loss": 0.0208, + "step": 16970 + }, + { + "action_loss": 0.025540227070450783, + "epoch": 15.260791366906474, + "step": 16970 + }, + { + "epoch": 15.260791366906474, + "step": 16970, + "torque_loss": 0.12221430987119675 + }, + { + "epoch": 15.260791366906474, + "force_loss": 0.026424996554851532, + "step": 16970 + }, + { + "epoch": 15.26978417266187, + "grad_norm": 0.4246219992637634, + "learning_rate": 8.587945521613369e-05, + "loss": 0.0317, + "step": 16980 + }, + { + "action_loss": 0.00594263756647706, + "epoch": 15.26978417266187, + "step": 16980 + }, + { + "epoch": 15.26978417266187, + "step": 16980, + "torque_loss": 0.10616742819547653 + }, + { + "epoch": 15.26978417266187, + "force_loss": 0.003391991602256894, + "step": 16980 + }, + { + "epoch": 15.278776978417266, + "grad_norm": 0.23231947422027588, + "learning_rate": 8.586025665610524e-05, + "loss": 0.0203, + "step": 16990 + }, + { + "action_loss": 0.011393420398235321, + "epoch": 15.278776978417266, + "step": 16990 + }, + { + "epoch": 15.278776978417266, + "step": 16990, + "torque_loss": 0.12851528823375702 + }, + { + "epoch": 15.278776978417266, + "force_loss": 0.009426434524357319, + "step": 16990 + }, + { + "epoch": 15.287769784172662, + "grad_norm": 0.4100874066352844, + "learning_rate": 8.584104720267765e-05, + "loss": 0.0228, + "step": 17000 + }, + { + "action_loss": 0.010372314602136612, + "epoch": 15.287769784172662, + "step": 17000 + }, + { + "epoch": 15.287769784172662, + "step": 17000, + "torque_loss": 0.10932165384292603 + }, + { + "epoch": 15.287769784172662, + "force_loss": 0.013693836517632008, + "step": 17000 + }, + { + "epoch": 15.296762589928058, + "grad_norm": 0.256041020154953, + "learning_rate": 8.582182686168625e-05, + "loss": 0.0242, + "step": 17010 + }, + { + "action_loss": 0.00900339987128973, + "epoch": 15.296762589928058, + "step": 17010 + }, + { + "epoch": 15.296762589928058, + "step": 17010, + "torque_loss": 0.12491494417190552 + }, + { + "epoch": 15.296762589928058, + "force_loss": 0.009845726191997528, + "step": 17010 + }, + { + "epoch": 15.305755395683454, + "grad_norm": 0.2653682827949524, + "learning_rate": 8.580259563896967e-05, + "loss": 0.0248, + "step": 17020 + }, + { + "action_loss": 0.008330421522259712, + "epoch": 15.305755395683454, + "step": 17020 + }, + { + "epoch": 15.305755395683454, + "step": 17020, + "torque_loss": 0.17204131186008453 + }, + { + "epoch": 15.305755395683454, + "force_loss": 0.006588909775018692, + "step": 17020 + }, + { + "epoch": 15.31474820143885, + "grad_norm": 0.4202973246574402, + "learning_rate": 8.578335354036983e-05, + "loss": 0.0238, + "step": 17030 + }, + { + "action_loss": 0.014478645287454128, + "epoch": 15.31474820143885, + "step": 17030 + }, + { + "epoch": 15.31474820143885, + "step": 17030, + "torque_loss": 0.10385163873434067 + }, + { + "epoch": 15.31474820143885, + "force_loss": 0.008455420844256878, + "step": 17030 + }, + { + "epoch": 15.323741007194245, + "grad_norm": 0.27224811911582947, + "learning_rate": 8.576410057173201e-05, + "loss": 0.0231, + "step": 17040 + }, + { + "action_loss": 0.004931575618684292, + "epoch": 15.323741007194245, + "step": 17040 + }, + { + "epoch": 15.323741007194245, + "step": 17040, + "torque_loss": 0.18013717234134674 + }, + { + "epoch": 15.323741007194245, + "force_loss": 0.006074054166674614, + "step": 17040 + }, + { + "epoch": 15.332733812949641, + "grad_norm": 0.31736278533935547, + "learning_rate": 8.574483673890474e-05, + "loss": 0.0209, + "step": 17050 + }, + { + "action_loss": 0.003609471023082733, + "epoch": 15.332733812949641, + "step": 17050 + }, + { + "epoch": 15.332733812949641, + "step": 17050, + "torque_loss": 0.1309669464826584 + }, + { + "epoch": 15.332733812949641, + "force_loss": 0.005024902056902647, + "step": 17050 + }, + { + "epoch": 15.341726618705035, + "grad_norm": 0.38658303022384644, + "learning_rate": 8.572556204773983e-05, + "loss": 0.0206, + "step": 17060 + }, + { + "action_loss": 0.0042090336792171, + "epoch": 15.341726618705035, + "step": 17060 + }, + { + "epoch": 15.341726618705035, + "step": 17060, + "torque_loss": 0.14667350053787231 + }, + { + "epoch": 15.341726618705035, + "force_loss": 0.00515960855409503, + "step": 17060 + }, + { + "epoch": 15.350719424460431, + "grad_norm": 0.5411636233329773, + "learning_rate": 8.570627650409246e-05, + "loss": 0.0201, + "step": 17070 + }, + { + "action_loss": 0.002961714519187808, + "epoch": 15.350719424460431, + "step": 17070 + }, + { + "epoch": 15.350719424460431, + "step": 17070, + "torque_loss": 0.12429994344711304 + }, + { + "epoch": 15.350719424460431, + "force_loss": 0.003751396434381604, + "step": 17070 + }, + { + "epoch": 15.359712230215827, + "grad_norm": 0.20276440680027008, + "learning_rate": 8.568698011382107e-05, + "loss": 0.0199, + "step": 17080 + }, + { + "action_loss": 0.022901957854628563, + "epoch": 15.359712230215827, + "step": 17080 + }, + { + "epoch": 15.359712230215827, + "step": 17080, + "torque_loss": 0.16564421355724335 + }, + { + "epoch": 15.359712230215827, + "force_loss": 0.02848103828728199, + "step": 17080 + }, + { + "epoch": 15.368705035971223, + "grad_norm": 0.4422018826007843, + "learning_rate": 8.566767288278738e-05, + "loss": 0.0265, + "step": 17090 + }, + { + "action_loss": 0.0053179445676505566, + "epoch": 15.368705035971223, + "step": 17090 + }, + { + "epoch": 15.368705035971223, + "step": 17090, + "torque_loss": 0.14988398551940918 + }, + { + "epoch": 15.368705035971223, + "force_loss": 0.0034148860722780228, + "step": 17090 + }, + { + "epoch": 15.377697841726619, + "grad_norm": 0.29468706250190735, + "learning_rate": 8.56483548168564e-05, + "loss": 0.0216, + "step": 17100 + }, + { + "action_loss": 0.006086260080337524, + "epoch": 15.377697841726619, + "step": 17100 + }, + { + "epoch": 15.377697841726619, + "step": 17100, + "torque_loss": 0.08516178280115128 + }, + { + "epoch": 15.377697841726619, + "force_loss": 0.00398709112778306, + "step": 17100 + }, + { + "epoch": 15.386690647482014, + "grad_norm": 0.3179362416267395, + "learning_rate": 8.562902592189648e-05, + "loss": 0.0212, + "step": 17110 + }, + { + "action_loss": 0.0031117647886276245, + "epoch": 15.386690647482014, + "step": 17110 + }, + { + "epoch": 15.386690647482014, + "step": 17110, + "torque_loss": 0.1303016096353531 + }, + { + "epoch": 15.386690647482014, + "force_loss": 0.004306287970393896, + "step": 17110 + }, + { + "epoch": 15.39568345323741, + "grad_norm": 0.18887697160243988, + "learning_rate": 8.560968620377921e-05, + "loss": 0.021, + "step": 17120 + }, + { + "action_loss": 0.0083672646433115, + "epoch": 15.39568345323741, + "step": 17120 + }, + { + "epoch": 15.39568345323741, + "step": 17120, + "torque_loss": 0.15263231098651886 + }, + { + "epoch": 15.39568345323741, + "force_loss": 0.006824602838605642, + "step": 17120 + }, + { + "epoch": 15.404676258992806, + "grad_norm": 0.5132378935813904, + "learning_rate": 8.559033566837951e-05, + "loss": 0.0238, + "step": 17130 + }, + { + "action_loss": 0.0025514133740216494, + "epoch": 15.404676258992806, + "step": 17130 + }, + { + "epoch": 15.404676258992806, + "step": 17130, + "torque_loss": 0.1241193637251854 + }, + { + "epoch": 15.404676258992806, + "force_loss": 0.002342240186408162, + "step": 17130 + }, + { + "epoch": 15.413669064748202, + "grad_norm": 0.5962299704551697, + "learning_rate": 8.557097432157551e-05, + "loss": 0.0249, + "step": 17140 + }, + { + "action_loss": 0.007867868058383465, + "epoch": 15.413669064748202, + "step": 17140 + }, + { + "epoch": 15.413669064748202, + "step": 17140, + "torque_loss": 0.12188458442687988 + }, + { + "epoch": 15.413669064748202, + "force_loss": 0.008350138552486897, + "step": 17140 + }, + { + "epoch": 15.422661870503598, + "grad_norm": 0.3489937484264374, + "learning_rate": 8.555160216924872e-05, + "loss": 0.0204, + "step": 17150 + }, + { + "action_loss": 0.013648436404764652, + "epoch": 15.422661870503598, + "step": 17150 + }, + { + "epoch": 15.422661870503598, + "step": 17150, + "torque_loss": 0.11687064170837402 + }, + { + "epoch": 15.422661870503598, + "force_loss": 0.011117999441921711, + "step": 17150 + }, + { + "epoch": 15.431654676258994, + "grad_norm": 0.6030886769294739, + "learning_rate": 8.55322192172839e-05, + "loss": 0.0228, + "step": 17160 + }, + { + "action_loss": 0.004624569788575172, + "epoch": 15.431654676258994, + "step": 17160 + }, + { + "epoch": 15.431654676258994, + "step": 17160, + "torque_loss": 0.1281377524137497 + }, + { + "epoch": 15.431654676258994, + "force_loss": 0.0034989865962415934, + "step": 17160 + }, + { + "epoch": 15.440647482014388, + "grad_norm": 0.2881278693675995, + "learning_rate": 8.551282547156902e-05, + "loss": 0.02, + "step": 17170 + }, + { + "action_loss": 0.010662811808288097, + "epoch": 15.440647482014388, + "step": 17170 + }, + { + "epoch": 15.440647482014388, + "step": 17170, + "torque_loss": 0.16588138043880463 + }, + { + "epoch": 15.440647482014388, + "force_loss": 0.007802148815244436, + "step": 17170 + }, + { + "epoch": 15.449640287769784, + "grad_norm": 0.8668406009674072, + "learning_rate": 8.549342093799544e-05, + "loss": 0.0236, + "step": 17180 + }, + { + "action_loss": 0.009070406667888165, + "epoch": 15.449640287769784, + "step": 17180 + }, + { + "epoch": 15.449640287769784, + "step": 17180, + "torque_loss": 0.15621691942214966 + }, + { + "epoch": 15.449640287769784, + "force_loss": 0.017987458035349846, + "step": 17180 + }, + { + "epoch": 15.45863309352518, + "grad_norm": 0.33376264572143555, + "learning_rate": 8.547400562245773e-05, + "loss": 0.0252, + "step": 17190 + }, + { + "action_loss": 0.019972102716565132, + "epoch": 15.45863309352518, + "step": 17190 + }, + { + "epoch": 15.45863309352518, + "step": 17190, + "torque_loss": 0.10239126533269882 + }, + { + "epoch": 15.45863309352518, + "force_loss": 0.014145704917609692, + "step": 17190 + }, + { + "epoch": 15.467625899280575, + "grad_norm": 0.35148489475250244, + "learning_rate": 8.545457953085374e-05, + "loss": 0.0266, + "step": 17200 + }, + { + "action_loss": 0.005947902332991362, + "epoch": 15.467625899280575, + "step": 17200 + }, + { + "epoch": 15.467625899280575, + "step": 17200, + "torque_loss": 0.11391753703355789 + }, + { + "epoch": 15.467625899280575, + "force_loss": 0.005423885304480791, + "step": 17200 + }, + { + "epoch": 15.476618705035971, + "grad_norm": 0.25836482644081116, + "learning_rate": 8.543514266908463e-05, + "loss": 0.0243, + "step": 17210 + }, + { + "action_loss": 0.00973950419574976, + "epoch": 15.476618705035971, + "step": 17210 + }, + { + "epoch": 15.476618705035971, + "step": 17210, + "torque_loss": 0.12382733821868896 + }, + { + "epoch": 15.476618705035971, + "force_loss": 0.004502378404140472, + "step": 17210 + }, + { + "epoch": 15.485611510791367, + "grad_norm": 0.41935157775878906, + "learning_rate": 8.541569504305478e-05, + "loss": 0.0257, + "step": 17220 + }, + { + "action_loss": 0.004793783184140921, + "epoch": 15.485611510791367, + "step": 17220 + }, + { + "epoch": 15.485611510791367, + "step": 17220, + "torque_loss": 0.11694937944412231 + }, + { + "epoch": 15.485611510791367, + "force_loss": 0.008017126470804214, + "step": 17220 + }, + { + "epoch": 15.494604316546763, + "grad_norm": 0.3241329491138458, + "learning_rate": 8.539623665867187e-05, + "loss": 0.0237, + "step": 17230 + }, + { + "action_loss": 0.03657834231853485, + "epoch": 15.494604316546763, + "step": 17230 + }, + { + "epoch": 15.494604316546763, + "step": 17230, + "torque_loss": 0.13412193953990936 + }, + { + "epoch": 15.494604316546763, + "force_loss": 0.02088378369808197, + "step": 17230 + }, + { + "epoch": 15.503597122302159, + "grad_norm": 0.3684811592102051, + "learning_rate": 8.537676752184685e-05, + "loss": 0.0256, + "step": 17240 + }, + { + "action_loss": 0.006749139633029699, + "epoch": 15.503597122302159, + "step": 17240 + }, + { + "epoch": 15.503597122302159, + "step": 17240, + "torque_loss": 0.19073623418807983 + }, + { + "epoch": 15.503597122302159, + "force_loss": 0.006471940781921148, + "step": 17240 + }, + { + "epoch": 15.512589928057555, + "grad_norm": 0.46356040239334106, + "learning_rate": 8.53572876384939e-05, + "loss": 0.0223, + "step": 17250 + }, + { + "action_loss": 0.005972277373075485, + "epoch": 15.512589928057555, + "step": 17250 + }, + { + "epoch": 15.512589928057555, + "step": 17250, + "torque_loss": 0.11475479602813721 + }, + { + "epoch": 15.512589928057555, + "force_loss": 0.006718062330037355, + "step": 17250 + }, + { + "epoch": 15.52158273381295, + "grad_norm": 0.5765788555145264, + "learning_rate": 8.533779701453056e-05, + "loss": 0.0233, + "step": 17260 + }, + { + "action_loss": 0.026722656562924385, + "epoch": 15.52158273381295, + "step": 17260 + }, + { + "epoch": 15.52158273381295, + "step": 17260, + "torque_loss": 0.159148171544075 + }, + { + "epoch": 15.52158273381295, + "force_loss": 0.02406766451895237, + "step": 17260 + }, + { + "epoch": 15.530575539568346, + "grad_norm": 0.4047594368457794, + "learning_rate": 8.53182956558775e-05, + "loss": 0.0241, + "step": 17270 + }, + { + "action_loss": 0.003972392529249191, + "epoch": 15.530575539568346, + "step": 17270 + }, + { + "epoch": 15.530575539568346, + "step": 17270, + "torque_loss": 0.1654411107301712 + }, + { + "epoch": 15.530575539568346, + "force_loss": 0.004558879882097244, + "step": 17270 + }, + { + "epoch": 15.53956834532374, + "grad_norm": 0.2250213921070099, + "learning_rate": 8.529878356845877e-05, + "loss": 0.021, + "step": 17280 + }, + { + "action_loss": 0.006601112429052591, + "epoch": 15.53956834532374, + "step": 17280 + }, + { + "epoch": 15.53956834532374, + "step": 17280, + "torque_loss": 0.13410568237304688 + }, + { + "epoch": 15.53956834532374, + "force_loss": 0.01160566508769989, + "step": 17280 + }, + { + "epoch": 15.548561151079136, + "grad_norm": 0.6944681406021118, + "learning_rate": 8.527926075820158e-05, + "loss": 0.0254, + "step": 17290 + }, + { + "action_loss": 0.0055807214230299, + "epoch": 15.548561151079136, + "step": 17290 + }, + { + "epoch": 15.548561151079136, + "step": 17290, + "torque_loss": 0.08707667142152786 + }, + { + "epoch": 15.548561151079136, + "force_loss": 0.003927208017557859, + "step": 17290 + }, + { + "epoch": 15.557553956834532, + "grad_norm": 0.6500819325447083, + "learning_rate": 8.525972723103648e-05, + "loss": 0.0251, + "step": 17300 + }, + { + "action_loss": 0.004456877708435059, + "epoch": 15.557553956834532, + "step": 17300 + }, + { + "epoch": 15.557553956834532, + "step": 17300, + "torque_loss": 0.1163586750626564 + }, + { + "epoch": 15.557553956834532, + "force_loss": 0.003028958337381482, + "step": 17300 + }, + { + "epoch": 15.566546762589928, + "grad_norm": 0.3894810378551483, + "learning_rate": 8.524018299289722e-05, + "loss": 0.0207, + "step": 17310 + }, + { + "action_loss": 0.011233396828174591, + "epoch": 15.566546762589928, + "step": 17310 + }, + { + "epoch": 15.566546762589928, + "step": 17310, + "torque_loss": 0.14090825617313385 + }, + { + "epoch": 15.566546762589928, + "force_loss": 0.006974514573812485, + "step": 17310 + }, + { + "epoch": 15.575539568345324, + "grad_norm": 0.4652196764945984, + "learning_rate": 8.522062804972083e-05, + "loss": 0.0241, + "step": 17320 + }, + { + "action_loss": 0.007066812366247177, + "epoch": 15.575539568345324, + "step": 17320 + }, + { + "epoch": 15.575539568345324, + "step": 17320, + "torque_loss": 0.10062506794929504 + }, + { + "epoch": 15.575539568345324, + "force_loss": 0.006861022207885981, + "step": 17320 + }, + { + "epoch": 15.58453237410072, + "grad_norm": 0.35327690839767456, + "learning_rate": 8.520106240744759e-05, + "loss": 0.022, + "step": 17330 + }, + { + "action_loss": 0.005429770797491074, + "epoch": 15.58453237410072, + "step": 17330 + }, + { + "epoch": 15.58453237410072, + "step": 17330, + "torque_loss": 0.12060769647359848 + }, + { + "epoch": 15.58453237410072, + "force_loss": 0.0049434867687523365, + "step": 17330 + }, + { + "epoch": 15.593525179856115, + "grad_norm": 0.4524448812007904, + "learning_rate": 8.518148607202102e-05, + "loss": 0.0259, + "step": 17340 + }, + { + "action_loss": 0.0075156292878091335, + "epoch": 15.593525179856115, + "step": 17340 + }, + { + "epoch": 15.593525179856115, + "step": 17340, + "torque_loss": 0.1567445546388626 + }, + { + "epoch": 15.593525179856115, + "force_loss": 0.004765639081597328, + "step": 17340 + }, + { + "epoch": 15.602517985611511, + "grad_norm": 0.45095622539520264, + "learning_rate": 8.51618990493879e-05, + "loss": 0.0254, + "step": 17350 + }, + { + "action_loss": 0.012920449487864971, + "epoch": 15.602517985611511, + "step": 17350 + }, + { + "epoch": 15.602517985611511, + "step": 17350, + "torque_loss": 0.17410211265087128 + }, + { + "epoch": 15.602517985611511, + "force_loss": 0.006048997398465872, + "step": 17350 + }, + { + "epoch": 15.611510791366907, + "grad_norm": 0.42172640562057495, + "learning_rate": 8.514230134549823e-05, + "loss": 0.0279, + "step": 17360 + }, + { + "action_loss": 0.013524171896278858, + "epoch": 15.611510791366907, + "step": 17360 + }, + { + "epoch": 15.611510791366907, + "step": 17360, + "torque_loss": 0.159952774643898 + }, + { + "epoch": 15.611510791366907, + "force_loss": 0.013305045664310455, + "step": 17360 + }, + { + "epoch": 15.620503597122303, + "grad_norm": 0.4205578863620758, + "learning_rate": 8.51226929663053e-05, + "loss": 0.0236, + "step": 17370 + }, + { + "action_loss": 0.012213137932121754, + "epoch": 15.620503597122303, + "step": 17370 + }, + { + "epoch": 15.620503597122303, + "step": 17370, + "torque_loss": 0.12412973493337631 + }, + { + "epoch": 15.620503597122303, + "force_loss": 0.020799381658434868, + "step": 17370 + }, + { + "epoch": 15.629496402877697, + "grad_norm": 0.7652576565742493, + "learning_rate": 8.51030739177656e-05, + "loss": 0.0225, + "step": 17380 + }, + { + "action_loss": 0.0036580178420990705, + "epoch": 15.629496402877697, + "step": 17380 + }, + { + "epoch": 15.629496402877697, + "step": 17380, + "torque_loss": 0.08535952121019363 + }, + { + "epoch": 15.629496402877697, + "force_loss": 0.004635924007743597, + "step": 17380 + }, + { + "epoch": 15.638489208633093, + "grad_norm": 0.26303476095199585, + "learning_rate": 8.508344420583889e-05, + "loss": 0.0247, + "step": 17390 + }, + { + "action_loss": 0.009836475364863873, + "epoch": 15.638489208633093, + "step": 17390 + }, + { + "epoch": 15.638489208633093, + "step": 17390, + "torque_loss": 0.12285927683115005 + }, + { + "epoch": 15.638489208633093, + "force_loss": 0.014383415691554546, + "step": 17390 + }, + { + "epoch": 15.647482014388489, + "grad_norm": 0.26400992274284363, + "learning_rate": 8.506380383648816e-05, + "loss": 0.0221, + "step": 17400 + }, + { + "action_loss": 0.006597287952899933, + "epoch": 15.647482014388489, + "step": 17400 + }, + { + "epoch": 15.647482014388489, + "step": 17400, + "torque_loss": 0.13825170695781708 + }, + { + "epoch": 15.647482014388489, + "force_loss": 0.004263477399945259, + "step": 17400 + }, + { + "epoch": 15.656474820143885, + "grad_norm": 0.3148339092731476, + "learning_rate": 8.504415281567963e-05, + "loss": 0.0214, + "step": 17410 + }, + { + "action_loss": 0.004615604877471924, + "epoch": 15.656474820143885, + "step": 17410 + }, + { + "epoch": 15.656474820143885, + "step": 17410, + "torque_loss": 0.10837849229574203 + }, + { + "epoch": 15.656474820143885, + "force_loss": 0.0037073504645377398, + "step": 17410 + }, + { + "epoch": 15.66546762589928, + "grad_norm": 0.3267367482185364, + "learning_rate": 8.502449114938275e-05, + "loss": 0.0212, + "step": 17420 + }, + { + "action_loss": 0.0038669221103191376, + "epoch": 15.66546762589928, + "step": 17420 + }, + { + "epoch": 15.66546762589928, + "step": 17420, + "torque_loss": 0.11064451187849045 + }, + { + "epoch": 15.66546762589928, + "force_loss": 0.0056570447050035, + "step": 17420 + }, + { + "epoch": 15.674460431654676, + "grad_norm": 0.7484758496284485, + "learning_rate": 8.500481884357025e-05, + "loss": 0.0248, + "step": 17430 + }, + { + "action_loss": 0.008798401802778244, + "epoch": 15.674460431654676, + "step": 17430 + }, + { + "epoch": 15.674460431654676, + "step": 17430, + "torque_loss": 0.1286756694316864 + }, + { + "epoch": 15.674460431654676, + "force_loss": 0.00599707430228591, + "step": 17430 + }, + { + "epoch": 15.683453237410072, + "grad_norm": 0.7126136422157288, + "learning_rate": 8.498513590421801e-05, + "loss": 0.025, + "step": 17440 + }, + { + "action_loss": 0.009946263395249844, + "epoch": 15.683453237410072, + "step": 17440 + }, + { + "epoch": 15.683453237410072, + "step": 17440, + "torque_loss": 0.1469493955373764 + }, + { + "epoch": 15.683453237410072, + "force_loss": 0.022330351173877716, + "step": 17440 + }, + { + "epoch": 15.692446043165468, + "grad_norm": 0.4793100357055664, + "learning_rate": 8.496544233730522e-05, + "loss": 0.0247, + "step": 17450 + }, + { + "action_loss": 0.005858680699020624, + "epoch": 15.692446043165468, + "step": 17450 + }, + { + "epoch": 15.692446043165468, + "step": 17450, + "torque_loss": 0.12280535697937012 + }, + { + "epoch": 15.692446043165468, + "force_loss": 0.0036531102377921343, + "step": 17450 + }, + { + "epoch": 15.701438848920864, + "grad_norm": 0.31725069880485535, + "learning_rate": 8.494573814881426e-05, + "loss": 0.0234, + "step": 17460 + }, + { + "action_loss": 0.005597864743322134, + "epoch": 15.701438848920864, + "step": 17460 + }, + { + "epoch": 15.701438848920864, + "step": 17460, + "torque_loss": 0.14102140069007874 + }, + { + "epoch": 15.701438848920864, + "force_loss": 0.004687194246798754, + "step": 17460 + }, + { + "epoch": 15.71043165467626, + "grad_norm": 0.35858169198036194, + "learning_rate": 8.492602334473074e-05, + "loss": 0.0203, + "step": 17470 + }, + { + "action_loss": 0.02361586131155491, + "epoch": 15.71043165467626, + "step": 17470 + }, + { + "epoch": 15.71043165467626, + "step": 17470, + "torque_loss": 0.2368074655532837 + }, + { + "epoch": 15.71043165467626, + "force_loss": 0.020131416618824005, + "step": 17470 + }, + { + "epoch": 15.719424460431654, + "grad_norm": 0.43791642785072327, + "learning_rate": 8.49062979310435e-05, + "loss": 0.0259, + "step": 17480 + }, + { + "action_loss": 0.015914402902126312, + "epoch": 15.719424460431654, + "step": 17480 + }, + { + "epoch": 15.719424460431654, + "step": 17480, + "torque_loss": 0.1638699322938919 + }, + { + "epoch": 15.719424460431654, + "force_loss": 0.009551900438964367, + "step": 17480 + }, + { + "epoch": 15.72841726618705, + "grad_norm": 0.17880485951900482, + "learning_rate": 8.488656191374458e-05, + "loss": 0.0255, + "step": 17490 + }, + { + "action_loss": 0.005156724713742733, + "epoch": 15.72841726618705, + "step": 17490 + }, + { + "epoch": 15.72841726618705, + "step": 17490, + "torque_loss": 0.09796721488237381 + }, + { + "epoch": 15.72841726618705, + "force_loss": 0.00782252661883831, + "step": 17490 + }, + { + "epoch": 15.737410071942445, + "grad_norm": 0.3123587965965271, + "learning_rate": 8.48668152988293e-05, + "loss": 0.0228, + "step": 17500 + }, + { + "action_loss": 0.005358940456062555, + "epoch": 15.737410071942445, + "step": 17500 + }, + { + "epoch": 15.737410071942445, + "step": 17500, + "torque_loss": 0.15764956176280975 + }, + { + "epoch": 15.737410071942445, + "force_loss": 0.006400526966899633, + "step": 17500 + }, + { + "epoch": 15.746402877697841, + "grad_norm": 0.2304433435201645, + "learning_rate": 8.484705809229612e-05, + "loss": 0.0226, + "step": 17510 + }, + { + "action_loss": 0.015522944740951061, + "epoch": 15.746402877697841, + "step": 17510 + }, + { + "epoch": 15.746402877697841, + "step": 17510, + "torque_loss": 0.17891037464141846 + }, + { + "epoch": 15.746402877697841, + "force_loss": 0.02278672158718109, + "step": 17510 + }, + { + "epoch": 15.755395683453237, + "grad_norm": 0.9582626223564148, + "learning_rate": 8.482729030014677e-05, + "loss": 0.0263, + "step": 17520 + }, + { + "action_loss": 0.0069336616434156895, + "epoch": 15.755395683453237, + "step": 17520 + }, + { + "epoch": 15.755395683453237, + "step": 17520, + "torque_loss": 0.13514092564582825 + }, + { + "epoch": 15.755395683453237, + "force_loss": 0.004314844962209463, + "step": 17520 + }, + { + "epoch": 15.764388489208633, + "grad_norm": 0.3698276877403259, + "learning_rate": 8.48075119283862e-05, + "loss": 0.0224, + "step": 17530 + }, + { + "action_loss": 0.013405833393335342, + "epoch": 15.764388489208633, + "step": 17530 + }, + { + "epoch": 15.764388489208633, + "step": 17530, + "torque_loss": 0.13743506371974945 + }, + { + "epoch": 15.764388489208633, + "force_loss": 0.02497536689043045, + "step": 17530 + }, + { + "epoch": 15.773381294964029, + "grad_norm": 0.5154282450675964, + "learning_rate": 8.478772298302254e-05, + "loss": 0.0237, + "step": 17540 + }, + { + "action_loss": 0.0063224430195987225, + "epoch": 15.773381294964029, + "step": 17540 + }, + { + "epoch": 15.773381294964029, + "step": 17540, + "torque_loss": 0.13486050069332123 + }, + { + "epoch": 15.773381294964029, + "force_loss": 0.01241572480648756, + "step": 17540 + }, + { + "epoch": 15.782374100719425, + "grad_norm": 0.41657909750938416, + "learning_rate": 8.476792347006716e-05, + "loss": 0.0278, + "step": 17550 + }, + { + "action_loss": 0.004343951120972633, + "epoch": 15.782374100719425, + "step": 17550 + }, + { + "epoch": 15.782374100719425, + "step": 17550, + "torque_loss": 0.09667249768972397 + }, + { + "epoch": 15.782374100719425, + "force_loss": 0.004796222317963839, + "step": 17550 + }, + { + "epoch": 15.79136690647482, + "grad_norm": 0.5779966711997986, + "learning_rate": 8.474811339553462e-05, + "loss": 0.0245, + "step": 17560 + }, + { + "action_loss": 0.007796111050993204, + "epoch": 15.79136690647482, + "step": 17560 + }, + { + "epoch": 15.79136690647482, + "step": 17560, + "torque_loss": 0.12458092719316483 + }, + { + "epoch": 15.79136690647482, + "force_loss": 0.010667835362255573, + "step": 17560 + }, + { + "epoch": 15.800359712230216, + "grad_norm": 0.8348226547241211, + "learning_rate": 8.47282927654427e-05, + "loss": 0.0327, + "step": 17570 + }, + { + "action_loss": 0.004865535534918308, + "epoch": 15.800359712230216, + "step": 17570 + }, + { + "epoch": 15.800359712230216, + "step": 17570, + "torque_loss": 0.09535191208124161 + }, + { + "epoch": 15.800359712230216, + "force_loss": 0.005711412522941828, + "step": 17570 + }, + { + "epoch": 15.809352517985612, + "grad_norm": 0.4629124104976654, + "learning_rate": 8.470846158581238e-05, + "loss": 0.0225, + "step": 17580 + }, + { + "action_loss": 0.010356389917433262, + "epoch": 15.809352517985612, + "step": 17580 + }, + { + "epoch": 15.809352517985612, + "step": 17580, + "torque_loss": 0.13352568447589874 + }, + { + "epoch": 15.809352517985612, + "force_loss": 0.0201144777238369, + "step": 17580 + }, + { + "epoch": 15.818345323741006, + "grad_norm": 0.45410624146461487, + "learning_rate": 8.468861986266787e-05, + "loss": 0.0246, + "step": 17590 + }, + { + "action_loss": 0.020586155354976654, + "epoch": 15.818345323741006, + "step": 17590 + }, + { + "epoch": 15.818345323741006, + "step": 17590, + "torque_loss": 0.15210437774658203 + }, + { + "epoch": 15.818345323741006, + "force_loss": 0.02451533079147339, + "step": 17590 + }, + { + "epoch": 15.827338129496402, + "grad_norm": 0.2987782657146454, + "learning_rate": 8.466876760203654e-05, + "loss": 0.0255, + "step": 17600 + }, + { + "action_loss": 0.052640676498413086, + "epoch": 15.827338129496402, + "step": 17600 + }, + { + "epoch": 15.827338129496402, + "step": 17600, + "torque_loss": 0.21721559762954712 + }, + { + "epoch": 15.827338129496402, + "force_loss": 0.05710431933403015, + "step": 17600 + }, + { + "epoch": 15.836330935251798, + "grad_norm": 0.35147571563720703, + "learning_rate": 8.464890480994898e-05, + "loss": 0.0307, + "step": 17610 + }, + { + "action_loss": 0.006250986363738775, + "epoch": 15.836330935251798, + "step": 17610 + }, + { + "epoch": 15.836330935251798, + "step": 17610, + "torque_loss": 0.13218986988067627 + }, + { + "epoch": 15.836330935251798, + "force_loss": 0.011742700822651386, + "step": 17610 + }, + { + "epoch": 15.845323741007194, + "grad_norm": 0.27377206087112427, + "learning_rate": 8.462903149243899e-05, + "loss": 0.0217, + "step": 17620 + }, + { + "action_loss": 0.005419587716460228, + "epoch": 15.845323741007194, + "step": 17620 + }, + { + "epoch": 15.845323741007194, + "step": 17620, + "torque_loss": 0.10276583582162857 + }, + { + "epoch": 15.845323741007194, + "force_loss": 0.00598743325099349, + "step": 17620 + }, + { + "epoch": 15.85431654676259, + "grad_norm": 0.27440932393074036, + "learning_rate": 8.460914765554357e-05, + "loss": 0.0215, + "step": 17630 + }, + { + "action_loss": 0.006008013617247343, + "epoch": 15.85431654676259, + "step": 17630 + }, + { + "epoch": 15.85431654676259, + "step": 17630, + "torque_loss": 0.135933056473732 + }, + { + "epoch": 15.85431654676259, + "force_loss": 0.004844170529395342, + "step": 17630 + }, + { + "epoch": 15.863309352517986, + "grad_norm": 0.4325622022151947, + "learning_rate": 8.458925330530288e-05, + "loss": 0.03, + "step": 17640 + }, + { + "action_loss": 0.004168189596384764, + "epoch": 15.863309352517986, + "step": 17640 + }, + { + "epoch": 15.863309352517986, + "step": 17640, + "torque_loss": 0.15087778866291046 + }, + { + "epoch": 15.863309352517986, + "force_loss": 0.0033332640305161476, + "step": 17640 + }, + { + "epoch": 15.872302158273381, + "grad_norm": 0.440936416387558, + "learning_rate": 8.456934844776032e-05, + "loss": 0.0234, + "step": 17650 + }, + { + "action_loss": 0.006525044795125723, + "epoch": 15.872302158273381, + "step": 17650 + }, + { + "epoch": 15.872302158273381, + "step": 17650, + "torque_loss": 0.1332537978887558 + }, + { + "epoch": 15.872302158273381, + "force_loss": 0.011580362915992737, + "step": 17650 + }, + { + "epoch": 15.881294964028777, + "grad_norm": 1.0107548236846924, + "learning_rate": 8.454943308896246e-05, + "loss": 0.0271, + "step": 17660 + }, + { + "action_loss": 0.01941235363483429, + "epoch": 15.881294964028777, + "step": 17660 + }, + { + "epoch": 15.881294964028777, + "step": 17660, + "torque_loss": 0.17526470124721527 + }, + { + "epoch": 15.881294964028777, + "force_loss": 0.016152264550328255, + "step": 17660 + }, + { + "epoch": 15.890287769784173, + "grad_norm": 0.3381480872631073, + "learning_rate": 8.452950723495905e-05, + "loss": 0.0232, + "step": 17670 + }, + { + "action_loss": 0.008447933942079544, + "epoch": 15.890287769784173, + "step": 17670 + }, + { + "epoch": 15.890287769784173, + "step": 17670, + "torque_loss": 0.13460573554039001 + }, + { + "epoch": 15.890287769784173, + "force_loss": 0.006527024786919355, + "step": 17670 + }, + { + "epoch": 15.899280575539569, + "grad_norm": 0.626726508140564, + "learning_rate": 8.450957089180303e-05, + "loss": 0.0245, + "step": 17680 + }, + { + "action_loss": 0.0064415582455694675, + "epoch": 15.899280575539569, + "step": 17680 + }, + { + "epoch": 15.899280575539569, + "step": 17680, + "torque_loss": 0.1289413571357727 + }, + { + "epoch": 15.899280575539569, + "force_loss": 0.004623195622116327, + "step": 17680 + }, + { + "epoch": 15.908273381294965, + "grad_norm": 0.735294759273529, + "learning_rate": 8.448962406555055e-05, + "loss": 0.0235, + "step": 17690 + }, + { + "action_loss": 0.024541936814785004, + "epoch": 15.908273381294965, + "step": 17690 + }, + { + "epoch": 15.908273381294965, + "step": 17690, + "torque_loss": 0.14886687695980072 + }, + { + "epoch": 15.908273381294965, + "force_loss": 0.024909773841500282, + "step": 17690 + }, + { + "epoch": 15.917266187050359, + "grad_norm": 0.33747783303260803, + "learning_rate": 8.446966676226093e-05, + "loss": 0.0274, + "step": 17700 + }, + { + "action_loss": 0.004488543141633272, + "epoch": 15.917266187050359, + "step": 17700 + }, + { + "epoch": 15.917266187050359, + "step": 17700, + "torque_loss": 0.11510869115591049 + }, + { + "epoch": 15.917266187050359, + "force_loss": 0.0038391754496842623, + "step": 17700 + }, + { + "epoch": 15.926258992805755, + "grad_norm": 0.5885621309280396, + "learning_rate": 8.444969898799667e-05, + "loss": 0.0273, + "step": 17710 + }, + { + "action_loss": 0.01990235038101673, + "epoch": 15.926258992805755, + "step": 17710 + }, + { + "epoch": 15.926258992805755, + "step": 17710, + "torque_loss": 0.13860540091991425 + }, + { + "epoch": 15.926258992805755, + "force_loss": 0.018544703722000122, + "step": 17710 + }, + { + "epoch": 15.93525179856115, + "grad_norm": 0.7729743719100952, + "learning_rate": 8.442972074882343e-05, + "loss": 0.0253, + "step": 17720 + }, + { + "action_loss": 0.01067747175693512, + "epoch": 15.93525179856115, + "step": 17720 + }, + { + "epoch": 15.93525179856115, + "step": 17720, + "torque_loss": 0.128219872713089 + }, + { + "epoch": 15.93525179856115, + "force_loss": 0.009551775641739368, + "step": 17720 + }, + { + "epoch": 15.944244604316546, + "grad_norm": 0.6176245212554932, + "learning_rate": 8.44097320508101e-05, + "loss": 0.0241, + "step": 17730 + }, + { + "action_loss": 0.005875390022993088, + "epoch": 15.944244604316546, + "step": 17730 + }, + { + "epoch": 15.944244604316546, + "step": 17730, + "torque_loss": 0.1448507159948349 + }, + { + "epoch": 15.944244604316546, + "force_loss": 0.005019858945161104, + "step": 17730 + }, + { + "epoch": 15.953237410071942, + "grad_norm": 0.23681005835533142, + "learning_rate": 8.43897329000287e-05, + "loss": 0.0194, + "step": 17740 + }, + { + "action_loss": 0.010999500751495361, + "epoch": 15.953237410071942, + "step": 17740 + }, + { + "epoch": 15.953237410071942, + "step": 17740, + "torque_loss": 0.12151765078306198 + }, + { + "epoch": 15.953237410071942, + "force_loss": 0.007289711385965347, + "step": 17740 + }, + { + "epoch": 15.962230215827338, + "grad_norm": 0.29228949546813965, + "learning_rate": 8.436972330255448e-05, + "loss": 0.0223, + "step": 17750 + }, + { + "action_loss": 0.005345213692635298, + "epoch": 15.962230215827338, + "step": 17750 + }, + { + "epoch": 15.962230215827338, + "step": 17750, + "torque_loss": 0.11086401343345642 + }, + { + "epoch": 15.962230215827338, + "force_loss": 0.0031974802259355783, + "step": 17750 + }, + { + "epoch": 15.971223021582734, + "grad_norm": 0.4384174346923828, + "learning_rate": 8.434970326446579e-05, + "loss": 0.025, + "step": 17760 + }, + { + "action_loss": 0.007547078654170036, + "epoch": 15.971223021582734, + "step": 17760 + }, + { + "epoch": 15.971223021582734, + "step": 17760, + "torque_loss": 0.08681625127792358 + }, + { + "epoch": 15.971223021582734, + "force_loss": 0.00496362941339612, + "step": 17760 + }, + { + "epoch": 15.98021582733813, + "grad_norm": 0.23871272802352905, + "learning_rate": 8.432967279184418e-05, + "loss": 0.0214, + "step": 17770 + }, + { + "action_loss": 0.009915996342897415, + "epoch": 15.98021582733813, + "step": 17770 + }, + { + "epoch": 15.98021582733813, + "step": 17770, + "torque_loss": 0.10791996866464615 + }, + { + "epoch": 15.98021582733813, + "force_loss": 0.0038088730070739985, + "step": 17770 + }, + { + "epoch": 15.989208633093526, + "grad_norm": 0.5143328309059143, + "learning_rate": 8.430963189077441e-05, + "loss": 0.0199, + "step": 17780 + }, + { + "action_loss": 0.004039436113089323, + "epoch": 15.989208633093526, + "step": 17780 + }, + { + "epoch": 15.989208633093526, + "step": 17780, + "torque_loss": 0.15265363454818726 + }, + { + "epoch": 15.989208633093526, + "force_loss": 0.0063683390617370605, + "step": 17780 + }, + { + "epoch": 15.998201438848922, + "grad_norm": 0.5695040822029114, + "learning_rate": 8.428958056734437e-05, + "loss": 0.0207, + "step": 17790 + }, + { + "action_loss": 0.007465250324457884, + "epoch": 15.998201438848922, + "step": 17790 + }, + { + "epoch": 15.998201438848922, + "step": 17790, + "torque_loss": 0.13948342204093933 + }, + { + "epoch": 15.998201438848922, + "force_loss": 0.005301920231431723, + "step": 17790 + }, + { + "epoch": 16.007194244604317, + "grad_norm": 0.3909229040145874, + "learning_rate": 8.426951882764513e-05, + "loss": 0.0314, + "step": 17800 + }, + { + "action_loss": 0.013685323297977448, + "epoch": 16.007194244604317, + "step": 17800 + }, + { + "epoch": 16.007194244604317, + "step": 17800, + "torque_loss": 0.16999734938144684 + }, + { + "epoch": 16.007194244604317, + "force_loss": 0.019315673038363457, + "step": 17800 + }, + { + "epoch": 16.01618705035971, + "grad_norm": 0.33626413345336914, + "learning_rate": 8.424944667777089e-05, + "loss": 0.0214, + "step": 17810 + }, + { + "action_loss": 0.00577952666208148, + "epoch": 16.01618705035971, + "step": 17810 + }, + { + "epoch": 16.01618705035971, + "step": 17810, + "torque_loss": 0.1201549544930458 + }, + { + "epoch": 16.01618705035971, + "force_loss": 0.007053063716739416, + "step": 17810 + }, + { + "epoch": 16.02517985611511, + "grad_norm": 0.48775607347488403, + "learning_rate": 8.422936412381905e-05, + "loss": 0.0216, + "step": 17820 + }, + { + "action_loss": 0.0107901431620121, + "epoch": 16.02517985611511, + "step": 17820 + }, + { + "epoch": 16.02517985611511, + "step": 17820, + "torque_loss": 0.15228381752967834 + }, + { + "epoch": 16.02517985611511, + "force_loss": 0.0058626290410757065, + "step": 17820 + }, + { + "epoch": 16.034172661870503, + "grad_norm": 0.3989713191986084, + "learning_rate": 8.420927117189017e-05, + "loss": 0.0222, + "step": 17830 + }, + { + "action_loss": 0.01462673768401146, + "epoch": 16.034172661870503, + "step": 17830 + }, + { + "epoch": 16.034172661870503, + "step": 17830, + "torque_loss": 0.18573270738124847 + }, + { + "epoch": 16.034172661870503, + "force_loss": 0.010324918664991856, + "step": 17830 + }, + { + "epoch": 16.0431654676259, + "grad_norm": 0.3277093768119812, + "learning_rate": 8.418916782808795e-05, + "loss": 0.0241, + "step": 17840 + }, + { + "action_loss": 0.006221791263669729, + "epoch": 16.0431654676259, + "step": 17840 + }, + { + "epoch": 16.0431654676259, + "step": 17840, + "torque_loss": 0.10587122291326523 + }, + { + "epoch": 16.0431654676259, + "force_loss": 0.005029916763305664, + "step": 17840 + }, + { + "epoch": 16.052158273381295, + "grad_norm": 0.2585214376449585, + "learning_rate": 8.416905409851926e-05, + "loss": 0.0214, + "step": 17850 + }, + { + "action_loss": 0.0056740292347967625, + "epoch": 16.052158273381295, + "step": 17850 + }, + { + "epoch": 16.052158273381295, + "step": 17850, + "torque_loss": 0.16839216649532318 + }, + { + "epoch": 16.052158273381295, + "force_loss": 0.005034435074776411, + "step": 17850 + }, + { + "epoch": 16.06115107913669, + "grad_norm": 0.24692974984645844, + "learning_rate": 8.41489299892941e-05, + "loss": 0.02, + "step": 17860 + }, + { + "action_loss": 0.017583243548870087, + "epoch": 16.06115107913669, + "step": 17860 + }, + { + "epoch": 16.06115107913669, + "step": 17860, + "torque_loss": 0.19254548847675323 + }, + { + "epoch": 16.06115107913669, + "force_loss": 0.013399135321378708, + "step": 17860 + }, + { + "epoch": 16.070143884892087, + "grad_norm": 0.2261732518672943, + "learning_rate": 8.412879550652566e-05, + "loss": 0.0223, + "step": 17870 + }, + { + "action_loss": 0.006178155075758696, + "epoch": 16.070143884892087, + "step": 17870 + }, + { + "epoch": 16.070143884892087, + "step": 17870, + "torque_loss": 0.13173258304595947 + }, + { + "epoch": 16.070143884892087, + "force_loss": 0.006183135788887739, + "step": 17870 + }, + { + "epoch": 16.07913669064748, + "grad_norm": 0.4054189622402191, + "learning_rate": 8.410865065633029e-05, + "loss": 0.0224, + "step": 17880 + }, + { + "action_loss": 0.005918122828006744, + "epoch": 16.07913669064748, + "step": 17880 + }, + { + "epoch": 16.07913669064748, + "step": 17880, + "torque_loss": 0.084866464138031 + }, + { + "epoch": 16.07913669064748, + "force_loss": 0.0033826958388090134, + "step": 17880 + }, + { + "epoch": 16.08812949640288, + "grad_norm": 0.6187392473220825, + "learning_rate": 8.408849544482742e-05, + "loss": 0.0248, + "step": 17890 + }, + { + "action_loss": 0.007516726851463318, + "epoch": 16.08812949640288, + "step": 17890 + }, + { + "epoch": 16.08812949640288, + "step": 17890, + "torque_loss": 0.1508045345544815 + }, + { + "epoch": 16.08812949640288, + "force_loss": 0.011879724450409412, + "step": 17890 + }, + { + "epoch": 16.097122302158272, + "grad_norm": 0.6458420753479004, + "learning_rate": 8.406832987813968e-05, + "loss": 0.023, + "step": 17900 + }, + { + "action_loss": 0.012775298207998276, + "epoch": 16.097122302158272, + "step": 17900 + }, + { + "epoch": 16.097122302158272, + "step": 17900, + "torque_loss": 0.1499585658311844 + }, + { + "epoch": 16.097122302158272, + "force_loss": 0.02126883901655674, + "step": 17900 + }, + { + "epoch": 16.10611510791367, + "grad_norm": 0.8367826342582703, + "learning_rate": 8.404815396239286e-05, + "loss": 0.024, + "step": 17910 + }, + { + "action_loss": 0.0034704997669905424, + "epoch": 16.10611510791367, + "step": 17910 + }, + { + "epoch": 16.10611510791367, + "step": 17910, + "torque_loss": 0.15362930297851562 + }, + { + "epoch": 16.10611510791367, + "force_loss": 0.0028502587229013443, + "step": 17910 + }, + { + "epoch": 16.115107913669064, + "grad_norm": 0.6870766282081604, + "learning_rate": 8.402796770371587e-05, + "loss": 0.0258, + "step": 17920 + }, + { + "action_loss": 0.007104405667632818, + "epoch": 16.115107913669064, + "step": 17920 + }, + { + "epoch": 16.115107913669064, + "step": 17920, + "torque_loss": 0.10150986909866333 + }, + { + "epoch": 16.115107913669064, + "force_loss": 0.012554663233458996, + "step": 17920 + }, + { + "epoch": 16.12410071942446, + "grad_norm": 0.2720673382282257, + "learning_rate": 8.400777110824071e-05, + "loss": 0.0218, + "step": 17930 + }, + { + "action_loss": 0.00911865197122097, + "epoch": 16.12410071942446, + "step": 17930 + }, + { + "epoch": 16.12410071942446, + "step": 17930, + "torque_loss": 0.11493996530771255 + }, + { + "epoch": 16.12410071942446, + "force_loss": 0.008109722286462784, + "step": 17930 + }, + { + "epoch": 16.133093525179856, + "grad_norm": 0.7344787120819092, + "learning_rate": 8.398756418210263e-05, + "loss": 0.0202, + "step": 17940 + }, + { + "action_loss": 0.0041253212839365005, + "epoch": 16.133093525179856, + "step": 17940 + }, + { + "epoch": 16.133093525179856, + "step": 17940, + "torque_loss": 0.1408514380455017 + }, + { + "epoch": 16.133093525179856, + "force_loss": 0.002820750931277871, + "step": 17940 + }, + { + "epoch": 16.142086330935253, + "grad_norm": 0.2454034835100174, + "learning_rate": 8.396734693143993e-05, + "loss": 0.021, + "step": 17950 + }, + { + "action_loss": 0.008053547702729702, + "epoch": 16.142086330935253, + "step": 17950 + }, + { + "epoch": 16.142086330935253, + "step": 17950, + "torque_loss": 0.1375073790550232 + }, + { + "epoch": 16.142086330935253, + "force_loss": 0.006553851068019867, + "step": 17950 + }, + { + "epoch": 16.151079136690647, + "grad_norm": 0.37337568402290344, + "learning_rate": 8.39471193623941e-05, + "loss": 0.0297, + "step": 17960 + }, + { + "action_loss": 0.004111962392926216, + "epoch": 16.151079136690647, + "step": 17960 + }, + { + "epoch": 16.151079136690647, + "step": 17960, + "torque_loss": 0.11576279252767563 + }, + { + "epoch": 16.151079136690647, + "force_loss": 0.004006903152912855, + "step": 17960 + }, + { + "epoch": 16.16007194244604, + "grad_norm": 0.3513309955596924, + "learning_rate": 8.392688148110974e-05, + "loss": 0.0252, + "step": 17970 + }, + { + "action_loss": 0.003856303170323372, + "epoch": 16.16007194244604, + "step": 17970 + }, + { + "epoch": 16.16007194244604, + "step": 17970, + "torque_loss": 0.13178487122058868 + }, + { + "epoch": 16.16007194244604, + "force_loss": 0.0045032622292637825, + "step": 17970 + }, + { + "epoch": 16.16906474820144, + "grad_norm": 0.5031517744064331, + "learning_rate": 8.390663329373456e-05, + "loss": 0.0226, + "step": 17980 + }, + { + "action_loss": 0.007378889247775078, + "epoch": 16.16906474820144, + "step": 17980 + }, + { + "epoch": 16.16906474820144, + "step": 17980, + "torque_loss": 0.13647036254405975 + }, + { + "epoch": 16.16906474820144, + "force_loss": 0.006954573094844818, + "step": 17980 + }, + { + "epoch": 16.178057553956833, + "grad_norm": 0.6193506717681885, + "learning_rate": 8.388637480641944e-05, + "loss": 0.0297, + "step": 17990 + }, + { + "action_loss": 0.015983037650585175, + "epoch": 16.178057553956833, + "step": 17990 + }, + { + "epoch": 16.178057553956833, + "step": 17990, + "torque_loss": 0.1552460938692093 + }, + { + "epoch": 16.178057553956833, + "force_loss": 0.011983555741608143, + "step": 17990 + }, + { + "epoch": 16.18705035971223, + "grad_norm": 0.4312192499637604, + "learning_rate": 8.386610602531837e-05, + "loss": 0.0257, + "step": 18000 + }, + { + "action_loss": 0.007568379398435354, + "epoch": 16.18705035971223, + "step": 18000 + }, + { + "epoch": 16.18705035971223, + "step": 18000, + "torque_loss": 0.11109446734189987 + }, + { + "epoch": 16.18705035971223, + "force_loss": 0.003926304634660482, + "step": 18000 + }, + { + "epoch": 16.196043165467625, + "grad_norm": 0.4163177013397217, + "learning_rate": 8.384582695658847e-05, + "loss": 0.0235, + "step": 18010 + }, + { + "action_loss": 0.015307377092540264, + "epoch": 16.196043165467625, + "step": 18010 + }, + { + "epoch": 16.196043165467625, + "step": 18010, + "torque_loss": 0.101185142993927 + }, + { + "epoch": 16.196043165467625, + "force_loss": 0.02133381925523281, + "step": 18010 + }, + { + "epoch": 16.205035971223023, + "grad_norm": 0.44741207361221313, + "learning_rate": 8.382553760638999e-05, + "loss": 0.0237, + "step": 18020 + }, + { + "action_loss": 0.009381234645843506, + "epoch": 16.205035971223023, + "step": 18020 + }, + { + "epoch": 16.205035971223023, + "step": 18020, + "torque_loss": 0.12503407895565033 + }, + { + "epoch": 16.205035971223023, + "force_loss": 0.00686328811571002, + "step": 18020 + }, + { + "epoch": 16.214028776978417, + "grad_norm": 0.36904048919677734, + "learning_rate": 8.380523798088631e-05, + "loss": 0.0206, + "step": 18030 + }, + { + "action_loss": 0.005536861252039671, + "epoch": 16.214028776978417, + "step": 18030 + }, + { + "epoch": 16.214028776978417, + "step": 18030, + "torque_loss": 0.11668294668197632 + }, + { + "epoch": 16.214028776978417, + "force_loss": 0.005136905703693628, + "step": 18030 + }, + { + "epoch": 16.223021582733814, + "grad_norm": 0.3489348888397217, + "learning_rate": 8.378492808624389e-05, + "loss": 0.0213, + "step": 18040 + }, + { + "action_loss": 0.004436939489096403, + "epoch": 16.223021582733814, + "step": 18040 + }, + { + "epoch": 16.223021582733814, + "step": 18040, + "torque_loss": 0.10620620846748352 + }, + { + "epoch": 16.223021582733814, + "force_loss": 0.004801291972398758, + "step": 18040 + }, + { + "epoch": 16.23201438848921, + "grad_norm": 0.5666268467903137, + "learning_rate": 8.376460792863237e-05, + "loss": 0.0258, + "step": 18050 + }, + { + "action_loss": 0.009859741665422916, + "epoch": 16.23201438848921, + "step": 18050 + }, + { + "epoch": 16.23201438848921, + "step": 18050, + "torque_loss": 0.13223198056221008 + }, + { + "epoch": 16.23201438848921, + "force_loss": 0.005797772202640772, + "step": 18050 + }, + { + "epoch": 16.241007194244606, + "grad_norm": 0.3594134449958801, + "learning_rate": 8.374427751422444e-05, + "loss": 0.0183, + "step": 18060 + }, + { + "action_loss": 0.005189632531255484, + "epoch": 16.241007194244606, + "step": 18060 + }, + { + "epoch": 16.241007194244606, + "step": 18060, + "torque_loss": 0.10445895791053772 + }, + { + "epoch": 16.241007194244606, + "force_loss": 0.0031314559746533632, + "step": 18060 + }, + { + "epoch": 16.25, + "grad_norm": 0.2947518825531006, + "learning_rate": 8.3723936849196e-05, + "loss": 0.0199, + "step": 18070 + }, + { + "action_loss": 0.004197425674647093, + "epoch": 16.25, + "step": 18070 + }, + { + "epoch": 16.25, + "step": 18070, + "torque_loss": 0.15771900117397308 + }, + { + "epoch": 16.25, + "force_loss": 0.0050789485685527325, + "step": 18070 + }, + { + "epoch": 16.258992805755394, + "grad_norm": 0.34019678831100464, + "learning_rate": 8.370358593972595e-05, + "loss": 0.0208, + "step": 18080 + }, + { + "action_loss": 0.004253249149769545, + "epoch": 16.258992805755394, + "step": 18080 + }, + { + "epoch": 16.258992805755394, + "step": 18080, + "torque_loss": 0.1208881139755249 + }, + { + "epoch": 16.258992805755394, + "force_loss": 0.007525982800871134, + "step": 18080 + }, + { + "epoch": 16.26798561151079, + "grad_norm": 0.4985514283180237, + "learning_rate": 8.36832247919964e-05, + "loss": 0.0195, + "step": 18090 + }, + { + "action_loss": 0.015206550247967243, + "epoch": 16.26798561151079, + "step": 18090 + }, + { + "epoch": 16.26798561151079, + "step": 18090, + "torque_loss": 0.11427616328001022 + }, + { + "epoch": 16.26798561151079, + "force_loss": 0.0120158726349473, + "step": 18090 + }, + { + "epoch": 16.276978417266186, + "grad_norm": 0.4713435769081116, + "learning_rate": 8.36628534121925e-05, + "loss": 0.0244, + "step": 18100 + }, + { + "action_loss": 0.004326095338910818, + "epoch": 16.276978417266186, + "step": 18100 + }, + { + "epoch": 16.276978417266186, + "step": 18100, + "torque_loss": 0.12609809637069702 + }, + { + "epoch": 16.276978417266186, + "force_loss": 0.0025467786472290754, + "step": 18100 + }, + { + "epoch": 16.285971223021583, + "grad_norm": 0.9442657232284546, + "learning_rate": 8.364247180650254e-05, + "loss": 0.0262, + "step": 18110 + }, + { + "action_loss": 0.011486049741506577, + "epoch": 16.285971223021583, + "step": 18110 + }, + { + "epoch": 16.285971223021583, + "step": 18110, + "torque_loss": 0.1206124797463417 + }, + { + "epoch": 16.285971223021583, + "force_loss": 0.011293943971395493, + "step": 18110 + }, + { + "epoch": 16.294964028776977, + "grad_norm": 0.25877389311790466, + "learning_rate": 8.362207998111794e-05, + "loss": 0.0191, + "step": 18120 + }, + { + "action_loss": 0.00729757035151124, + "epoch": 16.294964028776977, + "step": 18120 + }, + { + "epoch": 16.294964028776977, + "step": 18120, + "torque_loss": 0.12804141640663147 + }, + { + "epoch": 16.294964028776977, + "force_loss": 0.0034155212342739105, + "step": 18120 + }, + { + "epoch": 16.303956834532375, + "grad_norm": 0.2360243946313858, + "learning_rate": 8.360167794223318e-05, + "loss": 0.0209, + "step": 18130 + }, + { + "action_loss": 0.009071596898138523, + "epoch": 16.303956834532375, + "step": 18130 + }, + { + "epoch": 16.303956834532375, + "step": 18130, + "torque_loss": 0.17007555067539215 + }, + { + "epoch": 16.303956834532375, + "force_loss": 0.010613895952701569, + "step": 18130 + }, + { + "epoch": 16.31294964028777, + "grad_norm": 0.6451969146728516, + "learning_rate": 8.358126569604586e-05, + "loss": 0.0234, + "step": 18140 + }, + { + "action_loss": 0.004848698619753122, + "epoch": 16.31294964028777, + "step": 18140 + }, + { + "epoch": 16.31294964028777, + "step": 18140, + "torque_loss": 0.1646655946969986 + }, + { + "epoch": 16.31294964028777, + "force_loss": 0.005273627582937479, + "step": 18140 + }, + { + "epoch": 16.321942446043167, + "grad_norm": 0.25522100925445557, + "learning_rate": 8.356084324875668e-05, + "loss": 0.0257, + "step": 18150 + }, + { + "action_loss": 0.0047170682810246944, + "epoch": 16.321942446043167, + "step": 18150 + }, + { + "epoch": 16.321942446043167, + "step": 18150, + "torque_loss": 0.11787024885416031 + }, + { + "epoch": 16.321942446043167, + "force_loss": 0.006361795123666525, + "step": 18150 + }, + { + "epoch": 16.33093525179856, + "grad_norm": 0.3561152219772339, + "learning_rate": 8.354041060656945e-05, + "loss": 0.0198, + "step": 18160 + }, + { + "action_loss": 0.0031788635533303022, + "epoch": 16.33093525179856, + "step": 18160 + }, + { + "epoch": 16.33093525179856, + "step": 18160, + "torque_loss": 0.09880802035331726 + }, + { + "epoch": 16.33093525179856, + "force_loss": 0.002557651838287711, + "step": 18160 + }, + { + "epoch": 16.33992805755396, + "grad_norm": 0.26321348547935486, + "learning_rate": 8.351996777569106e-05, + "loss": 0.0204, + "step": 18170 + }, + { + "action_loss": 0.006902188062667847, + "epoch": 16.33992805755396, + "step": 18170 + }, + { + "epoch": 16.33992805755396, + "step": 18170, + "torque_loss": 0.13893133401870728 + }, + { + "epoch": 16.33992805755396, + "force_loss": 0.006122795399278402, + "step": 18170 + }, + { + "epoch": 16.348920863309353, + "grad_norm": 0.38124632835388184, + "learning_rate": 8.349951476233148e-05, + "loss": 0.0226, + "step": 18180 + }, + { + "action_loss": 0.003286626422777772, + "epoch": 16.348920863309353, + "step": 18180 + }, + { + "epoch": 16.348920863309353, + "step": 18180, + "torque_loss": 0.10841871052980423 + }, + { + "epoch": 16.348920863309353, + "force_loss": 0.003879651427268982, + "step": 18180 + }, + { + "epoch": 16.357913669064747, + "grad_norm": 0.22667431831359863, + "learning_rate": 8.347905157270386e-05, + "loss": 0.0215, + "step": 18190 + }, + { + "action_loss": 0.0055561005137860775, + "epoch": 16.357913669064747, + "step": 18190 + }, + { + "epoch": 16.357913669064747, + "step": 18190, + "torque_loss": 0.15022213757038116 + }, + { + "epoch": 16.357913669064747, + "force_loss": 0.004825991578400135, + "step": 18190 + }, + { + "epoch": 16.366906474820144, + "grad_norm": 0.2512531280517578, + "learning_rate": 8.345857821302432e-05, + "loss": 0.0239, + "step": 18200 + }, + { + "action_loss": 0.008582408539950848, + "epoch": 16.366906474820144, + "step": 18200 + }, + { + "epoch": 16.366906474820144, + "step": 18200, + "torque_loss": 0.16552527248859406 + }, + { + "epoch": 16.366906474820144, + "force_loss": 0.01397332176566124, + "step": 18200 + }, + { + "epoch": 16.37589928057554, + "grad_norm": 0.2697646915912628, + "learning_rate": 8.343809468951213e-05, + "loss": 0.0214, + "step": 18210 + }, + { + "action_loss": 0.006092951167374849, + "epoch": 16.37589928057554, + "step": 18210 + }, + { + "epoch": 16.37589928057554, + "step": 18210, + "torque_loss": 0.1761222928762436 + }, + { + "epoch": 16.37589928057554, + "force_loss": 0.007903401739895344, + "step": 18210 + }, + { + "epoch": 16.384892086330936, + "grad_norm": 0.5763723254203796, + "learning_rate": 8.341760100838965e-05, + "loss": 0.0278, + "step": 18220 + }, + { + "action_loss": 0.009349461644887924, + "epoch": 16.384892086330936, + "step": 18220 + }, + { + "epoch": 16.384892086330936, + "step": 18220, + "torque_loss": 0.13287697732448578 + }, + { + "epoch": 16.384892086330936, + "force_loss": 0.007232888136059046, + "step": 18220 + }, + { + "epoch": 16.39388489208633, + "grad_norm": 0.30847808718681335, + "learning_rate": 8.339709717588233e-05, + "loss": 0.0267, + "step": 18230 + }, + { + "action_loss": 0.007372267544269562, + "epoch": 16.39388489208633, + "step": 18230 + }, + { + "epoch": 16.39388489208633, + "step": 18230, + "torque_loss": 0.1381007581949234 + }, + { + "epoch": 16.39388489208633, + "force_loss": 0.005219774786382914, + "step": 18230 + }, + { + "epoch": 16.402877697841728, + "grad_norm": 0.5407735109329224, + "learning_rate": 8.33765831982187e-05, + "loss": 0.022, + "step": 18240 + }, + { + "action_loss": 0.005897350609302521, + "epoch": 16.402877697841728, + "step": 18240 + }, + { + "epoch": 16.402877697841728, + "step": 18240, + "torque_loss": 0.08377639204263687 + }, + { + "epoch": 16.402877697841728, + "force_loss": 0.004883553367108107, + "step": 18240 + }, + { + "epoch": 16.41187050359712, + "grad_norm": 0.22648492455482483, + "learning_rate": 8.335605908163035e-05, + "loss": 0.0179, + "step": 18250 + }, + { + "action_loss": 0.008108261041343212, + "epoch": 16.41187050359712, + "step": 18250 + }, + { + "epoch": 16.41187050359712, + "step": 18250, + "torque_loss": 0.1093992218375206 + }, + { + "epoch": 16.41187050359712, + "force_loss": 0.004886269569396973, + "step": 18250 + }, + { + "epoch": 16.42086330935252, + "grad_norm": 0.39348939061164856, + "learning_rate": 8.333552483235196e-05, + "loss": 0.021, + "step": 18260 + }, + { + "action_loss": 0.0043386840261518955, + "epoch": 16.42086330935252, + "step": 18260 + }, + { + "epoch": 16.42086330935252, + "step": 18260, + "torque_loss": 0.12040645629167557 + }, + { + "epoch": 16.42086330935252, + "force_loss": 0.004096080083400011, + "step": 18260 + }, + { + "epoch": 16.429856115107913, + "grad_norm": 0.47013697028160095, + "learning_rate": 8.33149804566213e-05, + "loss": 0.0226, + "step": 18270 + }, + { + "action_loss": 0.010142925195395947, + "epoch": 16.429856115107913, + "step": 18270 + }, + { + "epoch": 16.429856115107913, + "step": 18270, + "torque_loss": 0.14123956859111786 + }, + { + "epoch": 16.429856115107913, + "force_loss": 0.006758174393326044, + "step": 18270 + }, + { + "epoch": 16.43884892086331, + "grad_norm": 0.2910516560077667, + "learning_rate": 8.329442596067921e-05, + "loss": 0.0245, + "step": 18280 + }, + { + "action_loss": 0.003754260018467903, + "epoch": 16.43884892086331, + "step": 18280 + }, + { + "epoch": 16.43884892086331, + "step": 18280, + "torque_loss": 0.11502655595541 + }, + { + "epoch": 16.43884892086331, + "force_loss": 0.0023056436330080032, + "step": 18280 + }, + { + "epoch": 16.447841726618705, + "grad_norm": 0.3777787387371063, + "learning_rate": 8.32738613507696e-05, + "loss": 0.0251, + "step": 18290 + }, + { + "action_loss": 0.024292265996336937, + "epoch": 16.447841726618705, + "step": 18290 + }, + { + "epoch": 16.447841726618705, + "step": 18290, + "torque_loss": 0.18905021250247955 + }, + { + "epoch": 16.447841726618705, + "force_loss": 0.032001394778490067, + "step": 18290 + }, + { + "epoch": 16.4568345323741, + "grad_norm": 0.7325795888900757, + "learning_rate": 8.325328663313946e-05, + "loss": 0.0283, + "step": 18300 + }, + { + "action_loss": 0.012889673002064228, + "epoch": 16.4568345323741, + "step": 18300 + }, + { + "epoch": 16.4568345323741, + "step": 18300, + "torque_loss": 0.17282839119434357 + }, + { + "epoch": 16.4568345323741, + "force_loss": 0.023282423615455627, + "step": 18300 + }, + { + "epoch": 16.465827338129497, + "grad_norm": 0.3477124571800232, + "learning_rate": 8.323270181403884e-05, + "loss": 0.0249, + "step": 18310 + }, + { + "action_loss": 0.01892814226448536, + "epoch": 16.465827338129497, + "step": 18310 + }, + { + "epoch": 16.465827338129497, + "step": 18310, + "torque_loss": 0.13234031200408936 + }, + { + "epoch": 16.465827338129497, + "force_loss": 0.017151379957795143, + "step": 18310 + }, + { + "epoch": 16.47482014388489, + "grad_norm": 0.2253669798374176, + "learning_rate": 8.321210689972086e-05, + "loss": 0.0256, + "step": 18320 + }, + { + "action_loss": 0.005118671339005232, + "epoch": 16.47482014388489, + "step": 18320 + }, + { + "epoch": 16.47482014388489, + "step": 18320, + "torque_loss": 0.1254182606935501 + }, + { + "epoch": 16.47482014388489, + "force_loss": 0.00862070545554161, + "step": 18320 + }, + { + "epoch": 16.48381294964029, + "grad_norm": 0.3347220718860626, + "learning_rate": 8.319150189644174e-05, + "loss": 0.0239, + "step": 18330 + }, + { + "action_loss": 0.00308927777223289, + "epoch": 16.48381294964029, + "step": 18330 + }, + { + "epoch": 16.48381294964029, + "step": 18330, + "torque_loss": 0.10159683227539062 + }, + { + "epoch": 16.48381294964029, + "force_loss": 0.004008258227258921, + "step": 18330 + }, + { + "epoch": 16.492805755395683, + "grad_norm": 0.6985332369804382, + "learning_rate": 8.31708868104607e-05, + "loss": 0.0299, + "step": 18340 + }, + { + "action_loss": 0.010480965487658978, + "epoch": 16.492805755395683, + "step": 18340 + }, + { + "epoch": 16.492805755395683, + "step": 18340, + "torque_loss": 0.1386459618806839 + }, + { + "epoch": 16.492805755395683, + "force_loss": 0.007932504639029503, + "step": 18340 + }, + { + "epoch": 16.50179856115108, + "grad_norm": 0.37076595425605774, + "learning_rate": 8.315026164804007e-05, + "loss": 0.0238, + "step": 18350 + }, + { + "action_loss": 0.006796905305236578, + "epoch": 16.50179856115108, + "step": 18350 + }, + { + "epoch": 16.50179856115108, + "step": 18350, + "torque_loss": 0.13150005042552948 + }, + { + "epoch": 16.50179856115108, + "force_loss": 0.0044395411387085915, + "step": 18350 + }, + { + "epoch": 16.510791366906474, + "grad_norm": 1.0180201530456543, + "learning_rate": 8.312962641544524e-05, + "loss": 0.025, + "step": 18360 + }, + { + "action_loss": 0.007877036929130554, + "epoch": 16.510791366906474, + "step": 18360 + }, + { + "epoch": 16.510791366906474, + "step": 18360, + "torque_loss": 0.1178581491112709 + }, + { + "epoch": 16.510791366906474, + "force_loss": 0.009159534238278866, + "step": 18360 + }, + { + "epoch": 16.519784172661872, + "grad_norm": 0.39785414934158325, + "learning_rate": 8.310898111894465e-05, + "loss": 0.0216, + "step": 18370 + }, + { + "action_loss": 0.008548031561076641, + "epoch": 16.519784172661872, + "step": 18370 + }, + { + "epoch": 16.519784172661872, + "step": 18370, + "torque_loss": 0.13693289458751678 + }, + { + "epoch": 16.519784172661872, + "force_loss": 0.016244065016508102, + "step": 18370 + }, + { + "epoch": 16.528776978417266, + "grad_norm": 0.4085237979888916, + "learning_rate": 8.308832576480977e-05, + "loss": 0.0214, + "step": 18380 + }, + { + "action_loss": 0.004474898334592581, + "epoch": 16.528776978417266, + "step": 18380 + }, + { + "epoch": 16.528776978417266, + "step": 18380, + "torque_loss": 0.11087968200445175 + }, + { + "epoch": 16.528776978417266, + "force_loss": 0.004281978588551283, + "step": 18380 + }, + { + "epoch": 16.53776978417266, + "grad_norm": 0.24151866137981415, + "learning_rate": 8.306766035931519e-05, + "loss": 0.0262, + "step": 18390 + }, + { + "action_loss": 0.006306593772023916, + "epoch": 16.53776978417266, + "step": 18390 + }, + { + "epoch": 16.53776978417266, + "step": 18390, + "torque_loss": 0.2008480280637741 + }, + { + "epoch": 16.53776978417266, + "force_loss": 0.011665627360343933, + "step": 18390 + }, + { + "epoch": 16.546762589928058, + "grad_norm": 0.26691973209381104, + "learning_rate": 8.304698490873847e-05, + "loss": 0.0229, + "step": 18400 + }, + { + "action_loss": 0.005176689941436052, + "epoch": 16.546762589928058, + "step": 18400 + }, + { + "epoch": 16.546762589928058, + "step": 18400, + "torque_loss": 0.13689620792865753 + }, + { + "epoch": 16.546762589928058, + "force_loss": 0.0031080376356840134, + "step": 18400 + }, + { + "epoch": 16.555755395683452, + "grad_norm": 0.3118777275085449, + "learning_rate": 8.30262994193603e-05, + "loss": 0.0243, + "step": 18410 + }, + { + "action_loss": 0.0033146876376122236, + "epoch": 16.555755395683452, + "step": 18410 + }, + { + "epoch": 16.555755395683452, + "step": 18410, + "torque_loss": 0.10385149717330933 + }, + { + "epoch": 16.555755395683452, + "force_loss": 0.003780081868171692, + "step": 18410 + }, + { + "epoch": 16.56474820143885, + "grad_norm": 0.4881945252418518, + "learning_rate": 8.300560389746438e-05, + "loss": 0.0222, + "step": 18420 + }, + { + "action_loss": 0.007430974394083023, + "epoch": 16.56474820143885, + "step": 18420 + }, + { + "epoch": 16.56474820143885, + "step": 18420, + "torque_loss": 0.11570519953966141 + }, + { + "epoch": 16.56474820143885, + "force_loss": 0.008030847646296024, + "step": 18420 + }, + { + "epoch": 16.573741007194243, + "grad_norm": 0.7185755968093872, + "learning_rate": 8.298489834933745e-05, + "loss": 0.0263, + "step": 18430 + }, + { + "action_loss": 0.003142673522233963, + "epoch": 16.573741007194243, + "step": 18430 + }, + { + "epoch": 16.573741007194243, + "step": 18430, + "torque_loss": 0.12953804433345795 + }, + { + "epoch": 16.573741007194243, + "force_loss": 0.0025068691466003656, + "step": 18430 + }, + { + "epoch": 16.58273381294964, + "grad_norm": 0.25372314453125, + "learning_rate": 8.296418278126934e-05, + "loss": 0.0283, + "step": 18440 + }, + { + "action_loss": 0.005031744949519634, + "epoch": 16.58273381294964, + "step": 18440 + }, + { + "epoch": 16.58273381294964, + "step": 18440, + "torque_loss": 0.09780289977788925 + }, + { + "epoch": 16.58273381294964, + "force_loss": 0.00647571450099349, + "step": 18440 + }, + { + "epoch": 16.591726618705035, + "grad_norm": 0.2521689832210541, + "learning_rate": 8.294345719955284e-05, + "loss": 0.0223, + "step": 18450 + }, + { + "action_loss": 0.004054040182381868, + "epoch": 16.591726618705035, + "step": 18450 + }, + { + "epoch": 16.591726618705035, + "step": 18450, + "torque_loss": 0.21708768606185913 + }, + { + "epoch": 16.591726618705035, + "force_loss": 0.00399880250915885, + "step": 18450 + }, + { + "epoch": 16.600719424460433, + "grad_norm": 0.4184238314628601, + "learning_rate": 8.29227216104839e-05, + "loss": 0.0228, + "step": 18460 + }, + { + "action_loss": 0.006403022911399603, + "epoch": 16.600719424460433, + "step": 18460 + }, + { + "epoch": 16.600719424460433, + "step": 18460, + "torque_loss": 0.12503254413604736 + }, + { + "epoch": 16.600719424460433, + "force_loss": 0.005357746034860611, + "step": 18460 + }, + { + "epoch": 16.609712230215827, + "grad_norm": 0.34864628314971924, + "learning_rate": 8.290197602036137e-05, + "loss": 0.0236, + "step": 18470 + }, + { + "action_loss": 0.005995148792862892, + "epoch": 16.609712230215827, + "step": 18470 + }, + { + "epoch": 16.609712230215827, + "step": 18470, + "torque_loss": 0.1445033699274063 + }, + { + "epoch": 16.609712230215827, + "force_loss": 0.003798463148996234, + "step": 18470 + }, + { + "epoch": 16.618705035971225, + "grad_norm": 0.4197278618812561, + "learning_rate": 8.288122043548725e-05, + "loss": 0.0203, + "step": 18480 + }, + { + "action_loss": 0.008849951438605785, + "epoch": 16.618705035971225, + "step": 18480 + }, + { + "epoch": 16.618705035971225, + "step": 18480, + "torque_loss": 0.09843218326568604 + }, + { + "epoch": 16.618705035971225, + "force_loss": 0.009366246871650219, + "step": 18480 + }, + { + "epoch": 16.62769784172662, + "grad_norm": 0.3057209849357605, + "learning_rate": 8.286045486216657e-05, + "loss": 0.0254, + "step": 18490 + }, + { + "action_loss": 0.006491031032055616, + "epoch": 16.62769784172662, + "step": 18490 + }, + { + "epoch": 16.62769784172662, + "step": 18490, + "torque_loss": 0.1364649534225464 + }, + { + "epoch": 16.62769784172662, + "force_loss": 0.00570320850238204, + "step": 18490 + }, + { + "epoch": 16.636690647482013, + "grad_norm": 0.49683183431625366, + "learning_rate": 8.283967930670733e-05, + "loss": 0.0232, + "step": 18500 + }, + { + "action_loss": 0.008977101184427738, + "epoch": 16.636690647482013, + "step": 18500 + }, + { + "epoch": 16.636690647482013, + "step": 18500, + "torque_loss": 0.17831246554851532 + }, + { + "epoch": 16.636690647482013, + "force_loss": 0.005677130538970232, + "step": 18500 + }, + { + "epoch": 16.64568345323741, + "grad_norm": 0.25140267610549927, + "learning_rate": 8.281889377542058e-05, + "loss": 0.0248, + "step": 18510 + }, + { + "action_loss": 0.011087329126894474, + "epoch": 16.64568345323741, + "step": 18510 + }, + { + "epoch": 16.64568345323741, + "step": 18510, + "torque_loss": 0.12683016061782837 + }, + { + "epoch": 16.64568345323741, + "force_loss": 0.005519574508070946, + "step": 18510 + }, + { + "epoch": 16.654676258992804, + "grad_norm": 0.7398377656936646, + "learning_rate": 8.279809827462045e-05, + "loss": 0.0213, + "step": 18520 + }, + { + "action_loss": 0.01726895570755005, + "epoch": 16.654676258992804, + "step": 18520 + }, + { + "epoch": 16.654676258992804, + "step": 18520, + "torque_loss": 0.19766275584697723 + }, + { + "epoch": 16.654676258992804, + "force_loss": 0.012134368531405926, + "step": 18520 + }, + { + "epoch": 16.663669064748202, + "grad_norm": 0.5961434841156006, + "learning_rate": 8.277729281062402e-05, + "loss": 0.0267, + "step": 18530 + }, + { + "action_loss": 0.005861076060682535, + "epoch": 16.663669064748202, + "step": 18530 + }, + { + "epoch": 16.663669064748202, + "step": 18530, + "torque_loss": 0.13658632338047028 + }, + { + "epoch": 16.663669064748202, + "force_loss": 0.0032646150793880224, + "step": 18530 + }, + { + "epoch": 16.672661870503596, + "grad_norm": 0.4042477011680603, + "learning_rate": 8.27564773897515e-05, + "loss": 0.0244, + "step": 18540 + }, + { + "action_loss": 0.0073905400931835175, + "epoch": 16.672661870503596, + "step": 18540 + }, + { + "epoch": 16.672661870503596, + "step": 18540, + "torque_loss": 0.11590138077735901 + }, + { + "epoch": 16.672661870503596, + "force_loss": 0.004519374575465918, + "step": 18540 + }, + { + "epoch": 16.681654676258994, + "grad_norm": 0.23871681094169617, + "learning_rate": 8.273565201832602e-05, + "loss": 0.0214, + "step": 18550 + }, + { + "action_loss": 0.01045029517263174, + "epoch": 16.681654676258994, + "step": 18550 + }, + { + "epoch": 16.681654676258994, + "step": 18550, + "torque_loss": 0.15892839431762695 + }, + { + "epoch": 16.681654676258994, + "force_loss": 0.009049165062606335, + "step": 18550 + }, + { + "epoch": 16.690647482014388, + "grad_norm": 0.27389827370643616, + "learning_rate": 8.27148167026738e-05, + "loss": 0.0219, + "step": 18560 + }, + { + "action_loss": 0.007756886538118124, + "epoch": 16.690647482014388, + "step": 18560 + }, + { + "epoch": 16.690647482014388, + "step": 18560, + "torque_loss": 0.1800699681043625 + }, + { + "epoch": 16.690647482014388, + "force_loss": 0.004350780043751001, + "step": 18560 + }, + { + "epoch": 16.699640287769785, + "grad_norm": 0.3494758903980255, + "learning_rate": 8.269397144912405e-05, + "loss": 0.0201, + "step": 18570 + }, + { + "action_loss": 0.005796968936920166, + "epoch": 16.699640287769785, + "step": 18570 + }, + { + "epoch": 16.699640287769785, + "step": 18570, + "torque_loss": 0.13563187420368195 + }, + { + "epoch": 16.699640287769785, + "force_loss": 0.0042900522239506245, + "step": 18570 + }, + { + "epoch": 16.70863309352518, + "grad_norm": 0.1897176206111908, + "learning_rate": 8.267311626400899e-05, + "loss": 0.0203, + "step": 18580 + }, + { + "action_loss": 0.0036982849705964327, + "epoch": 16.70863309352518, + "step": 18580 + }, + { + "epoch": 16.70863309352518, + "step": 18580, + "torque_loss": 0.13515764474868774 + }, + { + "epoch": 16.70863309352518, + "force_loss": 0.004321611020714045, + "step": 18580 + }, + { + "epoch": 16.717625899280577, + "grad_norm": 0.648822546005249, + "learning_rate": 8.26522511536639e-05, + "loss": 0.0266, + "step": 18590 + }, + { + "action_loss": 0.005962255876511335, + "epoch": 16.717625899280577, + "step": 18590 + }, + { + "epoch": 16.717625899280577, + "step": 18590, + "torque_loss": 0.1289035528898239 + }, + { + "epoch": 16.717625899280577, + "force_loss": 0.007715058047324419, + "step": 18590 + }, + { + "epoch": 16.72661870503597, + "grad_norm": 0.6050834059715271, + "learning_rate": 8.263137612442706e-05, + "loss": 0.0228, + "step": 18600 + }, + { + "action_loss": 0.00279523734934628, + "epoch": 16.72661870503597, + "step": 18600 + }, + { + "epoch": 16.72661870503597, + "step": 18600, + "torque_loss": 0.1604495793581009 + }, + { + "epoch": 16.72661870503597, + "force_loss": 0.003315134672448039, + "step": 18600 + }, + { + "epoch": 16.735611510791365, + "grad_norm": 0.5683088898658752, + "learning_rate": 8.261049118263971e-05, + "loss": 0.0236, + "step": 18610 + }, + { + "action_loss": 0.00667849974706769, + "epoch": 16.735611510791365, + "step": 18610 + }, + { + "epoch": 16.735611510791365, + "step": 18610, + "torque_loss": 0.10446900129318237 + }, + { + "epoch": 16.735611510791365, + "force_loss": 0.005121113266795874, + "step": 18610 + }, + { + "epoch": 16.744604316546763, + "grad_norm": 0.312840074300766, + "learning_rate": 8.258959633464619e-05, + "loss": 0.0256, + "step": 18620 + }, + { + "action_loss": 0.011705388315021992, + "epoch": 16.744604316546763, + "step": 18620 + }, + { + "epoch": 16.744604316546763, + "step": 18620, + "torque_loss": 0.139545276761055 + }, + { + "epoch": 16.744604316546763, + "force_loss": 0.01052594929933548, + "step": 18620 + }, + { + "epoch": 16.753597122302157, + "grad_norm": 0.23400932550430298, + "learning_rate": 8.256869158679377e-05, + "loss": 0.0215, + "step": 18630 + }, + { + "action_loss": 0.013369572348892689, + "epoch": 16.753597122302157, + "step": 18630 + }, + { + "epoch": 16.753597122302157, + "step": 18630, + "torque_loss": 0.14347916841506958 + }, + { + "epoch": 16.753597122302157, + "force_loss": 0.02529866062104702, + "step": 18630 + }, + { + "epoch": 16.762589928057555, + "grad_norm": 0.2445773333311081, + "learning_rate": 8.254777694543278e-05, + "loss": 0.0244, + "step": 18640 + }, + { + "action_loss": 0.003455832600593567, + "epoch": 16.762589928057555, + "step": 18640 + }, + { + "epoch": 16.762589928057555, + "step": 18640, + "torque_loss": 0.10403459519147873 + }, + { + "epoch": 16.762589928057555, + "force_loss": 0.002834821818396449, + "step": 18640 + }, + { + "epoch": 16.77158273381295, + "grad_norm": 0.170840322971344, + "learning_rate": 8.252685241691651e-05, + "loss": 0.0223, + "step": 18650 + }, + { + "action_loss": 0.014742475003004074, + "epoch": 16.77158273381295, + "step": 18650 + }, + { + "epoch": 16.77158273381295, + "step": 18650, + "torque_loss": 0.1419486254453659 + }, + { + "epoch": 16.77158273381295, + "force_loss": 0.013319268822669983, + "step": 18650 + }, + { + "epoch": 16.780575539568346, + "grad_norm": 0.25448960065841675, + "learning_rate": 8.250591800760133e-05, + "loss": 0.0259, + "step": 18660 + }, + { + "action_loss": 0.0067754946649074554, + "epoch": 16.780575539568346, + "step": 18660 + }, + { + "epoch": 16.780575539568346, + "step": 18660, + "torque_loss": 0.12209559231996536 + }, + { + "epoch": 16.780575539568346, + "force_loss": 0.006957089062780142, + "step": 18660 + }, + { + "epoch": 16.78956834532374, + "grad_norm": 0.46032240986824036, + "learning_rate": 8.248497372384649e-05, + "loss": 0.0233, + "step": 18670 + }, + { + "action_loss": 0.005185444373637438, + "epoch": 16.78956834532374, + "step": 18670 + }, + { + "epoch": 16.78956834532374, + "step": 18670, + "torque_loss": 0.13454100489616394 + }, + { + "epoch": 16.78956834532374, + "force_loss": 0.004870919976383448, + "step": 18670 + }, + { + "epoch": 16.798561151079138, + "grad_norm": 0.3743554949760437, + "learning_rate": 8.246401957201437e-05, + "loss": 0.0226, + "step": 18680 + }, + { + "action_loss": 0.01026403158903122, + "epoch": 16.798561151079138, + "step": 18680 + }, + { + "epoch": 16.798561151079138, + "step": 18680, + "torque_loss": 0.09702879190444946 + }, + { + "epoch": 16.798561151079138, + "force_loss": 0.00985734909772873, + "step": 18680 + }, + { + "epoch": 16.807553956834532, + "grad_norm": 0.5881900787353516, + "learning_rate": 8.244305555847027e-05, + "loss": 0.0223, + "step": 18690 + }, + { + "action_loss": 0.006126718129962683, + "epoch": 16.807553956834532, + "step": 18690 + }, + { + "epoch": 16.807553956834532, + "step": 18690, + "torque_loss": 0.14007656276226044 + }, + { + "epoch": 16.807553956834532, + "force_loss": 0.005367644131183624, + "step": 18690 + }, + { + "epoch": 16.81654676258993, + "grad_norm": 0.1845822036266327, + "learning_rate": 8.24220816895825e-05, + "loss": 0.0208, + "step": 18700 + }, + { + "action_loss": 0.0038799226749688387, + "epoch": 16.81654676258993, + "step": 18700 + }, + { + "epoch": 16.81654676258993, + "step": 18700, + "torque_loss": 0.14139671623706818 + }, + { + "epoch": 16.81654676258993, + "force_loss": 0.004548170138150454, + "step": 18700 + }, + { + "epoch": 16.825539568345324, + "grad_norm": 0.33281707763671875, + "learning_rate": 8.240109797172237e-05, + "loss": 0.0238, + "step": 18710 + }, + { + "action_loss": 0.007475856691598892, + "epoch": 16.825539568345324, + "step": 18710 + }, + { + "epoch": 16.825539568345324, + "step": 18710, + "torque_loss": 0.12454714626073837 + }, + { + "epoch": 16.825539568345324, + "force_loss": 0.005322887096554041, + "step": 18710 + }, + { + "epoch": 16.834532374100718, + "grad_norm": 0.45379719138145447, + "learning_rate": 8.238010441126416e-05, + "loss": 0.0307, + "step": 18720 + }, + { + "action_loss": 0.00832696445286274, + "epoch": 16.834532374100718, + "step": 18720 + }, + { + "epoch": 16.834532374100718, + "step": 18720, + "torque_loss": 0.14609622955322266 + }, + { + "epoch": 16.834532374100718, + "force_loss": 0.01168263703584671, + "step": 18720 + }, + { + "epoch": 16.843525179856115, + "grad_norm": 0.2713542878627777, + "learning_rate": 8.23591010145852e-05, + "loss": 0.0274, + "step": 18730 + }, + { + "action_loss": 0.023935288190841675, + "epoch": 16.843525179856115, + "step": 18730 + }, + { + "epoch": 16.843525179856115, + "step": 18730, + "torque_loss": 0.16119518876075745 + }, + { + "epoch": 16.843525179856115, + "force_loss": 0.03651067987084389, + "step": 18730 + }, + { + "epoch": 16.85251798561151, + "grad_norm": 0.3632880747318268, + "learning_rate": 8.233808778806571e-05, + "loss": 0.0273, + "step": 18740 + }, + { + "action_loss": 0.007144462782889605, + "epoch": 16.85251798561151, + "step": 18740 + }, + { + "epoch": 16.85251798561151, + "step": 18740, + "torque_loss": 0.18217028677463531 + }, + { + "epoch": 16.85251798561151, + "force_loss": 0.0072443666867911816, + "step": 18740 + }, + { + "epoch": 16.861510791366907, + "grad_norm": 0.4320412874221802, + "learning_rate": 8.231706473808903e-05, + "loss": 0.024, + "step": 18750 + }, + { + "action_loss": 0.010637740604579449, + "epoch": 16.861510791366907, + "step": 18750 + }, + { + "epoch": 16.861510791366907, + "step": 18750, + "torque_loss": 0.15085817873477936 + }, + { + "epoch": 16.861510791366907, + "force_loss": 0.01596071571111679, + "step": 18750 + }, + { + "epoch": 16.8705035971223, + "grad_norm": 0.4479663074016571, + "learning_rate": 8.229603187104133e-05, + "loss": 0.0294, + "step": 18760 + }, + { + "action_loss": 0.004178466275334358, + "epoch": 16.8705035971223, + "step": 18760 + }, + { + "epoch": 16.8705035971223, + "step": 18760, + "torque_loss": 0.08190203458070755 + }, + { + "epoch": 16.8705035971223, + "force_loss": 0.0028483474161475897, + "step": 18760 + }, + { + "epoch": 16.8794964028777, + "grad_norm": 0.780994176864624, + "learning_rate": 8.22749891933119e-05, + "loss": 0.0244, + "step": 18770 + }, + { + "action_loss": 0.023183442652225494, + "epoch": 16.8794964028777, + "step": 18770 + }, + { + "epoch": 16.8794964028777, + "step": 18770, + "torque_loss": 0.18326187133789062 + }, + { + "epoch": 16.8794964028777, + "force_loss": 0.014718101359903812, + "step": 18770 + }, + { + "epoch": 16.888489208633093, + "grad_norm": 0.62701815366745, + "learning_rate": 8.225393671129291e-05, + "loss": 0.029, + "step": 18780 + }, + { + "action_loss": 0.008167502470314503, + "epoch": 16.888489208633093, + "step": 18780 + }, + { + "epoch": 16.888489208633093, + "step": 18780, + "torque_loss": 0.1430051028728485 + }, + { + "epoch": 16.888489208633093, + "force_loss": 0.0054850452579557896, + "step": 18780 + }, + { + "epoch": 16.89748201438849, + "grad_norm": 0.26363375782966614, + "learning_rate": 8.223287443137957e-05, + "loss": 0.0211, + "step": 18790 + }, + { + "action_loss": 0.0046821762807667255, + "epoch": 16.89748201438849, + "step": 18790 + }, + { + "epoch": 16.89748201438849, + "step": 18790, + "torque_loss": 0.12745413184165955 + }, + { + "epoch": 16.89748201438849, + "force_loss": 0.003840351477265358, + "step": 18790 + }, + { + "epoch": 16.906474820143885, + "grad_norm": 0.70247882604599, + "learning_rate": 8.221180235997004e-05, + "loss": 0.0231, + "step": 18800 + }, + { + "action_loss": 0.003294161520898342, + "epoch": 16.906474820143885, + "step": 18800 + }, + { + "epoch": 16.906474820143885, + "step": 18800, + "torque_loss": 0.13857722282409668 + }, + { + "epoch": 16.906474820143885, + "force_loss": 0.00698641873896122, + "step": 18800 + }, + { + "epoch": 16.915467625899282, + "grad_norm": 0.18306425213813782, + "learning_rate": 8.219072050346544e-05, + "loss": 0.0219, + "step": 18810 + }, + { + "action_loss": 0.006904096808284521, + "epoch": 16.915467625899282, + "step": 18810 + }, + { + "epoch": 16.915467625899282, + "step": 18810, + "torque_loss": 0.13770753145217896 + }, + { + "epoch": 16.915467625899282, + "force_loss": 0.009075612761080265, + "step": 18810 + }, + { + "epoch": 16.924460431654676, + "grad_norm": 0.3748214542865753, + "learning_rate": 8.216962886826992e-05, + "loss": 0.0222, + "step": 18820 + }, + { + "action_loss": 0.0046668448485434055, + "epoch": 16.924460431654676, + "step": 18820 + }, + { + "epoch": 16.924460431654676, + "step": 18820, + "torque_loss": 0.17053575813770294 + }, + { + "epoch": 16.924460431654676, + "force_loss": 0.004429507534950972, + "step": 18820 + }, + { + "epoch": 16.93345323741007, + "grad_norm": 0.7287952303886414, + "learning_rate": 8.214852746079054e-05, + "loss": 0.0264, + "step": 18830 + }, + { + "action_loss": 0.006570905912667513, + "epoch": 16.93345323741007, + "step": 18830 + }, + { + "epoch": 16.93345323741007, + "step": 18830, + "torque_loss": 0.16397243738174438 + }, + { + "epoch": 16.93345323741007, + "force_loss": 0.002944026840850711, + "step": 18830 + }, + { + "epoch": 16.942446043165468, + "grad_norm": 0.40586352348327637, + "learning_rate": 8.212741628743732e-05, + "loss": 0.0206, + "step": 18840 + }, + { + "action_loss": 0.0093872444704175, + "epoch": 16.942446043165468, + "step": 18840 + }, + { + "epoch": 16.942446043165468, + "step": 18840, + "torque_loss": 0.1676502227783203 + }, + { + "epoch": 16.942446043165468, + "force_loss": 0.005877931136637926, + "step": 18840 + }, + { + "epoch": 16.951438848920862, + "grad_norm": 0.3222416937351227, + "learning_rate": 8.210629535462333e-05, + "loss": 0.0239, + "step": 18850 + }, + { + "action_loss": 0.006561322137713432, + "epoch": 16.951438848920862, + "step": 18850 + }, + { + "epoch": 16.951438848920862, + "step": 18850, + "torque_loss": 0.08255260437726974 + }, + { + "epoch": 16.951438848920862, + "force_loss": 0.007167476695030928, + "step": 18850 + }, + { + "epoch": 16.96043165467626, + "grad_norm": 0.3633570671081543, + "learning_rate": 8.208516466876453e-05, + "loss": 0.0221, + "step": 18860 + }, + { + "action_loss": 0.009111897088587284, + "epoch": 16.96043165467626, + "step": 18860 + }, + { + "epoch": 16.96043165467626, + "step": 18860, + "torque_loss": 0.1447877287864685 + }, + { + "epoch": 16.96043165467626, + "force_loss": 0.011469370685517788, + "step": 18860 + }, + { + "epoch": 16.969424460431654, + "grad_norm": 0.457709401845932, + "learning_rate": 8.206402423627986e-05, + "loss": 0.0199, + "step": 18870 + }, + { + "action_loss": 0.00664312532171607, + "epoch": 16.969424460431654, + "step": 18870 + }, + { + "epoch": 16.969424460431654, + "step": 18870, + "torque_loss": 0.11739363521337509 + }, + { + "epoch": 16.969424460431654, + "force_loss": 0.011712410487234592, + "step": 18870 + }, + { + "epoch": 16.97841726618705, + "grad_norm": 0.3963375389575958, + "learning_rate": 8.204287406359124e-05, + "loss": 0.0311, + "step": 18880 + }, + { + "action_loss": 0.005666142795234919, + "epoch": 16.97841726618705, + "step": 18880 + }, + { + "epoch": 16.97841726618705, + "step": 18880, + "torque_loss": 0.13782495260238647 + }, + { + "epoch": 16.97841726618705, + "force_loss": 0.007117301691323519, + "step": 18880 + }, + { + "epoch": 16.987410071942445, + "grad_norm": 0.42669326066970825, + "learning_rate": 8.20217141571235e-05, + "loss": 0.0233, + "step": 18890 + }, + { + "action_loss": 0.0026051367167383432, + "epoch": 16.987410071942445, + "step": 18890 + }, + { + "epoch": 16.987410071942445, + "step": 18890, + "torque_loss": 0.10621228069067001 + }, + { + "epoch": 16.987410071942445, + "force_loss": 0.0038599625695496798, + "step": 18890 + }, + { + "epoch": 16.996402877697843, + "grad_norm": 0.2006705105304718, + "learning_rate": 8.200054452330449e-05, + "loss": 0.0187, + "step": 18900 + }, + { + "action_loss": 0.0020557413809001446, + "epoch": 16.996402877697843, + "step": 18900 + }, + { + "epoch": 16.996402877697843, + "step": 18900, + "torque_loss": 0.12934191524982452 + }, + { + "epoch": 16.996402877697843, + "force_loss": 0.0024156435392796993, + "step": 18900 + }, + { + "epoch": 17.005395683453237, + "grad_norm": 0.36919236183166504, + "learning_rate": 8.197936516856499e-05, + "loss": 0.0226, + "step": 18910 + }, + { + "action_loss": 0.007828912697732449, + "epoch": 17.005395683453237, + "step": 18910 + }, + { + "epoch": 17.005395683453237, + "step": 18910, + "torque_loss": 0.13782049715518951 + }, + { + "epoch": 17.005395683453237, + "force_loss": 0.008543119765818119, + "step": 18910 + }, + { + "epoch": 17.014388489208635, + "grad_norm": 0.32674938440322876, + "learning_rate": 8.195817609933871e-05, + "loss": 0.0314, + "step": 18920 + }, + { + "action_loss": 0.005228050053119659, + "epoch": 17.014388489208635, + "step": 18920 + }, + { + "epoch": 17.014388489208635, + "step": 18920, + "torque_loss": 0.11597766727209091 + }, + { + "epoch": 17.014388489208635, + "force_loss": 0.0029003219678997993, + "step": 18920 + }, + { + "epoch": 17.02338129496403, + "grad_norm": 0.4104541838169098, + "learning_rate": 8.193697732206233e-05, + "loss": 0.0209, + "step": 18930 + }, + { + "action_loss": 0.007867767475545406, + "epoch": 17.02338129496403, + "step": 18930 + }, + { + "epoch": 17.02338129496403, + "step": 18930, + "torque_loss": 0.12521807849407196 + }, + { + "epoch": 17.02338129496403, + "force_loss": 0.005270702298730612, + "step": 18930 + }, + { + "epoch": 17.032374100719423, + "grad_norm": 0.4457932710647583, + "learning_rate": 8.19157688431755e-05, + "loss": 0.0259, + "step": 18940 + }, + { + "action_loss": 0.007062478456646204, + "epoch": 17.032374100719423, + "step": 18940 + }, + { + "epoch": 17.032374100719423, + "step": 18940, + "torque_loss": 0.15664128959178925 + }, + { + "epoch": 17.032374100719423, + "force_loss": 0.004922620952129364, + "step": 18940 + }, + { + "epoch": 17.04136690647482, + "grad_norm": 0.8380558490753174, + "learning_rate": 8.189455066912077e-05, + "loss": 0.0229, + "step": 18950 + }, + { + "action_loss": 0.010789395309984684, + "epoch": 17.04136690647482, + "step": 18950 + }, + { + "epoch": 17.04136690647482, + "step": 18950, + "torque_loss": 0.14502167701721191 + }, + { + "epoch": 17.04136690647482, + "force_loss": 0.009628080762922764, + "step": 18950 + }, + { + "epoch": 17.050359712230215, + "grad_norm": 0.4603548049926758, + "learning_rate": 8.187332280634369e-05, + "loss": 0.0238, + "step": 18960 + }, + { + "action_loss": 0.008165935985744, + "epoch": 17.050359712230215, + "step": 18960 + }, + { + "epoch": 17.050359712230215, + "step": 18960, + "torque_loss": 0.15990819036960602 + }, + { + "epoch": 17.050359712230215, + "force_loss": 0.01032984908670187, + "step": 18960 + }, + { + "epoch": 17.059352517985612, + "grad_norm": 0.33527544140815735, + "learning_rate": 8.18520852612927e-05, + "loss": 0.0238, + "step": 18970 + }, + { + "action_loss": 0.006526540964841843, + "epoch": 17.059352517985612, + "step": 18970 + }, + { + "epoch": 17.059352517985612, + "step": 18970, + "torque_loss": 0.17004112899303436 + }, + { + "epoch": 17.059352517985612, + "force_loss": 0.0052053299732506275, + "step": 18970 + }, + { + "epoch": 17.068345323741006, + "grad_norm": 0.46330389380455017, + "learning_rate": 8.183083804041921e-05, + "loss": 0.0269, + "step": 18980 + }, + { + "action_loss": 0.010260792449116707, + "epoch": 17.068345323741006, + "step": 18980 + }, + { + "epoch": 17.068345323741006, + "step": 18980, + "torque_loss": 0.14924046397209167 + }, + { + "epoch": 17.068345323741006, + "force_loss": 0.007903140969574451, + "step": 18980 + }, + { + "epoch": 17.077338129496404, + "grad_norm": 0.3768214285373688, + "learning_rate": 8.180958115017757e-05, + "loss": 0.0211, + "step": 18990 + }, + { + "action_loss": 0.003264196217060089, + "epoch": 17.077338129496404, + "step": 18990 + }, + { + "epoch": 17.077338129496404, + "step": 18990, + "torque_loss": 0.12123546749353409 + }, + { + "epoch": 17.077338129496404, + "force_loss": 0.0032893174793571234, + "step": 18990 + }, + { + "epoch": 17.086330935251798, + "grad_norm": 0.6482235193252563, + "learning_rate": 8.178831459702505e-05, + "loss": 0.0306, + "step": 19000 + }, + { + "action_loss": 0.0057056681253015995, + "epoch": 17.086330935251798, + "step": 19000 + }, + { + "epoch": 17.086330935251798, + "step": 19000, + "torque_loss": 0.14882366359233856 + }, + { + "epoch": 17.086330935251798, + "force_loss": 0.005644096527248621, + "step": 19000 + }, + { + "epoch": 17.095323741007196, + "grad_norm": 0.423765629529953, + "learning_rate": 8.17670383874219e-05, + "loss": 0.0254, + "step": 19010 + }, + { + "action_loss": 0.02130795083940029, + "epoch": 17.095323741007196, + "step": 19010 + }, + { + "epoch": 17.095323741007196, + "step": 19010, + "torque_loss": 0.12126599997282028 + }, + { + "epoch": 17.095323741007196, + "force_loss": 0.014218921773135662, + "step": 19010 + }, + { + "epoch": 17.10431654676259, + "grad_norm": 0.22702963650226593, + "learning_rate": 8.174575252783124e-05, + "loss": 0.0208, + "step": 19020 + }, + { + "action_loss": 0.02861572802066803, + "epoch": 17.10431654676259, + "step": 19020 + }, + { + "epoch": 17.10431654676259, + "step": 19020, + "torque_loss": 0.27991050481796265 + }, + { + "epoch": 17.10431654676259, + "force_loss": 0.05038939043879509, + "step": 19020 + }, + { + "epoch": 17.113309352517987, + "grad_norm": 0.30597561597824097, + "learning_rate": 8.172445702471914e-05, + "loss": 0.0301, + "step": 19030 + }, + { + "action_loss": 0.013531462289392948, + "epoch": 17.113309352517987, + "step": 19030 + }, + { + "epoch": 17.113309352517987, + "step": 19030, + "torque_loss": 0.16070210933685303 + }, + { + "epoch": 17.113309352517987, + "force_loss": 0.005723575595766306, + "step": 19030 + }, + { + "epoch": 17.12230215827338, + "grad_norm": 0.5818365812301636, + "learning_rate": 8.170315188455466e-05, + "loss": 0.0259, + "step": 19040 + }, + { + "action_loss": 0.01446501910686493, + "epoch": 17.12230215827338, + "step": 19040 + }, + { + "epoch": 17.12230215827338, + "step": 19040, + "torque_loss": 0.10388970375061035 + }, + { + "epoch": 17.12230215827338, + "force_loss": 0.012621755711734295, + "step": 19040 + }, + { + "epoch": 17.131294964028775, + "grad_norm": 1.3864644765853882, + "learning_rate": 8.168183711380969e-05, + "loss": 0.0246, + "step": 19050 + }, + { + "action_loss": 0.005052044056355953, + "epoch": 17.131294964028775, + "step": 19050 + }, + { + "epoch": 17.131294964028775, + "step": 19050, + "torque_loss": 0.18764309585094452 + }, + { + "epoch": 17.131294964028775, + "force_loss": 0.004084060899913311, + "step": 19050 + }, + { + "epoch": 17.140287769784173, + "grad_norm": 0.3341905176639557, + "learning_rate": 8.166051271895913e-05, + "loss": 0.0263, + "step": 19060 + }, + { + "action_loss": 0.009814356453716755, + "epoch": 17.140287769784173, + "step": 19060 + }, + { + "epoch": 17.140287769784173, + "step": 19060, + "torque_loss": 0.12652598321437836 + }, + { + "epoch": 17.140287769784173, + "force_loss": 0.005368433892726898, + "step": 19060 + }, + { + "epoch": 17.149280575539567, + "grad_norm": 0.6932147741317749, + "learning_rate": 8.163917870648075e-05, + "loss": 0.0352, + "step": 19070 + }, + { + "action_loss": 0.014251135289669037, + "epoch": 17.149280575539567, + "step": 19070 + }, + { + "epoch": 17.149280575539567, + "step": 19070, + "torque_loss": 0.13219048082828522 + }, + { + "epoch": 17.149280575539567, + "force_loss": 0.0063622742891311646, + "step": 19070 + }, + { + "epoch": 17.158273381294965, + "grad_norm": 0.46337074041366577, + "learning_rate": 8.161783508285526e-05, + "loss": 0.0316, + "step": 19080 + }, + { + "action_loss": 0.05375169590115547, + "epoch": 17.158273381294965, + "step": 19080 + }, + { + "epoch": 17.158273381294965, + "step": 19080, + "torque_loss": 0.16900013387203217 + }, + { + "epoch": 17.158273381294965, + "force_loss": 0.028509723022580147, + "step": 19080 + }, + { + "epoch": 17.16726618705036, + "grad_norm": 0.3895530104637146, + "learning_rate": 8.159648185456628e-05, + "loss": 0.038, + "step": 19090 + }, + { + "action_loss": 0.008286750875413418, + "epoch": 17.16726618705036, + "step": 19090 + }, + { + "epoch": 17.16726618705036, + "step": 19090, + "torque_loss": 0.09403803199529648 + }, + { + "epoch": 17.16726618705036, + "force_loss": 0.0056198700331151485, + "step": 19090 + }, + { + "epoch": 17.176258992805757, + "grad_norm": 0.5421538949012756, + "learning_rate": 8.157511902810038e-05, + "loss": 0.0365, + "step": 19100 + }, + { + "action_loss": 0.012842250056564808, + "epoch": 17.176258992805757, + "step": 19100 + }, + { + "epoch": 17.176258992805757, + "step": 19100, + "torque_loss": 0.13496725261211395 + }, + { + "epoch": 17.176258992805757, + "force_loss": 0.013069468550384045, + "step": 19100 + }, + { + "epoch": 17.18525179856115, + "grad_norm": 0.8404548168182373, + "learning_rate": 8.155374660994701e-05, + "loss": 0.0267, + "step": 19110 + }, + { + "action_loss": 0.007729492615908384, + "epoch": 17.18525179856115, + "step": 19110 + }, + { + "epoch": 17.18525179856115, + "step": 19110, + "torque_loss": 0.11826196312904358 + }, + { + "epoch": 17.18525179856115, + "force_loss": 0.004309921991080046, + "step": 19110 + }, + { + "epoch": 17.194244604316548, + "grad_norm": 0.38744139671325684, + "learning_rate": 8.153236460659857e-05, + "loss": 0.0333, + "step": 19120 + }, + { + "action_loss": 0.011896542273461819, + "epoch": 17.194244604316548, + "step": 19120 + }, + { + "epoch": 17.194244604316548, + "step": 19120, + "torque_loss": 0.1816953867673874 + }, + { + "epoch": 17.194244604316548, + "force_loss": 0.008728374727070332, + "step": 19120 + }, + { + "epoch": 17.203237410071942, + "grad_norm": 0.5814850330352783, + "learning_rate": 8.151097302455031e-05, + "loss": 0.0246, + "step": 19130 + }, + { + "action_loss": 0.008752156980335712, + "epoch": 17.203237410071942, + "step": 19130 + }, + { + "epoch": 17.203237410071942, + "step": 19130, + "torque_loss": 0.14554281532764435 + }, + { + "epoch": 17.203237410071942, + "force_loss": 0.005143011454492807, + "step": 19130 + }, + { + "epoch": 17.21223021582734, + "grad_norm": 0.6042574644088745, + "learning_rate": 8.148957187030044e-05, + "loss": 0.03, + "step": 19140 + }, + { + "action_loss": 0.0060353465378284454, + "epoch": 17.21223021582734, + "step": 19140 + }, + { + "epoch": 17.21223021582734, + "step": 19140, + "torque_loss": 0.17903532087802887 + }, + { + "epoch": 17.21223021582734, + "force_loss": 0.0028662399854511023, + "step": 19140 + }, + { + "epoch": 17.221223021582734, + "grad_norm": 0.42693963646888733, + "learning_rate": 8.146816115035006e-05, + "loss": 0.0263, + "step": 19150 + }, + { + "action_loss": 0.0046659205108881, + "epoch": 17.221223021582734, + "step": 19150 + }, + { + "epoch": 17.221223021582734, + "step": 19150, + "torque_loss": 0.13491903245449066 + }, + { + "epoch": 17.221223021582734, + "force_loss": 0.0027668692637234926, + "step": 19150 + }, + { + "epoch": 17.230215827338128, + "grad_norm": 0.36125826835632324, + "learning_rate": 8.14467408712032e-05, + "loss": 0.0304, + "step": 19160 + }, + { + "action_loss": 0.0030811659526079893, + "epoch": 17.230215827338128, + "step": 19160 + }, + { + "epoch": 17.230215827338128, + "step": 19160, + "torque_loss": 0.11700669676065445 + }, + { + "epoch": 17.230215827338128, + "force_loss": 0.0032936956267803907, + "step": 19160 + }, + { + "epoch": 17.239208633093526, + "grad_norm": 0.3138788938522339, + "learning_rate": 8.142531103936678e-05, + "loss": 0.025, + "step": 19170 + }, + { + "action_loss": 0.006638602819293737, + "epoch": 17.239208633093526, + "step": 19170 + }, + { + "epoch": 17.239208633093526, + "step": 19170, + "torque_loss": 0.09379879385232925 + }, + { + "epoch": 17.239208633093526, + "force_loss": 0.0046833292581140995, + "step": 19170 + }, + { + "epoch": 17.24820143884892, + "grad_norm": 0.1880631446838379, + "learning_rate": 8.14038716613506e-05, + "loss": 0.0186, + "step": 19180 + }, + { + "action_loss": 0.01769532449543476, + "epoch": 17.24820143884892, + "step": 19180 + }, + { + "epoch": 17.24820143884892, + "step": 19180, + "torque_loss": 0.10830444097518921 + }, + { + "epoch": 17.24820143884892, + "force_loss": 0.01403549313545227, + "step": 19180 + }, + { + "epoch": 17.257194244604317, + "grad_norm": 0.4972555637359619, + "learning_rate": 8.138242274366736e-05, + "loss": 0.0234, + "step": 19190 + }, + { + "action_loss": 0.004576841834932566, + "epoch": 17.257194244604317, + "step": 19190 + }, + { + "epoch": 17.257194244604317, + "step": 19190, + "torque_loss": 0.11655453592538834 + }, + { + "epoch": 17.257194244604317, + "force_loss": 0.004046024288982153, + "step": 19190 + }, + { + "epoch": 17.26618705035971, + "grad_norm": 0.4342607855796814, + "learning_rate": 8.136096429283271e-05, + "loss": 0.0263, + "step": 19200 + }, + { + "action_loss": 0.006233396008610725, + "epoch": 17.26618705035971, + "step": 19200 + }, + { + "epoch": 17.26618705035971, + "step": 19200, + "torque_loss": 0.07891140133142471 + }, + { + "epoch": 17.26618705035971, + "force_loss": 0.005459946114569902, + "step": 19200 + }, + { + "epoch": 17.27517985611511, + "grad_norm": 0.6128469705581665, + "learning_rate": 8.133949631536515e-05, + "loss": 0.028, + "step": 19210 + }, + { + "action_loss": 0.012323073111474514, + "epoch": 17.27517985611511, + "step": 19210 + }, + { + "epoch": 17.27517985611511, + "step": 19210, + "torque_loss": 0.13757109642028809 + }, + { + "epoch": 17.27517985611511, + "force_loss": 0.00769123574718833, + "step": 19210 + }, + { + "epoch": 17.284172661870503, + "grad_norm": 0.5576527714729309, + "learning_rate": 8.131801881778607e-05, + "loss": 0.0293, + "step": 19220 + }, + { + "action_loss": 0.007438432425260544, + "epoch": 17.284172661870503, + "step": 19220 + }, + { + "epoch": 17.284172661870503, + "step": 19220, + "torque_loss": 0.0926814153790474 + }, + { + "epoch": 17.284172661870503, + "force_loss": 0.010401430539786816, + "step": 19220 + }, + { + "epoch": 17.2931654676259, + "grad_norm": 0.42516401410102844, + "learning_rate": 8.129653180661978e-05, + "loss": 0.0269, + "step": 19230 + }, + { + "action_loss": 0.006882320623844862, + "epoch": 17.2931654676259, + "step": 19230 + }, + { + "epoch": 17.2931654676259, + "step": 19230, + "torque_loss": 0.1604544073343277 + }, + { + "epoch": 17.2931654676259, + "force_loss": 0.0053317672573029995, + "step": 19230 + }, + { + "epoch": 17.302158273381295, + "grad_norm": 0.2955699563026428, + "learning_rate": 8.127503528839346e-05, + "loss": 0.0261, + "step": 19240 + }, + { + "action_loss": 0.01876833103597164, + "epoch": 17.302158273381295, + "step": 19240 + }, + { + "epoch": 17.302158273381295, + "step": 19240, + "torque_loss": 0.17759306728839874 + }, + { + "epoch": 17.302158273381295, + "force_loss": 0.015544223599135876, + "step": 19240 + }, + { + "epoch": 17.31115107913669, + "grad_norm": 0.5286865234375, + "learning_rate": 8.125352926963721e-05, + "loss": 0.0337, + "step": 19250 + }, + { + "action_loss": 0.004853442776948214, + "epoch": 17.31115107913669, + "step": 19250 + }, + { + "epoch": 17.31115107913669, + "step": 19250, + "torque_loss": 0.08871179074048996 + }, + { + "epoch": 17.31115107913669, + "force_loss": 0.003862435230985284, + "step": 19250 + }, + { + "epoch": 17.320143884892087, + "grad_norm": 0.4135963022708893, + "learning_rate": 8.123201375688395e-05, + "loss": 0.0239, + "step": 19260 + }, + { + "action_loss": 0.0029513679910451174, + "epoch": 17.320143884892087, + "step": 19260 + }, + { + "epoch": 17.320143884892087, + "step": 19260, + "torque_loss": 0.12203123420476913 + }, + { + "epoch": 17.320143884892087, + "force_loss": 0.0028603628743439913, + "step": 19260 + }, + { + "epoch": 17.32913669064748, + "grad_norm": 0.3187497556209564, + "learning_rate": 8.121048875666954e-05, + "loss": 0.0248, + "step": 19270 + }, + { + "action_loss": 0.00624515675008297, + "epoch": 17.32913669064748, + "step": 19270 + }, + { + "epoch": 17.32913669064748, + "step": 19270, + "torque_loss": 0.13843059539794922 + }, + { + "epoch": 17.32913669064748, + "force_loss": 0.0032186105381697416, + "step": 19270 + }, + { + "epoch": 17.33812949640288, + "grad_norm": 0.2930435836315155, + "learning_rate": 8.118895427553274e-05, + "loss": 0.0201, + "step": 19280 + }, + { + "action_loss": 0.00903305597603321, + "epoch": 17.33812949640288, + "step": 19280 + }, + { + "epoch": 17.33812949640288, + "step": 19280, + "torque_loss": 0.1215401291847229 + }, + { + "epoch": 17.33812949640288, + "force_loss": 0.00682861777022481, + "step": 19280 + }, + { + "epoch": 17.347122302158272, + "grad_norm": 0.37510836124420166, + "learning_rate": 8.116741032001511e-05, + "loss": 0.0248, + "step": 19290 + }, + { + "action_loss": 0.0066780210472643375, + "epoch": 17.347122302158272, + "step": 19290 + }, + { + "epoch": 17.347122302158272, + "step": 19290, + "torque_loss": 0.12911011278629303 + }, + { + "epoch": 17.347122302158272, + "force_loss": 0.005036401562392712, + "step": 19290 + }, + { + "epoch": 17.35611510791367, + "grad_norm": 0.5693814754486084, + "learning_rate": 8.114585689666114e-05, + "loss": 0.0251, + "step": 19300 + }, + { + "action_loss": 0.004626793321222067, + "epoch": 17.35611510791367, + "step": 19300 + }, + { + "epoch": 17.35611510791367, + "step": 19300, + "torque_loss": 0.11726341396570206 + }, + { + "epoch": 17.35611510791367, + "force_loss": 0.0034203932154923677, + "step": 19300 + }, + { + "epoch": 17.365107913669064, + "grad_norm": 0.35918116569519043, + "learning_rate": 8.112429401201821e-05, + "loss": 0.0208, + "step": 19310 + }, + { + "action_loss": 0.03519358113408089, + "epoch": 17.365107913669064, + "step": 19310 + }, + { + "epoch": 17.365107913669064, + "step": 19310, + "torque_loss": 0.21112732589244843 + }, + { + "epoch": 17.365107913669064, + "force_loss": 0.02771923877298832, + "step": 19310 + }, + { + "epoch": 17.37410071942446, + "grad_norm": 0.6362349987030029, + "learning_rate": 8.110272167263656e-05, + "loss": 0.0253, + "step": 19320 + }, + { + "action_loss": 0.011003575287759304, + "epoch": 17.37410071942446, + "step": 19320 + }, + { + "epoch": 17.37410071942446, + "step": 19320, + "torque_loss": 0.1277102380990982 + }, + { + "epoch": 17.37410071942446, + "force_loss": 0.007746491581201553, + "step": 19320 + }, + { + "epoch": 17.383093525179856, + "grad_norm": 0.27018672227859497, + "learning_rate": 8.108113988506929e-05, + "loss": 0.0234, + "step": 19330 + }, + { + "action_loss": 0.011912592686712742, + "epoch": 17.383093525179856, + "step": 19330 + }, + { + "epoch": 17.383093525179856, + "step": 19330, + "torque_loss": 0.19017989933490753 + }, + { + "epoch": 17.383093525179856, + "force_loss": 0.015125956386327744, + "step": 19330 + }, + { + "epoch": 17.392086330935253, + "grad_norm": 0.3717755675315857, + "learning_rate": 8.105954865587235e-05, + "loss": 0.0242, + "step": 19340 + }, + { + "action_loss": 0.006878519430756569, + "epoch": 17.392086330935253, + "step": 19340 + }, + { + "epoch": 17.392086330935253, + "step": 19340, + "torque_loss": 0.10891749709844589 + }, + { + "epoch": 17.392086330935253, + "force_loss": 0.004361771512776613, + "step": 19340 + }, + { + "epoch": 17.401079136690647, + "grad_norm": 0.25477027893066406, + "learning_rate": 8.103794799160463e-05, + "loss": 0.0221, + "step": 19350 + }, + { + "action_loss": 0.037407178431749344, + "epoch": 17.401079136690647, + "step": 19350 + }, + { + "epoch": 17.401079136690647, + "step": 19350, + "torque_loss": 0.20230621099472046 + }, + { + "epoch": 17.401079136690647, + "force_loss": 0.04858658090233803, + "step": 19350 + }, + { + "epoch": 17.41007194244604, + "grad_norm": 0.6735423803329468, + "learning_rate": 8.101633789882781e-05, + "loss": 0.026, + "step": 19360 + }, + { + "action_loss": 0.004760397132486105, + "epoch": 17.41007194244604, + "step": 19360 + }, + { + "epoch": 17.41007194244604, + "step": 19360, + "torque_loss": 0.1070166826248169 + }, + { + "epoch": 17.41007194244604, + "force_loss": 0.003806935390457511, + "step": 19360 + }, + { + "epoch": 17.41906474820144, + "grad_norm": 0.4031885862350464, + "learning_rate": 8.099471838410648e-05, + "loss": 0.0193, + "step": 19370 + }, + { + "action_loss": 0.010101772844791412, + "epoch": 17.41906474820144, + "step": 19370 + }, + { + "epoch": 17.41906474820144, + "step": 19370, + "torque_loss": 0.15674875676631927 + }, + { + "epoch": 17.41906474820144, + "force_loss": 0.008931778371334076, + "step": 19370 + }, + { + "epoch": 17.428057553956833, + "grad_norm": 0.4902113378047943, + "learning_rate": 8.097308945400806e-05, + "loss": 0.022, + "step": 19380 + }, + { + "action_loss": 0.00328111182898283, + "epoch": 17.428057553956833, + "step": 19380 + }, + { + "epoch": 17.428057553956833, + "step": 19380, + "torque_loss": 0.08215201646089554 + }, + { + "epoch": 17.428057553956833, + "force_loss": 0.002679927973076701, + "step": 19380 + }, + { + "epoch": 17.43705035971223, + "grad_norm": 0.6395204663276672, + "learning_rate": 8.095145111510288e-05, + "loss": 0.0227, + "step": 19390 + }, + { + "action_loss": 0.005782816559076309, + "epoch": 17.43705035971223, + "step": 19390 + }, + { + "epoch": 17.43705035971223, + "step": 19390, + "torque_loss": 0.10056646913290024 + }, + { + "epoch": 17.43705035971223, + "force_loss": 0.010992948897182941, + "step": 19390 + }, + { + "epoch": 17.446043165467625, + "grad_norm": 0.3419896364212036, + "learning_rate": 8.092980337396406e-05, + "loss": 0.0201, + "step": 19400 + }, + { + "action_loss": 0.010971141047775745, + "epoch": 17.446043165467625, + "step": 19400 + }, + { + "epoch": 17.446043165467625, + "step": 19400, + "torque_loss": 0.15733571350574493 + }, + { + "epoch": 17.446043165467625, + "force_loss": 0.017646750435233116, + "step": 19400 + }, + { + "epoch": 17.455035971223023, + "grad_norm": 0.40668272972106934, + "learning_rate": 8.090814623716763e-05, + "loss": 0.0246, + "step": 19410 + }, + { + "action_loss": 0.00327402469702065, + "epoch": 17.455035971223023, + "step": 19410 + }, + { + "epoch": 17.455035971223023, + "step": 19410, + "torque_loss": 0.11614451557397842 + }, + { + "epoch": 17.455035971223023, + "force_loss": 0.005468995776027441, + "step": 19410 + }, + { + "epoch": 17.464028776978417, + "grad_norm": 0.23284949362277985, + "learning_rate": 8.088647971129246e-05, + "loss": 0.0262, + "step": 19420 + }, + { + "action_loss": 0.025572746992111206, + "epoch": 17.464028776978417, + "step": 19420 + }, + { + "epoch": 17.464028776978417, + "step": 19420, + "torque_loss": 0.1209123507142067 + }, + { + "epoch": 17.464028776978417, + "force_loss": 0.026003820821642876, + "step": 19420 + }, + { + "epoch": 17.473021582733814, + "grad_norm": 0.20005621016025543, + "learning_rate": 8.086480380292026e-05, + "loss": 0.0265, + "step": 19430 + }, + { + "action_loss": 0.0027618110179901123, + "epoch": 17.473021582733814, + "step": 19430 + }, + { + "epoch": 17.473021582733814, + "step": 19430, + "torque_loss": 0.0853128433227539 + }, + { + "epoch": 17.473021582733814, + "force_loss": 0.002930806949734688, + "step": 19430 + }, + { + "epoch": 17.48201438848921, + "grad_norm": 0.4512649178504944, + "learning_rate": 8.084311851863562e-05, + "loss": 0.0228, + "step": 19440 + }, + { + "action_loss": 0.0056221564300358295, + "epoch": 17.48201438848921, + "step": 19440 + }, + { + "epoch": 17.48201438848921, + "step": 19440, + "torque_loss": 0.10249606519937515 + }, + { + "epoch": 17.48201438848921, + "force_loss": 0.003846965730190277, + "step": 19440 + }, + { + "epoch": 17.491007194244606, + "grad_norm": 0.3204338550567627, + "learning_rate": 8.082142386502591e-05, + "loss": 0.022, + "step": 19450 + }, + { + "action_loss": 0.012489952147006989, + "epoch": 17.491007194244606, + "step": 19450 + }, + { + "epoch": 17.491007194244606, + "step": 19450, + "torque_loss": 0.11867877840995789 + }, + { + "epoch": 17.491007194244606, + "force_loss": 0.014584623277187347, + "step": 19450 + }, + { + "epoch": 17.5, + "grad_norm": 0.8133110404014587, + "learning_rate": 8.079971984868145e-05, + "loss": 0.0252, + "step": 19460 + }, + { + "action_loss": 0.005288057029247284, + "epoch": 17.5, + "step": 19460 + }, + { + "epoch": 17.5, + "step": 19460, + "torque_loss": 0.1301623433828354 + }, + { + "epoch": 17.5, + "force_loss": 0.006689710076898336, + "step": 19460 + }, + { + "epoch": 17.508992805755394, + "grad_norm": 0.34816107153892517, + "learning_rate": 8.077800647619532e-05, + "loss": 0.0241, + "step": 19470 + }, + { + "action_loss": 0.004750456660985947, + "epoch": 17.508992805755394, + "step": 19470 + }, + { + "epoch": 17.508992805755394, + "step": 19470, + "torque_loss": 0.1748230904340744 + }, + { + "epoch": 17.508992805755394, + "force_loss": 0.0048001110553741455, + "step": 19470 + }, + { + "epoch": 17.51798561151079, + "grad_norm": 0.2986980676651001, + "learning_rate": 8.075628375416345e-05, + "loss": 0.022, + "step": 19480 + }, + { + "action_loss": 0.004213675856590271, + "epoch": 17.51798561151079, + "step": 19480 + }, + { + "epoch": 17.51798561151079, + "step": 19480, + "torque_loss": 0.11202915757894516 + }, + { + "epoch": 17.51798561151079, + "force_loss": 0.005337308626621962, + "step": 19480 + }, + { + "epoch": 17.526978417266186, + "grad_norm": 0.4014330804347992, + "learning_rate": 8.073455168918464e-05, + "loss": 0.0245, + "step": 19490 + }, + { + "action_loss": 0.006691730115562677, + "epoch": 17.526978417266186, + "step": 19490 + }, + { + "epoch": 17.526978417266186, + "step": 19490, + "torque_loss": 0.16882272064685822 + }, + { + "epoch": 17.526978417266186, + "force_loss": 0.0055504050105810165, + "step": 19490 + }, + { + "epoch": 17.535971223021583, + "grad_norm": 0.2635452449321747, + "learning_rate": 8.071281028786055e-05, + "loss": 0.0208, + "step": 19500 + }, + { + "action_loss": 0.005574541632086039, + "epoch": 17.535971223021583, + "step": 19500 + }, + { + "epoch": 17.535971223021583, + "step": 19500, + "torque_loss": 0.15276120603084564 + }, + { + "epoch": 17.535971223021583, + "force_loss": 0.0036612292751669884, + "step": 19500 + }, + { + "epoch": 17.544964028776977, + "grad_norm": 0.2833903729915619, + "learning_rate": 8.069105955679562e-05, + "loss": 0.0222, + "step": 19510 + }, + { + "action_loss": 0.009073040448129177, + "epoch": 17.544964028776977, + "step": 19510 + }, + { + "epoch": 17.544964028776977, + "step": 19510, + "torque_loss": 0.16100911796092987 + }, + { + "epoch": 17.544964028776977, + "force_loss": 0.005548717454075813, + "step": 19510 + }, + { + "epoch": 17.553956834532375, + "grad_norm": 0.31475210189819336, + "learning_rate": 8.066929950259713e-05, + "loss": 0.0231, + "step": 19520 + }, + { + "action_loss": 0.007994409650564194, + "epoch": 17.553956834532375, + "step": 19520 + }, + { + "epoch": 17.553956834532375, + "step": 19520, + "torque_loss": 0.14138777554035187 + }, + { + "epoch": 17.553956834532375, + "force_loss": 0.010852988809347153, + "step": 19520 + }, + { + "epoch": 17.56294964028777, + "grad_norm": 0.3428254723548889, + "learning_rate": 8.064753013187522e-05, + "loss": 0.0227, + "step": 19530 + }, + { + "action_loss": 0.020793737843632698, + "epoch": 17.56294964028777, + "step": 19530 + }, + { + "epoch": 17.56294964028777, + "step": 19530, + "torque_loss": 0.15864413976669312 + }, + { + "epoch": 17.56294964028777, + "force_loss": 0.018360594287514687, + "step": 19530 + }, + { + "epoch": 17.571942446043167, + "grad_norm": 0.3125179409980774, + "learning_rate": 8.062575145124289e-05, + "loss": 0.0232, + "step": 19540 + }, + { + "action_loss": 0.008672011084854603, + "epoch": 17.571942446043167, + "step": 19540 + }, + { + "epoch": 17.571942446043167, + "step": 19540, + "torque_loss": 0.1733013391494751 + }, + { + "epoch": 17.571942446043167, + "force_loss": 0.005243752151727676, + "step": 19540 + }, + { + "epoch": 17.58093525179856, + "grad_norm": 0.39535972476005554, + "learning_rate": 8.060396346731587e-05, + "loss": 0.0231, + "step": 19550 + }, + { + "action_loss": 0.0033565405756235123, + "epoch": 17.58093525179856, + "step": 19550 + }, + { + "epoch": 17.58093525179856, + "step": 19550, + "torque_loss": 0.11748424917459488 + }, + { + "epoch": 17.58093525179856, + "force_loss": 0.0031257914379239082, + "step": 19550 + }, + { + "epoch": 17.58992805755396, + "grad_norm": 0.5443530678749084, + "learning_rate": 8.058216618671281e-05, + "loss": 0.0196, + "step": 19560 + }, + { + "action_loss": 0.012148463167250156, + "epoch": 17.58992805755396, + "step": 19560 + }, + { + "epoch": 17.58992805755396, + "step": 19560, + "torque_loss": 0.10654151439666748 + }, + { + "epoch": 17.58992805755396, + "force_loss": 0.008956816047430038, + "step": 19560 + }, + { + "epoch": 17.598920863309353, + "grad_norm": 0.30746304988861084, + "learning_rate": 8.056035961605514e-05, + "loss": 0.0192, + "step": 19570 + }, + { + "action_loss": 0.0071060024201869965, + "epoch": 17.598920863309353, + "step": 19570 + }, + { + "epoch": 17.598920863309353, + "step": 19570, + "torque_loss": 0.12469115108251572 + }, + { + "epoch": 17.598920863309353, + "force_loss": 0.004781091120094061, + "step": 19570 + }, + { + "epoch": 17.607913669064747, + "grad_norm": 0.4439001679420471, + "learning_rate": 8.05385437619671e-05, + "loss": 0.028, + "step": 19580 + }, + { + "action_loss": 0.003755138022825122, + "epoch": 17.607913669064747, + "step": 19580 + }, + { + "epoch": 17.607913669064747, + "step": 19580, + "torque_loss": 0.14100150763988495 + }, + { + "epoch": 17.607913669064747, + "force_loss": 0.006742970552295446, + "step": 19580 + }, + { + "epoch": 17.616906474820144, + "grad_norm": 0.30653971433639526, + "learning_rate": 8.05167186310758e-05, + "loss": 0.0201, + "step": 19590 + }, + { + "action_loss": 0.008167531341314316, + "epoch": 17.616906474820144, + "step": 19590 + }, + { + "epoch": 17.616906474820144, + "step": 19590, + "torque_loss": 0.1529424637556076 + }, + { + "epoch": 17.616906474820144, + "force_loss": 0.005340991076081991, + "step": 19590 + }, + { + "epoch": 17.62589928057554, + "grad_norm": 0.3988235294818878, + "learning_rate": 8.049488423001113e-05, + "loss": 0.0228, + "step": 19600 + }, + { + "action_loss": 0.01556698139756918, + "epoch": 17.62589928057554, + "step": 19600 + }, + { + "epoch": 17.62589928057554, + "step": 19600, + "torque_loss": 0.1938236951828003 + }, + { + "epoch": 17.62589928057554, + "force_loss": 0.008753178641200066, + "step": 19600 + }, + { + "epoch": 17.634892086330936, + "grad_norm": 0.4130746126174927, + "learning_rate": 8.047304056540581e-05, + "loss": 0.0251, + "step": 19610 + }, + { + "action_loss": 0.005363933742046356, + "epoch": 17.634892086330936, + "step": 19610 + }, + { + "epoch": 17.634892086330936, + "step": 19610, + "torque_loss": 0.137730672955513 + }, + { + "epoch": 17.634892086330936, + "force_loss": 0.005873801652342081, + "step": 19610 + }, + { + "epoch": 17.64388489208633, + "grad_norm": 0.40095967054367065, + "learning_rate": 8.045118764389534e-05, + "loss": 0.0261, + "step": 19620 + }, + { + "action_loss": 0.002637271536514163, + "epoch": 17.64388489208633, + "step": 19620 + }, + { + "epoch": 17.64388489208633, + "step": 19620, + "torque_loss": 0.12004473805427551 + }, + { + "epoch": 17.64388489208633, + "force_loss": 0.0027399174869060516, + "step": 19620 + }, + { + "epoch": 17.652877697841728, + "grad_norm": 0.23908305168151855, + "learning_rate": 8.042932547211809e-05, + "loss": 0.0182, + "step": 19630 + }, + { + "action_loss": 0.006685630884021521, + "epoch": 17.652877697841728, + "step": 19630 + }, + { + "epoch": 17.652877697841728, + "step": 19630, + "torque_loss": 0.14698709547519684 + }, + { + "epoch": 17.652877697841728, + "force_loss": 0.004281893372535706, + "step": 19630 + }, + { + "epoch": 17.66187050359712, + "grad_norm": 0.5244224071502686, + "learning_rate": 8.04074540567152e-05, + "loss": 0.0229, + "step": 19640 + }, + { + "action_loss": 0.0032203607261180878, + "epoch": 17.66187050359712, + "step": 19640 + }, + { + "epoch": 17.66187050359712, + "step": 19640, + "torque_loss": 0.14612585306167603 + }, + { + "epoch": 17.66187050359712, + "force_loss": 0.0025413024704903364, + "step": 19640 + }, + { + "epoch": 17.67086330935252, + "grad_norm": 0.43039417266845703, + "learning_rate": 8.038557340433063e-05, + "loss": 0.0301, + "step": 19650 + }, + { + "action_loss": 0.009502124972641468, + "epoch": 17.67086330935252, + "step": 19650 + }, + { + "epoch": 17.67086330935252, + "step": 19650, + "torque_loss": 0.1313255876302719 + }, + { + "epoch": 17.67086330935252, + "force_loss": 0.008214607834815979, + "step": 19650 + }, + { + "epoch": 17.679856115107913, + "grad_norm": 0.21753033995628357, + "learning_rate": 8.036368352161115e-05, + "loss": 0.0208, + "step": 19660 + }, + { + "action_loss": 0.01917554996907711, + "epoch": 17.679856115107913, + "step": 19660 + }, + { + "epoch": 17.679856115107913, + "step": 19660, + "torque_loss": 0.13388876616954803 + }, + { + "epoch": 17.679856115107913, + "force_loss": 0.023362377658486366, + "step": 19660 + }, + { + "epoch": 17.68884892086331, + "grad_norm": 0.170255646109581, + "learning_rate": 8.034178441520633e-05, + "loss": 0.0204, + "step": 19670 + }, + { + "action_loss": 0.005621526390314102, + "epoch": 17.68884892086331, + "step": 19670 + }, + { + "epoch": 17.68884892086331, + "step": 19670, + "torque_loss": 0.11105605214834213 + }, + { + "epoch": 17.68884892086331, + "force_loss": 0.00489856768399477, + "step": 19670 + }, + { + "epoch": 17.697841726618705, + "grad_norm": 0.4363051652908325, + "learning_rate": 8.031987609176852e-05, + "loss": 0.0262, + "step": 19680 + }, + { + "action_loss": 0.0037665802519768476, + "epoch": 17.697841726618705, + "step": 19680 + }, + { + "epoch": 17.697841726618705, + "step": 19680, + "torque_loss": 0.10609819740056992 + }, + { + "epoch": 17.697841726618705, + "force_loss": 0.003781653940677643, + "step": 19680 + }, + { + "epoch": 17.7068345323741, + "grad_norm": 0.21985115110874176, + "learning_rate": 8.02979585579529e-05, + "loss": 0.0231, + "step": 19690 + }, + { + "action_loss": 0.007497600745409727, + "epoch": 17.7068345323741, + "step": 19690 + }, + { + "epoch": 17.7068345323741, + "step": 19690, + "torque_loss": 0.10320410877466202 + }, + { + "epoch": 17.7068345323741, + "force_loss": 0.010489988140761852, + "step": 19690 + }, + { + "epoch": 17.715827338129497, + "grad_norm": 0.6059436798095703, + "learning_rate": 8.027603182041745e-05, + "loss": 0.0291, + "step": 19700 + }, + { + "action_loss": 0.007499633356928825, + "epoch": 17.715827338129497, + "step": 19700 + }, + { + "epoch": 17.715827338129497, + "step": 19700, + "torque_loss": 0.12829194962978363 + }, + { + "epoch": 17.715827338129497, + "force_loss": 0.005964389070868492, + "step": 19700 + }, + { + "epoch": 17.72482014388489, + "grad_norm": 0.3505343794822693, + "learning_rate": 8.025409588582292e-05, + "loss": 0.0259, + "step": 19710 + }, + { + "action_loss": 0.006138558033853769, + "epoch": 17.72482014388489, + "step": 19710 + }, + { + "epoch": 17.72482014388489, + "step": 19710, + "torque_loss": 0.14246180653572083 + }, + { + "epoch": 17.72482014388489, + "force_loss": 0.0058523621410131454, + "step": 19710 + }, + { + "epoch": 17.73381294964029, + "grad_norm": 0.7307343482971191, + "learning_rate": 8.023215076083288e-05, + "loss": 0.0234, + "step": 19720 + }, + { + "action_loss": 0.0041977656073868275, + "epoch": 17.73381294964029, + "step": 19720 + }, + { + "epoch": 17.73381294964029, + "step": 19720, + "torque_loss": 0.12356024980545044 + }, + { + "epoch": 17.73381294964029, + "force_loss": 0.010036000050604343, + "step": 19720 + }, + { + "epoch": 17.742805755395683, + "grad_norm": 0.36447811126708984, + "learning_rate": 8.021019645211367e-05, + "loss": 0.0201, + "step": 19730 + }, + { + "action_loss": 0.002569265430793166, + "epoch": 17.742805755395683, + "step": 19730 + }, + { + "epoch": 17.742805755395683, + "step": 19730, + "torque_loss": 0.1548398733139038 + }, + { + "epoch": 17.742805755395683, + "force_loss": 0.003124109236523509, + "step": 19730 + }, + { + "epoch": 17.75179856115108, + "grad_norm": 0.30815255641937256, + "learning_rate": 8.018823296633441e-05, + "loss": 0.0207, + "step": 19740 + }, + { + "action_loss": 0.010665304027497768, + "epoch": 17.75179856115108, + "step": 19740 + }, + { + "epoch": 17.75179856115108, + "step": 19740, + "torque_loss": 0.1426772028207779 + }, + { + "epoch": 17.75179856115108, + "force_loss": 0.013071350753307343, + "step": 19740 + }, + { + "epoch": 17.760791366906474, + "grad_norm": 0.31556180119514465, + "learning_rate": 8.016626031016708e-05, + "loss": 0.0264, + "step": 19750 + }, + { + "action_loss": 0.008767900057137012, + "epoch": 17.760791366906474, + "step": 19750 + }, + { + "epoch": 17.760791366906474, + "step": 19750, + "torque_loss": 0.13452835381031036 + }, + { + "epoch": 17.760791366906474, + "force_loss": 0.011867806315422058, + "step": 19750 + }, + { + "epoch": 17.769784172661872, + "grad_norm": 0.4469786286354065, + "learning_rate": 8.014427849028636e-05, + "loss": 0.0215, + "step": 19760 + }, + { + "action_loss": 0.01100847590714693, + "epoch": 17.769784172661872, + "step": 19760 + }, + { + "epoch": 17.769784172661872, + "step": 19760, + "torque_loss": 0.14512237906455994 + }, + { + "epoch": 17.769784172661872, + "force_loss": 0.011651960201561451, + "step": 19760 + }, + { + "epoch": 17.778776978417266, + "grad_norm": 0.588995099067688, + "learning_rate": 8.012228751336974e-05, + "loss": 0.0243, + "step": 19770 + }, + { + "action_loss": 0.008913128636777401, + "epoch": 17.778776978417266, + "step": 19770 + }, + { + "epoch": 17.778776978417266, + "step": 19770, + "torque_loss": 0.19584007561206818 + }, + { + "epoch": 17.778776978417266, + "force_loss": 0.008015934377908707, + "step": 19770 + }, + { + "epoch": 17.78776978417266, + "grad_norm": 0.4769243597984314, + "learning_rate": 8.01002873860975e-05, + "loss": 0.0317, + "step": 19780 + }, + { + "action_loss": 0.0070003787986934185, + "epoch": 17.78776978417266, + "step": 19780 + }, + { + "epoch": 17.78776978417266, + "step": 19780, + "torque_loss": 0.1274857521057129 + }, + { + "epoch": 17.78776978417266, + "force_loss": 0.006243351846933365, + "step": 19780 + }, + { + "epoch": 17.796762589928058, + "grad_norm": 0.35653167963027954, + "learning_rate": 8.00782781151527e-05, + "loss": 0.0212, + "step": 19790 + }, + { + "action_loss": 0.02327529340982437, + "epoch": 17.796762589928058, + "step": 19790 + }, + { + "epoch": 17.796762589928058, + "step": 19790, + "torque_loss": 0.16975641250610352 + }, + { + "epoch": 17.796762589928058, + "force_loss": 0.01342775672674179, + "step": 19790 + }, + { + "epoch": 17.805755395683452, + "grad_norm": 0.335950642824173, + "learning_rate": 8.005625970722119e-05, + "loss": 0.0263, + "step": 19800 + }, + { + "action_loss": 0.004299199674278498, + "epoch": 17.805755395683452, + "step": 19800 + }, + { + "epoch": 17.805755395683452, + "step": 19800, + "torque_loss": 0.08940321952104568 + }, + { + "epoch": 17.805755395683452, + "force_loss": 0.00388413411565125, + "step": 19800 + }, + { + "epoch": 17.81474820143885, + "grad_norm": 0.441150039434433, + "learning_rate": 8.003423216899158e-05, + "loss": 0.0201, + "step": 19810 + }, + { + "action_loss": 0.0038878542836755514, + "epoch": 17.81474820143885, + "step": 19810 + }, + { + "epoch": 17.81474820143885, + "step": 19810, + "torque_loss": 0.19903464615345 + }, + { + "epoch": 17.81474820143885, + "force_loss": 0.002141745062544942, + "step": 19810 + }, + { + "epoch": 17.823741007194243, + "grad_norm": 0.24629060924053192, + "learning_rate": 8.001219550715522e-05, + "loss": 0.0285, + "step": 19820 + }, + { + "action_loss": 0.008333519101142883, + "epoch": 17.823741007194243, + "step": 19820 + }, + { + "epoch": 17.823741007194243, + "step": 19820, + "torque_loss": 0.10464733093976974 + }, + { + "epoch": 17.823741007194243, + "force_loss": 0.015250998549163342, + "step": 19820 + }, + { + "epoch": 17.83273381294964, + "grad_norm": 0.28278669714927673, + "learning_rate": 7.999014972840632e-05, + "loss": 0.0266, + "step": 19830 + }, + { + "action_loss": 0.0053902133367955685, + "epoch": 17.83273381294964, + "step": 19830 + }, + { + "epoch": 17.83273381294964, + "step": 19830, + "torque_loss": 0.12154543399810791 + }, + { + "epoch": 17.83273381294964, + "force_loss": 0.005527317523956299, + "step": 19830 + }, + { + "epoch": 17.841726618705035, + "grad_norm": 0.25460949540138245, + "learning_rate": 7.996809483944174e-05, + "loss": 0.02, + "step": 19840 + }, + { + "action_loss": 0.020251328125596046, + "epoch": 17.841726618705035, + "step": 19840 + }, + { + "epoch": 17.841726618705035, + "step": 19840, + "torque_loss": 0.16169770061969757 + }, + { + "epoch": 17.841726618705035, + "force_loss": 0.013819930143654346, + "step": 19840 + }, + { + "epoch": 17.850719424460433, + "grad_norm": 0.39666181802749634, + "learning_rate": 7.994603084696124e-05, + "loss": 0.0197, + "step": 19850 + }, + { + "action_loss": 0.014225137419998646, + "epoch": 17.850719424460433, + "step": 19850 + }, + { + "epoch": 17.850719424460433, + "step": 19850, + "torque_loss": 0.1906416416168213 + }, + { + "epoch": 17.850719424460433, + "force_loss": 0.011307370848953724, + "step": 19850 + }, + { + "epoch": 17.859712230215827, + "grad_norm": 0.19151118397712708, + "learning_rate": 7.992395775766724e-05, + "loss": 0.0192, + "step": 19860 + }, + { + "action_loss": 0.005128639284521341, + "epoch": 17.859712230215827, + "step": 19860 + }, + { + "epoch": 17.859712230215827, + "step": 19860, + "torque_loss": 0.11203759163618088 + }, + { + "epoch": 17.859712230215827, + "force_loss": 0.004943978972733021, + "step": 19860 + }, + { + "epoch": 17.868705035971225, + "grad_norm": 0.17765939235687256, + "learning_rate": 7.990187557826497e-05, + "loss": 0.0267, + "step": 19870 + }, + { + "action_loss": 0.011818304657936096, + "epoch": 17.868705035971225, + "step": 19870 + }, + { + "epoch": 17.868705035971225, + "step": 19870, + "torque_loss": 0.1765238642692566 + }, + { + "epoch": 17.868705035971225, + "force_loss": 0.014577222056686878, + "step": 19870 + }, + { + "epoch": 17.87769784172662, + "grad_norm": 0.2867027521133423, + "learning_rate": 7.987978431546242e-05, + "loss": 0.0259, + "step": 19880 + }, + { + "action_loss": 0.008290189318358898, + "epoch": 17.87769784172662, + "step": 19880 + }, + { + "epoch": 17.87769784172662, + "step": 19880, + "torque_loss": 0.1209149956703186 + }, + { + "epoch": 17.87769784172662, + "force_loss": 0.007892039604485035, + "step": 19880 + }, + { + "epoch": 17.886690647482013, + "grad_norm": 0.6111899018287659, + "learning_rate": 7.985768397597031e-05, + "loss": 0.0238, + "step": 19890 + }, + { + "action_loss": 0.008832314051687717, + "epoch": 17.886690647482013, + "step": 19890 + }, + { + "epoch": 17.886690647482013, + "step": 19890, + "torque_loss": 0.13851268589496613 + }, + { + "epoch": 17.886690647482013, + "force_loss": 0.008938915096223354, + "step": 19890 + }, + { + "epoch": 17.89568345323741, + "grad_norm": 0.6564391851425171, + "learning_rate": 7.983557456650216e-05, + "loss": 0.0263, + "step": 19900 + }, + { + "action_loss": 0.022660313174128532, + "epoch": 17.89568345323741, + "step": 19900 + }, + { + "epoch": 17.89568345323741, + "step": 19900, + "torque_loss": 0.16038145124912262 + }, + { + "epoch": 17.89568345323741, + "force_loss": 0.029534464702010155, + "step": 19900 + }, + { + "epoch": 17.904676258992804, + "grad_norm": 0.4128296375274658, + "learning_rate": 7.981345609377422e-05, + "loss": 0.0267, + "step": 19910 + }, + { + "action_loss": 0.018345626071095467, + "epoch": 17.904676258992804, + "step": 19910 + }, + { + "epoch": 17.904676258992804, + "step": 19910, + "torque_loss": 0.1732242852449417 + }, + { + "epoch": 17.904676258992804, + "force_loss": 0.013446040451526642, + "step": 19910 + }, + { + "epoch": 17.913669064748202, + "grad_norm": 0.3570900559425354, + "learning_rate": 7.97913285645055e-05, + "loss": 0.0225, + "step": 19920 + }, + { + "action_loss": 0.011159618385136127, + "epoch": 17.913669064748202, + "step": 19920 + }, + { + "epoch": 17.913669064748202, + "step": 19920, + "torque_loss": 0.1497364044189453 + }, + { + "epoch": 17.913669064748202, + "force_loss": 0.011165213771164417, + "step": 19920 + }, + { + "epoch": 17.922661870503596, + "grad_norm": 0.3876411020755768, + "learning_rate": 7.976919198541776e-05, + "loss": 0.0212, + "step": 19930 + }, + { + "action_loss": 0.007042673882097006, + "epoch": 17.922661870503596, + "step": 19930 + }, + { + "epoch": 17.922661870503596, + "step": 19930, + "torque_loss": 0.1251247674226761 + }, + { + "epoch": 17.922661870503596, + "force_loss": 0.00474808132275939, + "step": 19930 + }, + { + "epoch": 17.931654676258994, + "grad_norm": 0.45121076703071594, + "learning_rate": 7.974704636323548e-05, + "loss": 0.024, + "step": 19940 + }, + { + "action_loss": 0.007809826638549566, + "epoch": 17.931654676258994, + "step": 19940 + }, + { + "epoch": 17.931654676258994, + "step": 19940, + "torque_loss": 0.10121830552816391 + }, + { + "epoch": 17.931654676258994, + "force_loss": 0.0060829236172139645, + "step": 19940 + }, + { + "epoch": 17.940647482014388, + "grad_norm": 0.3448992371559143, + "learning_rate": 7.972489170468597e-05, + "loss": 0.0198, + "step": 19950 + }, + { + "action_loss": 0.004157028626650572, + "epoch": 17.940647482014388, + "step": 19950 + }, + { + "epoch": 17.940647482014388, + "step": 19950, + "torque_loss": 0.12187572568655014 + }, + { + "epoch": 17.940647482014388, + "force_loss": 0.004202096723020077, + "step": 19950 + }, + { + "epoch": 17.949640287769785, + "grad_norm": 0.41944149136543274, + "learning_rate": 7.970272801649918e-05, + "loss": 0.0247, + "step": 19960 + }, + { + "action_loss": 0.006942410487681627, + "epoch": 17.949640287769785, + "step": 19960 + }, + { + "epoch": 17.949640287769785, + "step": 19960, + "torque_loss": 0.14761404693126678 + }, + { + "epoch": 17.949640287769785, + "force_loss": 0.006611744407564402, + "step": 19960 + }, + { + "epoch": 17.95863309352518, + "grad_norm": 0.2848796546459198, + "learning_rate": 7.96805553054079e-05, + "loss": 0.0252, + "step": 19970 + }, + { + "action_loss": 0.007320711854845285, + "epoch": 17.95863309352518, + "step": 19970 + }, + { + "epoch": 17.95863309352518, + "step": 19970, + "torque_loss": 0.1549905389547348 + }, + { + "epoch": 17.95863309352518, + "force_loss": 0.0038721465971320868, + "step": 19970 + }, + { + "epoch": 17.967625899280577, + "grad_norm": 0.5131918787956238, + "learning_rate": 7.965837357814756e-05, + "loss": 0.0233, + "step": 19980 + }, + { + "action_loss": 0.007778391242027283, + "epoch": 17.967625899280577, + "step": 19980 + }, + { + "epoch": 17.967625899280577, + "step": 19980, + "torque_loss": 0.12595798075199127 + }, + { + "epoch": 17.967625899280577, + "force_loss": 0.004015080630779266, + "step": 19980 + }, + { + "epoch": 17.97661870503597, + "grad_norm": 0.23807628452777863, + "learning_rate": 7.963618284145643e-05, + "loss": 0.0248, + "step": 19990 + }, + { + "action_loss": 0.012418706901371479, + "epoch": 17.97661870503597, + "step": 19990 + }, + { + "epoch": 17.97661870503597, + "step": 19990, + "torque_loss": 0.19964194297790527 + }, + { + "epoch": 17.97661870503597, + "force_loss": 0.00856059230864048, + "step": 19990 + }, + { + "epoch": 17.985611510791365, + "grad_norm": 0.3788957893848419, + "learning_rate": 7.961398310207544e-05, + "loss": 0.0256, + "step": 20000 + }, + { + "action_loss": 0.0044211591593921185, + "epoch": 17.985611510791365, + "step": 20000 + }, + { + "epoch": 17.985611510791365, + "step": 20000, + "torque_loss": 0.11479606479406357 + }, + { + "epoch": 17.985611510791365, + "force_loss": 0.0035031491424888372, + "step": 20000 + }, + { + "epoch": 17.994604316546763, + "grad_norm": 0.3506962060928345, + "learning_rate": 7.95917743667483e-05, + "loss": 0.0224, + "step": 20010 + }, + { + "action_loss": 0.004047689493745565, + "epoch": 17.994604316546763, + "step": 20010 + }, + { + "epoch": 17.994604316546763, + "step": 20010, + "torque_loss": 0.14169400930404663 + }, + { + "epoch": 17.994604316546763, + "force_loss": 0.0023170565254986286, + "step": 20010 + }, + { + "epoch": 18.003597122302157, + "grad_norm": 0.5437688827514648, + "learning_rate": 7.956955664222144e-05, + "loss": 0.0234, + "step": 20020 + }, + { + "action_loss": 0.010446433909237385, + "epoch": 18.003597122302157, + "step": 20020 + }, + { + "epoch": 18.003597122302157, + "step": 20020, + "torque_loss": 0.14005039632320404 + }, + { + "epoch": 18.003597122302157, + "force_loss": 0.014068144373595715, + "step": 20020 + }, + { + "epoch": 18.012589928057555, + "grad_norm": 0.19264504313468933, + "learning_rate": 7.954732993524399e-05, + "loss": 0.0237, + "step": 20030 + }, + { + "action_loss": 0.01298863347619772, + "epoch": 18.012589928057555, + "step": 20030 + }, + { + "epoch": 18.012589928057555, + "step": 20030, + "torque_loss": 0.11979428678750992 + }, + { + "epoch": 18.012589928057555, + "force_loss": 0.0061620366759598255, + "step": 20030 + }, + { + "epoch": 18.02158273381295, + "grad_norm": 0.26759570837020874, + "learning_rate": 7.952509425256786e-05, + "loss": 0.024, + "step": 20040 + }, + { + "action_loss": 0.0036432023625820875, + "epoch": 18.02158273381295, + "step": 20040 + }, + { + "epoch": 18.02158273381295, + "step": 20040, + "torque_loss": 0.11801549792289734 + }, + { + "epoch": 18.02158273381295, + "force_loss": 0.003185616573318839, + "step": 20040 + }, + { + "epoch": 18.030575539568346, + "grad_norm": 0.3666815757751465, + "learning_rate": 7.950284960094767e-05, + "loss": 0.0238, + "step": 20050 + }, + { + "action_loss": 0.0030627876985818148, + "epoch": 18.030575539568346, + "step": 20050 + }, + { + "epoch": 18.030575539568346, + "step": 20050, + "torque_loss": 0.12197041511535645 + }, + { + "epoch": 18.030575539568346, + "force_loss": 0.002873636782169342, + "step": 20050 + }, + { + "epoch": 18.03956834532374, + "grad_norm": 0.41907984018325806, + "learning_rate": 7.948059598714076e-05, + "loss": 0.021, + "step": 20060 + }, + { + "action_loss": 0.003818368539214134, + "epoch": 18.03956834532374, + "step": 20060 + }, + { + "epoch": 18.03956834532374, + "step": 20060, + "torque_loss": 0.08319190889596939 + }, + { + "epoch": 18.03956834532374, + "force_loss": 0.006896860897541046, + "step": 20060 + }, + { + "epoch": 18.048561151079138, + "grad_norm": 0.425952285528183, + "learning_rate": 7.945833341790717e-05, + "loss": 0.0259, + "step": 20070 + }, + { + "action_loss": 0.010059972293674946, + "epoch": 18.048561151079138, + "step": 20070 + }, + { + "epoch": 18.048561151079138, + "step": 20070, + "torque_loss": 0.20924358069896698 + }, + { + "epoch": 18.048561151079138, + "force_loss": 0.008788040839135647, + "step": 20070 + }, + { + "epoch": 18.057553956834532, + "grad_norm": 0.6132962703704834, + "learning_rate": 7.94360619000097e-05, + "loss": 0.0278, + "step": 20080 + }, + { + "action_loss": 0.003754944773390889, + "epoch": 18.057553956834532, + "step": 20080 + }, + { + "epoch": 18.057553956834532, + "step": 20080, + "torque_loss": 0.10349229723215103 + }, + { + "epoch": 18.057553956834532, + "force_loss": 0.005687051918357611, + "step": 20080 + }, + { + "epoch": 18.06654676258993, + "grad_norm": 0.3191094398498535, + "learning_rate": 7.941378144021381e-05, + "loss": 0.0218, + "step": 20090 + }, + { + "action_loss": 0.03181866556406021, + "epoch": 18.06654676258993, + "step": 20090 + }, + { + "epoch": 18.06654676258993, + "step": 20090, + "torque_loss": 0.15198683738708496 + }, + { + "epoch": 18.06654676258993, + "force_loss": 0.04333909973502159, + "step": 20090 + }, + { + "epoch": 18.075539568345324, + "grad_norm": 0.2699708044528961, + "learning_rate": 7.939149204528777e-05, + "loss": 0.022, + "step": 20100 + }, + { + "action_loss": 0.011830595321953297, + "epoch": 18.075539568345324, + "step": 20100 + }, + { + "epoch": 18.075539568345324, + "step": 20100, + "torque_loss": 0.18377113342285156 + }, + { + "epoch": 18.075539568345324, + "force_loss": 0.016688520088791847, + "step": 20100 + }, + { + "epoch": 18.084532374100718, + "grad_norm": 0.5913984179496765, + "learning_rate": 7.936919372200246e-05, + "loss": 0.0264, + "step": 20110 + }, + { + "action_loss": 0.006229867693036795, + "epoch": 18.084532374100718, + "step": 20110 + }, + { + "epoch": 18.084532374100718, + "step": 20110, + "torque_loss": 0.1246085986495018 + }, + { + "epoch": 18.084532374100718, + "force_loss": 0.008133932948112488, + "step": 20110 + }, + { + "epoch": 18.093525179856115, + "grad_norm": 0.26789072155952454, + "learning_rate": 7.934688647713158e-05, + "loss": 0.0239, + "step": 20120 + }, + { + "action_loss": 0.003590468317270279, + "epoch": 18.093525179856115, + "step": 20120 + }, + { + "epoch": 18.093525179856115, + "step": 20120, + "torque_loss": 0.14061181247234344 + }, + { + "epoch": 18.093525179856115, + "force_loss": 0.003195495344698429, + "step": 20120 + }, + { + "epoch": 18.10251798561151, + "grad_norm": 0.29276755452156067, + "learning_rate": 7.932457031745143e-05, + "loss": 0.0216, + "step": 20130 + }, + { + "action_loss": 0.06483045220375061, + "epoch": 18.10251798561151, + "step": 20130 + }, + { + "epoch": 18.10251798561151, + "step": 20130, + "torque_loss": 0.1954166144132614 + }, + { + "epoch": 18.10251798561151, + "force_loss": 0.06355533748865128, + "step": 20130 + }, + { + "epoch": 18.111510791366907, + "grad_norm": 0.43592166900634766, + "learning_rate": 7.930224524974108e-05, + "loss": 0.0245, + "step": 20140 + }, + { + "action_loss": 0.014609490521252155, + "epoch": 18.111510791366907, + "step": 20140 + }, + { + "epoch": 18.111510791366907, + "step": 20140, + "torque_loss": 0.17122183740139008 + }, + { + "epoch": 18.111510791366907, + "force_loss": 0.013039768673479557, + "step": 20140 + }, + { + "epoch": 18.1205035971223, + "grad_norm": 0.39575302600860596, + "learning_rate": 7.927991128078232e-05, + "loss": 0.0229, + "step": 20150 + }, + { + "action_loss": 0.006208075676113367, + "epoch": 18.1205035971223, + "step": 20150 + }, + { + "epoch": 18.1205035971223, + "step": 20150, + "torque_loss": 0.15075568854808807 + }, + { + "epoch": 18.1205035971223, + "force_loss": 0.006185729056596756, + "step": 20150 + }, + { + "epoch": 18.1294964028777, + "grad_norm": 0.16428524255752563, + "learning_rate": 7.925756841735958e-05, + "loss": 0.0217, + "step": 20160 + }, + { + "action_loss": 0.012076959013938904, + "epoch": 18.1294964028777, + "step": 20160 + }, + { + "epoch": 18.1294964028777, + "step": 20160, + "torque_loss": 0.13658148050308228 + }, + { + "epoch": 18.1294964028777, + "force_loss": 0.008835576474666595, + "step": 20160 + }, + { + "epoch": 18.138489208633093, + "grad_norm": 0.4757242202758789, + "learning_rate": 7.923521666626008e-05, + "loss": 0.02, + "step": 20170 + }, + { + "action_loss": 0.015908218920230865, + "epoch": 18.138489208633093, + "step": 20170 + }, + { + "epoch": 18.138489208633093, + "step": 20170, + "torque_loss": 0.13368353247642517 + }, + { + "epoch": 18.138489208633093, + "force_loss": 0.022351935505867004, + "step": 20170 + }, + { + "epoch": 18.14748201438849, + "grad_norm": 0.3321283459663391, + "learning_rate": 7.921285603427366e-05, + "loss": 0.0211, + "step": 20180 + }, + { + "action_loss": 0.00522614223882556, + "epoch": 18.14748201438849, + "step": 20180 + }, + { + "epoch": 18.14748201438849, + "step": 20180, + "torque_loss": 0.10176622122526169 + }, + { + "epoch": 18.14748201438849, + "force_loss": 0.0024137133732438087, + "step": 20180 + }, + { + "epoch": 18.156474820143885, + "grad_norm": 0.3831334710121155, + "learning_rate": 7.91904865281929e-05, + "loss": 0.022, + "step": 20190 + }, + { + "action_loss": 0.003217557677999139, + "epoch": 18.156474820143885, + "step": 20190 + }, + { + "epoch": 18.156474820143885, + "step": 20190, + "torque_loss": 0.1516803503036499 + }, + { + "epoch": 18.156474820143885, + "force_loss": 0.0023491624742746353, + "step": 20190 + }, + { + "epoch": 18.165467625899282, + "grad_norm": 0.6456699967384338, + "learning_rate": 7.916810815481307e-05, + "loss": 0.0217, + "step": 20200 + }, + { + "action_loss": 0.004174186848104, + "epoch": 18.165467625899282, + "step": 20200 + }, + { + "epoch": 18.165467625899282, + "step": 20200, + "torque_loss": 0.11544787883758545 + }, + { + "epoch": 18.165467625899282, + "force_loss": 0.00204020575620234, + "step": 20200 + }, + { + "epoch": 18.174460431654676, + "grad_norm": 0.3879193961620331, + "learning_rate": 7.914572092093211e-05, + "loss": 0.0233, + "step": 20210 + }, + { + "action_loss": 0.01638071797788143, + "epoch": 18.174460431654676, + "step": 20210 + }, + { + "epoch": 18.174460431654676, + "step": 20210, + "torque_loss": 0.1639884114265442 + }, + { + "epoch": 18.174460431654676, + "force_loss": 0.025213507935404778, + "step": 20210 + }, + { + "epoch": 18.18345323741007, + "grad_norm": 0.3586960732936859, + "learning_rate": 7.912332483335068e-05, + "loss": 0.0231, + "step": 20220 + }, + { + "action_loss": 0.0165130365639925, + "epoch": 18.18345323741007, + "step": 20220 + }, + { + "epoch": 18.18345323741007, + "step": 20220, + "torque_loss": 0.12713371217250824 + }, + { + "epoch": 18.18345323741007, + "force_loss": 0.019182147458195686, + "step": 20220 + }, + { + "epoch": 18.192446043165468, + "grad_norm": 0.3122924268245697, + "learning_rate": 7.910091989887213e-05, + "loss": 0.0214, + "step": 20230 + }, + { + "action_loss": 0.008303462527692318, + "epoch": 18.192446043165468, + "step": 20230 + }, + { + "epoch": 18.192446043165468, + "step": 20230, + "torque_loss": 0.16290760040283203 + }, + { + "epoch": 18.192446043165468, + "force_loss": 0.008843469433486462, + "step": 20230 + }, + { + "epoch": 18.201438848920862, + "grad_norm": 0.5016494989395142, + "learning_rate": 7.907850612430248e-05, + "loss": 0.0246, + "step": 20240 + }, + { + "action_loss": 0.007878461852669716, + "epoch": 18.201438848920862, + "step": 20240 + }, + { + "epoch": 18.201438848920862, + "step": 20240, + "torque_loss": 0.16675801575183868 + }, + { + "epoch": 18.201438848920862, + "force_loss": 0.007841571234166622, + "step": 20240 + }, + { + "epoch": 18.21043165467626, + "grad_norm": 0.6306307911872864, + "learning_rate": 7.905608351645044e-05, + "loss": 0.0216, + "step": 20250 + }, + { + "action_loss": 0.009518210776150227, + "epoch": 18.21043165467626, + "step": 20250 + }, + { + "epoch": 18.21043165467626, + "step": 20250, + "torque_loss": 0.17719995975494385 + }, + { + "epoch": 18.21043165467626, + "force_loss": 0.010739825665950775, + "step": 20250 + }, + { + "epoch": 18.219424460431654, + "grad_norm": 0.4184080958366394, + "learning_rate": 7.90336520821274e-05, + "loss": 0.023, + "step": 20260 + }, + { + "action_loss": 0.009486421011388302, + "epoch": 18.219424460431654, + "step": 20260 + }, + { + "epoch": 18.219424460431654, + "step": 20260, + "torque_loss": 0.16221953928470612 + }, + { + "epoch": 18.219424460431654, + "force_loss": 0.017822276800870895, + "step": 20260 + }, + { + "epoch": 18.22841726618705, + "grad_norm": 0.19861479103565216, + "learning_rate": 7.901121182814746e-05, + "loss": 0.0203, + "step": 20270 + }, + { + "action_loss": 0.0024029635824263096, + "epoch": 18.22841726618705, + "step": 20270 + }, + { + "epoch": 18.22841726618705, + "step": 20270, + "torque_loss": 0.12884463369846344 + }, + { + "epoch": 18.22841726618705, + "force_loss": 0.0030409630853682756, + "step": 20270 + }, + { + "epoch": 18.237410071942445, + "grad_norm": 0.16220778226852417, + "learning_rate": 7.898876276132736e-05, + "loss": 0.0214, + "step": 20280 + }, + { + "action_loss": 0.004468801897019148, + "epoch": 18.237410071942445, + "step": 20280 + }, + { + "epoch": 18.237410071942445, + "step": 20280, + "torque_loss": 0.11765783280134201 + }, + { + "epoch": 18.237410071942445, + "force_loss": 0.0026512593030929565, + "step": 20280 + }, + { + "epoch": 18.246402877697843, + "grad_norm": 0.2431236356496811, + "learning_rate": 7.896630488848654e-05, + "loss": 0.0192, + "step": 20290 + }, + { + "action_loss": 0.009883382357656956, + "epoch": 18.246402877697843, + "step": 20290 + }, + { + "epoch": 18.246402877697843, + "step": 20290, + "torque_loss": 0.16536231338977814 + }, + { + "epoch": 18.246402877697843, + "force_loss": 0.01941848360002041, + "step": 20290 + }, + { + "epoch": 18.255395683453237, + "grad_norm": 0.29355722665786743, + "learning_rate": 7.89438382164471e-05, + "loss": 0.0228, + "step": 20300 + }, + { + "action_loss": 0.011039885692298412, + "epoch": 18.255395683453237, + "step": 20300 + }, + { + "epoch": 18.255395683453237, + "step": 20300, + "torque_loss": 0.17612044513225555 + }, + { + "epoch": 18.255395683453237, + "force_loss": 0.007762711029499769, + "step": 20300 + }, + { + "epoch": 18.264388489208635, + "grad_norm": 0.40812596678733826, + "learning_rate": 7.892136275203383e-05, + "loss": 0.0216, + "step": 20310 + }, + { + "action_loss": 0.0024956001434475183, + "epoch": 18.264388489208635, + "step": 20310 + }, + { + "epoch": 18.264388489208635, + "step": 20310, + "torque_loss": 0.07250147312879562 + }, + { + "epoch": 18.264388489208635, + "force_loss": 0.004585580434650183, + "step": 20310 + }, + { + "epoch": 18.27338129496403, + "grad_norm": 0.5498508810997009, + "learning_rate": 7.889887850207418e-05, + "loss": 0.0208, + "step": 20320 + }, + { + "action_loss": 0.012911583296954632, + "epoch": 18.27338129496403, + "step": 20320 + }, + { + "epoch": 18.27338129496403, + "step": 20320, + "torque_loss": 0.12616920471191406 + }, + { + "epoch": 18.27338129496403, + "force_loss": 0.013621024787425995, + "step": 20320 + }, + { + "epoch": 18.282374100719423, + "grad_norm": 0.4653564691543579, + "learning_rate": 7.887638547339827e-05, + "loss": 0.0245, + "step": 20330 + }, + { + "action_loss": 0.006385440472513437, + "epoch": 18.282374100719423, + "step": 20330 + }, + { + "epoch": 18.282374100719423, + "step": 20330, + "torque_loss": 0.13101039826869965 + }, + { + "epoch": 18.282374100719423, + "force_loss": 0.005413890350610018, + "step": 20330 + }, + { + "epoch": 18.29136690647482, + "grad_norm": 0.48891064524650574, + "learning_rate": 7.885388367283891e-05, + "loss": 0.0215, + "step": 20340 + }, + { + "action_loss": 0.008322554640471935, + "epoch": 18.29136690647482, + "step": 20340 + }, + { + "epoch": 18.29136690647482, + "step": 20340, + "torque_loss": 0.11535265296697617 + }, + { + "epoch": 18.29136690647482, + "force_loss": 0.011928697116672993, + "step": 20340 + }, + { + "epoch": 18.300359712230215, + "grad_norm": 0.33788347244262695, + "learning_rate": 7.88313731072315e-05, + "loss": 0.0223, + "step": 20350 + }, + { + "action_loss": 0.007125012576580048, + "epoch": 18.300359712230215, + "step": 20350 + }, + { + "epoch": 18.300359712230215, + "step": 20350, + "torque_loss": 0.16455142199993134 + }, + { + "epoch": 18.300359712230215, + "force_loss": 0.009632878936827183, + "step": 20350 + }, + { + "epoch": 18.309352517985612, + "grad_norm": 0.4722294211387634, + "learning_rate": 7.88088537834142e-05, + "loss": 0.0219, + "step": 20360 + }, + { + "action_loss": 0.022035181522369385, + "epoch": 18.309352517985612, + "step": 20360 + }, + { + "epoch": 18.309352517985612, + "step": 20360, + "torque_loss": 0.1472807675600052 + }, + { + "epoch": 18.309352517985612, + "force_loss": 0.029020635411143303, + "step": 20360 + }, + { + "epoch": 18.318345323741006, + "grad_norm": 0.2702522873878479, + "learning_rate": 7.878632570822778e-05, + "loss": 0.0225, + "step": 20370 + }, + { + "action_loss": 0.010567483492195606, + "epoch": 18.318345323741006, + "step": 20370 + }, + { + "epoch": 18.318345323741006, + "step": 20370, + "torque_loss": 0.12350978702306747 + }, + { + "epoch": 18.318345323741006, + "force_loss": 0.015000718645751476, + "step": 20370 + }, + { + "epoch": 18.327338129496404, + "grad_norm": 0.5101580619812012, + "learning_rate": 7.876378888851567e-05, + "loss": 0.022, + "step": 20380 + }, + { + "action_loss": 0.011632178910076618, + "epoch": 18.327338129496404, + "step": 20380 + }, + { + "epoch": 18.327338129496404, + "step": 20380, + "torque_loss": 0.11865854263305664 + }, + { + "epoch": 18.327338129496404, + "force_loss": 0.015955500304698944, + "step": 20380 + }, + { + "epoch": 18.336330935251798, + "grad_norm": 0.6490873694419861, + "learning_rate": 7.874124333112396e-05, + "loss": 0.023, + "step": 20390 + }, + { + "action_loss": 0.01874011754989624, + "epoch": 18.336330935251798, + "step": 20390 + }, + { + "epoch": 18.336330935251798, + "step": 20390, + "torque_loss": 0.07617810368537903 + }, + { + "epoch": 18.336330935251798, + "force_loss": 0.026293203234672546, + "step": 20390 + }, + { + "epoch": 18.345323741007196, + "grad_norm": 0.2603389024734497, + "learning_rate": 7.871868904290138e-05, + "loss": 0.0218, + "step": 20400 + }, + { + "action_loss": 0.018921151757240295, + "epoch": 18.345323741007196, + "step": 20400 + }, + { + "epoch": 18.345323741007196, + "step": 20400, + "torque_loss": 0.15553446114063263 + }, + { + "epoch": 18.345323741007196, + "force_loss": 0.024872565641999245, + "step": 20400 + }, + { + "epoch": 18.35431654676259, + "grad_norm": 0.3065906763076782, + "learning_rate": 7.869612603069935e-05, + "loss": 0.0243, + "step": 20410 + }, + { + "action_loss": 0.005943920463323593, + "epoch": 18.35431654676259, + "step": 20410 + }, + { + "epoch": 18.35431654676259, + "step": 20410, + "torque_loss": 0.13668511807918549 + }, + { + "epoch": 18.35431654676259, + "force_loss": 0.005031559616327286, + "step": 20410 + }, + { + "epoch": 18.363309352517987, + "grad_norm": 0.20989049971103668, + "learning_rate": 7.867355430137192e-05, + "loss": 0.0223, + "step": 20420 + }, + { + "action_loss": 0.003977446351200342, + "epoch": 18.363309352517987, + "step": 20420 + }, + { + "epoch": 18.363309352517987, + "step": 20420, + "torque_loss": 0.08900357037782669 + }, + { + "epoch": 18.363309352517987, + "force_loss": 0.00794149935245514, + "step": 20420 + }, + { + "epoch": 18.37230215827338, + "grad_norm": 0.2944192588329315, + "learning_rate": 7.865097386177577e-05, + "loss": 0.0182, + "step": 20430 + }, + { + "action_loss": 0.0303004402667284, + "epoch": 18.37230215827338, + "step": 20430 + }, + { + "epoch": 18.37230215827338, + "step": 20430, + "torque_loss": 0.16158780455589294 + }, + { + "epoch": 18.37230215827338, + "force_loss": 0.02947285957634449, + "step": 20430 + }, + { + "epoch": 18.381294964028775, + "grad_norm": 0.32577598094940186, + "learning_rate": 7.862838471877023e-05, + "loss": 0.0255, + "step": 20440 + }, + { + "action_loss": 0.017475208267569542, + "epoch": 18.381294964028775, + "step": 20440 + }, + { + "epoch": 18.381294964028775, + "step": 20440, + "torque_loss": 0.16580072045326233 + }, + { + "epoch": 18.381294964028775, + "force_loss": 0.014205612242221832, + "step": 20440 + }, + { + "epoch": 18.390287769784173, + "grad_norm": 0.3017246127128601, + "learning_rate": 7.860578687921731e-05, + "loss": 0.0245, + "step": 20450 + }, + { + "action_loss": 0.010980118997395039, + "epoch": 18.390287769784173, + "step": 20450 + }, + { + "epoch": 18.390287769784173, + "step": 20450, + "torque_loss": 0.12659159302711487 + }, + { + "epoch": 18.390287769784173, + "force_loss": 0.019411476328969002, + "step": 20450 + }, + { + "epoch": 18.399280575539567, + "grad_norm": 0.3701028823852539, + "learning_rate": 7.858318034998164e-05, + "loss": 0.0219, + "step": 20460 + }, + { + "action_loss": 0.0061258901841938496, + "epoch": 18.399280575539567, + "step": 20460 + }, + { + "epoch": 18.399280575539567, + "step": 20460, + "torque_loss": 0.13757547736167908 + }, + { + "epoch": 18.399280575539567, + "force_loss": 0.005352713633328676, + "step": 20460 + }, + { + "epoch": 18.408273381294965, + "grad_norm": 0.2128572314977646, + "learning_rate": 7.856056513793046e-05, + "loss": 0.0196, + "step": 20470 + }, + { + "action_loss": 0.009321019984781742, + "epoch": 18.408273381294965, + "step": 20470 + }, + { + "epoch": 18.408273381294965, + "step": 20470, + "torque_loss": 0.13114409148693085 + }, + { + "epoch": 18.408273381294965, + "force_loss": 0.004438444506376982, + "step": 20470 + }, + { + "epoch": 18.41726618705036, + "grad_norm": 0.31634169816970825, + "learning_rate": 7.85379412499337e-05, + "loss": 0.0221, + "step": 20480 + }, + { + "action_loss": 0.006537764798849821, + "epoch": 18.41726618705036, + "step": 20480 + }, + { + "epoch": 18.41726618705036, + "step": 20480, + "torque_loss": 0.1347230076789856 + }, + { + "epoch": 18.41726618705036, + "force_loss": 0.005477122962474823, + "step": 20480 + }, + { + "epoch": 18.426258992805757, + "grad_norm": 0.23366904258728027, + "learning_rate": 7.851530869286389e-05, + "loss": 0.0219, + "step": 20490 + }, + { + "action_loss": 0.00228289351798594, + "epoch": 18.426258992805757, + "step": 20490 + }, + { + "epoch": 18.426258992805757, + "step": 20490, + "torque_loss": 0.125401109457016 + }, + { + "epoch": 18.426258992805757, + "force_loss": 0.0026794367004185915, + "step": 20490 + }, + { + "epoch": 18.43525179856115, + "grad_norm": 0.597470760345459, + "learning_rate": 7.849266747359619e-05, + "loss": 0.021, + "step": 20500 + }, + { + "action_loss": 0.002604519948363304, + "epoch": 18.43525179856115, + "step": 20500 + }, + { + "epoch": 18.43525179856115, + "step": 20500, + "torque_loss": 0.12323381751775742 + }, + { + "epoch": 18.43525179856115, + "force_loss": 0.002440603682771325, + "step": 20500 + }, + { + "epoch": 18.444244604316548, + "grad_norm": 0.4298843741416931, + "learning_rate": 7.847001759900843e-05, + "loss": 0.0174, + "step": 20510 + }, + { + "action_loss": 0.003394864732399583, + "epoch": 18.444244604316548, + "step": 20510 + }, + { + "epoch": 18.444244604316548, + "step": 20510, + "torque_loss": 0.11813098192214966 + }, + { + "epoch": 18.444244604316548, + "force_loss": 0.004520654212683439, + "step": 20510 + }, + { + "epoch": 18.453237410071942, + "grad_norm": 0.4122294485569, + "learning_rate": 7.844735907598102e-05, + "loss": 0.0272, + "step": 20520 + }, + { + "action_loss": 0.02835668809711933, + "epoch": 18.453237410071942, + "step": 20520 + }, + { + "epoch": 18.453237410071942, + "step": 20520, + "torque_loss": 0.12698209285736084 + }, + { + "epoch": 18.453237410071942, + "force_loss": 0.03437325730919838, + "step": 20520 + }, + { + "epoch": 18.46223021582734, + "grad_norm": 0.23517845571041107, + "learning_rate": 7.842469191139703e-05, + "loss": 0.0218, + "step": 20530 + }, + { + "action_loss": 0.00439832080155611, + "epoch": 18.46223021582734, + "step": 20530 + }, + { + "epoch": 18.46223021582734, + "step": 20530, + "torque_loss": 0.15723256766796112 + }, + { + "epoch": 18.46223021582734, + "force_loss": 0.0030727973207831383, + "step": 20530 + }, + { + "epoch": 18.471223021582734, + "grad_norm": 0.26438984274864197, + "learning_rate": 7.840201611214215e-05, + "loss": 0.0215, + "step": 20540 + }, + { + "action_loss": 0.01083553209900856, + "epoch": 18.471223021582734, + "step": 20540 + }, + { + "epoch": 18.471223021582734, + "step": 20540, + "torque_loss": 0.17683565616607666 + }, + { + "epoch": 18.471223021582734, + "force_loss": 0.026332667097449303, + "step": 20540 + }, + { + "epoch": 18.480215827338128, + "grad_norm": 0.3710186779499054, + "learning_rate": 7.837933168510469e-05, + "loss": 0.0298, + "step": 20550 + }, + { + "action_loss": 0.00446340162307024, + "epoch": 18.480215827338128, + "step": 20550 + }, + { + "epoch": 18.480215827338128, + "step": 20550, + "torque_loss": 0.1782977133989334 + }, + { + "epoch": 18.480215827338128, + "force_loss": 0.003301275661215186, + "step": 20550 + }, + { + "epoch": 18.489208633093526, + "grad_norm": 0.438558429479599, + "learning_rate": 7.835663863717559e-05, + "loss": 0.0251, + "step": 20560 + }, + { + "action_loss": 0.012829662300646305, + "epoch": 18.489208633093526, + "step": 20560 + }, + { + "epoch": 18.489208633093526, + "step": 20560, + "torque_loss": 0.12239912897348404 + }, + { + "epoch": 18.489208633093526, + "force_loss": 0.012078560888767242, + "step": 20560 + }, + { + "epoch": 18.49820143884892, + "grad_norm": 0.33304500579833984, + "learning_rate": 7.833393697524838e-05, + "loss": 0.0264, + "step": 20570 + }, + { + "action_loss": 0.003589646890759468, + "epoch": 18.49820143884892, + "step": 20570 + }, + { + "epoch": 18.49820143884892, + "step": 20570, + "torque_loss": 0.08568349480628967 + }, + { + "epoch": 18.49820143884892, + "force_loss": 0.0027285218238830566, + "step": 20570 + }, + { + "epoch": 18.507194244604317, + "grad_norm": 0.3924681842327118, + "learning_rate": 7.831122670621922e-05, + "loss": 0.0226, + "step": 20580 + }, + { + "action_loss": 0.0037728771567344666, + "epoch": 18.507194244604317, + "step": 20580 + }, + { + "epoch": 18.507194244604317, + "step": 20580, + "torque_loss": 0.11202045530080795 + }, + { + "epoch": 18.507194244604317, + "force_loss": 0.004348220303654671, + "step": 20580 + }, + { + "epoch": 18.51618705035971, + "grad_norm": 0.23032504320144653, + "learning_rate": 7.82885078369869e-05, + "loss": 0.0217, + "step": 20590 + }, + { + "action_loss": 0.0051721553318202496, + "epoch": 18.51618705035971, + "step": 20590 + }, + { + "epoch": 18.51618705035971, + "step": 20590, + "torque_loss": 0.1278543621301651 + }, + { + "epoch": 18.51618705035971, + "force_loss": 0.005688309669494629, + "step": 20590 + }, + { + "epoch": 18.52517985611511, + "grad_norm": 0.4717794358730316, + "learning_rate": 7.826578037445283e-05, + "loss": 0.0205, + "step": 20600 + }, + { + "action_loss": 0.0063773407600820065, + "epoch": 18.52517985611511, + "step": 20600 + }, + { + "epoch": 18.52517985611511, + "step": 20600, + "torque_loss": 0.18620699644088745 + }, + { + "epoch": 18.52517985611511, + "force_loss": 0.008187116123735905, + "step": 20600 + }, + { + "epoch": 18.534172661870503, + "grad_norm": 0.29804527759552, + "learning_rate": 7.824304432552097e-05, + "loss": 0.019, + "step": 20610 + }, + { + "action_loss": 0.013948939740657806, + "epoch": 18.534172661870503, + "step": 20610 + }, + { + "epoch": 18.534172661870503, + "step": 20610, + "torque_loss": 0.19339405000209808 + }, + { + "epoch": 18.534172661870503, + "force_loss": 0.010422687977552414, + "step": 20610 + }, + { + "epoch": 18.5431654676259, + "grad_norm": 0.39517560601234436, + "learning_rate": 7.822029969709798e-05, + "loss": 0.0249, + "step": 20620 + }, + { + "action_loss": 0.00812717154622078, + "epoch": 18.5431654676259, + "step": 20620 + }, + { + "epoch": 18.5431654676259, + "step": 20620, + "torque_loss": 0.07682988047599792 + }, + { + "epoch": 18.5431654676259, + "force_loss": 0.006491567939519882, + "step": 20620 + }, + { + "epoch": 18.552158273381295, + "grad_norm": 0.27267733216285706, + "learning_rate": 7.819754649609306e-05, + "loss": 0.0261, + "step": 20630 + }, + { + "action_loss": 0.004313321318477392, + "epoch": 18.552158273381295, + "step": 20630 + }, + { + "epoch": 18.552158273381295, + "step": 20630, + "torque_loss": 0.11302149295806885 + }, + { + "epoch": 18.552158273381295, + "force_loss": 0.0033273298759013414, + "step": 20630 + }, + { + "epoch": 18.56115107913669, + "grad_norm": 0.22700157761573792, + "learning_rate": 7.817478472941802e-05, + "loss": 0.0232, + "step": 20640 + }, + { + "action_loss": 0.005358824972063303, + "epoch": 18.56115107913669, + "step": 20640 + }, + { + "epoch": 18.56115107913669, + "step": 20640, + "torque_loss": 0.11660012602806091 + }, + { + "epoch": 18.56115107913669, + "force_loss": 0.0028624318074434996, + "step": 20640 + }, + { + "epoch": 18.570143884892087, + "grad_norm": 0.506298303604126, + "learning_rate": 7.815201440398727e-05, + "loss": 0.0183, + "step": 20650 + }, + { + "action_loss": 0.01943872682750225, + "epoch": 18.570143884892087, + "step": 20650 + }, + { + "epoch": 18.570143884892087, + "step": 20650, + "torque_loss": 0.19819898903369904 + }, + { + "epoch": 18.570143884892087, + "force_loss": 0.027110660448670387, + "step": 20650 + }, + { + "epoch": 18.57913669064748, + "grad_norm": 0.26202455163002014, + "learning_rate": 7.812923552671789e-05, + "loss": 0.0242, + "step": 20660 + }, + { + "action_loss": 0.005351244937628508, + "epoch": 18.57913669064748, + "step": 20660 + }, + { + "epoch": 18.57913669064748, + "step": 20660, + "torque_loss": 0.1428813487291336 + }, + { + "epoch": 18.57913669064748, + "force_loss": 0.0048134420067071915, + "step": 20660 + }, + { + "epoch": 18.58812949640288, + "grad_norm": 0.2282099574804306, + "learning_rate": 7.810644810452945e-05, + "loss": 0.0195, + "step": 20670 + }, + { + "action_loss": 0.005451902747154236, + "epoch": 18.58812949640288, + "step": 20670 + }, + { + "epoch": 18.58812949640288, + "step": 20670, + "torque_loss": 0.1219620481133461 + }, + { + "epoch": 18.58812949640288, + "force_loss": 0.004392911214381456, + "step": 20670 + }, + { + "epoch": 18.597122302158272, + "grad_norm": 0.45334017276763916, + "learning_rate": 7.808365214434417e-05, + "loss": 0.0209, + "step": 20680 + }, + { + "action_loss": 0.0045912074856460094, + "epoch": 18.597122302158272, + "step": 20680 + }, + { + "epoch": 18.597122302158272, + "step": 20680, + "torque_loss": 0.13276012241840363 + }, + { + "epoch": 18.597122302158272, + "force_loss": 0.0067670163698494434, + "step": 20680 + }, + { + "epoch": 18.60611510791367, + "grad_norm": 0.2921150326728821, + "learning_rate": 7.80608476530869e-05, + "loss": 0.0219, + "step": 20690 + }, + { + "action_loss": 0.002647256478667259, + "epoch": 18.60611510791367, + "step": 20690 + }, + { + "epoch": 18.60611510791367, + "step": 20690, + "torque_loss": 0.128000870347023 + }, + { + "epoch": 18.60611510791367, + "force_loss": 0.002313948003575206, + "step": 20690 + }, + { + "epoch": 18.615107913669064, + "grad_norm": 0.2665504813194275, + "learning_rate": 7.8038034637685e-05, + "loss": 0.0197, + "step": 20700 + }, + { + "action_loss": 0.01289258524775505, + "epoch": 18.615107913669064, + "step": 20700 + }, + { + "epoch": 18.615107913669064, + "step": 20700, + "torque_loss": 0.12954920530319214 + }, + { + "epoch": 18.615107913669064, + "force_loss": 0.011452610604465008, + "step": 20700 + }, + { + "epoch": 18.62410071942446, + "grad_norm": 0.37744781374931335, + "learning_rate": 7.801521310506848e-05, + "loss": 0.0193, + "step": 20710 + }, + { + "action_loss": 0.008950660936534405, + "epoch": 18.62410071942446, + "step": 20710 + }, + { + "epoch": 18.62410071942446, + "step": 20710, + "torque_loss": 0.11152449995279312 + }, + { + "epoch": 18.62410071942446, + "force_loss": 0.005761043634265661, + "step": 20710 + }, + { + "epoch": 18.633093525179856, + "grad_norm": 0.4123731851577759, + "learning_rate": 7.799238306216994e-05, + "loss": 0.0188, + "step": 20720 + }, + { + "action_loss": 0.010444770567119122, + "epoch": 18.633093525179856, + "step": 20720 + }, + { + "epoch": 18.633093525179856, + "step": 20720, + "torque_loss": 0.1856756955385208 + }, + { + "epoch": 18.633093525179856, + "force_loss": 0.009819758124649525, + "step": 20720 + }, + { + "epoch": 18.642086330935253, + "grad_norm": 0.2715679407119751, + "learning_rate": 7.796954451592448e-05, + "loss": 0.0251, + "step": 20730 + }, + { + "action_loss": 0.00551832839846611, + "epoch": 18.642086330935253, + "step": 20730 + }, + { + "epoch": 18.642086330935253, + "step": 20730, + "torque_loss": 0.12976853549480438 + }, + { + "epoch": 18.642086330935253, + "force_loss": 0.0028945046942681074, + "step": 20730 + }, + { + "epoch": 18.651079136690647, + "grad_norm": 0.38369905948638916, + "learning_rate": 7.794669747326992e-05, + "loss": 0.0241, + "step": 20740 + }, + { + "action_loss": 0.005488332360982895, + "epoch": 18.651079136690647, + "step": 20740 + }, + { + "epoch": 18.651079136690647, + "step": 20740, + "torque_loss": 0.12438354641199112 + }, + { + "epoch": 18.651079136690647, + "force_loss": 0.007082134485244751, + "step": 20740 + }, + { + "epoch": 18.66007194244604, + "grad_norm": 0.27323734760284424, + "learning_rate": 7.792384194114654e-05, + "loss": 0.0205, + "step": 20750 + }, + { + "action_loss": 0.008285093121230602, + "epoch": 18.66007194244604, + "step": 20750 + }, + { + "epoch": 18.66007194244604, + "step": 20750, + "torque_loss": 0.13652460277080536 + }, + { + "epoch": 18.66007194244604, + "force_loss": 0.010674399323761463, + "step": 20750 + }, + { + "epoch": 18.66906474820144, + "grad_norm": 1.0483168363571167, + "learning_rate": 7.790097792649729e-05, + "loss": 0.0228, + "step": 20760 + }, + { + "action_loss": 0.0167890265583992, + "epoch": 18.66906474820144, + "step": 20760 + }, + { + "epoch": 18.66906474820144, + "step": 20760, + "torque_loss": 0.16304481029510498 + }, + { + "epoch": 18.66906474820144, + "force_loss": 0.018465260043740273, + "step": 20760 + }, + { + "epoch": 18.678057553956833, + "grad_norm": 0.28037571907043457, + "learning_rate": 7.787810543626762e-05, + "loss": 0.0209, + "step": 20770 + }, + { + "action_loss": 0.008035396225750446, + "epoch": 18.678057553956833, + "step": 20770 + }, + { + "epoch": 18.678057553956833, + "step": 20770, + "torque_loss": 0.1455891877412796 + }, + { + "epoch": 18.678057553956833, + "force_loss": 0.006655184086412191, + "step": 20770 + }, + { + "epoch": 18.68705035971223, + "grad_norm": 0.34441569447517395, + "learning_rate": 7.785522447740558e-05, + "loss": 0.0194, + "step": 20780 + }, + { + "action_loss": 0.004798682872205973, + "epoch": 18.68705035971223, + "step": 20780 + }, + { + "epoch": 18.68705035971223, + "step": 20780, + "torque_loss": 0.11174693703651428 + }, + { + "epoch": 18.68705035971223, + "force_loss": 0.0041641718707978725, + "step": 20780 + }, + { + "epoch": 18.696043165467625, + "grad_norm": 0.34278228878974915, + "learning_rate": 7.783233505686182e-05, + "loss": 0.0274, + "step": 20790 + }, + { + "action_loss": 0.0032451439183205366, + "epoch": 18.696043165467625, + "step": 20790 + }, + { + "epoch": 18.696043165467625, + "step": 20790, + "torque_loss": 0.138041689991951 + }, + { + "epoch": 18.696043165467625, + "force_loss": 0.0033103812020272017, + "step": 20790 + }, + { + "epoch": 18.705035971223023, + "grad_norm": 0.2710281014442444, + "learning_rate": 7.780943718158955e-05, + "loss": 0.0189, + "step": 20800 + }, + { + "action_loss": 0.004741458687931299, + "epoch": 18.705035971223023, + "step": 20800 + }, + { + "epoch": 18.705035971223023, + "step": 20800, + "torque_loss": 0.14033029973506927 + }, + { + "epoch": 18.705035971223023, + "force_loss": 0.008525731973350048, + "step": 20800 + }, + { + "epoch": 18.714028776978417, + "grad_norm": 0.4619952142238617, + "learning_rate": 7.778653085854453e-05, + "loss": 0.0226, + "step": 20810 + }, + { + "action_loss": 0.0035277584102004766, + "epoch": 18.714028776978417, + "step": 20810 + }, + { + "epoch": 18.714028776978417, + "step": 20810, + "torque_loss": 0.06143399700522423 + }, + { + "epoch": 18.714028776978417, + "force_loss": 0.005184220615774393, + "step": 20810 + }, + { + "epoch": 18.723021582733814, + "grad_norm": 0.35060372948646545, + "learning_rate": 7.77636160946851e-05, + "loss": 0.0195, + "step": 20820 + }, + { + "action_loss": 0.0045479764230549335, + "epoch": 18.723021582733814, + "step": 20820 + }, + { + "epoch": 18.723021582733814, + "step": 20820, + "torque_loss": 0.12226519733667374 + }, + { + "epoch": 18.723021582733814, + "force_loss": 0.009367171674966812, + "step": 20820 + }, + { + "epoch": 18.73201438848921, + "grad_norm": 0.45519742369651794, + "learning_rate": 7.774069289697215e-05, + "loss": 0.0229, + "step": 20830 + }, + { + "action_loss": 0.005259813740849495, + "epoch": 18.73201438848921, + "step": 20830 + }, + { + "epoch": 18.73201438848921, + "step": 20830, + "torque_loss": 0.13091827929019928 + }, + { + "epoch": 18.73201438848921, + "force_loss": 0.0031828416977077723, + "step": 20830 + }, + { + "epoch": 18.741007194244606, + "grad_norm": 0.2008289098739624, + "learning_rate": 7.771776127236913e-05, + "loss": 0.0208, + "step": 20840 + }, + { + "action_loss": 0.006448762025684118, + "epoch": 18.741007194244606, + "step": 20840 + }, + { + "epoch": 18.741007194244606, + "step": 20840, + "torque_loss": 0.14278720319271088 + }, + { + "epoch": 18.741007194244606, + "force_loss": 0.004897276405245066, + "step": 20840 + }, + { + "epoch": 18.75, + "grad_norm": 0.22035515308380127, + "learning_rate": 7.769482122784212e-05, + "loss": 0.02, + "step": 20850 + }, + { + "action_loss": 0.012750628404319286, + "epoch": 18.75, + "step": 20850 + }, + { + "epoch": 18.75, + "step": 20850, + "torque_loss": 0.17558342218399048 + }, + { + "epoch": 18.75, + "force_loss": 0.01920117251574993, + "step": 20850 + }, + { + "epoch": 18.758992805755394, + "grad_norm": 0.5520920157432556, + "learning_rate": 7.767187277035963e-05, + "loss": 0.0206, + "step": 20860 + }, + { + "action_loss": 0.013377487659454346, + "epoch": 18.758992805755394, + "step": 20860 + }, + { + "epoch": 18.758992805755394, + "step": 20860, + "torque_loss": 0.13168296217918396 + }, + { + "epoch": 18.758992805755394, + "force_loss": 0.01273645181208849, + "step": 20860 + }, + { + "epoch": 18.76798561151079, + "grad_norm": 0.3895902633666992, + "learning_rate": 7.764891590689285e-05, + "loss": 0.0245, + "step": 20870 + }, + { + "action_loss": 0.007989990524947643, + "epoch": 18.76798561151079, + "step": 20870 + }, + { + "epoch": 18.76798561151079, + "step": 20870, + "torque_loss": 0.20341791212558746 + }, + { + "epoch": 18.76798561151079, + "force_loss": 0.006274865940213203, + "step": 20870 + }, + { + "epoch": 18.776978417266186, + "grad_norm": 0.3786901533603668, + "learning_rate": 7.762595064441542e-05, + "loss": 0.0227, + "step": 20880 + }, + { + "action_loss": 0.020759470760822296, + "epoch": 18.776978417266186, + "step": 20880 + }, + { + "epoch": 18.776978417266186, + "step": 20880, + "torque_loss": 0.15657936036586761 + }, + { + "epoch": 18.776978417266186, + "force_loss": 0.022274455055594444, + "step": 20880 + }, + { + "epoch": 18.785971223021583, + "grad_norm": 0.3006162941455841, + "learning_rate": 7.760297698990362e-05, + "loss": 0.0211, + "step": 20890 + }, + { + "action_loss": 0.005678398068994284, + "epoch": 18.785971223021583, + "step": 20890 + }, + { + "epoch": 18.785971223021583, + "step": 20890, + "torque_loss": 0.08947417140007019 + }, + { + "epoch": 18.785971223021583, + "force_loss": 0.00516447564586997, + "step": 20890 + }, + { + "epoch": 18.794964028776977, + "grad_norm": 0.5261251926422119, + "learning_rate": 7.757999495033623e-05, + "loss": 0.0231, + "step": 20900 + }, + { + "action_loss": 0.00572761707007885, + "epoch": 18.794964028776977, + "step": 20900 + }, + { + "epoch": 18.794964028776977, + "step": 20900, + "torque_loss": 0.16962863504886627 + }, + { + "epoch": 18.794964028776977, + "force_loss": 0.004866601899266243, + "step": 20900 + }, + { + "epoch": 18.803956834532375, + "grad_norm": 0.20297500491142273, + "learning_rate": 7.755700453269456e-05, + "loss": 0.019, + "step": 20910 + }, + { + "action_loss": 0.004213298205286264, + "epoch": 18.803956834532375, + "step": 20910 + }, + { + "epoch": 18.803956834532375, + "step": 20910, + "torque_loss": 0.21491968631744385 + }, + { + "epoch": 18.803956834532375, + "force_loss": 0.002340235747396946, + "step": 20910 + }, + { + "epoch": 18.81294964028777, + "grad_norm": 0.6163339614868164, + "learning_rate": 7.753400574396254e-05, + "loss": 0.0237, + "step": 20920 + }, + { + "action_loss": 0.0036244343500584364, + "epoch": 18.81294964028777, + "step": 20920 + }, + { + "epoch": 18.81294964028777, + "step": 20920, + "torque_loss": 0.13420718908309937 + }, + { + "epoch": 18.81294964028777, + "force_loss": 0.002731218934059143, + "step": 20920 + }, + { + "epoch": 18.821942446043167, + "grad_norm": 0.5458612442016602, + "learning_rate": 7.751099859112655e-05, + "loss": 0.025, + "step": 20930 + }, + { + "action_loss": 0.006525054574012756, + "epoch": 18.821942446043167, + "step": 20930 + }, + { + "epoch": 18.821942446043167, + "step": 20930, + "torque_loss": 0.12040508538484573 + }, + { + "epoch": 18.821942446043167, + "force_loss": 0.014152037911117077, + "step": 20930 + }, + { + "epoch": 18.83093525179856, + "grad_norm": 0.2515645921230316, + "learning_rate": 7.748798308117557e-05, + "loss": 0.0196, + "step": 20940 + }, + { + "action_loss": 0.005239488091319799, + "epoch": 18.83093525179856, + "step": 20940 + }, + { + "epoch": 18.83093525179856, + "step": 20940, + "torque_loss": 0.11633515357971191 + }, + { + "epoch": 18.83093525179856, + "force_loss": 0.005713296588510275, + "step": 20940 + }, + { + "epoch": 18.83992805755396, + "grad_norm": 0.46672412753105164, + "learning_rate": 7.746495922110112e-05, + "loss": 0.0223, + "step": 20950 + }, + { + "action_loss": 0.008477890864014626, + "epoch": 18.83992805755396, + "step": 20950 + }, + { + "epoch": 18.83992805755396, + "step": 20950, + "torque_loss": 0.17128349840641022 + }, + { + "epoch": 18.83992805755396, + "force_loss": 0.00535630201920867, + "step": 20950 + }, + { + "epoch": 18.848920863309353, + "grad_norm": 0.14538341760635376, + "learning_rate": 7.744192701789723e-05, + "loss": 0.0201, + "step": 20960 + }, + { + "action_loss": 0.0038794800639152527, + "epoch": 18.848920863309353, + "step": 20960 + }, + { + "epoch": 18.848920863309353, + "step": 20960, + "torque_loss": 0.1321813017129898 + }, + { + "epoch": 18.848920863309353, + "force_loss": 0.002145926933735609, + "step": 20960 + }, + { + "epoch": 18.857913669064747, + "grad_norm": 0.30010467767715454, + "learning_rate": 7.741888647856046e-05, + "loss": 0.0201, + "step": 20970 + }, + { + "action_loss": 0.00981205701828003, + "epoch": 18.857913669064747, + "step": 20970 + }, + { + "epoch": 18.857913669064747, + "step": 20970, + "torque_loss": 0.1619131714105606 + }, + { + "epoch": 18.857913669064747, + "force_loss": 0.00961390696465969, + "step": 20970 + }, + { + "epoch": 18.866906474820144, + "grad_norm": 0.2195420116186142, + "learning_rate": 7.739583761008994e-05, + "loss": 0.022, + "step": 20980 + }, + { + "action_loss": 0.01965527981519699, + "epoch": 18.866906474820144, + "step": 20980 + }, + { + "epoch": 18.866906474820144, + "step": 20980, + "torque_loss": 0.13485606014728546 + }, + { + "epoch": 18.866906474820144, + "force_loss": 0.010004352778196335, + "step": 20980 + }, + { + "epoch": 18.87589928057554, + "grad_norm": 0.580980658531189, + "learning_rate": 7.73727804194873e-05, + "loss": 0.0208, + "step": 20990 + }, + { + "action_loss": 0.005588372703641653, + "epoch": 18.87589928057554, + "step": 20990 + }, + { + "epoch": 18.87589928057554, + "step": 20990, + "torque_loss": 0.18242399394512177 + }, + { + "epoch": 18.87589928057554, + "force_loss": 0.00622784486040473, + "step": 20990 + }, + { + "epoch": 18.884892086330936, + "grad_norm": 0.3247342109680176, + "learning_rate": 7.734971491375671e-05, + "loss": 0.0209, + "step": 21000 + }, + { + "action_loss": 0.002195393666625023, + "epoch": 18.884892086330936, + "step": 21000 + }, + { + "epoch": 18.884892086330936, + "step": 21000, + "torque_loss": 0.08245361596345901 + }, + { + "epoch": 18.884892086330936, + "force_loss": 0.0029244597535580397, + "step": 21000 + }, + { + "epoch": 18.89388489208633, + "grad_norm": 0.2926085293292999, + "learning_rate": 7.732664109990485e-05, + "loss": 0.0208, + "step": 21010 + }, + { + "action_loss": 0.006941869854927063, + "epoch": 18.89388489208633, + "step": 21010 + }, + { + "epoch": 18.89388489208633, + "step": 21010, + "torque_loss": 0.1329679787158966 + }, + { + "epoch": 18.89388489208633, + "force_loss": 0.0030126480851322412, + "step": 21010 + }, + { + "epoch": 18.902877697841728, + "grad_norm": 0.2950783967971802, + "learning_rate": 7.730355898494095e-05, + "loss": 0.0304, + "step": 21020 + }, + { + "action_loss": 0.011579719372093678, + "epoch": 18.902877697841728, + "step": 21020 + }, + { + "epoch": 18.902877697841728, + "step": 21020, + "torque_loss": 0.17243927717208862 + }, + { + "epoch": 18.902877697841728, + "force_loss": 0.009910565800964832, + "step": 21020 + }, + { + "epoch": 18.91187050359712, + "grad_norm": 0.3713012635707855, + "learning_rate": 7.728046857587673e-05, + "loss": 0.0269, + "step": 21030 + }, + { + "action_loss": 0.00507223280146718, + "epoch": 18.91187050359712, + "step": 21030 + }, + { + "epoch": 18.91187050359712, + "step": 21030, + "torque_loss": 0.12365678697824478 + }, + { + "epoch": 18.91187050359712, + "force_loss": 0.008763331919908524, + "step": 21030 + }, + { + "epoch": 18.92086330935252, + "grad_norm": 0.7931478023529053, + "learning_rate": 7.725736987972647e-05, + "loss": 0.0248, + "step": 21040 + }, + { + "action_loss": 0.009880068711936474, + "epoch": 18.92086330935252, + "step": 21040 + }, + { + "epoch": 18.92086330935252, + "step": 21040, + "torque_loss": 0.12279900908470154 + }, + { + "epoch": 18.92086330935252, + "force_loss": 0.009871678426861763, + "step": 21040 + }, + { + "epoch": 18.929856115107913, + "grad_norm": 0.4834195673465729, + "learning_rate": 7.723426290350691e-05, + "loss": 0.0245, + "step": 21050 + }, + { + "action_loss": 0.011661668308079243, + "epoch": 18.929856115107913, + "step": 21050 + }, + { + "epoch": 18.929856115107913, + "step": 21050, + "torque_loss": 0.14733369648456573 + }, + { + "epoch": 18.929856115107913, + "force_loss": 0.012337010353803635, + "step": 21050 + }, + { + "epoch": 18.93884892086331, + "grad_norm": 0.3356728255748749, + "learning_rate": 7.721114765423736e-05, + "loss": 0.0249, + "step": 21060 + }, + { + "action_loss": 0.008570664562284946, + "epoch": 18.93884892086331, + "step": 21060 + }, + { + "epoch": 18.93884892086331, + "step": 21060, + "torque_loss": 0.10762292146682739 + }, + { + "epoch": 18.93884892086331, + "force_loss": 0.009123653173446655, + "step": 21060 + }, + { + "epoch": 18.947841726618705, + "grad_norm": 0.27851220965385437, + "learning_rate": 7.718802413893963e-05, + "loss": 0.0215, + "step": 21070 + }, + { + "action_loss": 0.007585987448692322, + "epoch": 18.947841726618705, + "step": 21070 + }, + { + "epoch": 18.947841726618705, + "step": 21070, + "torque_loss": 0.11948993057012558 + }, + { + "epoch": 18.947841726618705, + "force_loss": 0.0066976421512663364, + "step": 21070 + }, + { + "epoch": 18.9568345323741, + "grad_norm": 0.35844120383262634, + "learning_rate": 7.716489236463802e-05, + "loss": 0.0208, + "step": 21080 + }, + { + "action_loss": 0.007779898587614298, + "epoch": 18.9568345323741, + "step": 21080 + }, + { + "epoch": 18.9568345323741, + "step": 21080, + "torque_loss": 0.12000761181116104 + }, + { + "epoch": 18.9568345323741, + "force_loss": 0.015624753199517727, + "step": 21080 + }, + { + "epoch": 18.965827338129497, + "grad_norm": 0.16129791736602783, + "learning_rate": 7.714175233835936e-05, + "loss": 0.0197, + "step": 21090 + }, + { + "action_loss": 0.0030143007170408964, + "epoch": 18.965827338129497, + "step": 21090 + }, + { + "epoch": 18.965827338129497, + "step": 21090, + "torque_loss": 0.12497544288635254 + }, + { + "epoch": 18.965827338129497, + "force_loss": 0.0027070678770542145, + "step": 21090 + }, + { + "epoch": 18.97482014388489, + "grad_norm": 0.3578173816204071, + "learning_rate": 7.711860406713299e-05, + "loss": 0.0202, + "step": 21100 + }, + { + "action_loss": 0.005643816199153662, + "epoch": 18.97482014388489, + "step": 21100 + }, + { + "epoch": 18.97482014388489, + "step": 21100, + "torque_loss": 0.12769551575183868 + }, + { + "epoch": 18.97482014388489, + "force_loss": 0.0054486957378685474, + "step": 21100 + }, + { + "epoch": 18.98381294964029, + "grad_norm": 0.20371337234973907, + "learning_rate": 7.70954475579907e-05, + "loss": 0.0206, + "step": 21110 + }, + { + "action_loss": 0.004199328366667032, + "epoch": 18.98381294964029, + "step": 21110 + }, + { + "epoch": 18.98381294964029, + "step": 21110, + "torque_loss": 0.13115596771240234 + }, + { + "epoch": 18.98381294964029, + "force_loss": 0.003864569589495659, + "step": 21110 + }, + { + "epoch": 18.992805755395683, + "grad_norm": 0.4181339740753174, + "learning_rate": 7.707228281796688e-05, + "loss": 0.0211, + "step": 21120 + }, + { + "action_loss": 0.004545002710074186, + "epoch": 18.992805755395683, + "step": 21120 + }, + { + "epoch": 18.992805755395683, + "step": 21120, + "torque_loss": 0.14200468361377716 + }, + { + "epoch": 18.992805755395683, + "force_loss": 0.002078678458929062, + "step": 21120 + }, + { + "epoch": 19.00179856115108, + "grad_norm": 0.45750585198402405, + "learning_rate": 7.704910985409833e-05, + "loss": 0.0242, + "step": 21130 + }, + { + "action_loss": 0.01091001182794571, + "epoch": 19.00179856115108, + "step": 21130 + }, + { + "epoch": 19.00179856115108, + "step": 21130, + "torque_loss": 0.15371786057949066 + }, + { + "epoch": 19.00179856115108, + "force_loss": 0.014564561657607555, + "step": 21130 + }, + { + "epoch": 19.010791366906474, + "grad_norm": 0.37320059537887573, + "learning_rate": 7.702592867342439e-05, + "loss": 0.0264, + "step": 21140 + }, + { + "action_loss": 0.009730189107358456, + "epoch": 19.010791366906474, + "step": 21140 + }, + { + "epoch": 19.010791366906474, + "step": 21140, + "torque_loss": 0.18975459039211273 + }, + { + "epoch": 19.010791366906474, + "force_loss": 0.006682038307189941, + "step": 21140 + }, + { + "epoch": 19.019784172661872, + "grad_norm": 0.2717139422893524, + "learning_rate": 7.700273928298691e-05, + "loss": 0.0201, + "step": 21150 + }, + { + "action_loss": 0.006085825618356466, + "epoch": 19.019784172661872, + "step": 21150 + }, + { + "epoch": 19.019784172661872, + "step": 21150, + "torque_loss": 0.12803782522678375 + }, + { + "epoch": 19.019784172661872, + "force_loss": 0.008105587214231491, + "step": 21150 + }, + { + "epoch": 19.028776978417266, + "grad_norm": 0.2965158224105835, + "learning_rate": 7.697954168983021e-05, + "loss": 0.0223, + "step": 21160 + }, + { + "action_loss": 0.005119136068969965, + "epoch": 19.028776978417266, + "step": 21160 + }, + { + "epoch": 19.028776978417266, + "step": 21160, + "torque_loss": 0.0973953828215599 + }, + { + "epoch": 19.028776978417266, + "force_loss": 0.0038145689759403467, + "step": 21160 + }, + { + "epoch": 19.037769784172664, + "grad_norm": 0.30059152841567993, + "learning_rate": 7.695633590100109e-05, + "loss": 0.0232, + "step": 21170 + }, + { + "action_loss": 0.00632014824077487, + "epoch": 19.037769784172664, + "step": 21170 + }, + { + "epoch": 19.037769784172664, + "step": 21170, + "torque_loss": 0.14109350740909576 + }, + { + "epoch": 19.037769784172664, + "force_loss": 0.009804566390812397, + "step": 21170 + }, + { + "epoch": 19.046762589928058, + "grad_norm": 0.41945117712020874, + "learning_rate": 7.693312192354886e-05, + "loss": 0.0224, + "step": 21180 + }, + { + "action_loss": 0.0068938713520765305, + "epoch": 19.046762589928058, + "step": 21180 + }, + { + "epoch": 19.046762589928058, + "step": 21180, + "torque_loss": 0.13942526280879974 + }, + { + "epoch": 19.046762589928058, + "force_loss": 0.01081293448805809, + "step": 21180 + }, + { + "epoch": 19.055755395683452, + "grad_norm": 0.2478008270263672, + "learning_rate": 7.690989976452532e-05, + "loss": 0.0207, + "step": 21190 + }, + { + "action_loss": 0.006615053862333298, + "epoch": 19.055755395683452, + "step": 21190 + }, + { + "epoch": 19.055755395683452, + "step": 21190, + "torque_loss": 0.1412479132413864 + }, + { + "epoch": 19.055755395683452, + "force_loss": 0.005072843749076128, + "step": 21190 + }, + { + "epoch": 19.06474820143885, + "grad_norm": 0.41966503858566284, + "learning_rate": 7.688666943098475e-05, + "loss": 0.0219, + "step": 21200 + }, + { + "action_loss": 0.00664830207824707, + "epoch": 19.06474820143885, + "step": 21200 + }, + { + "epoch": 19.06474820143885, + "step": 21200, + "torque_loss": 0.15645386278629303 + }, + { + "epoch": 19.06474820143885, + "force_loss": 0.00666293129324913, + "step": 21200 + }, + { + "epoch": 19.073741007194243, + "grad_norm": 0.21348387002944946, + "learning_rate": 7.686343092998389e-05, + "loss": 0.019, + "step": 21210 + }, + { + "action_loss": 0.004536744207143784, + "epoch": 19.073741007194243, + "step": 21210 + }, + { + "epoch": 19.073741007194243, + "step": 21210, + "torque_loss": 0.14316974580287933 + }, + { + "epoch": 19.073741007194243, + "force_loss": 0.004935022909194231, + "step": 21210 + }, + { + "epoch": 19.08273381294964, + "grad_norm": 0.31518983840942383, + "learning_rate": 7.684018426858202e-05, + "loss": 0.0254, + "step": 21220 + }, + { + "action_loss": 0.003023432567715645, + "epoch": 19.08273381294964, + "step": 21220 + }, + { + "epoch": 19.08273381294964, + "step": 21220, + "torque_loss": 0.1416092962026596 + }, + { + "epoch": 19.08273381294964, + "force_loss": 0.0018521612510085106, + "step": 21220 + }, + { + "epoch": 19.091726618705035, + "grad_norm": 0.42931514978408813, + "learning_rate": 7.681692945384084e-05, + "loss": 0.0217, + "step": 21230 + }, + { + "action_loss": 0.004434697795659304, + "epoch": 19.091726618705035, + "step": 21230 + }, + { + "epoch": 19.091726618705035, + "step": 21230, + "torque_loss": 0.10643320530653 + }, + { + "epoch": 19.091726618705035, + "force_loss": 0.006954862270504236, + "step": 21230 + }, + { + "epoch": 19.100719424460433, + "grad_norm": 0.18268223106861115, + "learning_rate": 7.679366649282456e-05, + "loss": 0.0185, + "step": 21240 + }, + { + "action_loss": 0.0451851524412632, + "epoch": 19.100719424460433, + "step": 21240 + }, + { + "epoch": 19.100719424460433, + "step": 21240, + "torque_loss": 0.17587248980998993 + }, + { + "epoch": 19.100719424460433, + "force_loss": 0.0445452444255352, + "step": 21240 + }, + { + "epoch": 19.109712230215827, + "grad_norm": 0.23430714011192322, + "learning_rate": 7.677039539259983e-05, + "loss": 0.0237, + "step": 21250 + }, + { + "action_loss": 0.019339272752404213, + "epoch": 19.109712230215827, + "step": 21250 + }, + { + "epoch": 19.109712230215827, + "step": 21250, + "torque_loss": 0.17584173381328583 + }, + { + "epoch": 19.109712230215827, + "force_loss": 0.021408798173069954, + "step": 21250 + }, + { + "epoch": 19.118705035971225, + "grad_norm": 0.36534684896469116, + "learning_rate": 7.674711616023581e-05, + "loss": 0.0243, + "step": 21260 + }, + { + "action_loss": 0.005523163825273514, + "epoch": 19.118705035971225, + "step": 21260 + }, + { + "epoch": 19.118705035971225, + "step": 21260, + "torque_loss": 0.15490221977233887 + }, + { + "epoch": 19.118705035971225, + "force_loss": 0.007282101083546877, + "step": 21260 + }, + { + "epoch": 19.12769784172662, + "grad_norm": 0.44240134954452515, + "learning_rate": 7.672382880280413e-05, + "loss": 0.0203, + "step": 21270 + }, + { + "action_loss": 0.009951199404895306, + "epoch": 19.12769784172662, + "step": 21270 + }, + { + "epoch": 19.12769784172662, + "step": 21270, + "torque_loss": 0.13297080993652344 + }, + { + "epoch": 19.12769784172662, + "force_loss": 0.008881856687366962, + "step": 21270 + }, + { + "epoch": 19.136690647482013, + "grad_norm": 0.36340591311454773, + "learning_rate": 7.670053332737885e-05, + "loss": 0.0217, + "step": 21280 + }, + { + "action_loss": 0.002128706779330969, + "epoch": 19.136690647482013, + "step": 21280 + }, + { + "epoch": 19.136690647482013, + "step": 21280, + "torque_loss": 0.07681339234113693 + }, + { + "epoch": 19.136690647482013, + "force_loss": 0.001706802868284285, + "step": 21280 + }, + { + "epoch": 19.14568345323741, + "grad_norm": 0.4394150674343109, + "learning_rate": 7.667722974103654e-05, + "loss": 0.0172, + "step": 21290 + }, + { + "action_loss": 0.0016647170996293426, + "epoch": 19.14568345323741, + "step": 21290 + }, + { + "epoch": 19.14568345323741, + "step": 21290, + "torque_loss": 0.06562650948762894 + }, + { + "epoch": 19.14568345323741, + "force_loss": 0.0011401548981666565, + "step": 21290 + }, + { + "epoch": 19.154676258992804, + "grad_norm": 1.3382195234298706, + "learning_rate": 7.66539180508562e-05, + "loss": 0.0199, + "step": 21300 + }, + { + "action_loss": 0.009306578896939754, + "epoch": 19.154676258992804, + "step": 21300 + }, + { + "epoch": 19.154676258992804, + "step": 21300, + "torque_loss": 0.17482580244541168 + }, + { + "epoch": 19.154676258992804, + "force_loss": 0.013695008121430874, + "step": 21300 + }, + { + "epoch": 19.163669064748202, + "grad_norm": 0.5320211052894592, + "learning_rate": 7.663059826391932e-05, + "loss": 0.0246, + "step": 21310 + }, + { + "action_loss": 0.004609776195138693, + "epoch": 19.163669064748202, + "step": 21310 + }, + { + "epoch": 19.163669064748202, + "step": 21310, + "torque_loss": 0.16104266047477722 + }, + { + "epoch": 19.163669064748202, + "force_loss": 0.006079719867557287, + "step": 21310 + }, + { + "epoch": 19.172661870503596, + "grad_norm": 0.2695898413658142, + "learning_rate": 7.660727038730981e-05, + "loss": 0.0219, + "step": 21320 + }, + { + "action_loss": 0.022043129429221153, + "epoch": 19.172661870503596, + "step": 21320 + }, + { + "epoch": 19.172661870503596, + "step": 21320, + "torque_loss": 0.15773947536945343 + }, + { + "epoch": 19.172661870503596, + "force_loss": 0.0237929355353117, + "step": 21320 + }, + { + "epoch": 19.181654676258994, + "grad_norm": 0.3807615041732788, + "learning_rate": 7.65839344281141e-05, + "loss": 0.0237, + "step": 21330 + }, + { + "action_loss": 0.005357636604458094, + "epoch": 19.181654676258994, + "step": 21330 + }, + { + "epoch": 19.181654676258994, + "step": 21330, + "torque_loss": 0.13021297752857208 + }, + { + "epoch": 19.181654676258994, + "force_loss": 0.008785915561020374, + "step": 21330 + }, + { + "epoch": 19.190647482014388, + "grad_norm": 0.28463417291641235, + "learning_rate": 7.656059039342101e-05, + "loss": 0.0189, + "step": 21340 + }, + { + "action_loss": 0.009215909987688065, + "epoch": 19.190647482014388, + "step": 21340 + }, + { + "epoch": 19.190647482014388, + "step": 21340, + "torque_loss": 0.11108382791280746 + }, + { + "epoch": 19.190647482014388, + "force_loss": 0.011105313897132874, + "step": 21340 + }, + { + "epoch": 19.199640287769785, + "grad_norm": 0.33906736969947815, + "learning_rate": 7.653723829032187e-05, + "loss": 0.0216, + "step": 21350 + }, + { + "action_loss": 0.00422265287488699, + "epoch": 19.199640287769785, + "step": 21350 + }, + { + "epoch": 19.199640287769785, + "step": 21350, + "torque_loss": 0.09489398449659348 + }, + { + "epoch": 19.199640287769785, + "force_loss": 0.00896951463073492, + "step": 21350 + }, + { + "epoch": 19.20863309352518, + "grad_norm": 0.3440510630607605, + "learning_rate": 7.65138781259104e-05, + "loss": 0.0258, + "step": 21360 + }, + { + "action_loss": 0.003073754021897912, + "epoch": 19.20863309352518, + "step": 21360 + }, + { + "epoch": 19.20863309352518, + "step": 21360, + "torque_loss": 0.1115085706114769 + }, + { + "epoch": 19.20863309352518, + "force_loss": 0.003067217767238617, + "step": 21360 + }, + { + "epoch": 19.217625899280577, + "grad_norm": 0.3840801417827606, + "learning_rate": 7.649050990728279e-05, + "loss": 0.0183, + "step": 21370 + }, + { + "action_loss": 0.009511918760836124, + "epoch": 19.217625899280577, + "step": 21370 + }, + { + "epoch": 19.217625899280577, + "step": 21370, + "torque_loss": 0.1170787587761879 + }, + { + "epoch": 19.217625899280577, + "force_loss": 0.007953859865665436, + "step": 21370 + }, + { + "epoch": 19.22661870503597, + "grad_norm": 0.4349346458911896, + "learning_rate": 7.646713364153774e-05, + "loss": 0.0224, + "step": 21380 + }, + { + "action_loss": 0.00421341834589839, + "epoch": 19.22661870503597, + "step": 21380 + }, + { + "epoch": 19.22661870503597, + "step": 21380, + "torque_loss": 0.1454629749059677 + }, + { + "epoch": 19.22661870503597, + "force_loss": 0.0028723319992423058, + "step": 21380 + }, + { + "epoch": 19.235611510791365, + "grad_norm": 0.29153531789779663, + "learning_rate": 7.64437493357763e-05, + "loss": 0.0216, + "step": 21390 + }, + { + "action_loss": 0.005913767963647842, + "epoch": 19.235611510791365, + "step": 21390 + }, + { + "epoch": 19.235611510791365, + "step": 21390, + "torque_loss": 0.10143619775772095 + }, + { + "epoch": 19.235611510791365, + "force_loss": 0.006469981744885445, + "step": 21390 + }, + { + "epoch": 19.244604316546763, + "grad_norm": 0.21742607653141022, + "learning_rate": 7.642035699710202e-05, + "loss": 0.0218, + "step": 21400 + }, + { + "action_loss": 0.00501607172191143, + "epoch": 19.244604316546763, + "step": 21400 + }, + { + "epoch": 19.244604316546763, + "step": 21400, + "torque_loss": 0.14102411270141602 + }, + { + "epoch": 19.244604316546763, + "force_loss": 0.0037264544516801834, + "step": 21400 + }, + { + "epoch": 19.253597122302157, + "grad_norm": 0.39896100759506226, + "learning_rate": 7.639695663262089e-05, + "loss": 0.0197, + "step": 21410 + }, + { + "action_loss": 0.012978407554328442, + "epoch": 19.253597122302157, + "step": 21410 + }, + { + "epoch": 19.253597122302157, + "step": 21410, + "torque_loss": 0.14753873646259308 + }, + { + "epoch": 19.253597122302157, + "force_loss": 0.01547118928283453, + "step": 21410 + }, + { + "epoch": 19.262589928057555, + "grad_norm": 0.442873477935791, + "learning_rate": 7.637354824944128e-05, + "loss": 0.0293, + "step": 21420 + }, + { + "action_loss": 0.002996989758685231, + "epoch": 19.262589928057555, + "step": 21420 + }, + { + "epoch": 19.262589928057555, + "step": 21420, + "torque_loss": 0.12058514356613159 + }, + { + "epoch": 19.262589928057555, + "force_loss": 0.002383910119533539, + "step": 21420 + }, + { + "epoch": 19.27158273381295, + "grad_norm": 0.4413894712924957, + "learning_rate": 7.635013185467408e-05, + "loss": 0.018, + "step": 21430 + }, + { + "action_loss": 0.005001666955649853, + "epoch": 19.27158273381295, + "step": 21430 + }, + { + "epoch": 19.27158273381295, + "step": 21430, + "torque_loss": 0.12552034854888916 + }, + { + "epoch": 19.27158273381295, + "force_loss": 0.003579702228307724, + "step": 21430 + }, + { + "epoch": 19.280575539568346, + "grad_norm": 0.19401530921459198, + "learning_rate": 7.632670745543256e-05, + "loss": 0.0202, + "step": 21440 + }, + { + "action_loss": 0.0051520224660634995, + "epoch": 19.280575539568346, + "step": 21440 + }, + { + "epoch": 19.280575539568346, + "step": 21440, + "torque_loss": 0.10939887166023254 + }, + { + "epoch": 19.280575539568346, + "force_loss": 0.0027365863788872957, + "step": 21440 + }, + { + "epoch": 19.28956834532374, + "grad_norm": 0.3055570125579834, + "learning_rate": 7.630327505883242e-05, + "loss": 0.0233, + "step": 21450 + }, + { + "action_loss": 0.0033240392804145813, + "epoch": 19.28956834532374, + "step": 21450 + }, + { + "epoch": 19.28956834532374, + "step": 21450, + "torque_loss": 0.13942258059978485 + }, + { + "epoch": 19.28956834532374, + "force_loss": 0.002692107344046235, + "step": 21450 + }, + { + "epoch": 19.298561151079138, + "grad_norm": 0.2008047252893448, + "learning_rate": 7.627983467199182e-05, + "loss": 0.019, + "step": 21460 + }, + { + "action_loss": 0.006855081766843796, + "epoch": 19.298561151079138, + "step": 21460 + }, + { + "epoch": 19.298561151079138, + "step": 21460, + "torque_loss": 0.21656499803066254 + }, + { + "epoch": 19.298561151079138, + "force_loss": 0.007732841651886702, + "step": 21460 + }, + { + "epoch": 19.307553956834532, + "grad_norm": 1.2336971759796143, + "learning_rate": 7.625638630203132e-05, + "loss": 0.0261, + "step": 21470 + }, + { + "action_loss": 0.005216373596340418, + "epoch": 19.307553956834532, + "step": 21470 + }, + { + "epoch": 19.307553956834532, + "step": 21470, + "torque_loss": 0.1514403074979782 + }, + { + "epoch": 19.307553956834532, + "force_loss": 0.0029113886412233114, + "step": 21470 + }, + { + "epoch": 19.31654676258993, + "grad_norm": 0.30147963762283325, + "learning_rate": 7.623292995607394e-05, + "loss": 0.021, + "step": 21480 + }, + { + "action_loss": 0.0085029611364007, + "epoch": 19.31654676258993, + "step": 21480 + }, + { + "epoch": 19.31654676258993, + "step": 21480, + "torque_loss": 0.17489902675151825 + }, + { + "epoch": 19.31654676258993, + "force_loss": 0.006882034242153168, + "step": 21480 + }, + { + "epoch": 19.325539568345324, + "grad_norm": 0.4513334035873413, + "learning_rate": 7.620946564124507e-05, + "loss": 0.0217, + "step": 21490 + }, + { + "action_loss": 0.012447998858988285, + "epoch": 19.325539568345324, + "step": 21490 + }, + { + "epoch": 19.325539568345324, + "step": 21490, + "torque_loss": 0.14656271040439606 + }, + { + "epoch": 19.325539568345324, + "force_loss": 0.013763678260147572, + "step": 21490 + }, + { + "epoch": 19.334532374100718, + "grad_norm": 0.3689354956150055, + "learning_rate": 7.618599336467256e-05, + "loss": 0.0218, + "step": 21500 + }, + { + "action_loss": 0.005984982941299677, + "epoch": 19.334532374100718, + "step": 21500 + }, + { + "epoch": 19.334532374100718, + "step": 21500, + "torque_loss": 0.11968144029378891 + }, + { + "epoch": 19.334532374100718, + "force_loss": 0.007563177030533552, + "step": 21500 + }, + { + "epoch": 19.343525179856115, + "grad_norm": 0.33535292744636536, + "learning_rate": 7.616251313348666e-05, + "loss": 0.0194, + "step": 21510 + }, + { + "action_loss": 0.004599094856530428, + "epoch": 19.343525179856115, + "step": 21510 + }, + { + "epoch": 19.343525179856115, + "step": 21510, + "torque_loss": 0.14619536697864532 + }, + { + "epoch": 19.343525179856115, + "force_loss": 0.009815558791160583, + "step": 21510 + }, + { + "epoch": 19.35251798561151, + "grad_norm": 0.7966991066932678, + "learning_rate": 7.613902495482005e-05, + "loss": 0.025, + "step": 21520 + }, + { + "action_loss": 0.00572822755202651, + "epoch": 19.35251798561151, + "step": 21520 + }, + { + "epoch": 19.35251798561151, + "step": 21520, + "torque_loss": 0.12078839540481567 + }, + { + "epoch": 19.35251798561151, + "force_loss": 0.0027878370601683855, + "step": 21520 + }, + { + "epoch": 19.361510791366907, + "grad_norm": 0.34956786036491394, + "learning_rate": 7.611552883580784e-05, + "loss": 0.0193, + "step": 21530 + }, + { + "action_loss": 0.009181947447359562, + "epoch": 19.361510791366907, + "step": 21530 + }, + { + "epoch": 19.361510791366907, + "step": 21530, + "torque_loss": 0.15982826054096222 + }, + { + "epoch": 19.361510791366907, + "force_loss": 0.00671034911647439, + "step": 21530 + }, + { + "epoch": 19.3705035971223, + "grad_norm": 0.2988636791706085, + "learning_rate": 7.609202478358748e-05, + "loss": 0.022, + "step": 21540 + }, + { + "action_loss": 0.009565108455717564, + "epoch": 19.3705035971223, + "step": 21540 + }, + { + "epoch": 19.3705035971223, + "step": 21540, + "torque_loss": 0.16315078735351562 + }, + { + "epoch": 19.3705035971223, + "force_loss": 0.008048557676374912, + "step": 21540 + }, + { + "epoch": 19.3794964028777, + "grad_norm": 0.22613811492919922, + "learning_rate": 7.606851280529895e-05, + "loss": 0.0221, + "step": 21550 + }, + { + "action_loss": 0.006762331817299128, + "epoch": 19.3794964028777, + "step": 21550 + }, + { + "epoch": 19.3794964028777, + "step": 21550, + "torque_loss": 0.13161323964595795 + }, + { + "epoch": 19.3794964028777, + "force_loss": 0.006115918513387442, + "step": 21550 + }, + { + "epoch": 19.388489208633093, + "grad_norm": 0.24901631474494934, + "learning_rate": 7.604499290808449e-05, + "loss": 0.0217, + "step": 21560 + }, + { + "action_loss": 0.011465758085250854, + "epoch": 19.388489208633093, + "step": 21560 + }, + { + "epoch": 19.388489208633093, + "step": 21560, + "torque_loss": 0.10021913796663284 + }, + { + "epoch": 19.388489208633093, + "force_loss": 0.005063768941909075, + "step": 21560 + }, + { + "epoch": 19.39748201438849, + "grad_norm": 0.32080399990081787, + "learning_rate": 7.602146509908888e-05, + "loss": 0.0195, + "step": 21570 + }, + { + "action_loss": 0.0021028409246355295, + "epoch": 19.39748201438849, + "step": 21570 + }, + { + "epoch": 19.39748201438849, + "step": 21570, + "torque_loss": 0.08726945519447327 + }, + { + "epoch": 19.39748201438849, + "force_loss": 0.001828385517001152, + "step": 21570 + }, + { + "epoch": 19.406474820143885, + "grad_norm": 0.5371273159980774, + "learning_rate": 7.599792938545921e-05, + "loss": 0.027, + "step": 21580 + }, + { + "action_loss": 0.007085882127285004, + "epoch": 19.406474820143885, + "step": 21580 + }, + { + "epoch": 19.406474820143885, + "step": 21580, + "torque_loss": 0.09476127475500107 + }, + { + "epoch": 19.406474820143885, + "force_loss": 0.004616372287273407, + "step": 21580 + }, + { + "epoch": 19.415467625899282, + "grad_norm": 0.33448728919029236, + "learning_rate": 7.597438577434506e-05, + "loss": 0.0226, + "step": 21590 + }, + { + "action_loss": 0.0068463473580777645, + "epoch": 19.415467625899282, + "step": 21590 + }, + { + "epoch": 19.415467625899282, + "step": 21590, + "torque_loss": 0.15716011822223663 + }, + { + "epoch": 19.415467625899282, + "force_loss": 0.008225476369261742, + "step": 21590 + }, + { + "epoch": 19.424460431654676, + "grad_norm": 0.36186304688453674, + "learning_rate": 7.595083427289831e-05, + "loss": 0.0212, + "step": 21600 + }, + { + "action_loss": 0.0033750608563423157, + "epoch": 19.424460431654676, + "step": 21600 + }, + { + "epoch": 19.424460431654676, + "step": 21600, + "torque_loss": 0.1819905787706375 + }, + { + "epoch": 19.424460431654676, + "force_loss": 0.004477418027818203, + "step": 21600 + }, + { + "epoch": 19.43345323741007, + "grad_norm": 0.19142687320709229, + "learning_rate": 7.59272748882733e-05, + "loss": 0.0191, + "step": 21610 + }, + { + "action_loss": 0.022279134020209312, + "epoch": 19.43345323741007, + "step": 21610 + }, + { + "epoch": 19.43345323741007, + "step": 21610, + "torque_loss": 0.18531103432178497 + }, + { + "epoch": 19.43345323741007, + "force_loss": 0.025092462077736855, + "step": 21610 + }, + { + "epoch": 19.442446043165468, + "grad_norm": 0.20963843166828156, + "learning_rate": 7.590370762762675e-05, + "loss": 0.0218, + "step": 21620 + }, + { + "action_loss": 0.003140711458399892, + "epoch": 19.442446043165468, + "step": 21620 + }, + { + "epoch": 19.442446043165468, + "step": 21620, + "torque_loss": 0.1628819704055786 + }, + { + "epoch": 19.442446043165468, + "force_loss": 0.002686604857444763, + "step": 21620 + }, + { + "epoch": 19.451438848920862, + "grad_norm": 0.5249615907669067, + "learning_rate": 7.588013249811777e-05, + "loss": 0.0273, + "step": 21630 + }, + { + "action_loss": 0.006182766053825617, + "epoch": 19.451438848920862, + "step": 21630 + }, + { + "epoch": 19.451438848920862, + "step": 21630, + "torque_loss": 0.13689465820789337 + }, + { + "epoch": 19.451438848920862, + "force_loss": 0.004190466832369566, + "step": 21630 + }, + { + "epoch": 19.46043165467626, + "grad_norm": 0.2777141034603119, + "learning_rate": 7.585654950690786e-05, + "loss": 0.0217, + "step": 21640 + }, + { + "action_loss": 0.006458205636590719, + "epoch": 19.46043165467626, + "step": 21640 + }, + { + "epoch": 19.46043165467626, + "step": 21640, + "torque_loss": 0.13719987869262695 + }, + { + "epoch": 19.46043165467626, + "force_loss": 0.00828318577259779, + "step": 21640 + }, + { + "epoch": 19.469424460431654, + "grad_norm": 0.45955348014831543, + "learning_rate": 7.583295866116091e-05, + "loss": 0.0236, + "step": 21650 + }, + { + "action_loss": 0.007151912897825241, + "epoch": 19.469424460431654, + "step": 21650 + }, + { + "epoch": 19.469424460431654, + "step": 21650, + "torque_loss": 0.1097402349114418 + }, + { + "epoch": 19.469424460431654, + "force_loss": 0.004806819837540388, + "step": 21650 + }, + { + "epoch": 19.47841726618705, + "grad_norm": 0.2692463994026184, + "learning_rate": 7.580935996804321e-05, + "loss": 0.0233, + "step": 21660 + }, + { + "action_loss": 0.018180709332227707, + "epoch": 19.47841726618705, + "step": 21660 + }, + { + "epoch": 19.47841726618705, + "step": 21660, + "torque_loss": 0.14763160049915314 + }, + { + "epoch": 19.47841726618705, + "force_loss": 0.014515109360218048, + "step": 21660 + }, + { + "epoch": 19.487410071942445, + "grad_norm": 0.22884929180145264, + "learning_rate": 7.57857534347234e-05, + "loss": 0.0232, + "step": 21670 + }, + { + "action_loss": 0.004157568793743849, + "epoch": 19.487410071942445, + "step": 21670 + }, + { + "epoch": 19.487410071942445, + "step": 21670, + "torque_loss": 0.12420075386762619 + }, + { + "epoch": 19.487410071942445, + "force_loss": 0.0033161770552396774, + "step": 21670 + }, + { + "epoch": 19.496402877697843, + "grad_norm": 0.4245760142803192, + "learning_rate": 7.576213906837254e-05, + "loss": 0.0211, + "step": 21680 + }, + { + "action_loss": 0.020072678104043007, + "epoch": 19.496402877697843, + "step": 21680 + }, + { + "epoch": 19.496402877697843, + "step": 21680, + "torque_loss": 0.18354733288288116 + }, + { + "epoch": 19.496402877697843, + "force_loss": 0.019310100004076958, + "step": 21680 + }, + { + "epoch": 19.505395683453237, + "grad_norm": 0.2859075665473938, + "learning_rate": 7.573851687616403e-05, + "loss": 0.0231, + "step": 21690 + }, + { + "action_loss": 0.008961736224591732, + "epoch": 19.505395683453237, + "step": 21690 + }, + { + "epoch": 19.505395683453237, + "step": 21690, + "torque_loss": 0.14467954635620117 + }, + { + "epoch": 19.505395683453237, + "force_loss": 0.014233012683689594, + "step": 21690 + }, + { + "epoch": 19.514388489208635, + "grad_norm": 0.22410668432712555, + "learning_rate": 7.571488686527368e-05, + "loss": 0.019, + "step": 21700 + }, + { + "action_loss": 0.005172547418624163, + "epoch": 19.514388489208635, + "step": 21700 + }, + { + "epoch": 19.514388489208635, + "step": 21700, + "torque_loss": 0.13156788051128387 + }, + { + "epoch": 19.514388489208635, + "force_loss": 0.009319930337369442, + "step": 21700 + }, + { + "epoch": 19.52338129496403, + "grad_norm": 0.37554094195365906, + "learning_rate": 7.569124904287968e-05, + "loss": 0.0195, + "step": 21710 + }, + { + "action_loss": 0.009185795672237873, + "epoch": 19.52338129496403, + "step": 21710 + }, + { + "epoch": 19.52338129496403, + "step": 21710, + "torque_loss": 0.15854012966156006 + }, + { + "epoch": 19.52338129496403, + "force_loss": 0.009237014688551426, + "step": 21710 + }, + { + "epoch": 19.532374100719423, + "grad_norm": 0.4086345136165619, + "learning_rate": 7.566760341616254e-05, + "loss": 0.0225, + "step": 21720 + }, + { + "action_loss": 0.0032329161185771227, + "epoch": 19.532374100719423, + "step": 21720 + }, + { + "epoch": 19.532374100719423, + "step": 21720, + "torque_loss": 0.12092572450637817 + }, + { + "epoch": 19.532374100719423, + "force_loss": 0.002039354993030429, + "step": 21720 + }, + { + "epoch": 19.54136690647482, + "grad_norm": 0.3729719817638397, + "learning_rate": 7.564394999230519e-05, + "loss": 0.0246, + "step": 21730 + }, + { + "action_loss": 0.0032205404713749886, + "epoch": 19.54136690647482, + "step": 21730 + }, + { + "epoch": 19.54136690647482, + "step": 21730, + "torque_loss": 0.11641553789377213 + }, + { + "epoch": 19.54136690647482, + "force_loss": 0.002129241591319442, + "step": 21730 + }, + { + "epoch": 19.550359712230215, + "grad_norm": 0.2299564927816391, + "learning_rate": 7.562028877849294e-05, + "loss": 0.0183, + "step": 21740 + }, + { + "action_loss": 0.013398644514381886, + "epoch": 19.550359712230215, + "step": 21740 + }, + { + "epoch": 19.550359712230215, + "step": 21740, + "torque_loss": 0.1278691589832306 + }, + { + "epoch": 19.550359712230215, + "force_loss": 0.011579960584640503, + "step": 21740 + }, + { + "epoch": 19.559352517985612, + "grad_norm": 0.3825627267360687, + "learning_rate": 7.559661978191341e-05, + "loss": 0.0215, + "step": 21750 + }, + { + "action_loss": 0.011062725447118282, + "epoch": 19.559352517985612, + "step": 21750 + }, + { + "epoch": 19.559352517985612, + "step": 21750, + "torque_loss": 0.09550317376852036 + }, + { + "epoch": 19.559352517985612, + "force_loss": 0.007611427456140518, + "step": 21750 + }, + { + "epoch": 19.568345323741006, + "grad_norm": 0.3207350969314575, + "learning_rate": 7.557294300975664e-05, + "loss": 0.0206, + "step": 21760 + }, + { + "action_loss": 0.003980598878115416, + "epoch": 19.568345323741006, + "step": 21760 + }, + { + "epoch": 19.568345323741006, + "step": 21760, + "torque_loss": 0.11463906615972519 + }, + { + "epoch": 19.568345323741006, + "force_loss": 0.0025576932821422815, + "step": 21760 + }, + { + "epoch": 19.577338129496404, + "grad_norm": 0.4431993067264557, + "learning_rate": 7.554925846921499e-05, + "loss": 0.0205, + "step": 21770 + }, + { + "action_loss": 0.0061083766631782055, + "epoch": 19.577338129496404, + "step": 21770 + }, + { + "epoch": 19.577338129496404, + "step": 21770, + "torque_loss": 0.12348593026399612 + }, + { + "epoch": 19.577338129496404, + "force_loss": 0.003276041941717267, + "step": 21770 + }, + { + "epoch": 19.586330935251798, + "grad_norm": 0.4156457781791687, + "learning_rate": 7.552556616748321e-05, + "loss": 0.0208, + "step": 21780 + }, + { + "action_loss": 0.008796994574368, + "epoch": 19.586330935251798, + "step": 21780 + }, + { + "epoch": 19.586330935251798, + "step": 21780, + "torque_loss": 0.13290178775787354 + }, + { + "epoch": 19.586330935251798, + "force_loss": 0.012139275670051575, + "step": 21780 + }, + { + "epoch": 19.595323741007196, + "grad_norm": 0.28027454018592834, + "learning_rate": 7.550186611175838e-05, + "loss": 0.0236, + "step": 21790 + }, + { + "action_loss": 0.003140410641208291, + "epoch": 19.595323741007196, + "step": 21790 + }, + { + "epoch": 19.595323741007196, + "step": 21790, + "torque_loss": 0.12866981327533722 + }, + { + "epoch": 19.595323741007196, + "force_loss": 0.0019691220950335264, + "step": 21790 + }, + { + "epoch": 19.60431654676259, + "grad_norm": 0.3398789167404175, + "learning_rate": 7.547815830923998e-05, + "loss": 0.0187, + "step": 21800 + }, + { + "action_loss": 0.008160281926393509, + "epoch": 19.60431654676259, + "step": 21800 + }, + { + "epoch": 19.60431654676259, + "step": 21800, + "torque_loss": 0.13390092551708221 + }, + { + "epoch": 19.60431654676259, + "force_loss": 0.004792905878275633, + "step": 21800 + }, + { + "epoch": 19.613309352517987, + "grad_norm": 0.43377819657325745, + "learning_rate": 7.54544427671298e-05, + "loss": 0.0229, + "step": 21810 + }, + { + "action_loss": 0.011805598624050617, + "epoch": 19.613309352517987, + "step": 21810 + }, + { + "epoch": 19.613309352517987, + "step": 21810, + "torque_loss": 0.14656482636928558 + }, + { + "epoch": 19.613309352517987, + "force_loss": 0.013147692196071148, + "step": 21810 + }, + { + "epoch": 19.62230215827338, + "grad_norm": 0.4121027886867523, + "learning_rate": 7.543071949263198e-05, + "loss": 0.02, + "step": 21820 + }, + { + "action_loss": 0.0031295297667384148, + "epoch": 19.62230215827338, + "step": 21820 + }, + { + "epoch": 19.62230215827338, + "step": 21820, + "torque_loss": 0.11127686500549316 + }, + { + "epoch": 19.62230215827338, + "force_loss": 0.002735495800152421, + "step": 21820 + }, + { + "epoch": 19.631294964028775, + "grad_norm": 0.331376850605011, + "learning_rate": 7.540698849295305e-05, + "loss": 0.0178, + "step": 21830 + }, + { + "action_loss": 0.006875521969050169, + "epoch": 19.631294964028775, + "step": 21830 + }, + { + "epoch": 19.631294964028775, + "step": 21830, + "torque_loss": 0.1267344206571579 + }, + { + "epoch": 19.631294964028775, + "force_loss": 0.007150923367589712, + "step": 21830 + }, + { + "epoch": 19.640287769784173, + "grad_norm": 0.20037895441055298, + "learning_rate": 7.538324977530183e-05, + "loss": 0.0203, + "step": 21840 + }, + { + "action_loss": 0.021925918757915497, + "epoch": 19.640287769784173, + "step": 21840 + }, + { + "epoch": 19.640287769784173, + "step": 21840, + "torque_loss": 0.20452392101287842 + }, + { + "epoch": 19.640287769784173, + "force_loss": 0.03243473544716835, + "step": 21840 + }, + { + "epoch": 19.649280575539567, + "grad_norm": 0.3448210656642914, + "learning_rate": 7.535950334688955e-05, + "loss": 0.0243, + "step": 21850 + }, + { + "action_loss": 0.00136956840287894, + "epoch": 19.649280575539567, + "step": 21850 + }, + { + "epoch": 19.649280575539567, + "step": 21850, + "torque_loss": 0.07476025819778442 + }, + { + "epoch": 19.649280575539567, + "force_loss": 0.001339944894425571, + "step": 21850 + }, + { + "epoch": 19.658273381294965, + "grad_norm": 0.18949294090270996, + "learning_rate": 7.533574921492972e-05, + "loss": 0.0214, + "step": 21860 + }, + { + "action_loss": 0.0027416015509516, + "epoch": 19.658273381294965, + "step": 21860 + }, + { + "epoch": 19.658273381294965, + "step": 21860, + "torque_loss": 0.11827247589826584 + }, + { + "epoch": 19.658273381294965, + "force_loss": 0.002873457269743085, + "step": 21860 + }, + { + "epoch": 19.66726618705036, + "grad_norm": 0.20143093168735504, + "learning_rate": 7.531198738663824e-05, + "loss": 0.0191, + "step": 21870 + }, + { + "action_loss": 0.005855960305780172, + "epoch": 19.66726618705036, + "step": 21870 + }, + { + "epoch": 19.66726618705036, + "step": 21870, + "torque_loss": 0.17841953039169312 + }, + { + "epoch": 19.66726618705036, + "force_loss": 0.006000491324812174, + "step": 21870 + }, + { + "epoch": 19.676258992805757, + "grad_norm": 0.20106497406959534, + "learning_rate": 7.528821786923333e-05, + "loss": 0.0204, + "step": 21880 + }, + { + "action_loss": 0.008288371376693249, + "epoch": 19.676258992805757, + "step": 21880 + }, + { + "epoch": 19.676258992805757, + "step": 21880, + "torque_loss": 0.13893398642539978 + }, + { + "epoch": 19.676258992805757, + "force_loss": 0.004750822205096483, + "step": 21880 + }, + { + "epoch": 19.68525179856115, + "grad_norm": 0.3063576817512512, + "learning_rate": 7.52644406699355e-05, + "loss": 0.0227, + "step": 21890 + }, + { + "action_loss": 0.005023397039622068, + "epoch": 19.68525179856115, + "step": 21890 + }, + { + "epoch": 19.68525179856115, + "step": 21890, + "torque_loss": 0.15836508572101593 + }, + { + "epoch": 19.68525179856115, + "force_loss": 0.0050356886349618435, + "step": 21890 + }, + { + "epoch": 19.694244604316548, + "grad_norm": 0.20005039870738983, + "learning_rate": 7.524065579596766e-05, + "loss": 0.0205, + "step": 21900 + }, + { + "action_loss": 0.003932314459234476, + "epoch": 19.694244604316548, + "step": 21900 + }, + { + "epoch": 19.694244604316548, + "step": 21900, + "torque_loss": 0.12649227678775787 + }, + { + "epoch": 19.694244604316548, + "force_loss": 0.0038639113772660494, + "step": 21900 + }, + { + "epoch": 19.703237410071942, + "grad_norm": 0.20406463742256165, + "learning_rate": 7.521686325455506e-05, + "loss": 0.0191, + "step": 21910 + }, + { + "action_loss": 0.007570803165435791, + "epoch": 19.703237410071942, + "step": 21910 + }, + { + "epoch": 19.703237410071942, + "step": 21910, + "torque_loss": 0.16327345371246338 + }, + { + "epoch": 19.703237410071942, + "force_loss": 0.004229263868182898, + "step": 21910 + }, + { + "epoch": 19.71223021582734, + "grad_norm": 0.5840118527412415, + "learning_rate": 7.51930630529252e-05, + "loss": 0.0217, + "step": 21920 + }, + { + "action_loss": 0.005566589534282684, + "epoch": 19.71223021582734, + "step": 21920 + }, + { + "epoch": 19.71223021582734, + "step": 21920, + "torque_loss": 0.15318508446216583 + }, + { + "epoch": 19.71223021582734, + "force_loss": 0.008140143938362598, + "step": 21920 + }, + { + "epoch": 19.721223021582734, + "grad_norm": 0.30791956186294556, + "learning_rate": 7.516925519830797e-05, + "loss": 0.0196, + "step": 21930 + }, + { + "action_loss": 0.010097134858369827, + "epoch": 19.721223021582734, + "step": 21930 + }, + { + "epoch": 19.721223021582734, + "step": 21930, + "torque_loss": 0.16301332414150238 + }, + { + "epoch": 19.721223021582734, + "force_loss": 0.011774941347539425, + "step": 21930 + }, + { + "epoch": 19.730215827338128, + "grad_norm": 0.29583054780960083, + "learning_rate": 7.514543969793557e-05, + "loss": 0.0213, + "step": 21940 + }, + { + "action_loss": 0.0032733362168073654, + "epoch": 19.730215827338128, + "step": 21940 + }, + { + "epoch": 19.730215827338128, + "step": 21940, + "torque_loss": 0.11166312545537949 + }, + { + "epoch": 19.730215827338128, + "force_loss": 0.0047770277597010136, + "step": 21940 + }, + { + "epoch": 19.739208633093526, + "grad_norm": 0.6144985556602478, + "learning_rate": 7.512161655904251e-05, + "loss": 0.0221, + "step": 21950 + }, + { + "action_loss": 0.005275297444313765, + "epoch": 19.739208633093526, + "step": 21950 + }, + { + "epoch": 19.739208633093526, + "step": 21950, + "torque_loss": 0.13560757040977478 + }, + { + "epoch": 19.739208633093526, + "force_loss": 0.004693909082561731, + "step": 21950 + }, + { + "epoch": 19.74820143884892, + "grad_norm": 0.3211604356765747, + "learning_rate": 7.509778578886563e-05, + "loss": 0.0218, + "step": 21960 + }, + { + "action_loss": 0.003604740835726261, + "epoch": 19.74820143884892, + "step": 21960 + }, + { + "epoch": 19.74820143884892, + "step": 21960, + "torque_loss": 0.11745858192443848 + }, + { + "epoch": 19.74820143884892, + "force_loss": 0.006334047764539719, + "step": 21960 + }, + { + "epoch": 19.757194244604317, + "grad_norm": 0.71286940574646, + "learning_rate": 7.507394739464412e-05, + "loss": 0.0221, + "step": 21970 + }, + { + "action_loss": 0.004232177510857582, + "epoch": 19.757194244604317, + "step": 21970 + }, + { + "epoch": 19.757194244604317, + "step": 21970, + "torque_loss": 0.09373342990875244 + }, + { + "epoch": 19.757194244604317, + "force_loss": 0.006249606609344482, + "step": 21970 + }, + { + "epoch": 19.76618705035971, + "grad_norm": 0.7615202069282532, + "learning_rate": 7.50501013836194e-05, + "loss": 0.0242, + "step": 21980 + }, + { + "action_loss": 0.004300723783671856, + "epoch": 19.76618705035971, + "step": 21980 + }, + { + "epoch": 19.76618705035971, + "step": 21980, + "torque_loss": 0.1120564416050911 + }, + { + "epoch": 19.76618705035971, + "force_loss": 0.004500760231167078, + "step": 21980 + }, + { + "epoch": 19.77517985611511, + "grad_norm": 0.5227411389350891, + "learning_rate": 7.50262477630353e-05, + "loss": 0.025, + "step": 21990 + }, + { + "action_loss": 0.01617811806499958, + "epoch": 19.77517985611511, + "step": 21990 + }, + { + "epoch": 19.77517985611511, + "step": 21990, + "torque_loss": 0.11352749913930893 + }, + { + "epoch": 19.77517985611511, + "force_loss": 0.009329590015113354, + "step": 21990 + }, + { + "epoch": 19.784172661870503, + "grad_norm": 0.35520029067993164, + "learning_rate": 7.500238654013794e-05, + "loss": 0.0248, + "step": 22000 + }, + { + "action_loss": 0.0038139354437589645, + "epoch": 19.784172661870503, + "step": 22000 + }, + { + "epoch": 19.784172661870503, + "step": 22000, + "torque_loss": 0.12948231399059296 + }, + { + "epoch": 19.784172661870503, + "force_loss": 0.0033348749857395887, + "step": 22000 + }, + { + "epoch": 19.7931654676259, + "grad_norm": 0.1954745501279831, + "learning_rate": 7.497851772217566e-05, + "loss": 0.0203, + "step": 22010 + }, + { + "action_loss": 0.005455614533275366, + "epoch": 19.7931654676259, + "step": 22010 + }, + { + "epoch": 19.7931654676259, + "step": 22010, + "torque_loss": 0.15897557139396667 + }, + { + "epoch": 19.7931654676259, + "force_loss": 0.003561886725947261, + "step": 22010 + }, + { + "epoch": 19.802158273381295, + "grad_norm": 0.2572297751903534, + "learning_rate": 7.495464131639924e-05, + "loss": 0.0195, + "step": 22020 + }, + { + "action_loss": 0.007881480269134045, + "epoch": 19.802158273381295, + "step": 22020 + }, + { + "epoch": 19.802158273381295, + "step": 22020, + "torque_loss": 0.0848357081413269 + }, + { + "epoch": 19.802158273381295, + "force_loss": 0.005228835623711348, + "step": 22020 + }, + { + "epoch": 19.81115107913669, + "grad_norm": 0.5305858850479126, + "learning_rate": 7.493075733006166e-05, + "loss": 0.0214, + "step": 22030 + }, + { + "action_loss": 0.0029701683670282364, + "epoch": 19.81115107913669, + "step": 22030 + }, + { + "epoch": 19.81115107913669, + "step": 22030, + "torque_loss": 0.1005556508898735 + }, + { + "epoch": 19.81115107913669, + "force_loss": 0.006433894857764244, + "step": 22030 + }, + { + "epoch": 19.820143884892087, + "grad_norm": 0.33326831459999084, + "learning_rate": 7.490686577041828e-05, + "loss": 0.0205, + "step": 22040 + }, + { + "action_loss": 0.005682650487869978, + "epoch": 19.820143884892087, + "step": 22040 + }, + { + "epoch": 19.820143884892087, + "step": 22040, + "torque_loss": 0.17791688442230225 + }, + { + "epoch": 19.820143884892087, + "force_loss": 0.003731469390913844, + "step": 22040 + }, + { + "epoch": 19.82913669064748, + "grad_norm": 0.33978524804115295, + "learning_rate": 7.488296664472668e-05, + "loss": 0.0282, + "step": 22050 + }, + { + "action_loss": 0.007012306246906519, + "epoch": 19.82913669064748, + "step": 22050 + }, + { + "epoch": 19.82913669064748, + "step": 22050, + "torque_loss": 0.09265895932912827 + }, + { + "epoch": 19.82913669064748, + "force_loss": 0.00769916782155633, + "step": 22050 + }, + { + "epoch": 19.83812949640288, + "grad_norm": 0.5150094032287598, + "learning_rate": 7.485905996024682e-05, + "loss": 0.0228, + "step": 22060 + }, + { + "action_loss": 0.006248244550079107, + "epoch": 19.83812949640288, + "step": 22060 + }, + { + "epoch": 19.83812949640288, + "step": 22060, + "torque_loss": 0.19567523896694183 + }, + { + "epoch": 19.83812949640288, + "force_loss": 0.004354776348918676, + "step": 22060 + }, + { + "epoch": 19.847122302158272, + "grad_norm": 0.5206131339073181, + "learning_rate": 7.483514572424093e-05, + "loss": 0.0199, + "step": 22070 + }, + { + "action_loss": 0.010819584131240845, + "epoch": 19.847122302158272, + "step": 22070 + }, + { + "epoch": 19.847122302158272, + "step": 22070, + "torque_loss": 0.08632185310125351 + }, + { + "epoch": 19.847122302158272, + "force_loss": 0.013305111788213253, + "step": 22070 + }, + { + "epoch": 19.85611510791367, + "grad_norm": 0.27759236097335815, + "learning_rate": 7.481122394397349e-05, + "loss": 0.0197, + "step": 22080 + }, + { + "action_loss": 0.004525866359472275, + "epoch": 19.85611510791367, + "step": 22080 + }, + { + "epoch": 19.85611510791367, + "step": 22080, + "torque_loss": 0.14517642557621002 + }, + { + "epoch": 19.85611510791367, + "force_loss": 0.0021787364967167377, + "step": 22080 + }, + { + "epoch": 19.865107913669064, + "grad_norm": 0.1399390548467636, + "learning_rate": 7.478729462671131e-05, + "loss": 0.0193, + "step": 22090 + }, + { + "action_loss": 0.010387607850134373, + "epoch": 19.865107913669064, + "step": 22090 + }, + { + "epoch": 19.865107913669064, + "step": 22090, + "torque_loss": 0.16290795803070068 + }, + { + "epoch": 19.865107913669064, + "force_loss": 0.011659813113510609, + "step": 22090 + }, + { + "epoch": 19.87410071942446, + "grad_norm": 0.36216187477111816, + "learning_rate": 7.47633577797235e-05, + "loss": 0.0264, + "step": 22100 + }, + { + "action_loss": 0.003574792528524995, + "epoch": 19.87410071942446, + "step": 22100 + }, + { + "epoch": 19.87410071942446, + "step": 22100, + "torque_loss": 0.12696026265621185 + }, + { + "epoch": 19.87410071942446, + "force_loss": 0.003652961691841483, + "step": 22100 + }, + { + "epoch": 19.883093525179856, + "grad_norm": 0.46038782596588135, + "learning_rate": 7.473941341028144e-05, + "loss": 0.0198, + "step": 22110 + }, + { + "action_loss": 0.0029956044163554907, + "epoch": 19.883093525179856, + "step": 22110 + }, + { + "epoch": 19.883093525179856, + "step": 22110, + "torque_loss": 0.14681954681873322 + }, + { + "epoch": 19.883093525179856, + "force_loss": 0.0023136690724641085, + "step": 22110 + }, + { + "epoch": 19.892086330935253, + "grad_norm": 0.23139101266860962, + "learning_rate": 7.471546152565879e-05, + "loss": 0.0189, + "step": 22120 + }, + { + "action_loss": 0.020397908985614777, + "epoch": 19.892086330935253, + "step": 22120 + }, + { + "epoch": 19.892086330935253, + "step": 22120, + "torque_loss": 0.1635989099740982 + }, + { + "epoch": 19.892086330935253, + "force_loss": 0.03053855150938034, + "step": 22120 + }, + { + "epoch": 19.901079136690647, + "grad_norm": 0.22430942952632904, + "learning_rate": 7.46915021331315e-05, + "loss": 0.0258, + "step": 22130 + }, + { + "action_loss": 0.0238180011510849, + "epoch": 19.901079136690647, + "step": 22130 + }, + { + "epoch": 19.901079136690647, + "step": 22130, + "torque_loss": 0.13356070220470428 + }, + { + "epoch": 19.901079136690647, + "force_loss": 0.04048687219619751, + "step": 22130 + }, + { + "epoch": 19.91007194244604, + "grad_norm": 0.2783410847187042, + "learning_rate": 7.466753523997778e-05, + "loss": 0.0242, + "step": 22140 + }, + { + "action_loss": 0.008494014851748943, + "epoch": 19.91007194244604, + "step": 22140 + }, + { + "epoch": 19.91007194244604, + "step": 22140, + "torque_loss": 0.13085371255874634 + }, + { + "epoch": 19.91007194244604, + "force_loss": 0.0026112135965377092, + "step": 22140 + }, + { + "epoch": 19.91906474820144, + "grad_norm": 0.4878132939338684, + "learning_rate": 7.464356085347819e-05, + "loss": 0.0251, + "step": 22150 + }, + { + "action_loss": 0.004474810790270567, + "epoch": 19.91906474820144, + "step": 22150 + }, + { + "epoch": 19.91906474820144, + "step": 22150, + "torque_loss": 0.13291238248348236 + }, + { + "epoch": 19.91906474820144, + "force_loss": 0.005239900201559067, + "step": 22150 + }, + { + "epoch": 19.928057553956833, + "grad_norm": 0.3961816430091858, + "learning_rate": 7.461957898091548e-05, + "loss": 0.0224, + "step": 22160 + }, + { + "action_loss": 0.0038818896282464266, + "epoch": 19.928057553956833, + "step": 22160 + }, + { + "epoch": 19.928057553956833, + "step": 22160, + "torque_loss": 0.12886212766170502 + }, + { + "epoch": 19.928057553956833, + "force_loss": 0.0030095858965069056, + "step": 22160 + }, + { + "epoch": 19.93705035971223, + "grad_norm": 0.21624428033828735, + "learning_rate": 7.459558962957473e-05, + "loss": 0.0223, + "step": 22170 + }, + { + "action_loss": 0.0031913446728140116, + "epoch": 19.93705035971223, + "step": 22170 + }, + { + "epoch": 19.93705035971223, + "step": 22170, + "torque_loss": 0.1169213280081749 + }, + { + "epoch": 19.93705035971223, + "force_loss": 0.0029216452967375517, + "step": 22170 + }, + { + "epoch": 19.946043165467625, + "grad_norm": 0.4567829668521881, + "learning_rate": 7.457159280674326e-05, + "loss": 0.0214, + "step": 22180 + }, + { + "action_loss": 0.006738933268934488, + "epoch": 19.946043165467625, + "step": 22180 + }, + { + "epoch": 19.946043165467625, + "step": 22180, + "torque_loss": 0.0989663302898407 + }, + { + "epoch": 19.946043165467625, + "force_loss": 0.007388899568468332, + "step": 22180 + }, + { + "epoch": 19.955035971223023, + "grad_norm": 0.4367087781429291, + "learning_rate": 7.454758851971066e-05, + "loss": 0.0237, + "step": 22190 + }, + { + "action_loss": 0.004280680324882269, + "epoch": 19.955035971223023, + "step": 22190 + }, + { + "epoch": 19.955035971223023, + "step": 22190, + "torque_loss": 0.1810615062713623 + }, + { + "epoch": 19.955035971223023, + "force_loss": 0.004259243607521057, + "step": 22190 + }, + { + "epoch": 19.964028776978417, + "grad_norm": 0.21038062870502472, + "learning_rate": 7.45235767757688e-05, + "loss": 0.0225, + "step": 22200 + }, + { + "action_loss": 0.0524432547390461, + "epoch": 19.964028776978417, + "step": 22200 + }, + { + "epoch": 19.964028776978417, + "step": 22200, + "torque_loss": 0.20673634111881256 + }, + { + "epoch": 19.964028776978417, + "force_loss": 0.051325708627700806, + "step": 22200 + }, + { + "epoch": 19.973021582733814, + "grad_norm": 0.6872680187225342, + "learning_rate": 7.449955758221183e-05, + "loss": 0.0296, + "step": 22210 + }, + { + "action_loss": 0.010176043026149273, + "epoch": 19.973021582733814, + "step": 22210 + }, + { + "epoch": 19.973021582733814, + "step": 22210, + "torque_loss": 0.16881322860717773 + }, + { + "epoch": 19.973021582733814, + "force_loss": 0.011119500733911991, + "step": 22210 + }, + { + "epoch": 19.98201438848921, + "grad_norm": 0.2892797291278839, + "learning_rate": 7.447553094633615e-05, + "loss": 0.0207, + "step": 22220 + }, + { + "action_loss": 0.010465911589562893, + "epoch": 19.98201438848921, + "step": 22220 + }, + { + "epoch": 19.98201438848921, + "step": 22220, + "torque_loss": 0.11822440475225449 + }, + { + "epoch": 19.98201438848921, + "force_loss": 0.010354467667639256, + "step": 22220 + }, + { + "epoch": 19.991007194244606, + "grad_norm": 0.5477696061134338, + "learning_rate": 7.445149687544039e-05, + "loss": 0.0216, + "step": 22230 + }, + { + "action_loss": 0.009246482513844967, + "epoch": 19.991007194244606, + "step": 22230 + }, + { + "epoch": 19.991007194244606, + "step": 22230, + "torque_loss": 0.1304655373096466 + }, + { + "epoch": 19.991007194244606, + "force_loss": 0.006503938231617212, + "step": 22230 + }, + { + "epoch": 20.0, + "grad_norm": 0.24211806058883667, + "learning_rate": 7.44274553768255e-05, + "loss": 0.0224, + "step": 22240 + }, + { + "action_loss": 0.015477842651307583, + "epoch": 20.0, + "step": 22240 + }, + { + "epoch": 20.0, + "step": 22240, + "torque_loss": 0.13669709861278534 + }, + { + "epoch": 20.0, + "force_loss": 0.019504958763718605, + "step": 22240 + }, + { + "epoch": 20.008992805755394, + "grad_norm": 0.35836857557296753, + "learning_rate": 7.440340645779464e-05, + "loss": 0.0225, + "step": 22250 + }, + { + "action_loss": 0.008046984672546387, + "epoch": 20.008992805755394, + "step": 22250 + }, + { + "epoch": 20.008992805755394, + "step": 22250, + "torque_loss": 0.12169583886861801 + }, + { + "epoch": 20.008992805755394, + "force_loss": 0.004948172718286514, + "step": 22250 + }, + { + "epoch": 20.01798561151079, + "grad_norm": 0.23541094362735748, + "learning_rate": 7.437935012565322e-05, + "loss": 0.0219, + "step": 22260 + }, + { + "action_loss": 0.003909599035978317, + "epoch": 20.01798561151079, + "step": 22260 + }, + { + "epoch": 20.01798561151079, + "step": 22260, + "torque_loss": 0.08229057490825653 + }, + { + "epoch": 20.01798561151079, + "force_loss": 0.005453682038933039, + "step": 22260 + }, + { + "epoch": 20.026978417266186, + "grad_norm": 0.23424990475177765, + "learning_rate": 7.435528638770893e-05, + "loss": 0.0234, + "step": 22270 + }, + { + "action_loss": 0.007413262501358986, + "epoch": 20.026978417266186, + "step": 22270 + }, + { + "epoch": 20.026978417266186, + "step": 22270, + "torque_loss": 0.1534394770860672 + }, + { + "epoch": 20.026978417266186, + "force_loss": 0.003463149070739746, + "step": 22270 + }, + { + "epoch": 20.035971223021583, + "grad_norm": 0.3123205602169037, + "learning_rate": 7.433121525127171e-05, + "loss": 0.0284, + "step": 22280 + }, + { + "action_loss": 0.011486530303955078, + "epoch": 20.035971223021583, + "step": 22280 + }, + { + "epoch": 20.035971223021583, + "step": 22280, + "torque_loss": 0.14047278463840485 + }, + { + "epoch": 20.035971223021583, + "force_loss": 0.00781856756657362, + "step": 22280 + }, + { + "epoch": 20.044964028776977, + "grad_norm": 0.29927730560302734, + "learning_rate": 7.430713672365371e-05, + "loss": 0.0341, + "step": 22290 + }, + { + "action_loss": 0.0031803464516997337, + "epoch": 20.044964028776977, + "step": 22290 + }, + { + "epoch": 20.044964028776977, + "step": 22290, + "torque_loss": 0.12818396091461182 + }, + { + "epoch": 20.044964028776977, + "force_loss": 0.003122147871181369, + "step": 22290 + }, + { + "epoch": 20.053956834532375, + "grad_norm": 0.2114167958498001, + "learning_rate": 7.428305081216938e-05, + "loss": 0.023, + "step": 22300 + }, + { + "action_loss": 0.00613408163189888, + "epoch": 20.053956834532375, + "step": 22300 + }, + { + "epoch": 20.053956834532375, + "step": 22300, + "torque_loss": 0.14041538536548615 + }, + { + "epoch": 20.053956834532375, + "force_loss": 0.005299183074384928, + "step": 22300 + }, + { + "epoch": 20.06294964028777, + "grad_norm": 0.38932931423187256, + "learning_rate": 7.425895752413536e-05, + "loss": 0.0236, + "step": 22310 + }, + { + "action_loss": 0.017816336825489998, + "epoch": 20.06294964028777, + "step": 22310 + }, + { + "epoch": 20.06294964028777, + "step": 22310, + "torque_loss": 0.10648417472839355 + }, + { + "epoch": 20.06294964028777, + "force_loss": 0.03322720527648926, + "step": 22310 + }, + { + "epoch": 20.071942446043167, + "grad_norm": 0.2532728910446167, + "learning_rate": 7.423485686687057e-05, + "loss": 0.0274, + "step": 22320 + }, + { + "action_loss": 0.007788743823766708, + "epoch": 20.071942446043167, + "step": 22320 + }, + { + "epoch": 20.071942446043167, + "step": 22320, + "torque_loss": 0.13962529599666595 + }, + { + "epoch": 20.071942446043167, + "force_loss": 0.009766372852027416, + "step": 22320 + }, + { + "epoch": 20.08093525179856, + "grad_norm": 0.3897517919540405, + "learning_rate": 7.421074884769616e-05, + "loss": 0.0235, + "step": 22330 + }, + { + "action_loss": 0.014374022372066975, + "epoch": 20.08093525179856, + "step": 22330 + }, + { + "epoch": 20.08093525179856, + "step": 22330, + "torque_loss": 0.13326890766620636 + }, + { + "epoch": 20.08093525179856, + "force_loss": 0.014441430568695068, + "step": 22330 + }, + { + "epoch": 20.08992805755396, + "grad_norm": 0.3673367202281952, + "learning_rate": 7.418663347393548e-05, + "loss": 0.0238, + "step": 22340 + }, + { + "action_loss": 0.006223693490028381, + "epoch": 20.08992805755396, + "step": 22340 + }, + { + "epoch": 20.08992805755396, + "step": 22340, + "torque_loss": 0.14779575169086456 + }, + { + "epoch": 20.08992805755396, + "force_loss": 0.010368873365223408, + "step": 22340 + }, + { + "epoch": 20.098920863309353, + "grad_norm": 0.29293516278266907, + "learning_rate": 7.416251075291418e-05, + "loss": 0.0222, + "step": 22350 + }, + { + "action_loss": 0.004675194155424833, + "epoch": 20.098920863309353, + "step": 22350 + }, + { + "epoch": 20.098920863309353, + "step": 22350, + "torque_loss": 0.11690375953912735 + }, + { + "epoch": 20.098920863309353, + "force_loss": 0.0035475061740726233, + "step": 22350 + }, + { + "epoch": 20.107913669064747, + "grad_norm": 0.16194823384284973, + "learning_rate": 7.413838069196007e-05, + "loss": 0.0172, + "step": 22360 + }, + { + "action_loss": 0.004984290339052677, + "epoch": 20.107913669064747, + "step": 22360 + }, + { + "epoch": 20.107913669064747, + "step": 22360, + "torque_loss": 0.10476479679346085 + }, + { + "epoch": 20.107913669064747, + "force_loss": 0.006919895764440298, + "step": 22360 + }, + { + "epoch": 20.116906474820144, + "grad_norm": 0.18410949409008026, + "learning_rate": 7.411424329840324e-05, + "loss": 0.0232, + "step": 22370 + }, + { + "action_loss": 0.006483596283942461, + "epoch": 20.116906474820144, + "step": 22370 + }, + { + "epoch": 20.116906474820144, + "step": 22370, + "torque_loss": 0.12645767629146576 + }, + { + "epoch": 20.116906474820144, + "force_loss": 0.004071300849318504, + "step": 22370 + }, + { + "epoch": 20.12589928057554, + "grad_norm": 0.2255152463912964, + "learning_rate": 7.409009857957601e-05, + "loss": 0.0286, + "step": 22380 + }, + { + "action_loss": 0.01818394847214222, + "epoch": 20.12589928057554, + "step": 22380 + }, + { + "epoch": 20.12589928057554, + "step": 22380, + "torque_loss": 0.1535508930683136 + }, + { + "epoch": 20.12589928057554, + "force_loss": 0.01527877151966095, + "step": 22380 + }, + { + "epoch": 20.134892086330936, + "grad_norm": 0.23035389184951782, + "learning_rate": 7.40659465428129e-05, + "loss": 0.0273, + "step": 22390 + }, + { + "action_loss": 0.05318012833595276, + "epoch": 20.134892086330936, + "step": 22390 + }, + { + "epoch": 20.134892086330936, + "step": 22390, + "torque_loss": 0.18275542557239532 + }, + { + "epoch": 20.134892086330936, + "force_loss": 0.04396986588835716, + "step": 22390 + }, + { + "epoch": 20.14388489208633, + "grad_norm": 0.243258997797966, + "learning_rate": 7.404178719545063e-05, + "loss": 0.025, + "step": 22400 + }, + { + "action_loss": 0.010891060344874859, + "epoch": 20.14388489208633, + "step": 22400 + }, + { + "epoch": 20.14388489208633, + "step": 22400, + "torque_loss": 0.1615249067544937 + }, + { + "epoch": 20.14388489208633, + "force_loss": 0.007259754464030266, + "step": 22400 + }, + { + "epoch": 20.152877697841728, + "grad_norm": 0.33244723081588745, + "learning_rate": 7.401762054482822e-05, + "loss": 0.0276, + "step": 22410 + }, + { + "action_loss": 0.00869003590196371, + "epoch": 20.152877697841728, + "step": 22410 + }, + { + "epoch": 20.152877697841728, + "step": 22410, + "torque_loss": 0.11507581919431686 + }, + { + "epoch": 20.152877697841728, + "force_loss": 0.008545215241611004, + "step": 22410 + }, + { + "epoch": 20.16187050359712, + "grad_norm": 1.2588152885437012, + "learning_rate": 7.39934465982868e-05, + "loss": 0.0235, + "step": 22420 + }, + { + "action_loss": 0.005899036768823862, + "epoch": 20.16187050359712, + "step": 22420 + }, + { + "epoch": 20.16187050359712, + "step": 22420, + "torque_loss": 0.16736231744289398 + }, + { + "epoch": 20.16187050359712, + "force_loss": 0.004097914323210716, + "step": 22420 + }, + { + "epoch": 20.17086330935252, + "grad_norm": 0.44440752267837524, + "learning_rate": 7.396926536316984e-05, + "loss": 0.0212, + "step": 22430 + }, + { + "action_loss": 0.006229629274457693, + "epoch": 20.17086330935252, + "step": 22430 + }, + { + "epoch": 20.17086330935252, + "step": 22430, + "torque_loss": 0.13538970053195953 + }, + { + "epoch": 20.17086330935252, + "force_loss": 0.006261768285185099, + "step": 22430 + }, + { + "epoch": 20.179856115107913, + "grad_norm": 0.46360284090042114, + "learning_rate": 7.394507684682293e-05, + "loss": 0.0249, + "step": 22440 + }, + { + "action_loss": 0.0048889233730733395, + "epoch": 20.179856115107913, + "step": 22440 + }, + { + "epoch": 20.179856115107913, + "step": 22440, + "torque_loss": 0.07896500080823898 + }, + { + "epoch": 20.179856115107913, + "force_loss": 0.003936445340514183, + "step": 22440 + }, + { + "epoch": 20.18884892086331, + "grad_norm": 0.36999088525772095, + "learning_rate": 7.392088105659393e-05, + "loss": 0.022, + "step": 22450 + }, + { + "action_loss": 0.006878270301967859, + "epoch": 20.18884892086331, + "step": 22450 + }, + { + "epoch": 20.18884892086331, + "step": 22450, + "torque_loss": 0.11815337091684341 + }, + { + "epoch": 20.18884892086331, + "force_loss": 0.0078524649143219, + "step": 22450 + }, + { + "epoch": 20.197841726618705, + "grad_norm": 0.2953590154647827, + "learning_rate": 7.389667799983284e-05, + "loss": 0.0265, + "step": 22460 + }, + { + "action_loss": 0.004856071434915066, + "epoch": 20.197841726618705, + "step": 22460 + }, + { + "epoch": 20.197841726618705, + "step": 22460, + "torque_loss": 0.10968545824289322 + }, + { + "epoch": 20.197841726618705, + "force_loss": 0.0033237107563763857, + "step": 22460 + }, + { + "epoch": 20.2068345323741, + "grad_norm": 0.3318917453289032, + "learning_rate": 7.387246768389193e-05, + "loss": 0.0208, + "step": 22470 + }, + { + "action_loss": 0.017688913270831108, + "epoch": 20.2068345323741, + "step": 22470 + }, + { + "epoch": 20.2068345323741, + "step": 22470, + "torque_loss": 0.1457376927137375 + }, + { + "epoch": 20.2068345323741, + "force_loss": 0.020751332864165306, + "step": 22470 + }, + { + "epoch": 20.215827338129497, + "grad_norm": 0.25988060235977173, + "learning_rate": 7.384825011612563e-05, + "loss": 0.0201, + "step": 22480 + }, + { + "action_loss": 0.006920827552676201, + "epoch": 20.215827338129497, + "step": 22480 + }, + { + "epoch": 20.215827338129497, + "step": 22480, + "torque_loss": 0.1720820814371109 + }, + { + "epoch": 20.215827338129497, + "force_loss": 0.006652161478996277, + "step": 22480 + }, + { + "epoch": 20.22482014388489, + "grad_norm": 0.5164228081703186, + "learning_rate": 7.382402530389066e-05, + "loss": 0.0288, + "step": 22490 + }, + { + "action_loss": 0.004785399418324232, + "epoch": 20.22482014388489, + "step": 22490 + }, + { + "epoch": 20.22482014388489, + "step": 22490, + "torque_loss": 0.13154055178165436 + }, + { + "epoch": 20.22482014388489, + "force_loss": 0.0029237607959657907, + "step": 22490 + }, + { + "epoch": 20.23381294964029, + "grad_norm": 0.4319625198841095, + "learning_rate": 7.379979325454582e-05, + "loss": 0.0244, + "step": 22500 + }, + { + "action_loss": 0.0043952311389148235, + "epoch": 20.23381294964029, + "step": 22500 + }, + { + "epoch": 20.23381294964029, + "step": 22500, + "torque_loss": 0.12781020998954773 + }, + { + "epoch": 20.23381294964029, + "force_loss": 0.0027401261031627655, + "step": 22500 + }, + { + "epoch": 20.242805755395683, + "grad_norm": 0.3425253927707672, + "learning_rate": 7.37755539754522e-05, + "loss": 0.0228, + "step": 22510 + }, + { + "action_loss": 0.009167276322841644, + "epoch": 20.242805755395683, + "step": 22510 + }, + { + "epoch": 20.242805755395683, + "step": 22510, + "torque_loss": 0.1435106247663498 + }, + { + "epoch": 20.242805755395683, + "force_loss": 0.00964704342186451, + "step": 22510 + }, + { + "epoch": 20.25179856115108, + "grad_norm": 0.24721738696098328, + "learning_rate": 7.375130747397302e-05, + "loss": 0.02, + "step": 22520 + }, + { + "action_loss": 0.0033436038065701723, + "epoch": 20.25179856115108, + "step": 22520 + }, + { + "epoch": 20.25179856115108, + "step": 22520, + "torque_loss": 0.11359447240829468 + }, + { + "epoch": 20.25179856115108, + "force_loss": 0.0030099814757704735, + "step": 22520 + }, + { + "epoch": 20.260791366906474, + "grad_norm": 0.4744258224964142, + "learning_rate": 7.372705375747377e-05, + "loss": 0.0209, + "step": 22530 + }, + { + "action_loss": 0.006460311356931925, + "epoch": 20.260791366906474, + "step": 22530 + }, + { + "epoch": 20.260791366906474, + "step": 22530, + "torque_loss": 0.1257258802652359 + }, + { + "epoch": 20.260791366906474, + "force_loss": 0.008514966815710068, + "step": 22530 + }, + { + "epoch": 20.269784172661872, + "grad_norm": 0.5593452453613281, + "learning_rate": 7.370279283332205e-05, + "loss": 0.0201, + "step": 22540 + }, + { + "action_loss": 0.01024629082530737, + "epoch": 20.269784172661872, + "step": 22540 + }, + { + "epoch": 20.269784172661872, + "step": 22540, + "torque_loss": 0.1480618119239807 + }, + { + "epoch": 20.269784172661872, + "force_loss": 0.012057830579578876, + "step": 22540 + }, + { + "epoch": 20.278776978417266, + "grad_norm": 0.35618850588798523, + "learning_rate": 7.36785247088877e-05, + "loss": 0.0203, + "step": 22550 + }, + { + "action_loss": 0.007292577531188726, + "epoch": 20.278776978417266, + "step": 22550 + }, + { + "epoch": 20.278776978417266, + "step": 22550, + "torque_loss": 0.1357903629541397 + }, + { + "epoch": 20.278776978417266, + "force_loss": 0.004702517297118902, + "step": 22550 + }, + { + "epoch": 20.28776978417266, + "grad_norm": 0.21865610778331757, + "learning_rate": 7.365424939154275e-05, + "loss": 0.0226, + "step": 22560 + }, + { + "action_loss": 0.013641615398228168, + "epoch": 20.28776978417266, + "step": 22560 + }, + { + "epoch": 20.28776978417266, + "step": 22560, + "torque_loss": 0.12795692682266235 + }, + { + "epoch": 20.28776978417266, + "force_loss": 0.013013812713325024, + "step": 22560 + }, + { + "epoch": 20.296762589928058, + "grad_norm": 0.22523823380470276, + "learning_rate": 7.362996688866138e-05, + "loss": 0.0214, + "step": 22570 + }, + { + "action_loss": 0.0032046933192759752, + "epoch": 20.296762589928058, + "step": 22570 + }, + { + "epoch": 20.296762589928058, + "step": 22570, + "torque_loss": 0.11089793592691422 + }, + { + "epoch": 20.296762589928058, + "force_loss": 0.002070546383038163, + "step": 22570 + }, + { + "epoch": 20.305755395683452, + "grad_norm": 0.5252678394317627, + "learning_rate": 7.360567720761999e-05, + "loss": 0.0239, + "step": 22580 + }, + { + "action_loss": 0.008866540156304836, + "epoch": 20.305755395683452, + "step": 22580 + }, + { + "epoch": 20.305755395683452, + "step": 22580, + "torque_loss": 0.1712295413017273 + }, + { + "epoch": 20.305755395683452, + "force_loss": 0.009010610170662403, + "step": 22580 + }, + { + "epoch": 20.31474820143885, + "grad_norm": 0.22528356313705444, + "learning_rate": 7.358138035579711e-05, + "loss": 0.0225, + "step": 22590 + }, + { + "action_loss": 0.003371112747117877, + "epoch": 20.31474820143885, + "step": 22590 + }, + { + "epoch": 20.31474820143885, + "step": 22590, + "torque_loss": 0.1017804741859436 + }, + { + "epoch": 20.31474820143885, + "force_loss": 0.0027128432411700487, + "step": 22590 + }, + { + "epoch": 20.323741007194243, + "grad_norm": 0.21611282229423523, + "learning_rate": 7.355707634057354e-05, + "loss": 0.0214, + "step": 22600 + }, + { + "action_loss": 0.005914825946092606, + "epoch": 20.323741007194243, + "step": 22600 + }, + { + "epoch": 20.323741007194243, + "step": 22600, + "torque_loss": 0.14993257820606232 + }, + { + "epoch": 20.323741007194243, + "force_loss": 0.004669476766139269, + "step": 22600 + }, + { + "epoch": 20.33273381294964, + "grad_norm": 0.287702739238739, + "learning_rate": 7.353276516933215e-05, + "loss": 0.0172, + "step": 22610 + }, + { + "action_loss": 0.014122466556727886, + "epoch": 20.33273381294964, + "step": 22610 + }, + { + "epoch": 20.33273381294964, + "step": 22610, + "torque_loss": 0.15321184694766998 + }, + { + "epoch": 20.33273381294964, + "force_loss": 0.02719024010002613, + "step": 22610 + }, + { + "epoch": 20.341726618705035, + "grad_norm": 0.1807129979133606, + "learning_rate": 7.350844684945806e-05, + "loss": 0.0228, + "step": 22620 + }, + { + "action_loss": 0.008383541367948055, + "epoch": 20.341726618705035, + "step": 22620 + }, + { + "epoch": 20.341726618705035, + "step": 22620, + "torque_loss": 0.17359912395477295 + }, + { + "epoch": 20.341726618705035, + "force_loss": 0.014319824986159801, + "step": 22620 + }, + { + "epoch": 20.350719424460433, + "grad_norm": 0.26065948605537415, + "learning_rate": 7.348412138833851e-05, + "loss": 0.0194, + "step": 22630 + }, + { + "action_loss": 0.005191102158278227, + "epoch": 20.350719424460433, + "step": 22630 + }, + { + "epoch": 20.350719424460433, + "step": 22630, + "torque_loss": 0.10668083280324936 + }, + { + "epoch": 20.350719424460433, + "force_loss": 0.005676930770277977, + "step": 22630 + }, + { + "epoch": 20.359712230215827, + "grad_norm": 0.29040876030921936, + "learning_rate": 7.345978879336295e-05, + "loss": 0.0194, + "step": 22640 + }, + { + "action_loss": 0.004312486387789249, + "epoch": 20.359712230215827, + "step": 22640 + }, + { + "epoch": 20.359712230215827, + "step": 22640, + "torque_loss": 0.11470244079828262 + }, + { + "epoch": 20.359712230215827, + "force_loss": 0.006611905992031097, + "step": 22640 + }, + { + "epoch": 20.368705035971225, + "grad_norm": 0.25477704405784607, + "learning_rate": 7.343544907192296e-05, + "loss": 0.0251, + "step": 22650 + }, + { + "action_loss": 0.007612638175487518, + "epoch": 20.368705035971225, + "step": 22650 + }, + { + "epoch": 20.368705035971225, + "step": 22650, + "torque_loss": 0.19843995571136475 + }, + { + "epoch": 20.368705035971225, + "force_loss": 0.005092856008559465, + "step": 22650 + }, + { + "epoch": 20.37769784172662, + "grad_norm": 0.263653427362442, + "learning_rate": 7.341110223141235e-05, + "loss": 0.0223, + "step": 22660 + }, + { + "action_loss": 0.004654020071029663, + "epoch": 20.37769784172662, + "step": 22660 + }, + { + "epoch": 20.37769784172662, + "step": 22660, + "torque_loss": 0.1299055814743042 + }, + { + "epoch": 20.37769784172662, + "force_loss": 0.0033027261961251497, + "step": 22660 + }, + { + "epoch": 20.386690647482013, + "grad_norm": 0.6859336495399475, + "learning_rate": 7.3386748279227e-05, + "loss": 0.0252, + "step": 22670 + }, + { + "action_loss": 0.004958470351994038, + "epoch": 20.386690647482013, + "step": 22670 + }, + { + "epoch": 20.386690647482013, + "step": 22670, + "torque_loss": 0.12926582992076874 + }, + { + "epoch": 20.386690647482013, + "force_loss": 0.0032022378873080015, + "step": 22670 + }, + { + "epoch": 20.39568345323741, + "grad_norm": 0.8659705519676208, + "learning_rate": 7.336238722276501e-05, + "loss": 0.0267, + "step": 22680 + }, + { + "action_loss": 0.011049383319914341, + "epoch": 20.39568345323741, + "step": 22680 + }, + { + "epoch": 20.39568345323741, + "step": 22680, + "torque_loss": 0.12452337890863419 + }, + { + "epoch": 20.39568345323741, + "force_loss": 0.005410314071923494, + "step": 22680 + }, + { + "epoch": 20.404676258992804, + "grad_norm": 0.4141238331794739, + "learning_rate": 7.333801906942663e-05, + "loss": 0.0203, + "step": 22690 + }, + { + "action_loss": 0.010135426186025143, + "epoch": 20.404676258992804, + "step": 22690 + }, + { + "epoch": 20.404676258992804, + "step": 22690, + "torque_loss": 0.12890994548797607 + }, + { + "epoch": 20.404676258992804, + "force_loss": 0.01397536601871252, + "step": 22690 + }, + { + "epoch": 20.413669064748202, + "grad_norm": 0.21891717612743378, + "learning_rate": 7.331364382661428e-05, + "loss": 0.0201, + "step": 22700 + }, + { + "action_loss": 0.004239857662469149, + "epoch": 20.413669064748202, + "step": 22700 + }, + { + "epoch": 20.413669064748202, + "step": 22700, + "torque_loss": 0.14295683801174164 + }, + { + "epoch": 20.413669064748202, + "force_loss": 0.006196148693561554, + "step": 22700 + }, + { + "epoch": 20.422661870503596, + "grad_norm": 0.2961306869983673, + "learning_rate": 7.328926150173248e-05, + "loss": 0.0211, + "step": 22710 + }, + { + "action_loss": 0.005986800882965326, + "epoch": 20.422661870503596, + "step": 22710 + }, + { + "epoch": 20.422661870503596, + "step": 22710, + "torque_loss": 0.09392008930444717 + }, + { + "epoch": 20.422661870503596, + "force_loss": 0.0027146136853843927, + "step": 22710 + }, + { + "epoch": 20.431654676258994, + "grad_norm": 0.4203747510910034, + "learning_rate": 7.326487210218795e-05, + "loss": 0.02, + "step": 22720 + }, + { + "action_loss": 0.008271696977317333, + "epoch": 20.431654676258994, + "step": 22720 + }, + { + "epoch": 20.431654676258994, + "step": 22720, + "torque_loss": 0.12983034551143646 + }, + { + "epoch": 20.431654676258994, + "force_loss": 0.004994562827050686, + "step": 22720 + }, + { + "epoch": 20.440647482014388, + "grad_norm": 0.31495434045791626, + "learning_rate": 7.324047563538955e-05, + "loss": 0.026, + "step": 22730 + }, + { + "action_loss": 0.00360013241879642, + "epoch": 20.440647482014388, + "step": 22730 + }, + { + "epoch": 20.440647482014388, + "step": 22730, + "torque_loss": 0.15073582530021667 + }, + { + "epoch": 20.440647482014388, + "force_loss": 0.0022492518182843924, + "step": 22730 + }, + { + "epoch": 20.449640287769785, + "grad_norm": 0.2842729091644287, + "learning_rate": 7.321607210874828e-05, + "loss": 0.0221, + "step": 22740 + }, + { + "action_loss": 0.011417836882174015, + "epoch": 20.449640287769785, + "step": 22740 + }, + { + "epoch": 20.449640287769785, + "step": 22740, + "torque_loss": 0.15755224227905273 + }, + { + "epoch": 20.449640287769785, + "force_loss": 0.019844690337777138, + "step": 22740 + }, + { + "epoch": 20.45863309352518, + "grad_norm": 0.2664801776409149, + "learning_rate": 7.31916615296773e-05, + "loss": 0.0208, + "step": 22750 + }, + { + "action_loss": 0.008020293898880482, + "epoch": 20.45863309352518, + "step": 22750 + }, + { + "epoch": 20.45863309352518, + "step": 22750, + "torque_loss": 0.14187000691890717 + }, + { + "epoch": 20.45863309352518, + "force_loss": 0.005518303718417883, + "step": 22750 + }, + { + "epoch": 20.467625899280577, + "grad_norm": 0.20545458793640137, + "learning_rate": 7.316724390559188e-05, + "loss": 0.0193, + "step": 22760 + }, + { + "action_loss": 0.0034446020144969225, + "epoch": 20.467625899280577, + "step": 22760 + }, + { + "epoch": 20.467625899280577, + "step": 22760, + "torque_loss": 0.10390549153089523 + }, + { + "epoch": 20.467625899280577, + "force_loss": 0.0020792176946997643, + "step": 22760 + }, + { + "epoch": 20.47661870503597, + "grad_norm": 0.2803266942501068, + "learning_rate": 7.314281924390946e-05, + "loss": 0.0187, + "step": 22770 + }, + { + "action_loss": 0.025641782209277153, + "epoch": 20.47661870503597, + "step": 22770 + }, + { + "epoch": 20.47661870503597, + "step": 22770, + "torque_loss": 0.14045365154743195 + }, + { + "epoch": 20.47661870503597, + "force_loss": 0.024263186380267143, + "step": 22770 + }, + { + "epoch": 20.485611510791365, + "grad_norm": 0.3588990271091461, + "learning_rate": 7.311838755204959e-05, + "loss": 0.0224, + "step": 22780 + }, + { + "action_loss": 0.004766867961734533, + "epoch": 20.485611510791365, + "step": 22780 + }, + { + "epoch": 20.485611510791365, + "step": 22780, + "torque_loss": 0.10751763731241226 + }, + { + "epoch": 20.485611510791365, + "force_loss": 0.0041995481587946415, + "step": 22780 + }, + { + "epoch": 20.494604316546763, + "grad_norm": 0.2514796853065491, + "learning_rate": 7.3093948837434e-05, + "loss": 0.0189, + "step": 22790 + }, + { + "action_loss": 0.0024767823051661253, + "epoch": 20.494604316546763, + "step": 22790 + }, + { + "epoch": 20.494604316546763, + "step": 22790, + "torque_loss": 0.08609968423843384 + }, + { + "epoch": 20.494604316546763, + "force_loss": 0.00434294156730175, + "step": 22790 + }, + { + "epoch": 20.503597122302157, + "grad_norm": 0.3953184187412262, + "learning_rate": 7.306950310748651e-05, + "loss": 0.0181, + "step": 22800 + }, + { + "action_loss": 0.00486801890656352, + "epoch": 20.503597122302157, + "step": 22800 + }, + { + "epoch": 20.503597122302157, + "step": 22800, + "torque_loss": 0.12692274153232574 + }, + { + "epoch": 20.503597122302157, + "force_loss": 0.003190352814272046, + "step": 22800 + }, + { + "epoch": 20.512589928057555, + "grad_norm": 0.9914472699165344, + "learning_rate": 7.304505036963311e-05, + "loss": 0.026, + "step": 22810 + }, + { + "action_loss": 0.008738495409488678, + "epoch": 20.512589928057555, + "step": 22810 + }, + { + "epoch": 20.512589928057555, + "step": 22810, + "torque_loss": 0.12337524443864822 + }, + { + "epoch": 20.512589928057555, + "force_loss": 0.015178129076957703, + "step": 22810 + }, + { + "epoch": 20.52158273381295, + "grad_norm": 0.38276538252830505, + "learning_rate": 7.302059063130186e-05, + "loss": 0.0215, + "step": 22820 + }, + { + "action_loss": 0.005425394978374243, + "epoch": 20.52158273381295, + "step": 22820 + }, + { + "epoch": 20.52158273381295, + "step": 22820, + "torque_loss": 0.13263416290283203 + }, + { + "epoch": 20.52158273381295, + "force_loss": 0.007371599320322275, + "step": 22820 + }, + { + "epoch": 20.530575539568346, + "grad_norm": 0.40876755118370056, + "learning_rate": 7.2996123899923e-05, + "loss": 0.0204, + "step": 22830 + }, + { + "action_loss": 0.0035579076502472162, + "epoch": 20.530575539568346, + "step": 22830 + }, + { + "epoch": 20.530575539568346, + "step": 22830, + "torque_loss": 0.16174590587615967 + }, + { + "epoch": 20.530575539568346, + "force_loss": 0.0021231574937701225, + "step": 22830 + }, + { + "epoch": 20.53956834532374, + "grad_norm": 0.3351193070411682, + "learning_rate": 7.297165018292886e-05, + "loss": 0.0237, + "step": 22840 + }, + { + "action_loss": 0.004874266218394041, + "epoch": 20.53956834532374, + "step": 22840 + }, + { + "epoch": 20.53956834532374, + "step": 22840, + "torque_loss": 0.1806125044822693 + }, + { + "epoch": 20.53956834532374, + "force_loss": 0.011274074204266071, + "step": 22840 + }, + { + "epoch": 20.548561151079138, + "grad_norm": 0.28446999192237854, + "learning_rate": 7.294716948775396e-05, + "loss": 0.0205, + "step": 22850 + }, + { + "action_loss": 0.009164727292954922, + "epoch": 20.548561151079138, + "step": 22850 + }, + { + "epoch": 20.548561151079138, + "step": 22850, + "torque_loss": 0.16843517124652863 + }, + { + "epoch": 20.548561151079138, + "force_loss": 0.008355320431292057, + "step": 22850 + }, + { + "epoch": 20.557553956834532, + "grad_norm": 0.26358136534690857, + "learning_rate": 7.292268182183484e-05, + "loss": 0.0206, + "step": 22860 + }, + { + "action_loss": 0.003183215158060193, + "epoch": 20.557553956834532, + "step": 22860 + }, + { + "epoch": 20.557553956834532, + "step": 22860, + "torque_loss": 0.1107417568564415 + }, + { + "epoch": 20.557553956834532, + "force_loss": 0.003787040011957288, + "step": 22860 + }, + { + "epoch": 20.56654676258993, + "grad_norm": 0.39613863825798035, + "learning_rate": 7.28981871926102e-05, + "loss": 0.0196, + "step": 22870 + }, + { + "action_loss": 0.009816057980060577, + "epoch": 20.56654676258993, + "step": 22870 + }, + { + "epoch": 20.56654676258993, + "step": 22870, + "torque_loss": 0.13265351951122284 + }, + { + "epoch": 20.56654676258993, + "force_loss": 0.005459487903863192, + "step": 22870 + }, + { + "epoch": 20.575539568345324, + "grad_norm": 0.4228397309780121, + "learning_rate": 7.28736856075209e-05, + "loss": 0.023, + "step": 22880 + }, + { + "action_loss": 0.004934034775942564, + "epoch": 20.575539568345324, + "step": 22880 + }, + { + "epoch": 20.575539568345324, + "step": 22880, + "torque_loss": 0.10597292333841324 + }, + { + "epoch": 20.575539568345324, + "force_loss": 0.00351532269269228, + "step": 22880 + }, + { + "epoch": 20.584532374100718, + "grad_norm": 0.10540630668401718, + "learning_rate": 7.284917707400985e-05, + "loss": 0.0198, + "step": 22890 + }, + { + "action_loss": 0.025004873052239418, + "epoch": 20.584532374100718, + "step": 22890 + }, + { + "epoch": 20.584532374100718, + "step": 22890, + "torque_loss": 0.18900613486766815 + }, + { + "epoch": 20.584532374100718, + "force_loss": 0.02924586832523346, + "step": 22890 + }, + { + "epoch": 20.593525179856115, + "grad_norm": 0.298160195350647, + "learning_rate": 7.282466159952212e-05, + "loss": 0.0244, + "step": 22900 + }, + { + "action_loss": 0.006574613507837057, + "epoch": 20.593525179856115, + "step": 22900 + }, + { + "epoch": 20.593525179856115, + "step": 22900, + "torque_loss": 0.1357024759054184 + }, + { + "epoch": 20.593525179856115, + "force_loss": 0.004133729729801416, + "step": 22900 + }, + { + "epoch": 20.60251798561151, + "grad_norm": 0.1741696447134018, + "learning_rate": 7.280013919150483e-05, + "loss": 0.0224, + "step": 22910 + }, + { + "action_loss": 0.028599942103028297, + "epoch": 20.60251798561151, + "step": 22910 + }, + { + "epoch": 20.60251798561151, + "step": 22910, + "torque_loss": 0.24114887416362762 + }, + { + "epoch": 20.60251798561151, + "force_loss": 0.036634355783462524, + "step": 22910 + }, + { + "epoch": 20.611510791366907, + "grad_norm": 0.37376120686531067, + "learning_rate": 7.277560985740728e-05, + "loss": 0.0316, + "step": 22920 + }, + { + "action_loss": 0.010220159776508808, + "epoch": 20.611510791366907, + "step": 22920 + }, + { + "epoch": 20.611510791366907, + "step": 22920, + "torque_loss": 0.13908237218856812 + }, + { + "epoch": 20.611510791366907, + "force_loss": 0.024738037958741188, + "step": 22920 + }, + { + "epoch": 20.6205035971223, + "grad_norm": 0.24346213042736053, + "learning_rate": 7.275107360468079e-05, + "loss": 0.0242, + "step": 22930 + }, + { + "action_loss": 0.006254466716200113, + "epoch": 20.6205035971223, + "step": 22930 + }, + { + "epoch": 20.6205035971223, + "step": 22930, + "torque_loss": 0.1192498728632927 + }, + { + "epoch": 20.6205035971223, + "force_loss": 0.006305792834609747, + "step": 22930 + }, + { + "epoch": 20.6294964028777, + "grad_norm": 0.25157687067985535, + "learning_rate": 7.272653044077885e-05, + "loss": 0.0218, + "step": 22940 + }, + { + "action_loss": 0.02662225253880024, + "epoch": 20.6294964028777, + "step": 22940 + }, + { + "epoch": 20.6294964028777, + "step": 22940, + "torque_loss": 0.2028772085905075 + }, + { + "epoch": 20.6294964028777, + "force_loss": 0.028705840930342674, + "step": 22940 + }, + { + "epoch": 20.638489208633093, + "grad_norm": 0.4135701060295105, + "learning_rate": 7.270198037315703e-05, + "loss": 0.0277, + "step": 22950 + }, + { + "action_loss": 0.006027355790138245, + "epoch": 20.638489208633093, + "step": 22950 + }, + { + "epoch": 20.638489208633093, + "step": 22950, + "torque_loss": 0.14523166418075562 + }, + { + "epoch": 20.638489208633093, + "force_loss": 0.003223589388653636, + "step": 22950 + }, + { + "epoch": 20.64748201438849, + "grad_norm": 0.1827203780412674, + "learning_rate": 7.267742340927297e-05, + "loss": 0.0191, + "step": 22960 + }, + { + "action_loss": 0.008993715979158878, + "epoch": 20.64748201438849, + "step": 22960 + }, + { + "epoch": 20.64748201438849, + "step": 22960, + "torque_loss": 0.1408950835466385 + }, + { + "epoch": 20.64748201438849, + "force_loss": 0.007974785752594471, + "step": 22960 + }, + { + "epoch": 20.656474820143885, + "grad_norm": 0.1610017716884613, + "learning_rate": 7.265285955658645e-05, + "loss": 0.0186, + "step": 22970 + }, + { + "action_loss": 0.012898233719170094, + "epoch": 20.656474820143885, + "step": 22970 + }, + { + "epoch": 20.656474820143885, + "step": 22970, + "torque_loss": 0.14384199678897858 + }, + { + "epoch": 20.656474820143885, + "force_loss": 0.02432405948638916, + "step": 22970 + }, + { + "epoch": 20.665467625899282, + "grad_norm": 0.2561543583869934, + "learning_rate": 7.26282888225593e-05, + "loss": 0.021, + "step": 22980 + }, + { + "action_loss": 0.005474910605698824, + "epoch": 20.665467625899282, + "step": 22980 + }, + { + "epoch": 20.665467625899282, + "step": 22980, + "torque_loss": 0.10223334282636642 + }, + { + "epoch": 20.665467625899282, + "force_loss": 0.009249657392501831, + "step": 22980 + }, + { + "epoch": 20.674460431654676, + "grad_norm": 0.2637881636619568, + "learning_rate": 7.260371121465548e-05, + "loss": 0.0205, + "step": 22990 + }, + { + "action_loss": 0.004197610542178154, + "epoch": 20.674460431654676, + "step": 22990 + }, + { + "epoch": 20.674460431654676, + "step": 22990, + "torque_loss": 0.09877309948205948 + }, + { + "epoch": 20.674460431654676, + "force_loss": 0.0035306450445204973, + "step": 22990 + }, + { + "epoch": 20.68345323741007, + "grad_norm": 0.21508187055587769, + "learning_rate": 7.2579126740341e-05, + "loss": 0.024, + "step": 23000 + }, + { + "action_loss": 0.015079661272466183, + "epoch": 20.68345323741007, + "step": 23000 + }, + { + "epoch": 20.68345323741007, + "step": 23000, + "torque_loss": 0.15902179479599 + }, + { + "epoch": 20.68345323741007, + "force_loss": 0.01978220045566559, + "step": 23000 + }, + { + "epoch": 20.692446043165468, + "grad_norm": 0.15836142003536224, + "learning_rate": 7.2554535407084e-05, + "loss": 0.0189, + "step": 23010 + }, + { + "action_loss": 0.002242977963760495, + "epoch": 20.692446043165468, + "step": 23010 + }, + { + "epoch": 20.692446043165468, + "step": 23010, + "torque_loss": 0.1343315690755844 + }, + { + "epoch": 20.692446043165468, + "force_loss": 0.00287189451046288, + "step": 23010 + }, + { + "epoch": 20.701438848920862, + "grad_norm": 0.14945478737354279, + "learning_rate": 7.252993722235464e-05, + "loss": 0.0216, + "step": 23020 + }, + { + "action_loss": 0.00448503065854311, + "epoch": 20.701438848920862, + "step": 23020 + }, + { + "epoch": 20.701438848920862, + "step": 23020, + "torque_loss": 0.10860016196966171 + }, + { + "epoch": 20.701438848920862, + "force_loss": 0.004214582499116659, + "step": 23020 + }, + { + "epoch": 20.71043165467626, + "grad_norm": 0.24460574984550476, + "learning_rate": 7.250533219362523e-05, + "loss": 0.0188, + "step": 23030 + }, + { + "action_loss": 0.007515007629990578, + "epoch": 20.71043165467626, + "step": 23030 + }, + { + "epoch": 20.71043165467626, + "step": 23030, + "torque_loss": 0.15320223569869995 + }, + { + "epoch": 20.71043165467626, + "force_loss": 0.008584956638514996, + "step": 23030 + }, + { + "epoch": 20.719424460431654, + "grad_norm": 0.33232659101486206, + "learning_rate": 7.248072032837012e-05, + "loss": 0.024, + "step": 23040 + }, + { + "action_loss": 0.018211496993899345, + "epoch": 20.719424460431654, + "step": 23040 + }, + { + "epoch": 20.719424460431654, + "step": 23040, + "torque_loss": 0.12002400308847427 + }, + { + "epoch": 20.719424460431654, + "force_loss": 0.012630761601030827, + "step": 23040 + }, + { + "epoch": 20.72841726618705, + "grad_norm": 0.4287187159061432, + "learning_rate": 7.245610163406575e-05, + "loss": 0.0221, + "step": 23050 + }, + { + "action_loss": 0.009795115329325199, + "epoch": 20.72841726618705, + "step": 23050 + }, + { + "epoch": 20.72841726618705, + "step": 23050, + "torque_loss": 0.10163215547800064 + }, + { + "epoch": 20.72841726618705, + "force_loss": 0.009978638030588627, + "step": 23050 + }, + { + "epoch": 20.737410071942445, + "grad_norm": 0.3301442861557007, + "learning_rate": 7.243147611819061e-05, + "loss": 0.0226, + "step": 23060 + }, + { + "action_loss": 0.0025839603040367365, + "epoch": 20.737410071942445, + "step": 23060 + }, + { + "epoch": 20.737410071942445, + "step": 23060, + "torque_loss": 0.13534292578697205 + }, + { + "epoch": 20.737410071942445, + "force_loss": 0.002022884087637067, + "step": 23060 + }, + { + "epoch": 20.746402877697843, + "grad_norm": 0.3564178943634033, + "learning_rate": 7.240684378822531e-05, + "loss": 0.0201, + "step": 23070 + }, + { + "action_loss": 0.0025712875649333, + "epoch": 20.746402877697843, + "step": 23070 + }, + { + "epoch": 20.746402877697843, + "step": 23070, + "torque_loss": 0.13158206641674042 + }, + { + "epoch": 20.746402877697843, + "force_loss": 0.0022691027261316776, + "step": 23070 + }, + { + "epoch": 20.755395683453237, + "grad_norm": 0.4516822099685669, + "learning_rate": 7.238220465165248e-05, + "loss": 0.0194, + "step": 23080 + }, + { + "action_loss": 0.004054612014442682, + "epoch": 20.755395683453237, + "step": 23080 + }, + { + "epoch": 20.755395683453237, + "step": 23080, + "torque_loss": 0.14073596894741058 + }, + { + "epoch": 20.755395683453237, + "force_loss": 0.0026126548182219267, + "step": 23080 + }, + { + "epoch": 20.764388489208635, + "grad_norm": 0.4884900748729706, + "learning_rate": 7.235755871595684e-05, + "loss": 0.0248, + "step": 23090 + }, + { + "action_loss": 0.005712583661079407, + "epoch": 20.764388489208635, + "step": 23090 + }, + { + "epoch": 20.764388489208635, + "step": 23090, + "torque_loss": 0.09353253990411758 + }, + { + "epoch": 20.764388489208635, + "force_loss": 0.006905198562890291, + "step": 23090 + }, + { + "epoch": 20.77338129496403, + "grad_norm": 0.24786247313022614, + "learning_rate": 7.233290598862517e-05, + "loss": 0.0216, + "step": 23100 + }, + { + "action_loss": 0.00905452948063612, + "epoch": 20.77338129496403, + "step": 23100 + }, + { + "epoch": 20.77338129496403, + "step": 23100, + "torque_loss": 0.13807465136051178 + }, + { + "epoch": 20.77338129496403, + "force_loss": 0.005232261493802071, + "step": 23100 + }, + { + "epoch": 20.782374100719423, + "grad_norm": 0.39007019996643066, + "learning_rate": 7.230824647714635e-05, + "loss": 0.0211, + "step": 23110 + }, + { + "action_loss": 0.00991443544626236, + "epoch": 20.782374100719423, + "step": 23110 + }, + { + "epoch": 20.782374100719423, + "step": 23110, + "torque_loss": 0.13694830238819122 + }, + { + "epoch": 20.782374100719423, + "force_loss": 0.007330181542783976, + "step": 23110 + }, + { + "epoch": 20.79136690647482, + "grad_norm": 0.3686298131942749, + "learning_rate": 7.228358018901124e-05, + "loss": 0.0213, + "step": 23120 + }, + { + "action_loss": 0.005644665565341711, + "epoch": 20.79136690647482, + "step": 23120 + }, + { + "epoch": 20.79136690647482, + "step": 23120, + "torque_loss": 0.12125851958990097 + }, + { + "epoch": 20.79136690647482, + "force_loss": 0.0046434178948402405, + "step": 23120 + }, + { + "epoch": 20.800359712230215, + "grad_norm": 0.4194779396057129, + "learning_rate": 7.225890713171286e-05, + "loss": 0.0233, + "step": 23130 + }, + { + "action_loss": 0.010150275193154812, + "epoch": 20.800359712230215, + "step": 23130 + }, + { + "epoch": 20.800359712230215, + "step": 23130, + "torque_loss": 0.12016569823026657 + }, + { + "epoch": 20.800359712230215, + "force_loss": 0.008466050960123539, + "step": 23130 + }, + { + "epoch": 20.809352517985612, + "grad_norm": 0.25126224756240845, + "learning_rate": 7.223422731274618e-05, + "loss": 0.0202, + "step": 23140 + }, + { + "action_loss": 0.004344898741692305, + "epoch": 20.809352517985612, + "step": 23140 + }, + { + "epoch": 20.809352517985612, + "step": 23140, + "torque_loss": 0.11202933639287949 + }, + { + "epoch": 20.809352517985612, + "force_loss": 0.0033856763038784266, + "step": 23140 + }, + { + "epoch": 20.818345323741006, + "grad_norm": 0.5618451833724976, + "learning_rate": 7.220954073960832e-05, + "loss": 0.0193, + "step": 23150 + }, + { + "action_loss": 0.00615106662735343, + "epoch": 20.818345323741006, + "step": 23150 + }, + { + "epoch": 20.818345323741006, + "step": 23150, + "torque_loss": 0.12374911457300186 + }, + { + "epoch": 20.818345323741006, + "force_loss": 0.012422936968505383, + "step": 23150 + }, + { + "epoch": 20.827338129496404, + "grad_norm": 0.3195774555206299, + "learning_rate": 7.218484741979838e-05, + "loss": 0.0202, + "step": 23160 + }, + { + "action_loss": 0.004305599257349968, + "epoch": 20.827338129496404, + "step": 23160 + }, + { + "epoch": 20.827338129496404, + "step": 23160, + "torque_loss": 0.12096554040908813 + }, + { + "epoch": 20.827338129496404, + "force_loss": 0.004232310224324465, + "step": 23160 + }, + { + "epoch": 20.836330935251798, + "grad_norm": 0.36266034841537476, + "learning_rate": 7.216014736081756e-05, + "loss": 0.0246, + "step": 23170 + }, + { + "action_loss": 0.014980564825236797, + "epoch": 20.836330935251798, + "step": 23170 + }, + { + "epoch": 20.836330935251798, + "step": 23170, + "torque_loss": 0.16716547310352325 + }, + { + "epoch": 20.836330935251798, + "force_loss": 0.023115748539566994, + "step": 23170 + }, + { + "epoch": 20.845323741007196, + "grad_norm": 0.22026123106479645, + "learning_rate": 7.213544057016906e-05, + "loss": 0.0225, + "step": 23180 + }, + { + "action_loss": 0.0049982825294137, + "epoch": 20.845323741007196, + "step": 23180 + }, + { + "epoch": 20.845323741007196, + "step": 23180, + "torque_loss": 0.13764846324920654 + }, + { + "epoch": 20.845323741007196, + "force_loss": 0.0037465591449290514, + "step": 23180 + }, + { + "epoch": 20.85431654676259, + "grad_norm": 0.24484923481941223, + "learning_rate": 7.211072705535819e-05, + "loss": 0.0214, + "step": 23190 + }, + { + "action_loss": 0.006374166812747717, + "epoch": 20.85431654676259, + "step": 23190 + }, + { + "epoch": 20.85431654676259, + "step": 23190, + "torque_loss": 0.1526750773191452 + }, + { + "epoch": 20.85431654676259, + "force_loss": 0.004419608507305384, + "step": 23190 + }, + { + "epoch": 20.863309352517987, + "grad_norm": 0.2755548655986786, + "learning_rate": 7.208600682389224e-05, + "loss": 0.0211, + "step": 23200 + }, + { + "action_loss": 0.002987870015203953, + "epoch": 20.863309352517987, + "step": 23200 + }, + { + "epoch": 20.863309352517987, + "step": 23200, + "torque_loss": 0.1289345771074295 + }, + { + "epoch": 20.863309352517987, + "force_loss": 0.0045778024941682816, + "step": 23200 + }, + { + "epoch": 20.87230215827338, + "grad_norm": 0.6204583048820496, + "learning_rate": 7.206127988328055e-05, + "loss": 0.0225, + "step": 23210 + }, + { + "action_loss": 0.01421859860420227, + "epoch": 20.87230215827338, + "step": 23210 + }, + { + "epoch": 20.87230215827338, + "step": 23210, + "torque_loss": 0.124875508248806 + }, + { + "epoch": 20.87230215827338, + "force_loss": 0.02199861966073513, + "step": 23210 + }, + { + "epoch": 20.881294964028775, + "grad_norm": 0.24115519225597382, + "learning_rate": 7.203654624103453e-05, + "loss": 0.0212, + "step": 23220 + }, + { + "action_loss": 0.007234225515276194, + "epoch": 20.881294964028775, + "step": 23220 + }, + { + "epoch": 20.881294964028775, + "step": 23220, + "torque_loss": 0.15499652922153473 + }, + { + "epoch": 20.881294964028775, + "force_loss": 0.008450089953839779, + "step": 23220 + }, + { + "epoch": 20.890287769784173, + "grad_norm": 0.2984203100204468, + "learning_rate": 7.201180590466761e-05, + "loss": 0.0219, + "step": 23230 + }, + { + "action_loss": 0.003522929735481739, + "epoch": 20.890287769784173, + "step": 23230 + }, + { + "epoch": 20.890287769784173, + "step": 23230, + "torque_loss": 0.10293682664632797 + }, + { + "epoch": 20.890287769784173, + "force_loss": 0.004324816633015871, + "step": 23230 + }, + { + "epoch": 20.899280575539567, + "grad_norm": 0.18120406568050385, + "learning_rate": 7.198705888169523e-05, + "loss": 0.0254, + "step": 23240 + }, + { + "action_loss": 0.012180055491626263, + "epoch": 20.899280575539567, + "step": 23240 + }, + { + "epoch": 20.899280575539567, + "step": 23240, + "torque_loss": 0.16695387661457062 + }, + { + "epoch": 20.899280575539567, + "force_loss": 0.014077156782150269, + "step": 23240 + }, + { + "epoch": 20.908273381294965, + "grad_norm": 0.4418695569038391, + "learning_rate": 7.196230517963491e-05, + "loss": 0.0204, + "step": 23250 + }, + { + "action_loss": 0.031053727492690086, + "epoch": 20.908273381294965, + "step": 23250 + }, + { + "epoch": 20.908273381294965, + "step": 23250, + "torque_loss": 0.17252886295318604 + }, + { + "epoch": 20.908273381294965, + "force_loss": 0.021286115050315857, + "step": 23250 + }, + { + "epoch": 20.91726618705036, + "grad_norm": 0.2474447786808014, + "learning_rate": 7.193754480600615e-05, + "loss": 0.0203, + "step": 23260 + }, + { + "action_loss": 0.0039863549172878265, + "epoch": 20.91726618705036, + "step": 23260 + }, + { + "epoch": 20.91726618705036, + "step": 23260, + "torque_loss": 0.11533962935209274 + }, + { + "epoch": 20.91726618705036, + "force_loss": 0.0024454023223370314, + "step": 23260 + }, + { + "epoch": 20.926258992805757, + "grad_norm": 0.31433430314064026, + "learning_rate": 7.19127777683305e-05, + "loss": 0.0303, + "step": 23270 + }, + { + "action_loss": 0.002052891766652465, + "epoch": 20.926258992805757, + "step": 23270 + }, + { + "epoch": 20.926258992805757, + "step": 23270, + "torque_loss": 0.06688390672206879 + }, + { + "epoch": 20.926258992805757, + "force_loss": 0.0032323088962584734, + "step": 23270 + }, + { + "epoch": 20.93525179856115, + "grad_norm": 0.16507881879806519, + "learning_rate": 7.188800407413156e-05, + "loss": 0.0169, + "step": 23280 + }, + { + "action_loss": 0.002124940976500511, + "epoch": 20.93525179856115, + "step": 23280 + }, + { + "epoch": 20.93525179856115, + "step": 23280, + "torque_loss": 0.11510109901428223 + }, + { + "epoch": 20.93525179856115, + "force_loss": 0.0015078651485964656, + "step": 23280 + }, + { + "epoch": 20.944244604316548, + "grad_norm": 0.1809292882680893, + "learning_rate": 7.186322373093489e-05, + "loss": 0.0214, + "step": 23290 + }, + { + "action_loss": 0.002750733867287636, + "epoch": 20.944244604316548, + "step": 23290 + }, + { + "epoch": 20.944244604316548, + "step": 23290, + "torque_loss": 0.1321621686220169 + }, + { + "epoch": 20.944244604316548, + "force_loss": 0.003203033236786723, + "step": 23290 + }, + { + "epoch": 20.953237410071942, + "grad_norm": 0.420011967420578, + "learning_rate": 7.18384367462681e-05, + "loss": 0.0183, + "step": 23300 + }, + { + "action_loss": 0.011628490872681141, + "epoch": 20.953237410071942, + "step": 23300 + }, + { + "epoch": 20.953237410071942, + "step": 23300, + "torque_loss": 0.1385202556848526 + }, + { + "epoch": 20.953237410071942, + "force_loss": 0.009227244183421135, + "step": 23300 + }, + { + "epoch": 20.96223021582734, + "grad_norm": 0.31038522720336914, + "learning_rate": 7.181364312766085e-05, + "loss": 0.0212, + "step": 23310 + }, + { + "action_loss": 0.006483044475317001, + "epoch": 20.96223021582734, + "step": 23310 + }, + { + "epoch": 20.96223021582734, + "step": 23310, + "torque_loss": 0.20367340743541718 + }, + { + "epoch": 20.96223021582734, + "force_loss": 0.0034793762024492025, + "step": 23310 + }, + { + "epoch": 20.971223021582734, + "grad_norm": 0.2559487521648407, + "learning_rate": 7.178884288264477e-05, + "loss": 0.0183, + "step": 23320 + }, + { + "action_loss": 0.011040893383324146, + "epoch": 20.971223021582734, + "step": 23320 + }, + { + "epoch": 20.971223021582734, + "step": 23320, + "torque_loss": 0.156141459941864 + }, + { + "epoch": 20.971223021582734, + "force_loss": 0.011167776770889759, + "step": 23320 + }, + { + "epoch": 20.980215827338128, + "grad_norm": 0.4533368945121765, + "learning_rate": 7.176403601875353e-05, + "loss": 0.0226, + "step": 23330 + }, + { + "action_loss": 0.003421274945139885, + "epoch": 20.980215827338128, + "step": 23330 + }, + { + "epoch": 20.980215827338128, + "step": 23330, + "torque_loss": 0.1442335546016693 + }, + { + "epoch": 20.980215827338128, + "force_loss": 0.0024091105442494154, + "step": 23330 + }, + { + "epoch": 20.989208633093526, + "grad_norm": 0.3069468140602112, + "learning_rate": 7.173922254352279e-05, + "loss": 0.0251, + "step": 23340 + }, + { + "action_loss": 0.00260400609113276, + "epoch": 20.989208633093526, + "step": 23340 + }, + { + "epoch": 20.989208633093526, + "step": 23340, + "torque_loss": 0.13419514894485474 + }, + { + "epoch": 20.989208633093526, + "force_loss": 0.0029714603442698717, + "step": 23340 + }, + { + "epoch": 20.99820143884892, + "grad_norm": 0.2399994432926178, + "learning_rate": 7.171440246449024e-05, + "loss": 0.0196, + "step": 23350 + }, + { + "action_loss": 0.005962350871413946, + "epoch": 20.99820143884892, + "step": 23350 + }, + { + "epoch": 20.99820143884892, + "step": 23350, + "torque_loss": 0.13464462757110596 + }, + { + "epoch": 20.99820143884892, + "force_loss": 0.0050678919069468975, + "step": 23350 + }, + { + "epoch": 21.007194244604317, + "grad_norm": 0.19311892986297607, + "learning_rate": 7.168957578919555e-05, + "loss": 0.0176, + "step": 23360 + }, + { + "action_loss": 0.003959774039685726, + "epoch": 21.007194244604317, + "step": 23360 + }, + { + "epoch": 21.007194244604317, + "step": 23360, + "torque_loss": 0.1621791124343872 + }, + { + "epoch": 21.007194244604317, + "force_loss": 0.004346047528088093, + "step": 23360 + }, + { + "epoch": 21.01618705035971, + "grad_norm": 0.24874347448349, + "learning_rate": 7.16647425251804e-05, + "loss": 0.0214, + "step": 23370 + }, + { + "action_loss": 0.00828962866216898, + "epoch": 21.01618705035971, + "step": 23370 + }, + { + "epoch": 21.01618705035971, + "step": 23370, + "torque_loss": 0.12503741681575775 + }, + { + "epoch": 21.01618705035971, + "force_loss": 0.0070351199246943, + "step": 23370 + }, + { + "epoch": 21.02517985611511, + "grad_norm": 0.35728171467781067, + "learning_rate": 7.163990267998852e-05, + "loss": 0.023, + "step": 23380 + }, + { + "action_loss": 0.00807653833180666, + "epoch": 21.02517985611511, + "step": 23380 + }, + { + "epoch": 21.02517985611511, + "step": 23380, + "torque_loss": 0.1355939358472824 + }, + { + "epoch": 21.02517985611511, + "force_loss": 0.004546436946839094, + "step": 23380 + }, + { + "epoch": 21.034172661870503, + "grad_norm": 0.19132237136363983, + "learning_rate": 7.161505626116556e-05, + "loss": 0.0229, + "step": 23390 + }, + { + "action_loss": 0.003996682818979025, + "epoch": 21.034172661870503, + "step": 23390 + }, + { + "epoch": 21.034172661870503, + "step": 23390, + "torque_loss": 0.13249383866786957 + }, + { + "epoch": 21.034172661870503, + "force_loss": 0.002313007367774844, + "step": 23390 + }, + { + "epoch": 21.0431654676259, + "grad_norm": 0.63074791431427, + "learning_rate": 7.159020327625923e-05, + "loss": 0.0217, + "step": 23400 + }, + { + "action_loss": 0.013399988412857056, + "epoch": 21.0431654676259, + "step": 23400 + }, + { + "epoch": 21.0431654676259, + "step": 23400, + "torque_loss": 0.1996813416481018 + }, + { + "epoch": 21.0431654676259, + "force_loss": 0.017626948654651642, + "step": 23400 + }, + { + "epoch": 21.052158273381295, + "grad_norm": 0.4147021472454071, + "learning_rate": 7.15653437328192e-05, + "loss": 0.0298, + "step": 23410 + }, + { + "action_loss": 0.004978652112185955, + "epoch": 21.052158273381295, + "step": 23410 + }, + { + "epoch": 21.052158273381295, + "step": 23410, + "torque_loss": 0.13333041965961456 + }, + { + "epoch": 21.052158273381295, + "force_loss": 0.003872217610478401, + "step": 23410 + }, + { + "epoch": 21.06115107913669, + "grad_norm": 0.2319970726966858, + "learning_rate": 7.154047763839713e-05, + "loss": 0.0217, + "step": 23420 + }, + { + "action_loss": 0.002744711237028241, + "epoch": 21.06115107913669, + "step": 23420 + }, + { + "epoch": 21.06115107913669, + "step": 23420, + "torque_loss": 0.10988708585500717 + }, + { + "epoch": 21.06115107913669, + "force_loss": 0.0029605224262923002, + "step": 23420 + }, + { + "epoch": 21.070143884892087, + "grad_norm": 0.26481521129608154, + "learning_rate": 7.15156050005467e-05, + "loss": 0.0183, + "step": 23430 + }, + { + "action_loss": 0.009172564372420311, + "epoch": 21.070143884892087, + "step": 23430 + }, + { + "epoch": 21.070143884892087, + "step": 23430, + "torque_loss": 0.11302395910024643 + }, + { + "epoch": 21.070143884892087, + "force_loss": 0.015311741270124912, + "step": 23430 + }, + { + "epoch": 21.07913669064748, + "grad_norm": 0.360005646944046, + "learning_rate": 7.149072582682357e-05, + "loss": 0.0201, + "step": 23440 + }, + { + "action_loss": 0.013230375945568085, + "epoch": 21.07913669064748, + "step": 23440 + }, + { + "epoch": 21.07913669064748, + "step": 23440, + "torque_loss": 0.09906294196844101 + }, + { + "epoch": 21.07913669064748, + "force_loss": 0.013757321052253246, + "step": 23440 + }, + { + "epoch": 21.08812949640288, + "grad_norm": 0.1836540400981903, + "learning_rate": 7.146584012478535e-05, + "loss": 0.0181, + "step": 23450 + }, + { + "action_loss": 0.01820623129606247, + "epoch": 21.08812949640288, + "step": 23450 + }, + { + "epoch": 21.08812949640288, + "step": 23450, + "torque_loss": 0.10784520953893661 + }, + { + "epoch": 21.08812949640288, + "force_loss": 0.025196099653840065, + "step": 23450 + }, + { + "epoch": 21.097122302158272, + "grad_norm": 0.2669019401073456, + "learning_rate": 7.144094790199169e-05, + "loss": 0.021, + "step": 23460 + }, + { + "action_loss": 0.007781012449413538, + "epoch": 21.097122302158272, + "step": 23460 + }, + { + "epoch": 21.097122302158272, + "step": 23460, + "torque_loss": 0.11886411905288696 + }, + { + "epoch": 21.097122302158272, + "force_loss": 0.012984029948711395, + "step": 23460 + }, + { + "epoch": 21.10611510791367, + "grad_norm": 0.2791656255722046, + "learning_rate": 7.141604916600415e-05, + "loss": 0.02, + "step": 23470 + }, + { + "action_loss": 0.008043314330279827, + "epoch": 21.10611510791367, + "step": 23470 + }, + { + "epoch": 21.10611510791367, + "step": 23470, + "torque_loss": 0.20208413898944855 + }, + { + "epoch": 21.10611510791367, + "force_loss": 0.007641212549060583, + "step": 23470 + }, + { + "epoch": 21.115107913669064, + "grad_norm": 0.27927857637405396, + "learning_rate": 7.139114392438635e-05, + "loss": 0.0193, + "step": 23480 + }, + { + "action_loss": 0.026578575372695923, + "epoch": 21.115107913669064, + "step": 23480 + }, + { + "epoch": 21.115107913669064, + "step": 23480, + "torque_loss": 0.11552394181489944 + }, + { + "epoch": 21.115107913669064, + "force_loss": 0.02971476875245571, + "step": 23480 + }, + { + "epoch": 21.12410071942446, + "grad_norm": 0.23018686473369598, + "learning_rate": 7.136623218470382e-05, + "loss": 0.0218, + "step": 23490 + }, + { + "action_loss": 0.003139466280117631, + "epoch": 21.12410071942446, + "step": 23490 + }, + { + "epoch": 21.12410071942446, + "step": 23490, + "torque_loss": 0.14302611351013184 + }, + { + "epoch": 21.12410071942446, + "force_loss": 0.0020197771955281496, + "step": 23490 + }, + { + "epoch": 21.133093525179856, + "grad_norm": 0.23918592929840088, + "learning_rate": 7.13413139545241e-05, + "loss": 0.0201, + "step": 23500 + }, + { + "action_loss": 0.013414248824119568, + "epoch": 21.133093525179856, + "step": 23500 + }, + { + "epoch": 21.133093525179856, + "step": 23500, + "torque_loss": 0.15465418994426727 + }, + { + "epoch": 21.133093525179856, + "force_loss": 0.007328353356570005, + "step": 23500 + }, + { + "epoch": 21.142086330935253, + "grad_norm": 0.19082936644554138, + "learning_rate": 7.131638924141668e-05, + "loss": 0.024, + "step": 23510 + }, + { + "action_loss": 0.0034979190677404404, + "epoch": 21.142086330935253, + "step": 23510 + }, + { + "epoch": 21.142086330935253, + "step": 23510, + "torque_loss": 0.07275998592376709 + }, + { + "epoch": 21.142086330935253, + "force_loss": 0.006297590211033821, + "step": 23510 + }, + { + "epoch": 21.151079136690647, + "grad_norm": 0.5941745638847351, + "learning_rate": 7.129145805295304e-05, + "loss": 0.0221, + "step": 23520 + }, + { + "action_loss": 0.002326295478269458, + "epoch": 21.151079136690647, + "step": 23520 + }, + { + "epoch": 21.151079136690647, + "step": 23520, + "torque_loss": 0.10932417958974838 + }, + { + "epoch": 21.151079136690647, + "force_loss": 0.0025577836204320192, + "step": 23520 + }, + { + "epoch": 21.16007194244604, + "grad_norm": 0.10179499536752701, + "learning_rate": 7.126652039670661e-05, + "loss": 0.0155, + "step": 23530 + }, + { + "action_loss": 0.004577077459543943, + "epoch": 21.16007194244604, + "step": 23530 + }, + { + "epoch": 21.16007194244604, + "step": 23530, + "torque_loss": 0.12532149255275726 + }, + { + "epoch": 21.16007194244604, + "force_loss": 0.0070156678557395935, + "step": 23530 + }, + { + "epoch": 21.16906474820144, + "grad_norm": 0.256354957818985, + "learning_rate": 7.124157628025278e-05, + "loss": 0.02, + "step": 23540 + }, + { + "action_loss": 0.006174854468554258, + "epoch": 21.16906474820144, + "step": 23540 + }, + { + "epoch": 21.16906474820144, + "step": 23540, + "torque_loss": 0.11213638633489609 + }, + { + "epoch": 21.16906474820144, + "force_loss": 0.008580025285482407, + "step": 23540 + }, + { + "epoch": 21.178057553956833, + "grad_norm": 0.3622494041919708, + "learning_rate": 7.121662571116894e-05, + "loss": 0.0206, + "step": 23550 + }, + { + "action_loss": 0.00479300320148468, + "epoch": 21.178057553956833, + "step": 23550 + }, + { + "epoch": 21.178057553956833, + "step": 23550, + "torque_loss": 0.11562427133321762 + }, + { + "epoch": 21.178057553956833, + "force_loss": 0.002547664800658822, + "step": 23550 + }, + { + "epoch": 21.18705035971223, + "grad_norm": 0.44891542196273804, + "learning_rate": 7.119166869703441e-05, + "loss": 0.0166, + "step": 23560 + }, + { + "action_loss": 0.0022924719378352165, + "epoch": 21.18705035971223, + "step": 23560 + }, + { + "epoch": 21.18705035971223, + "step": 23560, + "torque_loss": 0.10936244577169418 + }, + { + "epoch": 21.18705035971223, + "force_loss": 0.004271793644875288, + "step": 23560 + }, + { + "epoch": 21.196043165467625, + "grad_norm": 0.26346316933631897, + "learning_rate": 7.116670524543044e-05, + "loss": 0.0229, + "step": 23570 + }, + { + "action_loss": 0.006651930045336485, + "epoch": 21.196043165467625, + "step": 23570 + }, + { + "epoch": 21.196043165467625, + "step": 23570, + "torque_loss": 0.1137520894408226 + }, + { + "epoch": 21.196043165467625, + "force_loss": 0.008779712952673435, + "step": 23570 + }, + { + "epoch": 21.205035971223023, + "grad_norm": 0.5915934443473816, + "learning_rate": 7.114173536394032e-05, + "loss": 0.0218, + "step": 23580 + }, + { + "action_loss": 0.005308545660227537, + "epoch": 21.205035971223023, + "step": 23580 + }, + { + "epoch": 21.205035971223023, + "step": 23580, + "torque_loss": 0.11905542761087418 + }, + { + "epoch": 21.205035971223023, + "force_loss": 0.00404251366853714, + "step": 23580 + }, + { + "epoch": 21.214028776978417, + "grad_norm": 0.23304638266563416, + "learning_rate": 7.111675906014917e-05, + "loss": 0.0228, + "step": 23590 + }, + { + "action_loss": 0.010965664871037006, + "epoch": 21.214028776978417, + "step": 23590 + }, + { + "epoch": 21.214028776978417, + "step": 23590, + "torque_loss": 0.1191180869936943 + }, + { + "epoch": 21.214028776978417, + "force_loss": 0.012466054409742355, + "step": 23590 + }, + { + "epoch": 21.223021582733814, + "grad_norm": 0.23632240295410156, + "learning_rate": 7.109177634164421e-05, + "loss": 0.0191, + "step": 23600 + }, + { + "action_loss": 0.002272161887958646, + "epoch": 21.223021582733814, + "step": 23600 + }, + { + "epoch": 21.223021582733814, + "step": 23600, + "torque_loss": 0.08585027605295181 + }, + { + "epoch": 21.223021582733814, + "force_loss": 0.0030193133279681206, + "step": 23600 + }, + { + "epoch": 21.23201438848921, + "grad_norm": 0.2457566112279892, + "learning_rate": 7.106678721601449e-05, + "loss": 0.0184, + "step": 23610 + }, + { + "action_loss": 0.009018865413963795, + "epoch": 21.23201438848921, + "step": 23610 + }, + { + "epoch": 21.23201438848921, + "step": 23610, + "torque_loss": 0.11562643200159073 + }, + { + "epoch": 21.23201438848921, + "force_loss": 0.009257269091904163, + "step": 23610 + }, + { + "epoch": 21.241007194244606, + "grad_norm": 0.34829962253570557, + "learning_rate": 7.104179169085103e-05, + "loss": 0.0218, + "step": 23620 + }, + { + "action_loss": 0.00873131025582552, + "epoch": 21.241007194244606, + "step": 23620 + }, + { + "epoch": 21.241007194244606, + "step": 23620, + "torque_loss": 0.11321505904197693 + }, + { + "epoch": 21.241007194244606, + "force_loss": 0.010424568317830563, + "step": 23620 + }, + { + "epoch": 21.25, + "grad_norm": 0.3373963534832001, + "learning_rate": 7.101678977374683e-05, + "loss": 0.0265, + "step": 23630 + }, + { + "action_loss": 0.003254042938351631, + "epoch": 21.25, + "step": 23630 + }, + { + "epoch": 21.25, + "step": 23630, + "torque_loss": 0.126604363322258 + }, + { + "epoch": 21.25, + "force_loss": 0.0015665879473090172, + "step": 23630 + }, + { + "epoch": 21.258992805755394, + "grad_norm": 0.31189942359924316, + "learning_rate": 7.099178147229685e-05, + "loss": 0.0205, + "step": 23640 + }, + { + "action_loss": 0.011472395621240139, + "epoch": 21.258992805755394, + "step": 23640 + }, + { + "epoch": 21.258992805755394, + "step": 23640, + "torque_loss": 0.13908208906650543 + }, + { + "epoch": 21.258992805755394, + "force_loss": 0.008833193220198154, + "step": 23640 + }, + { + "epoch": 21.26798561151079, + "grad_norm": 0.1859862357378006, + "learning_rate": 7.096676679409789e-05, + "loss": 0.022, + "step": 23650 + }, + { + "action_loss": 0.015845075249671936, + "epoch": 21.26798561151079, + "step": 23650 + }, + { + "epoch": 21.26798561151079, + "step": 23650, + "torque_loss": 0.13396631181240082 + }, + { + "epoch": 21.26798561151079, + "force_loss": 0.021496951580047607, + "step": 23650 + }, + { + "epoch": 21.276978417266186, + "grad_norm": 0.36624208092689514, + "learning_rate": 7.094174574674877e-05, + "loss": 0.0214, + "step": 23660 + }, + { + "action_loss": 0.005504108965396881, + "epoch": 21.276978417266186, + "step": 23660 + }, + { + "epoch": 21.276978417266186, + "step": 23660, + "torque_loss": 0.14271610975265503 + }, + { + "epoch": 21.276978417266186, + "force_loss": 0.004780036862939596, + "step": 23660 + }, + { + "epoch": 21.285971223021583, + "grad_norm": 0.44943949580192566, + "learning_rate": 7.091671833785025e-05, + "loss": 0.018, + "step": 23670 + }, + { + "action_loss": 0.0040962230414152145, + "epoch": 21.285971223021583, + "step": 23670 + }, + { + "epoch": 21.285971223021583, + "step": 23670, + "torque_loss": 0.11015243083238602 + }, + { + "epoch": 21.285971223021583, + "force_loss": 0.004503512289375067, + "step": 23670 + }, + { + "epoch": 21.294964028776977, + "grad_norm": 0.4552011489868164, + "learning_rate": 7.089168457500493e-05, + "loss": 0.0211, + "step": 23680 + }, + { + "action_loss": 0.006273546721786261, + "epoch": 21.294964028776977, + "step": 23680 + }, + { + "epoch": 21.294964028776977, + "step": 23680, + "torque_loss": 0.12740683555603027 + }, + { + "epoch": 21.294964028776977, + "force_loss": 0.00599891459569335, + "step": 23680 + }, + { + "epoch": 21.303956834532375, + "grad_norm": 0.15325917303562164, + "learning_rate": 7.086664446581747e-05, + "loss": 0.0256, + "step": 23690 + }, + { + "action_loss": 0.004658068064600229, + "epoch": 21.303956834532375, + "step": 23690 + }, + { + "epoch": 21.303956834532375, + "step": 23690, + "torque_loss": 0.18447761237621307 + }, + { + "epoch": 21.303956834532375, + "force_loss": 0.008792830631136894, + "step": 23690 + }, + { + "epoch": 21.31294964028777, + "grad_norm": 0.21590489149093628, + "learning_rate": 7.084159801789438e-05, + "loss": 0.0201, + "step": 23700 + }, + { + "action_loss": 0.0029024232644587755, + "epoch": 21.31294964028777, + "step": 23700 + }, + { + "epoch": 21.31294964028777, + "step": 23700, + "torque_loss": 0.11336324363946915 + }, + { + "epoch": 21.31294964028777, + "force_loss": 0.001504159881733358, + "step": 23700 + }, + { + "epoch": 21.321942446043167, + "grad_norm": 0.22023874521255493, + "learning_rate": 7.081654523884411e-05, + "loss": 0.0222, + "step": 23710 + }, + { + "action_loss": 0.003117077983915806, + "epoch": 21.321942446043167, + "step": 23710 + }, + { + "epoch": 21.321942446043167, + "step": 23710, + "torque_loss": 0.11545255780220032 + }, + { + "epoch": 21.321942446043167, + "force_loss": 0.0031235376372933388, + "step": 23710 + }, + { + "epoch": 21.33093525179856, + "grad_norm": 0.7098838686943054, + "learning_rate": 7.0791486136277e-05, + "loss": 0.0235, + "step": 23720 + }, + { + "action_loss": 0.004025460220873356, + "epoch": 21.33093525179856, + "step": 23720 + }, + { + "epoch": 21.33093525179856, + "step": 23720, + "torque_loss": 0.1383565217256546 + }, + { + "epoch": 21.33093525179856, + "force_loss": 0.003235556883737445, + "step": 23720 + }, + { + "epoch": 21.33992805755396, + "grad_norm": 0.29444053769111633, + "learning_rate": 7.07664207178054e-05, + "loss": 0.0237, + "step": 23730 + }, + { + "action_loss": 0.003561244113370776, + "epoch": 21.33992805755396, + "step": 23730 + }, + { + "epoch": 21.33992805755396, + "step": 23730, + "torque_loss": 0.14339353144168854 + }, + { + "epoch": 21.33992805755396, + "force_loss": 0.005450025200843811, + "step": 23730 + }, + { + "epoch": 21.348920863309353, + "grad_norm": 0.42307940125465393, + "learning_rate": 7.074134899104345e-05, + "loss": 0.0229, + "step": 23740 + }, + { + "action_loss": 0.003302047261968255, + "epoch": 21.348920863309353, + "step": 23740 + }, + { + "epoch": 21.348920863309353, + "step": 23740, + "torque_loss": 0.15255039930343628 + }, + { + "epoch": 21.348920863309353, + "force_loss": 0.0042688664980232716, + "step": 23740 + }, + { + "epoch": 21.357913669064747, + "grad_norm": 0.19251471757888794, + "learning_rate": 7.071627096360735e-05, + "loss": 0.019, + "step": 23750 + }, + { + "action_loss": 0.003922722768038511, + "epoch": 21.357913669064747, + "step": 23750 + }, + { + "epoch": 21.357913669064747, + "step": 23750, + "torque_loss": 0.17961324751377106 + }, + { + "epoch": 21.357913669064747, + "force_loss": 0.008094881661236286, + "step": 23750 + }, + { + "epoch": 21.366906474820144, + "grad_norm": 0.1816745102405548, + "learning_rate": 7.069118664311511e-05, + "loss": 0.0202, + "step": 23760 + }, + { + "action_loss": 0.006597679108381271, + "epoch": 21.366906474820144, + "step": 23760 + }, + { + "epoch": 21.366906474820144, + "step": 23760, + "torque_loss": 0.09441333264112473 + }, + { + "epoch": 21.366906474820144, + "force_loss": 0.002418153453618288, + "step": 23760 + }, + { + "epoch": 21.37589928057554, + "grad_norm": 0.29194214940071106, + "learning_rate": 7.06660960371867e-05, + "loss": 0.0191, + "step": 23770 + }, + { + "action_loss": 0.008289594203233719, + "epoch": 21.37589928057554, + "step": 23770 + }, + { + "epoch": 21.37589928057554, + "step": 23770, + "torque_loss": 0.13637441396713257 + }, + { + "epoch": 21.37589928057554, + "force_loss": 0.008279447443783283, + "step": 23770 + }, + { + "epoch": 21.384892086330936, + "grad_norm": 0.4021297097206116, + "learning_rate": 7.064099915344396e-05, + "loss": 0.025, + "step": 23780 + }, + { + "action_loss": 0.006939854472875595, + "epoch": 21.384892086330936, + "step": 23780 + }, + { + "epoch": 21.384892086330936, + "step": 23780, + "torque_loss": 0.12058931589126587 + }, + { + "epoch": 21.384892086330936, + "force_loss": 0.003337736474350095, + "step": 23780 + }, + { + "epoch": 21.39388489208633, + "grad_norm": 0.415184885263443, + "learning_rate": 7.061589599951066e-05, + "loss": 0.0225, + "step": 23790 + }, + { + "action_loss": 0.003426339477300644, + "epoch": 21.39388489208633, + "step": 23790 + }, + { + "epoch": 21.39388489208633, + "step": 23790, + "torque_loss": 0.1108291745185852 + }, + { + "epoch": 21.39388489208633, + "force_loss": 0.004369981121271849, + "step": 23790 + }, + { + "epoch": 21.402877697841728, + "grad_norm": 0.2257554829120636, + "learning_rate": 7.05907865830125e-05, + "loss": 0.0271, + "step": 23800 + }, + { + "action_loss": 0.00312385824508965, + "epoch": 21.402877697841728, + "step": 23800 + }, + { + "epoch": 21.402877697841728, + "step": 23800, + "torque_loss": 0.1279628425836563 + }, + { + "epoch": 21.402877697841728, + "force_loss": 0.004388665780425072, + "step": 23800 + }, + { + "epoch": 21.41187050359712, + "grad_norm": 0.389212429523468, + "learning_rate": 7.056567091157703e-05, + "loss": 0.0192, + "step": 23810 + }, + { + "action_loss": 0.010184607468545437, + "epoch": 21.41187050359712, + "step": 23810 + }, + { + "epoch": 21.41187050359712, + "step": 23810, + "torque_loss": 0.12354090064764023 + }, + { + "epoch": 21.41187050359712, + "force_loss": 0.0047220769338309765, + "step": 23810 + }, + { + "epoch": 21.42086330935252, + "grad_norm": 0.23627988994121552, + "learning_rate": 7.054054899283375e-05, + "loss": 0.0226, + "step": 23820 + }, + { + "action_loss": 0.0036707476247102022, + "epoch": 21.42086330935252, + "step": 23820 + }, + { + "epoch": 21.42086330935252, + "step": 23820, + "torque_loss": 0.12478425353765488 + }, + { + "epoch": 21.42086330935252, + "force_loss": 0.004485785495489836, + "step": 23820 + }, + { + "epoch": 21.429856115107913, + "grad_norm": 0.16367127001285553, + "learning_rate": 7.051542083441403e-05, + "loss": 0.0197, + "step": 23830 + }, + { + "action_loss": 0.0046365465968847275, + "epoch": 21.429856115107913, + "step": 23830 + }, + { + "epoch": 21.429856115107913, + "step": 23830, + "torque_loss": 0.11964243650436401 + }, + { + "epoch": 21.429856115107913, + "force_loss": 0.003397740190848708, + "step": 23830 + }, + { + "epoch": 21.43884892086331, + "grad_norm": 0.4269733428955078, + "learning_rate": 7.049028644395113e-05, + "loss": 0.0211, + "step": 23840 + }, + { + "action_loss": 0.002511439146474004, + "epoch": 21.43884892086331, + "step": 23840 + }, + { + "epoch": 21.43884892086331, + "step": 23840, + "torque_loss": 0.12440451234579086 + }, + { + "epoch": 21.43884892086331, + "force_loss": 0.005141694564372301, + "step": 23840 + }, + { + "epoch": 21.447841726618705, + "grad_norm": 0.18345722556114197, + "learning_rate": 7.046514582908024e-05, + "loss": 0.0174, + "step": 23850 + }, + { + "action_loss": 0.004589290823787451, + "epoch": 21.447841726618705, + "step": 23850 + }, + { + "epoch": 21.447841726618705, + "step": 23850, + "torque_loss": 0.12272452563047409 + }, + { + "epoch": 21.447841726618705, + "force_loss": 0.004211971070617437, + "step": 23850 + }, + { + "epoch": 21.4568345323741, + "grad_norm": 0.3202456533908844, + "learning_rate": 7.043999899743838e-05, + "loss": 0.0198, + "step": 23860 + }, + { + "action_loss": 0.010315182618796825, + "epoch": 21.4568345323741, + "step": 23860 + }, + { + "epoch": 21.4568345323741, + "step": 23860, + "torque_loss": 0.14328861236572266 + }, + { + "epoch": 21.4568345323741, + "force_loss": 0.01128691527992487, + "step": 23860 + }, + { + "epoch": 21.465827338129497, + "grad_norm": 0.18338774144649506, + "learning_rate": 7.041484595666451e-05, + "loss": 0.0243, + "step": 23870 + }, + { + "action_loss": 0.005853708367794752, + "epoch": 21.465827338129497, + "step": 23870 + }, + { + "epoch": 21.465827338129497, + "step": 23870, + "torque_loss": 0.14396049082279205 + }, + { + "epoch": 21.465827338129497, + "force_loss": 0.00822159182280302, + "step": 23870 + }, + { + "epoch": 21.47482014388489, + "grad_norm": 0.31921061873435974, + "learning_rate": 7.038968671439948e-05, + "loss": 0.0222, + "step": 23880 + }, + { + "action_loss": 0.008314737118780613, + "epoch": 21.47482014388489, + "step": 23880 + }, + { + "epoch": 21.47482014388489, + "step": 23880, + "torque_loss": 0.12615780532360077 + }, + { + "epoch": 21.47482014388489, + "force_loss": 0.010880435816943645, + "step": 23880 + }, + { + "epoch": 21.48381294964029, + "grad_norm": 0.18777592480182648, + "learning_rate": 7.036452127828596e-05, + "loss": 0.0225, + "step": 23890 + }, + { + "action_loss": 0.0036884669680148363, + "epoch": 21.48381294964029, + "step": 23890 + }, + { + "epoch": 21.48381294964029, + "step": 23890, + "torque_loss": 0.12386516481637955 + }, + { + "epoch": 21.48381294964029, + "force_loss": 0.0038495806511491537, + "step": 23890 + }, + { + "epoch": 21.492805755395683, + "grad_norm": 0.3598543703556061, + "learning_rate": 7.033934965596859e-05, + "loss": 0.0177, + "step": 23900 + }, + { + "action_loss": 0.010872217826545238, + "epoch": 21.492805755395683, + "step": 23900 + }, + { + "epoch": 21.492805755395683, + "step": 23900, + "torque_loss": 0.1956467181444168 + }, + { + "epoch": 21.492805755395683, + "force_loss": 0.010237191803753376, + "step": 23900 + }, + { + "epoch": 21.50179856115108, + "grad_norm": 0.19021499156951904, + "learning_rate": 7.031417185509381e-05, + "loss": 0.0211, + "step": 23910 + }, + { + "action_loss": 0.006348499562591314, + "epoch": 21.50179856115108, + "step": 23910 + }, + { + "epoch": 21.50179856115108, + "step": 23910, + "torque_loss": 0.12852859497070312 + }, + { + "epoch": 21.50179856115108, + "force_loss": 0.0033777987118810415, + "step": 23910 + }, + { + "epoch": 21.510791366906474, + "grad_norm": 0.22508279979228973, + "learning_rate": 7.028898788331e-05, + "loss": 0.0199, + "step": 23920 + }, + { + "action_loss": 0.05363455042243004, + "epoch": 21.510791366906474, + "step": 23920 + }, + { + "epoch": 21.510791366906474, + "step": 23920, + "torque_loss": 0.18643148243427277 + }, + { + "epoch": 21.510791366906474, + "force_loss": 0.06643527001142502, + "step": 23920 + }, + { + "epoch": 21.519784172661872, + "grad_norm": 0.13674558699131012, + "learning_rate": 7.026379774826736e-05, + "loss": 0.0272, + "step": 23930 + }, + { + "action_loss": 0.007402696181088686, + "epoch": 21.519784172661872, + "step": 23930 + }, + { + "epoch": 21.519784172661872, + "step": 23930, + "torque_loss": 0.16087555885314941 + }, + { + "epoch": 21.519784172661872, + "force_loss": 0.016275329515337944, + "step": 23930 + }, + { + "epoch": 21.528776978417266, + "grad_norm": 0.3391278088092804, + "learning_rate": 7.0238601457618e-05, + "loss": 0.0177, + "step": 23940 + }, + { + "action_loss": 0.014212819747626781, + "epoch": 21.528776978417266, + "step": 23940 + }, + { + "epoch": 21.528776978417266, + "step": 23940, + "torque_loss": 0.1407519429922104 + }, + { + "epoch": 21.528776978417266, + "force_loss": 0.008294091559946537, + "step": 23940 + }, + { + "epoch": 21.53776978417266, + "grad_norm": 0.24297048151493073, + "learning_rate": 7.02133990190159e-05, + "loss": 0.0201, + "step": 23950 + }, + { + "action_loss": 0.0034887150395661592, + "epoch": 21.53776978417266, + "step": 23950 + }, + { + "epoch": 21.53776978417266, + "step": 23950, + "torque_loss": 0.13968603312969208 + }, + { + "epoch": 21.53776978417266, + "force_loss": 0.002215509070083499, + "step": 23950 + }, + { + "epoch": 21.546762589928058, + "grad_norm": 0.3339236378669739, + "learning_rate": 7.018819044011687e-05, + "loss": 0.021, + "step": 23960 + }, + { + "action_loss": 0.005108047276735306, + "epoch": 21.546762589928058, + "step": 23960 + }, + { + "epoch": 21.546762589928058, + "step": 23960, + "torque_loss": 0.1285800188779831 + }, + { + "epoch": 21.546762589928058, + "force_loss": 0.00446784682571888, + "step": 23960 + }, + { + "epoch": 21.555755395683452, + "grad_norm": 0.3731296956539154, + "learning_rate": 7.016297572857863e-05, + "loss": 0.0188, + "step": 23970 + }, + { + "action_loss": 0.0029835563618689775, + "epoch": 21.555755395683452, + "step": 23970 + }, + { + "epoch": 21.555755395683452, + "step": 23970, + "torque_loss": 0.11705994606018066 + }, + { + "epoch": 21.555755395683452, + "force_loss": 0.0021975317504256964, + "step": 23970 + }, + { + "epoch": 21.56474820143885, + "grad_norm": 0.212856262922287, + "learning_rate": 7.013775489206072e-05, + "loss": 0.0208, + "step": 23980 + }, + { + "action_loss": 0.010328707285225391, + "epoch": 21.56474820143885, + "step": 23980 + }, + { + "epoch": 21.56474820143885, + "step": 23980, + "torque_loss": 0.11490169167518616 + }, + { + "epoch": 21.56474820143885, + "force_loss": 0.012079688720405102, + "step": 23980 + }, + { + "epoch": 21.573741007194243, + "grad_norm": 0.29255232214927673, + "learning_rate": 7.01125279382246e-05, + "loss": 0.0175, + "step": 23990 + }, + { + "action_loss": 0.013218914158642292, + "epoch": 21.573741007194243, + "step": 23990 + }, + { + "epoch": 21.573741007194243, + "step": 23990, + "torque_loss": 0.16297321021556854 + }, + { + "epoch": 21.573741007194243, + "force_loss": 0.013694614171981812, + "step": 23990 + }, + { + "epoch": 21.58273381294964, + "grad_norm": 0.39558789134025574, + "learning_rate": 7.008729487473351e-05, + "loss": 0.0211, + "step": 24000 + }, + { + "action_loss": 0.005202788393944502, + "epoch": 21.58273381294964, + "step": 24000 + }, + { + "epoch": 21.58273381294964, + "step": 24000, + "torque_loss": 0.12038284540176392 + }, + { + "epoch": 21.58273381294964, + "force_loss": 0.008069656789302826, + "step": 24000 + }, + { + "epoch": 21.591726618705035, + "grad_norm": 0.3813171684741974, + "learning_rate": 7.006205570925263e-05, + "loss": 0.0233, + "step": 24010 + }, + { + "action_loss": 0.00404118187725544, + "epoch": 21.591726618705035, + "step": 24010 + }, + { + "epoch": 21.591726618705035, + "step": 24010, + "torque_loss": 0.15226443111896515 + }, + { + "epoch": 21.591726618705035, + "force_loss": 0.0024941328447312117, + "step": 24010 + }, + { + "epoch": 21.600719424460433, + "grad_norm": 0.2981605529785156, + "learning_rate": 7.003681044944892e-05, + "loss": 0.0205, + "step": 24020 + }, + { + "action_loss": 0.0040172552689909935, + "epoch": 21.600719424460433, + "step": 24020 + }, + { + "epoch": 21.600719424460433, + "step": 24020, + "torque_loss": 0.11741326004266739 + }, + { + "epoch": 21.600719424460433, + "force_loss": 0.0046316650696098804, + "step": 24020 + }, + { + "epoch": 21.609712230215827, + "grad_norm": 0.4226076304912567, + "learning_rate": 7.001155910299126e-05, + "loss": 0.0216, + "step": 24030 + }, + { + "action_loss": 0.017753953114151955, + "epoch": 21.609712230215827, + "step": 24030 + }, + { + "epoch": 21.609712230215827, + "step": 24030, + "torque_loss": 0.1803961545228958 + }, + { + "epoch": 21.609712230215827, + "force_loss": 0.02056589350104332, + "step": 24030 + }, + { + "epoch": 21.618705035971225, + "grad_norm": 0.2300824373960495, + "learning_rate": 6.99863016775503e-05, + "loss": 0.0261, + "step": 24040 + }, + { + "action_loss": 0.006227040663361549, + "epoch": 21.618705035971225, + "step": 24040 + }, + { + "epoch": 21.618705035971225, + "step": 24040, + "torque_loss": 0.11970778554677963 + }, + { + "epoch": 21.618705035971225, + "force_loss": 0.009214621037244797, + "step": 24040 + }, + { + "epoch": 21.62769784172662, + "grad_norm": 0.40676695108413696, + "learning_rate": 6.996103818079859e-05, + "loss": 0.0196, + "step": 24050 + }, + { + "action_loss": 0.008792642503976822, + "epoch": 21.62769784172662, + "step": 24050 + }, + { + "epoch": 21.62769784172662, + "step": 24050, + "torque_loss": 0.10972053557634354 + }, + { + "epoch": 21.62769784172662, + "force_loss": 0.005656917113810778, + "step": 24050 + }, + { + "epoch": 21.636690647482013, + "grad_norm": 0.506324291229248, + "learning_rate": 6.993576862041054e-05, + "loss": 0.0218, + "step": 24060 + }, + { + "action_loss": 0.0061348178423941135, + "epoch": 21.636690647482013, + "step": 24060 + }, + { + "epoch": 21.636690647482013, + "step": 24060, + "torque_loss": 0.13718056678771973 + }, + { + "epoch": 21.636690647482013, + "force_loss": 0.00756865506991744, + "step": 24060 + }, + { + "epoch": 21.64568345323741, + "grad_norm": 0.4522571563720703, + "learning_rate": 6.991049300406235e-05, + "loss": 0.0179, + "step": 24070 + }, + { + "action_loss": 0.005923125892877579, + "epoch": 21.64568345323741, + "step": 24070 + }, + { + "epoch": 21.64568345323741, + "step": 24070, + "torque_loss": 0.14005731046199799 + }, + { + "epoch": 21.64568345323741, + "force_loss": 0.0036965280305594206, + "step": 24070 + }, + { + "epoch": 21.654676258992804, + "grad_norm": 0.21875688433647156, + "learning_rate": 6.988521133943209e-05, + "loss": 0.0202, + "step": 24080 + }, + { + "action_loss": 0.017261147499084473, + "epoch": 21.654676258992804, + "step": 24080 + }, + { + "epoch": 21.654676258992804, + "step": 24080, + "torque_loss": 0.1414811760187149 + }, + { + "epoch": 21.654676258992804, + "force_loss": 0.01806695945560932, + "step": 24080 + }, + { + "epoch": 21.663669064748202, + "grad_norm": 0.5208433866500854, + "learning_rate": 6.985992363419966e-05, + "loss": 0.0245, + "step": 24090 + }, + { + "action_loss": 0.0029069986194372177, + "epoch": 21.663669064748202, + "step": 24090 + }, + { + "epoch": 21.663669064748202, + "step": 24090, + "torque_loss": 0.09179780632257462 + }, + { + "epoch": 21.663669064748202, + "force_loss": 0.001528251334093511, + "step": 24090 + }, + { + "epoch": 21.672661870503596, + "grad_norm": 0.3127578794956207, + "learning_rate": 6.983462989604682e-05, + "loss": 0.021, + "step": 24100 + }, + { + "action_loss": 0.0032168340403586626, + "epoch": 21.672661870503596, + "step": 24100 + }, + { + "epoch": 21.672661870503596, + "step": 24100, + "torque_loss": 0.12450555711984634 + }, + { + "epoch": 21.672661870503596, + "force_loss": 0.002190846251323819, + "step": 24100 + }, + { + "epoch": 21.681654676258994, + "grad_norm": 0.3800390660762787, + "learning_rate": 6.980933013265709e-05, + "loss": 0.0177, + "step": 24110 + }, + { + "action_loss": 0.006540060043334961, + "epoch": 21.681654676258994, + "step": 24110 + }, + { + "epoch": 21.681654676258994, + "step": 24110, + "torque_loss": 0.13025234639644623 + }, + { + "epoch": 21.681654676258994, + "force_loss": 0.009451337158679962, + "step": 24110 + }, + { + "epoch": 21.690647482014388, + "grad_norm": 0.4404729902744293, + "learning_rate": 6.978402435171592e-05, + "loss": 0.0303, + "step": 24120 + }, + { + "action_loss": 0.007622348610311747, + "epoch": 21.690647482014388, + "step": 24120 + }, + { + "epoch": 21.690647482014388, + "step": 24120, + "torque_loss": 0.19697785377502441 + }, + { + "epoch": 21.690647482014388, + "force_loss": 0.01301037147641182, + "step": 24120 + }, + { + "epoch": 21.699640287769785, + "grad_norm": 0.25689589977264404, + "learning_rate": 6.975871256091052e-05, + "loss": 0.0232, + "step": 24130 + }, + { + "action_loss": 0.0025068081449717283, + "epoch": 21.699640287769785, + "step": 24130 + }, + { + "epoch": 21.699640287769785, + "step": 24130, + "torque_loss": 0.12582151591777802 + }, + { + "epoch": 21.699640287769785, + "force_loss": 0.002489696955308318, + "step": 24130 + }, + { + "epoch": 21.70863309352518, + "grad_norm": 0.24892547726631165, + "learning_rate": 6.973339476792995e-05, + "loss": 0.0201, + "step": 24140 + }, + { + "action_loss": 0.005375228356570005, + "epoch": 21.70863309352518, + "step": 24140 + }, + { + "epoch": 21.70863309352518, + "step": 24140, + "torque_loss": 0.18812493979930878 + }, + { + "epoch": 21.70863309352518, + "force_loss": 0.004086604341864586, + "step": 24140 + }, + { + "epoch": 21.717625899280577, + "grad_norm": 0.28377944231033325, + "learning_rate": 6.970807098046505e-05, + "loss": 0.0191, + "step": 24150 + }, + { + "action_loss": 0.0025983082596212626, + "epoch": 21.717625899280577, + "step": 24150 + }, + { + "epoch": 21.717625899280577, + "step": 24150, + "torque_loss": 0.12086061388254166 + }, + { + "epoch": 21.717625899280577, + "force_loss": 0.0032244424801319838, + "step": 24150 + }, + { + "epoch": 21.72661870503597, + "grad_norm": 0.4414898157119751, + "learning_rate": 6.968274120620858e-05, + "loss": 0.0214, + "step": 24160 + }, + { + "action_loss": 0.002598340855911374, + "epoch": 21.72661870503597, + "step": 24160 + }, + { + "epoch": 21.72661870503597, + "step": 24160, + "torque_loss": 0.15716147422790527 + }, + { + "epoch": 21.72661870503597, + "force_loss": 0.0021320506930351257, + "step": 24160 + }, + { + "epoch": 21.735611510791365, + "grad_norm": 0.23562584817409515, + "learning_rate": 6.965740545285499e-05, + "loss": 0.0184, + "step": 24170 + }, + { + "action_loss": 0.0031706802546977997, + "epoch": 21.735611510791365, + "step": 24170 + }, + { + "epoch": 21.735611510791365, + "step": 24170, + "torque_loss": 0.1128600463271141 + }, + { + "epoch": 21.735611510791365, + "force_loss": 0.004506824538111687, + "step": 24170 + }, + { + "epoch": 21.744604316546763, + "grad_norm": 0.16314929723739624, + "learning_rate": 6.963206372810068e-05, + "loss": 0.0187, + "step": 24180 + }, + { + "action_loss": 0.008436471223831177, + "epoch": 21.744604316546763, + "step": 24180 + }, + { + "epoch": 21.744604316546763, + "step": 24180, + "torque_loss": 0.11788768321275711 + }, + { + "epoch": 21.744604316546763, + "force_loss": 0.004664561245590448, + "step": 24180 + }, + { + "epoch": 21.753597122302157, + "grad_norm": 0.2003936767578125, + "learning_rate": 6.960671603964375e-05, + "loss": 0.023, + "step": 24190 + }, + { + "action_loss": 0.03077048249542713, + "epoch": 21.753597122302157, + "step": 24190 + }, + { + "epoch": 21.753597122302157, + "step": 24190, + "torque_loss": 0.13748450577259064 + }, + { + "epoch": 21.753597122302157, + "force_loss": 0.04616479203104973, + "step": 24190 + }, + { + "epoch": 21.762589928057555, + "grad_norm": 0.257639080286026, + "learning_rate": 6.958136239518418e-05, + "loss": 0.0212, + "step": 24200 + }, + { + "action_loss": 0.0019250352634117007, + "epoch": 21.762589928057555, + "step": 24200 + }, + { + "epoch": 21.762589928057555, + "step": 24200, + "torque_loss": 0.09396030753850937 + }, + { + "epoch": 21.762589928057555, + "force_loss": 0.002093286719173193, + "step": 24200 + }, + { + "epoch": 21.77158273381295, + "grad_norm": 0.23291504383087158, + "learning_rate": 6.955600280242371e-05, + "loss": 0.0197, + "step": 24210 + }, + { + "action_loss": 0.005376171320676804, + "epoch": 21.77158273381295, + "step": 24210 + }, + { + "epoch": 21.77158273381295, + "step": 24210, + "torque_loss": 0.1337880641222 + }, + { + "epoch": 21.77158273381295, + "force_loss": 0.00854805763810873, + "step": 24210 + }, + { + "epoch": 21.780575539568346, + "grad_norm": 0.3987133204936981, + "learning_rate": 6.953063726906596e-05, + "loss": 0.0204, + "step": 24220 + }, + { + "action_loss": 0.00652548810467124, + "epoch": 21.780575539568346, + "step": 24220 + }, + { + "epoch": 21.780575539568346, + "step": 24220, + "torque_loss": 0.15214596688747406 + }, + { + "epoch": 21.780575539568346, + "force_loss": 0.011250283569097519, + "step": 24220 + }, + { + "epoch": 21.78956834532374, + "grad_norm": 0.31795546412467957, + "learning_rate": 6.950526580281626e-05, + "loss": 0.0202, + "step": 24230 + }, + { + "action_loss": 0.0022861079778522253, + "epoch": 21.78956834532374, + "step": 24230 + }, + { + "epoch": 21.78956834532374, + "step": 24230, + "torque_loss": 0.1178806722164154 + }, + { + "epoch": 21.78956834532374, + "force_loss": 0.003101978451013565, + "step": 24230 + }, + { + "epoch": 21.798561151079138, + "grad_norm": 0.2745036780834198, + "learning_rate": 6.947988841138184e-05, + "loss": 0.022, + "step": 24240 + }, + { + "action_loss": 0.0025741548743098974, + "epoch": 21.798561151079138, + "step": 24240 + }, + { + "epoch": 21.798561151079138, + "step": 24240, + "torque_loss": 0.10255517810583115 + }, + { + "epoch": 21.798561151079138, + "force_loss": 0.002620341954752803, + "step": 24240 + }, + { + "epoch": 21.807553956834532, + "grad_norm": 0.3730088174343109, + "learning_rate": 6.945450510247165e-05, + "loss": 0.0207, + "step": 24250 + }, + { + "action_loss": 0.0075866542756557465, + "epoch": 21.807553956834532, + "step": 24250 + }, + { + "epoch": 21.807553956834532, + "step": 24250, + "torque_loss": 0.1519446223974228 + }, + { + "epoch": 21.807553956834532, + "force_loss": 0.004284977447241545, + "step": 24250 + }, + { + "epoch": 21.81654676258993, + "grad_norm": 0.3975403606891632, + "learning_rate": 6.942911588379647e-05, + "loss": 0.0207, + "step": 24260 + }, + { + "action_loss": 0.004833179991692305, + "epoch": 21.81654676258993, + "step": 24260 + }, + { + "epoch": 21.81654676258993, + "step": 24260, + "torque_loss": 0.12231109291315079 + }, + { + "epoch": 21.81654676258993, + "force_loss": 0.007674244698137045, + "step": 24260 + }, + { + "epoch": 21.825539568345324, + "grad_norm": 0.2681105434894562, + "learning_rate": 6.940372076306888e-05, + "loss": 0.0201, + "step": 24270 + }, + { + "action_loss": 0.005627890583127737, + "epoch": 21.825539568345324, + "step": 24270 + }, + { + "epoch": 21.825539568345324, + "step": 24270, + "torque_loss": 0.18219177424907684 + }, + { + "epoch": 21.825539568345324, + "force_loss": 0.0068586901761591434, + "step": 24270 + }, + { + "epoch": 21.834532374100718, + "grad_norm": 0.14424137771129608, + "learning_rate": 6.937831974800326e-05, + "loss": 0.0202, + "step": 24280 + }, + { + "action_loss": 0.004893868695944548, + "epoch": 21.834532374100718, + "step": 24280 + }, + { + "epoch": 21.834532374100718, + "step": 24280, + "torque_loss": 0.10660874843597412 + }, + { + "epoch": 21.834532374100718, + "force_loss": 0.004262237343937159, + "step": 24280 + }, + { + "epoch": 21.843525179856115, + "grad_norm": 0.7608571648597717, + "learning_rate": 6.935291284631574e-05, + "loss": 0.0238, + "step": 24290 + }, + { + "action_loss": 0.015205886214971542, + "epoch": 21.843525179856115, + "step": 24290 + }, + { + "epoch": 21.843525179856115, + "step": 24290, + "torque_loss": 0.11672701686620712 + }, + { + "epoch": 21.843525179856115, + "force_loss": 0.017855575308203697, + "step": 24290 + }, + { + "epoch": 21.85251798561151, + "grad_norm": 0.6793885231018066, + "learning_rate": 6.932750006572428e-05, + "loss": 0.019, + "step": 24300 + }, + { + "action_loss": 0.005090448074042797, + "epoch": 21.85251798561151, + "step": 24300 + }, + { + "epoch": 21.85251798561151, + "step": 24300, + "torque_loss": 0.1350153088569641 + }, + { + "epoch": 21.85251798561151, + "force_loss": 0.010580133646726608, + "step": 24300 + }, + { + "epoch": 21.861510791366907, + "grad_norm": 0.25309425592422485, + "learning_rate": 6.930208141394863e-05, + "loss": 0.0367, + "step": 24310 + }, + { + "action_loss": 0.007332662586122751, + "epoch": 21.861510791366907, + "step": 24310 + }, + { + "epoch": 21.861510791366907, + "step": 24310, + "torque_loss": 0.15832073986530304 + }, + { + "epoch": 21.861510791366907, + "force_loss": 0.013494604267179966, + "step": 24310 + }, + { + "epoch": 21.8705035971223, + "grad_norm": 0.42558208107948303, + "learning_rate": 6.927665689871026e-05, + "loss": 0.0202, + "step": 24320 + }, + { + "action_loss": 0.012143700383603573, + "epoch": 21.8705035971223, + "step": 24320 + }, + { + "epoch": 21.8705035971223, + "step": 24320, + "torque_loss": 0.14808912575244904 + }, + { + "epoch": 21.8705035971223, + "force_loss": 0.008264814503490925, + "step": 24320 + }, + { + "epoch": 21.8794964028777, + "grad_norm": 0.3452666103839874, + "learning_rate": 6.925122652773253e-05, + "loss": 0.0216, + "step": 24330 + }, + { + "action_loss": 0.0038850188720971346, + "epoch": 21.8794964028777, + "step": 24330 + }, + { + "epoch": 21.8794964028777, + "step": 24330, + "torque_loss": 0.14941705763339996 + }, + { + "epoch": 21.8794964028777, + "force_loss": 0.004443696234375238, + "step": 24330 + }, + { + "epoch": 21.888489208633093, + "grad_norm": 0.20415908098220825, + "learning_rate": 6.922579030874046e-05, + "loss": 0.0212, + "step": 24340 + }, + { + "action_loss": 0.011489741504192352, + "epoch": 21.888489208633093, + "step": 24340 + }, + { + "epoch": 21.888489208633093, + "step": 24340, + "torque_loss": 0.19305045902729034 + }, + { + "epoch": 21.888489208633093, + "force_loss": 0.02003069408237934, + "step": 24340 + }, + { + "epoch": 21.89748201438849, + "grad_norm": 0.29833388328552246, + "learning_rate": 6.920034824946093e-05, + "loss": 0.025, + "step": 24350 + }, + { + "action_loss": 0.005486502777785063, + "epoch": 21.89748201438849, + "step": 24350 + }, + { + "epoch": 21.89748201438849, + "step": 24350, + "torque_loss": 0.16189566254615784 + }, + { + "epoch": 21.89748201438849, + "force_loss": 0.003585736034438014, + "step": 24350 + }, + { + "epoch": 21.906474820143885, + "grad_norm": 0.23319552838802338, + "learning_rate": 6.917490035762255e-05, + "loss": 0.023, + "step": 24360 + }, + { + "action_loss": 0.024321740493178368, + "epoch": 21.906474820143885, + "step": 24360 + }, + { + "epoch": 21.906474820143885, + "step": 24360, + "torque_loss": 0.20250184834003448 + }, + { + "epoch": 21.906474820143885, + "force_loss": 0.04470481351017952, + "step": 24360 + }, + { + "epoch": 21.915467625899282, + "grad_norm": 0.2829186022281647, + "learning_rate": 6.914944664095573e-05, + "loss": 0.0281, + "step": 24370 + }, + { + "action_loss": 0.004660788923501968, + "epoch": 21.915467625899282, + "step": 24370 + }, + { + "epoch": 21.915467625899282, + "step": 24370, + "torque_loss": 0.14200960099697113 + }, + { + "epoch": 21.915467625899282, + "force_loss": 0.004201762843877077, + "step": 24370 + }, + { + "epoch": 21.924460431654676, + "grad_norm": 0.5323184132575989, + "learning_rate": 6.912398710719264e-05, + "loss": 0.0226, + "step": 24380 + }, + { + "action_loss": 0.0049565755762159824, + "epoch": 21.924460431654676, + "step": 24380 + }, + { + "epoch": 21.924460431654676, + "step": 24380, + "torque_loss": 0.15860824286937714 + }, + { + "epoch": 21.924460431654676, + "force_loss": 0.006551635917276144, + "step": 24380 + }, + { + "epoch": 21.93345323741007, + "grad_norm": 0.26677796244621277, + "learning_rate": 6.90985217640672e-05, + "loss": 0.0189, + "step": 24390 + }, + { + "action_loss": 0.004493407439440489, + "epoch": 21.93345323741007, + "step": 24390 + }, + { + "epoch": 21.93345323741007, + "step": 24390, + "torque_loss": 0.10732502490282059 + }, + { + "epoch": 21.93345323741007, + "force_loss": 0.002163970610126853, + "step": 24390 + }, + { + "epoch": 21.942446043165468, + "grad_norm": 0.6263008117675781, + "learning_rate": 6.90730506193151e-05, + "loss": 0.0201, + "step": 24400 + }, + { + "action_loss": 0.005693149287253618, + "epoch": 21.942446043165468, + "step": 24400 + }, + { + "epoch": 21.942446043165468, + "step": 24400, + "torque_loss": 0.12757854163646698 + }, + { + "epoch": 21.942446043165468, + "force_loss": 0.0025026879739016294, + "step": 24400 + }, + { + "epoch": 21.951438848920862, + "grad_norm": 0.3909143805503845, + "learning_rate": 6.904757368067384e-05, + "loss": 0.023, + "step": 24410 + }, + { + "action_loss": 0.0076774596236646175, + "epoch": 21.951438848920862, + "step": 24410 + }, + { + "epoch": 21.951438848920862, + "step": 24410, + "torque_loss": 0.09612607955932617 + }, + { + "epoch": 21.951438848920862, + "force_loss": 0.01226059254258871, + "step": 24410 + }, + { + "epoch": 21.96043165467626, + "grad_norm": 0.26202166080474854, + "learning_rate": 6.90220909558826e-05, + "loss": 0.0251, + "step": 24420 + }, + { + "action_loss": 0.010712314397096634, + "epoch": 21.96043165467626, + "step": 24420 + }, + { + "epoch": 21.96043165467626, + "step": 24420, + "torque_loss": 0.12895852327346802 + }, + { + "epoch": 21.96043165467626, + "force_loss": 0.00807793065905571, + "step": 24420 + }, + { + "epoch": 21.969424460431654, + "grad_norm": 0.3353864252567291, + "learning_rate": 6.899660245268237e-05, + "loss": 0.0213, + "step": 24430 + }, + { + "action_loss": 0.0037603459786623716, + "epoch": 21.969424460431654, + "step": 24430 + }, + { + "epoch": 21.969424460431654, + "step": 24430, + "torque_loss": 0.1198287382721901 + }, + { + "epoch": 21.969424460431654, + "force_loss": 0.004271873738616705, + "step": 24430 + }, + { + "epoch": 21.97841726618705, + "grad_norm": 0.11434774845838547, + "learning_rate": 6.897110817881592e-05, + "loss": 0.0214, + "step": 24440 + }, + { + "action_loss": 0.006766705308109522, + "epoch": 21.97841726618705, + "step": 24440 + }, + { + "epoch": 21.97841726618705, + "step": 24440, + "torque_loss": 0.1438988894224167 + }, + { + "epoch": 21.97841726618705, + "force_loss": 0.006048687268048525, + "step": 24440 + }, + { + "epoch": 21.987410071942445, + "grad_norm": 0.6172817945480347, + "learning_rate": 6.894560814202769e-05, + "loss": 0.0229, + "step": 24450 + }, + { + "action_loss": 0.0024692120496183634, + "epoch": 21.987410071942445, + "step": 24450 + }, + { + "epoch": 21.987410071942445, + "step": 24450, + "torque_loss": 0.11306580156087875 + }, + { + "epoch": 21.987410071942445, + "force_loss": 0.0031304650474339724, + "step": 24450 + }, + { + "epoch": 21.996402877697843, + "grad_norm": 0.4427724778652191, + "learning_rate": 6.892010235006394e-05, + "loss": 0.0268, + "step": 24460 + }, + { + "action_loss": 0.003611071268096566, + "epoch": 21.996402877697843, + "step": 24460 + }, + { + "epoch": 21.996402877697843, + "step": 24460, + "torque_loss": 0.1440209299325943 + }, + { + "epoch": 21.996402877697843, + "force_loss": 0.003615929977968335, + "step": 24460 + }, + { + "epoch": 22.005395683453237, + "grad_norm": 0.21930928528308868, + "learning_rate": 6.889459081067264e-05, + "loss": 0.0204, + "step": 24470 + }, + { + "action_loss": 0.0037955709267407656, + "epoch": 22.005395683453237, + "step": 24470 + }, + { + "epoch": 22.005395683453237, + "step": 24470, + "torque_loss": 0.14295008778572083 + }, + { + "epoch": 22.005395683453237, + "force_loss": 0.004967763088643551, + "step": 24470 + }, + { + "epoch": 22.014388489208635, + "grad_norm": 0.43003565073013306, + "learning_rate": 6.886907353160356e-05, + "loss": 0.0202, + "step": 24480 + }, + { + "action_loss": 0.007930673658847809, + "epoch": 22.014388489208635, + "step": 24480 + }, + { + "epoch": 22.014388489208635, + "step": 24480, + "torque_loss": 0.14471480250358582 + }, + { + "epoch": 22.014388489208635, + "force_loss": 0.00403394503518939, + "step": 24480 + }, + { + "epoch": 22.02338129496403, + "grad_norm": 0.31343305110931396, + "learning_rate": 6.884355052060814e-05, + "loss": 0.0187, + "step": 24490 + }, + { + "action_loss": 0.00326588936150074, + "epoch": 22.02338129496403, + "step": 24490 + }, + { + "epoch": 22.02338129496403, + "step": 24490, + "torque_loss": 0.09171837568283081 + }, + { + "epoch": 22.02338129496403, + "force_loss": 0.005965802818536758, + "step": 24490 + }, + { + "epoch": 22.032374100719423, + "grad_norm": 0.2788020074367523, + "learning_rate": 6.88180217854396e-05, + "loss": 0.0197, + "step": 24500 + }, + { + "action_loss": 0.0050150505267083645, + "epoch": 22.032374100719423, + "step": 24500 + }, + { + "epoch": 22.032374100719423, + "step": 24500, + "torque_loss": 0.14749211072921753 + }, + { + "epoch": 22.032374100719423, + "force_loss": 0.002626863308250904, + "step": 24500 + }, + { + "epoch": 22.04136690647482, + "grad_norm": 0.32771310210227966, + "learning_rate": 6.87924873338529e-05, + "loss": 0.0231, + "step": 24510 + }, + { + "action_loss": 0.006207337602972984, + "epoch": 22.04136690647482, + "step": 24510 + }, + { + "epoch": 22.04136690647482, + "step": 24510, + "torque_loss": 0.11077922582626343 + }, + { + "epoch": 22.04136690647482, + "force_loss": 0.0047624483704566956, + "step": 24510 + }, + { + "epoch": 22.050359712230215, + "grad_norm": 0.4554392099380493, + "learning_rate": 6.876694717360475e-05, + "loss": 0.0217, + "step": 24520 + }, + { + "action_loss": 0.02835608273744583, + "epoch": 22.050359712230215, + "step": 24520 + }, + { + "epoch": 22.050359712230215, + "step": 24520, + "torque_loss": 0.12670385837554932 + }, + { + "epoch": 22.050359712230215, + "force_loss": 0.015614225529134274, + "step": 24520 + }, + { + "epoch": 22.059352517985612, + "grad_norm": 0.5012367963790894, + "learning_rate": 6.874140131245355e-05, + "loss": 0.0224, + "step": 24530 + }, + { + "action_loss": 0.012275311164557934, + "epoch": 22.059352517985612, + "step": 24530 + }, + { + "epoch": 22.059352517985612, + "step": 24530, + "torque_loss": 0.15115399658679962 + }, + { + "epoch": 22.059352517985612, + "force_loss": 0.01023236196488142, + "step": 24530 + }, + { + "epoch": 22.068345323741006, + "grad_norm": 0.2496970146894455, + "learning_rate": 6.871584975815948e-05, + "loss": 0.0217, + "step": 24540 + }, + { + "action_loss": 0.0030946796759963036, + "epoch": 22.068345323741006, + "step": 24540 + }, + { + "epoch": 22.068345323741006, + "step": 24540, + "torque_loss": 0.10134420543909073 + }, + { + "epoch": 22.068345323741006, + "force_loss": 0.0022605503909289837, + "step": 24540 + }, + { + "epoch": 22.077338129496404, + "grad_norm": 0.14811429381370544, + "learning_rate": 6.86902925184844e-05, + "loss": 0.0193, + "step": 24550 + }, + { + "action_loss": 0.01476606447249651, + "epoch": 22.077338129496404, + "step": 24550 + }, + { + "epoch": 22.077338129496404, + "step": 24550, + "torque_loss": 0.1842147558927536 + }, + { + "epoch": 22.077338129496404, + "force_loss": 0.013570576906204224, + "step": 24550 + }, + { + "epoch": 22.086330935251798, + "grad_norm": 0.3305526375770569, + "learning_rate": 6.866472960119195e-05, + "loss": 0.0234, + "step": 24560 + }, + { + "action_loss": 0.005050330422818661, + "epoch": 22.086330935251798, + "step": 24560 + }, + { + "epoch": 22.086330935251798, + "step": 24560, + "torque_loss": 0.14061351120471954 + }, + { + "epoch": 22.086330935251798, + "force_loss": 0.00714228255674243, + "step": 24560 + }, + { + "epoch": 22.095323741007196, + "grad_norm": 0.1732715666294098, + "learning_rate": 6.863916101404748e-05, + "loss": 0.0195, + "step": 24570 + }, + { + "action_loss": 0.004619399551302195, + "epoch": 22.095323741007196, + "step": 24570 + }, + { + "epoch": 22.095323741007196, + "step": 24570, + "torque_loss": 0.09905781596899033 + }, + { + "epoch": 22.095323741007196, + "force_loss": 0.004102995153516531, + "step": 24570 + }, + { + "epoch": 22.10431654676259, + "grad_norm": 0.28794586658477783, + "learning_rate": 6.8613586764818e-05, + "loss": 0.0178, + "step": 24580 + }, + { + "action_loss": 0.003019990399479866, + "epoch": 22.10431654676259, + "step": 24580 + }, + { + "epoch": 22.10431654676259, + "step": 24580, + "torque_loss": 0.16483174264431 + }, + { + "epoch": 22.10431654676259, + "force_loss": 0.0052350894547998905, + "step": 24580 + }, + { + "epoch": 22.113309352517987, + "grad_norm": 0.3921899199485779, + "learning_rate": 6.858800686127233e-05, + "loss": 0.0198, + "step": 24590 + }, + { + "action_loss": 0.0059818304143846035, + "epoch": 22.113309352517987, + "step": 24590 + }, + { + "epoch": 22.113309352517987, + "step": 24590, + "torque_loss": 0.16413426399230957 + }, + { + "epoch": 22.113309352517987, + "force_loss": 0.006394246127456427, + "step": 24590 + }, + { + "epoch": 22.12230215827338, + "grad_norm": 0.3071562349796295, + "learning_rate": 6.856242131118097e-05, + "loss": 0.0193, + "step": 24600 + }, + { + "action_loss": 0.007390322629362345, + "epoch": 22.12230215827338, + "step": 24600 + }, + { + "epoch": 22.12230215827338, + "step": 24600, + "torque_loss": 0.08253031224012375 + }, + { + "epoch": 22.12230215827338, + "force_loss": 0.008102931082248688, + "step": 24600 + }, + { + "epoch": 22.131294964028775, + "grad_norm": 0.432756245136261, + "learning_rate": 6.853683012231614e-05, + "loss": 0.0186, + "step": 24610 + }, + { + "action_loss": 0.0032958232332021, + "epoch": 22.131294964028775, + "step": 24610 + }, + { + "epoch": 22.131294964028775, + "step": 24610, + "torque_loss": 0.19886575639247894 + }, + { + "epoch": 22.131294964028775, + "force_loss": 0.0029224285390228033, + "step": 24610 + }, + { + "epoch": 22.140287769784173, + "grad_norm": 0.48232972621917725, + "learning_rate": 6.851123330245173e-05, + "loss": 0.0213, + "step": 24620 + }, + { + "action_loss": 0.011466942727565765, + "epoch": 22.140287769784173, + "step": 24620 + }, + { + "epoch": 22.140287769784173, + "step": 24620, + "torque_loss": 0.1438477784395218 + }, + { + "epoch": 22.140287769784173, + "force_loss": 0.018884124234318733, + "step": 24620 + }, + { + "epoch": 22.149280575539567, + "grad_norm": 0.39638736844062805, + "learning_rate": 6.848563085936343e-05, + "loss": 0.0181, + "step": 24630 + }, + { + "action_loss": 0.0053353155963122845, + "epoch": 22.149280575539567, + "step": 24630 + }, + { + "epoch": 22.149280575539567, + "step": 24630, + "torque_loss": 0.1025819256901741 + }, + { + "epoch": 22.149280575539567, + "force_loss": 0.0041658105328679085, + "step": 24630 + }, + { + "epoch": 22.158273381294965, + "grad_norm": 0.319553941488266, + "learning_rate": 6.846002280082853e-05, + "loss": 0.0195, + "step": 24640 + }, + { + "action_loss": 0.006258409004658461, + "epoch": 22.158273381294965, + "step": 24640 + }, + { + "epoch": 22.158273381294965, + "step": 24640, + "torque_loss": 0.10401511192321777 + }, + { + "epoch": 22.158273381294965, + "force_loss": 0.003696775995194912, + "step": 24640 + }, + { + "epoch": 22.16726618705036, + "grad_norm": 0.23440581560134888, + "learning_rate": 6.843440913462614e-05, + "loss": 0.0181, + "step": 24650 + }, + { + "action_loss": 0.0025956497993320227, + "epoch": 22.16726618705036, + "step": 24650 + }, + { + "epoch": 22.16726618705036, + "step": 24650, + "torque_loss": 0.0990554690361023 + }, + { + "epoch": 22.16726618705036, + "force_loss": 0.003344378201290965, + "step": 24650 + }, + { + "epoch": 22.176258992805757, + "grad_norm": 0.28230926394462585, + "learning_rate": 6.840878986853698e-05, + "loss": 0.02, + "step": 24660 + }, + { + "action_loss": 0.0030577369034290314, + "epoch": 22.176258992805757, + "step": 24660 + }, + { + "epoch": 22.176258992805757, + "step": 24660, + "torque_loss": 0.13943682610988617 + }, + { + "epoch": 22.176258992805757, + "force_loss": 0.003802162827923894, + "step": 24660 + }, + { + "epoch": 22.18525179856115, + "grad_norm": 0.2650807499885559, + "learning_rate": 6.838316501034352e-05, + "loss": 0.0199, + "step": 24670 + }, + { + "action_loss": 0.0055350023321807384, + "epoch": 22.18525179856115, + "step": 24670 + }, + { + "epoch": 22.18525179856115, + "step": 24670, + "torque_loss": 0.09273897856473923 + }, + { + "epoch": 22.18525179856115, + "force_loss": 0.00897206086665392, + "step": 24670 + }, + { + "epoch": 22.194244604316548, + "grad_norm": 0.15893565118312836, + "learning_rate": 6.83575345678299e-05, + "loss": 0.0187, + "step": 24680 + }, + { + "action_loss": 0.011666147969663143, + "epoch": 22.194244604316548, + "step": 24680 + }, + { + "epoch": 22.194244604316548, + "step": 24680, + "torque_loss": 0.1414196938276291 + }, + { + "epoch": 22.194244604316548, + "force_loss": 0.014527139253914356, + "step": 24680 + }, + { + "epoch": 22.203237410071942, + "grad_norm": 0.3142419159412384, + "learning_rate": 6.833189854878196e-05, + "loss": 0.0215, + "step": 24690 + }, + { + "action_loss": 0.0065027461387217045, + "epoch": 22.203237410071942, + "step": 24690 + }, + { + "epoch": 22.203237410071942, + "step": 24690, + "torque_loss": 0.11355862766504288 + }, + { + "epoch": 22.203237410071942, + "force_loss": 0.009570063091814518, + "step": 24690 + }, + { + "epoch": 22.21223021582734, + "grad_norm": 0.1881752759218216, + "learning_rate": 6.83062569609873e-05, + "loss": 0.0206, + "step": 24700 + }, + { + "action_loss": 0.006756490562111139, + "epoch": 22.21223021582734, + "step": 24700 + }, + { + "epoch": 22.21223021582734, + "step": 24700, + "torque_loss": 0.14468832314014435 + }, + { + "epoch": 22.21223021582734, + "force_loss": 0.003405606606975198, + "step": 24700 + }, + { + "epoch": 22.221223021582734, + "grad_norm": 0.2073754072189331, + "learning_rate": 6.828060981223512e-05, + "loss": 0.0195, + "step": 24710 + }, + { + "action_loss": 0.004220070783048868, + "epoch": 22.221223021582734, + "step": 24710 + }, + { + "epoch": 22.221223021582734, + "step": 24710, + "torque_loss": 0.1627507209777832 + }, + { + "epoch": 22.221223021582734, + "force_loss": 0.008783784694969654, + "step": 24710 + }, + { + "epoch": 22.230215827338128, + "grad_norm": 0.28720879554748535, + "learning_rate": 6.825495711031634e-05, + "loss": 0.0197, + "step": 24720 + }, + { + "action_loss": 0.005291081964969635, + "epoch": 22.230215827338128, + "step": 24720 + }, + { + "epoch": 22.230215827338128, + "step": 24720, + "torque_loss": 0.1927880048751831 + }, + { + "epoch": 22.230215827338128, + "force_loss": 0.006160981487482786, + "step": 24720 + }, + { + "epoch": 22.239208633093526, + "grad_norm": 0.4726666510105133, + "learning_rate": 6.822929886302359e-05, + "loss": 0.0174, + "step": 24730 + }, + { + "action_loss": 0.005073511507362127, + "epoch": 22.239208633093526, + "step": 24730 + }, + { + "epoch": 22.239208633093526, + "step": 24730, + "torque_loss": 0.07566569000482559 + }, + { + "epoch": 22.239208633093526, + "force_loss": 0.002500078408047557, + "step": 24730 + }, + { + "epoch": 22.24820143884892, + "grad_norm": 0.2170042246580124, + "learning_rate": 6.820363507815116e-05, + "loss": 0.0183, + "step": 24740 + }, + { + "action_loss": 0.011065724305808544, + "epoch": 22.24820143884892, + "step": 24740 + }, + { + "epoch": 22.24820143884892, + "step": 24740, + "torque_loss": 0.10500673204660416 + }, + { + "epoch": 22.24820143884892, + "force_loss": 0.014746363274753094, + "step": 24740 + }, + { + "epoch": 22.257194244604317, + "grad_norm": 0.29405534267425537, + "learning_rate": 6.817796576349501e-05, + "loss": 0.0217, + "step": 24750 + }, + { + "action_loss": 0.005858331453055143, + "epoch": 22.257194244604317, + "step": 24750 + }, + { + "epoch": 22.257194244604317, + "step": 24750, + "torque_loss": 0.15028120577335358 + }, + { + "epoch": 22.257194244604317, + "force_loss": 0.0070252916775643826, + "step": 24750 + }, + { + "epoch": 22.26618705035971, + "grad_norm": 0.3350282609462738, + "learning_rate": 6.815229092685285e-05, + "loss": 0.0235, + "step": 24760 + }, + { + "action_loss": 0.005081116687506437, + "epoch": 22.26618705035971, + "step": 24760 + }, + { + "epoch": 22.26618705035971, + "step": 24760, + "torque_loss": 0.0663282573223114 + }, + { + "epoch": 22.26618705035971, + "force_loss": 0.004096261691302061, + "step": 24760 + }, + { + "epoch": 22.27517985611511, + "grad_norm": 0.2524561285972595, + "learning_rate": 6.812661057602399e-05, + "loss": 0.0188, + "step": 24770 + }, + { + "action_loss": 0.003354328451678157, + "epoch": 22.27517985611511, + "step": 24770 + }, + { + "epoch": 22.27517985611511, + "step": 24770, + "torque_loss": 0.08592530339956284 + }, + { + "epoch": 22.27517985611511, + "force_loss": 0.002191887004300952, + "step": 24770 + }, + { + "epoch": 22.284172661870503, + "grad_norm": 0.2644980251789093, + "learning_rate": 6.810092471880943e-05, + "loss": 0.0217, + "step": 24780 + }, + { + "action_loss": 0.0054429336450994015, + "epoch": 22.284172661870503, + "step": 24780 + }, + { + "epoch": 22.284172661870503, + "step": 24780, + "torque_loss": 0.1347578763961792 + }, + { + "epoch": 22.284172661870503, + "force_loss": 0.006393214222043753, + "step": 24780 + }, + { + "epoch": 22.2931654676259, + "grad_norm": 0.7584131360054016, + "learning_rate": 6.807523336301187e-05, + "loss": 0.023, + "step": 24790 + }, + { + "action_loss": 0.015225163660943508, + "epoch": 22.2931654676259, + "step": 24790 + }, + { + "epoch": 22.2931654676259, + "step": 24790, + "torque_loss": 0.17529775202274323 + }, + { + "epoch": 22.2931654676259, + "force_loss": 0.022977866232395172, + "step": 24790 + }, + { + "epoch": 22.302158273381295, + "grad_norm": 0.49336618185043335, + "learning_rate": 6.804953651643566e-05, + "loss": 0.0253, + "step": 24800 + }, + { + "action_loss": 0.008526027202606201, + "epoch": 22.302158273381295, + "step": 24800 + }, + { + "epoch": 22.302158273381295, + "step": 24800, + "torque_loss": 0.11459165811538696 + }, + { + "epoch": 22.302158273381295, + "force_loss": 0.0073268115520477295, + "step": 24800 + }, + { + "epoch": 22.31115107913669, + "grad_norm": 0.2615133821964264, + "learning_rate": 6.802383418688685e-05, + "loss": 0.023, + "step": 24810 + }, + { + "action_loss": 0.0180727019906044, + "epoch": 22.31115107913669, + "step": 24810 + }, + { + "epoch": 22.31115107913669, + "step": 24810, + "torque_loss": 0.1488514542579651 + }, + { + "epoch": 22.31115107913669, + "force_loss": 0.02162788063287735, + "step": 24810 + }, + { + "epoch": 22.320143884892087, + "grad_norm": 0.3022231459617615, + "learning_rate": 6.799812638217309e-05, + "loss": 0.0246, + "step": 24820 + }, + { + "action_loss": 0.014785663224756718, + "epoch": 22.320143884892087, + "step": 24820 + }, + { + "epoch": 22.320143884892087, + "step": 24820, + "torque_loss": 0.16015289723873138 + }, + { + "epoch": 22.320143884892087, + "force_loss": 0.014687980525195599, + "step": 24820 + }, + { + "epoch": 22.32913669064748, + "grad_norm": 0.43572723865509033, + "learning_rate": 6.797241311010373e-05, + "loss": 0.023, + "step": 24830 + }, + { + "action_loss": 0.008995863609015942, + "epoch": 22.32913669064748, + "step": 24830 + }, + { + "epoch": 22.32913669064748, + "step": 24830, + "torque_loss": 0.13701839745044708 + }, + { + "epoch": 22.32913669064748, + "force_loss": 0.005755970720201731, + "step": 24830 + }, + { + "epoch": 22.33812949640288, + "grad_norm": 0.8672119975090027, + "learning_rate": 6.794669437848982e-05, + "loss": 0.0231, + "step": 24840 + }, + { + "action_loss": 0.004539123270660639, + "epoch": 22.33812949640288, + "step": 24840 + }, + { + "epoch": 22.33812949640288, + "step": 24840, + "torque_loss": 0.13663187623023987 + }, + { + "epoch": 22.33812949640288, + "force_loss": 0.004633542615920305, + "step": 24840 + }, + { + "epoch": 22.347122302158272, + "grad_norm": 0.3586977422237396, + "learning_rate": 6.792097019514402e-05, + "loss": 0.0213, + "step": 24850 + }, + { + "action_loss": 0.002659488469362259, + "epoch": 22.347122302158272, + "step": 24850 + }, + { + "epoch": 22.347122302158272, + "step": 24850, + "torque_loss": 0.09668034315109253 + }, + { + "epoch": 22.347122302158272, + "force_loss": 0.0020048124715685844, + "step": 24850 + }, + { + "epoch": 22.35611510791367, + "grad_norm": 0.4001941978931427, + "learning_rate": 6.789524056788064e-05, + "loss": 0.0224, + "step": 24860 + }, + { + "action_loss": 0.003226390341296792, + "epoch": 22.35611510791367, + "step": 24860 + }, + { + "epoch": 22.35611510791367, + "step": 24860, + "torque_loss": 0.10833647847175598 + }, + { + "epoch": 22.35611510791367, + "force_loss": 0.0017195852706208825, + "step": 24860 + }, + { + "epoch": 22.365107913669064, + "grad_norm": 0.39897388219833374, + "learning_rate": 6.786950550451567e-05, + "loss": 0.0181, + "step": 24870 + }, + { + "action_loss": 0.014251780696213245, + "epoch": 22.365107913669064, + "step": 24870 + }, + { + "epoch": 22.365107913669064, + "step": 24870, + "torque_loss": 0.17052705585956573 + }, + { + "epoch": 22.365107913669064, + "force_loss": 0.01718895509839058, + "step": 24870 + }, + { + "epoch": 22.37410071942446, + "grad_norm": 0.21309426426887512, + "learning_rate": 6.784376501286676e-05, + "loss": 0.0216, + "step": 24880 + }, + { + "action_loss": 0.006995067000389099, + "epoch": 22.37410071942446, + "step": 24880 + }, + { + "epoch": 22.37410071942446, + "step": 24880, + "torque_loss": 0.14151662588119507 + }, + { + "epoch": 22.37410071942446, + "force_loss": 0.004014255944639444, + "step": 24880 + }, + { + "epoch": 22.383093525179856, + "grad_norm": 0.14126957952976227, + "learning_rate": 6.781801910075316e-05, + "loss": 0.0192, + "step": 24890 + }, + { + "action_loss": 0.007174244616180658, + "epoch": 22.383093525179856, + "step": 24890 + }, + { + "epoch": 22.383093525179856, + "step": 24890, + "torque_loss": 0.19454972445964813 + }, + { + "epoch": 22.383093525179856, + "force_loss": 0.008282207883894444, + "step": 24890 + }, + { + "epoch": 22.392086330935253, + "grad_norm": 0.24397610127925873, + "learning_rate": 6.779226777599581e-05, + "loss": 0.0192, + "step": 24900 + }, + { + "action_loss": 0.013853867538273335, + "epoch": 22.392086330935253, + "step": 24900 + }, + { + "epoch": 22.392086330935253, + "step": 24900, + "torque_loss": 0.14847691357135773 + }, + { + "epoch": 22.392086330935253, + "force_loss": 0.012605573982000351, + "step": 24900 + }, + { + "epoch": 22.401079136690647, + "grad_norm": 0.2652047574520111, + "learning_rate": 6.776651104641729e-05, + "loss": 0.0218, + "step": 24910 + }, + { + "action_loss": 0.009297415614128113, + "epoch": 22.401079136690647, + "step": 24910 + }, + { + "epoch": 22.401079136690647, + "step": 24910, + "torque_loss": 0.1373676210641861 + }, + { + "epoch": 22.401079136690647, + "force_loss": 0.0061741359531879425, + "step": 24910 + }, + { + "epoch": 22.41007194244604, + "grad_norm": 0.2956402599811554, + "learning_rate": 6.774074891984183e-05, + "loss": 0.021, + "step": 24920 + }, + { + "action_loss": 0.010844677686691284, + "epoch": 22.41007194244604, + "step": 24920 + }, + { + "epoch": 22.41007194244604, + "step": 24920, + "torque_loss": 0.08772342652082443 + }, + { + "epoch": 22.41007194244604, + "force_loss": 0.005964852403849363, + "step": 24920 + }, + { + "epoch": 22.41906474820144, + "grad_norm": 0.31046637892723083, + "learning_rate": 6.771498140409526e-05, + "loss": 0.0197, + "step": 24930 + }, + { + "action_loss": 0.004734450485557318, + "epoch": 22.41906474820144, + "step": 24930 + }, + { + "epoch": 22.41906474820144, + "step": 24930, + "torque_loss": 0.20352749526500702 + }, + { + "epoch": 22.41906474820144, + "force_loss": 0.0023477189242839813, + "step": 24930 + }, + { + "epoch": 22.428057553956833, + "grad_norm": 0.43902724981307983, + "learning_rate": 6.768920850700506e-05, + "loss": 0.02, + "step": 24940 + }, + { + "action_loss": 0.03939766809344292, + "epoch": 22.428057553956833, + "step": 24940 + }, + { + "epoch": 22.428057553956833, + "step": 24940, + "torque_loss": 0.18034398555755615 + }, + { + "epoch": 22.428057553956833, + "force_loss": 0.06515728682279587, + "step": 24940 + }, + { + "epoch": 22.43705035971223, + "grad_norm": 0.20046652853488922, + "learning_rate": 6.766343023640039e-05, + "loss": 0.0259, + "step": 24950 + }, + { + "action_loss": 0.0051938616670668125, + "epoch": 22.43705035971223, + "step": 24950 + }, + { + "epoch": 22.43705035971223, + "step": 24950, + "torque_loss": 0.10448554158210754 + }, + { + "epoch": 22.43705035971223, + "force_loss": 0.008075357414782047, + "step": 24950 + }, + { + "epoch": 22.446043165467625, + "grad_norm": 0.2738473117351532, + "learning_rate": 6.763764660011198e-05, + "loss": 0.0191, + "step": 24960 + }, + { + "action_loss": 0.003480554325506091, + "epoch": 22.446043165467625, + "step": 24960 + }, + { + "epoch": 22.446043165467625, + "step": 24960, + "torque_loss": 0.20879770815372467 + }, + { + "epoch": 22.446043165467625, + "force_loss": 0.003547949017956853, + "step": 24960 + }, + { + "epoch": 22.455035971223023, + "grad_norm": 0.234994575381279, + "learning_rate": 6.761185760597223e-05, + "loss": 0.0188, + "step": 24970 + }, + { + "action_loss": 0.01314420998096466, + "epoch": 22.455035971223023, + "step": 24970 + }, + { + "epoch": 22.455035971223023, + "step": 24970, + "torque_loss": 0.16362716257572174 + }, + { + "epoch": 22.455035971223023, + "force_loss": 0.015762537717819214, + "step": 24970 + }, + { + "epoch": 22.464028776978417, + "grad_norm": 0.2934909164905548, + "learning_rate": 6.758606326181515e-05, + "loss": 0.0222, + "step": 24980 + }, + { + "action_loss": 0.005077384412288666, + "epoch": 22.464028776978417, + "step": 24980 + }, + { + "epoch": 22.464028776978417, + "step": 24980, + "torque_loss": 0.14764131605625153 + }, + { + "epoch": 22.464028776978417, + "force_loss": 0.003645877121016383, + "step": 24980 + }, + { + "epoch": 22.473021582733814, + "grad_norm": 0.26081541180610657, + "learning_rate": 6.75602635754764e-05, + "loss": 0.0202, + "step": 24990 + }, + { + "action_loss": 0.008632371202111244, + "epoch": 22.473021582733814, + "step": 24990 + }, + { + "epoch": 22.473021582733814, + "step": 24990, + "torque_loss": 0.12616242468357086 + }, + { + "epoch": 22.473021582733814, + "force_loss": 0.018579445779323578, + "step": 24990 + }, + { + "epoch": 22.48201438848921, + "grad_norm": 0.7656939029693604, + "learning_rate": 6.75344585547932e-05, + "loss": 0.0232, + "step": 25000 + }, + { + "action_loss": 0.0028978276532143354, + "epoch": 22.48201438848921, + "step": 25000 + }, + { + "epoch": 22.48201438848921, + "step": 25000, + "torque_loss": 0.09270501881837845 + }, + { + "epoch": 22.48201438848921, + "force_loss": 0.0019726885948330164, + "step": 25000 + }, + { + "epoch": 22.491007194244606, + "grad_norm": 0.44807904958724976, + "learning_rate": 6.750864820760449e-05, + "loss": 0.0182, + "step": 25010 + }, + { + "action_loss": 0.0037822090089321136, + "epoch": 22.491007194244606, + "step": 25010 + }, + { + "epoch": 22.491007194244606, + "step": 25010, + "torque_loss": 0.11277223378419876 + }, + { + "epoch": 22.491007194244606, + "force_loss": 0.0042450325563549995, + "step": 25010 + }, + { + "epoch": 22.5, + "grad_norm": 0.6256704330444336, + "learning_rate": 6.748283254175072e-05, + "loss": 0.021, + "step": 25020 + }, + { + "action_loss": 0.0028590282890945673, + "epoch": 22.5, + "step": 25020 + }, + { + "epoch": 22.5, + "step": 25020, + "torque_loss": 0.13656139373779297 + }, + { + "epoch": 22.5, + "force_loss": 0.00206926092505455, + "step": 25020 + }, + { + "epoch": 22.508992805755394, + "grad_norm": 0.21707117557525635, + "learning_rate": 6.745701156507404e-05, + "loss": 0.0185, + "step": 25030 + }, + { + "action_loss": 0.0020222412422299385, + "epoch": 22.508992805755394, + "step": 25030 + }, + { + "epoch": 22.508992805755394, + "step": 25030, + "torque_loss": 0.09703171253204346 + }, + { + "epoch": 22.508992805755394, + "force_loss": 0.0013680089032277465, + "step": 25030 + }, + { + "epoch": 22.51798561151079, + "grad_norm": 0.16367389261722565, + "learning_rate": 6.743118528541818e-05, + "loss": 0.0191, + "step": 25040 + }, + { + "action_loss": 0.0050452034920454025, + "epoch": 22.51798561151079, + "step": 25040 + }, + { + "epoch": 22.51798561151079, + "step": 25040, + "torque_loss": 0.13396082818508148 + }, + { + "epoch": 22.51798561151079, + "force_loss": 0.006993439514189959, + "step": 25040 + }, + { + "epoch": 22.526978417266186, + "grad_norm": 0.40440526604652405, + "learning_rate": 6.740535371062846e-05, + "loss": 0.0197, + "step": 25050 + }, + { + "action_loss": 0.008025317452847958, + "epoch": 22.526978417266186, + "step": 25050 + }, + { + "epoch": 22.526978417266186, + "step": 25050, + "torque_loss": 0.13185955584049225 + }, + { + "epoch": 22.526978417266186, + "force_loss": 0.010429500602185726, + "step": 25050 + }, + { + "epoch": 22.535971223021583, + "grad_norm": 0.14783775806427002, + "learning_rate": 6.737951684855185e-05, + "loss": 0.0188, + "step": 25060 + }, + { + "action_loss": 0.011450252495706081, + "epoch": 22.535971223021583, + "step": 25060 + }, + { + "epoch": 22.535971223021583, + "step": 25060, + "torque_loss": 0.15720783174037933 + }, + { + "epoch": 22.535971223021583, + "force_loss": 0.00878047477453947, + "step": 25060 + }, + { + "epoch": 22.544964028776977, + "grad_norm": 0.3013652265071869, + "learning_rate": 6.735367470703691e-05, + "loss": 0.02, + "step": 25070 + }, + { + "action_loss": 0.0033889685291796923, + "epoch": 22.544964028776977, + "step": 25070 + }, + { + "epoch": 22.544964028776977, + "step": 25070, + "torque_loss": 0.0825701579451561 + }, + { + "epoch": 22.544964028776977, + "force_loss": 0.0018574000569060445, + "step": 25070 + }, + { + "epoch": 22.553956834532375, + "grad_norm": 0.3465050756931305, + "learning_rate": 6.732782729393379e-05, + "loss": 0.0173, + "step": 25080 + }, + { + "action_loss": 0.00617151940241456, + "epoch": 22.553956834532375, + "step": 25080 + }, + { + "epoch": 22.553956834532375, + "step": 25080, + "torque_loss": 0.15709702670574188 + }, + { + "epoch": 22.553956834532375, + "force_loss": 0.010902347974479198, + "step": 25080 + }, + { + "epoch": 22.56294964028777, + "grad_norm": 0.3559282720088959, + "learning_rate": 6.730197461709425e-05, + "loss": 0.0188, + "step": 25090 + }, + { + "action_loss": 0.006413564085960388, + "epoch": 22.56294964028777, + "step": 25090 + }, + { + "epoch": 22.56294964028777, + "step": 25090, + "torque_loss": 0.12822692096233368 + }, + { + "epoch": 22.56294964028777, + "force_loss": 0.004938144702464342, + "step": 25090 + }, + { + "epoch": 22.571942446043167, + "grad_norm": 0.18942776322364807, + "learning_rate": 6.727611668437164e-05, + "loss": 0.0244, + "step": 25100 + }, + { + "action_loss": 0.008657535538077354, + "epoch": 22.571942446043167, + "step": 25100 + }, + { + "epoch": 22.571942446043167, + "step": 25100, + "torque_loss": 0.19749164581298828 + }, + { + "epoch": 22.571942446043167, + "force_loss": 0.006594389211386442, + "step": 25100 + }, + { + "epoch": 22.58093525179856, + "grad_norm": 0.348819762468338, + "learning_rate": 6.725025350362094e-05, + "loss": 0.0184, + "step": 25110 + }, + { + "action_loss": 0.0034841757733374834, + "epoch": 22.58093525179856, + "step": 25110 + }, + { + "epoch": 22.58093525179856, + "step": 25110, + "torque_loss": 0.13012182712554932 + }, + { + "epoch": 22.58093525179856, + "force_loss": 0.003398830071091652, + "step": 25110 + }, + { + "epoch": 22.58992805755396, + "grad_norm": 0.26889199018478394, + "learning_rate": 6.72243850826987e-05, + "loss": 0.0171, + "step": 25120 + }, + { + "action_loss": 0.003253486705943942, + "epoch": 22.58992805755396, + "step": 25120 + }, + { + "epoch": 22.58992805755396, + "step": 25120, + "torque_loss": 0.10711225122213364 + }, + { + "epoch": 22.58992805755396, + "force_loss": 0.004229988437145948, + "step": 25120 + }, + { + "epoch": 22.598920863309353, + "grad_norm": 0.5290204882621765, + "learning_rate": 6.719851142946305e-05, + "loss": 0.0176, + "step": 25130 + }, + { + "action_loss": 0.002730436623096466, + "epoch": 22.598920863309353, + "step": 25130 + }, + { + "epoch": 22.598920863309353, + "step": 25130, + "torque_loss": 0.09934938699007034 + }, + { + "epoch": 22.598920863309353, + "force_loss": 0.0018018983537331223, + "step": 25130 + }, + { + "epoch": 22.607913669064747, + "grad_norm": 0.27351611852645874, + "learning_rate": 6.717263255177372e-05, + "loss": 0.0252, + "step": 25140 + }, + { + "action_loss": 0.005032676737755537, + "epoch": 22.607913669064747, + "step": 25140 + }, + { + "epoch": 22.607913669064747, + "step": 25140, + "torque_loss": 0.17156310379505157 + }, + { + "epoch": 22.607913669064747, + "force_loss": 0.004266476724296808, + "step": 25140 + }, + { + "epoch": 22.616906474820144, + "grad_norm": 0.12947946786880493, + "learning_rate": 6.714674845749205e-05, + "loss": 0.0205, + "step": 25150 + }, + { + "action_loss": 0.008485876955091953, + "epoch": 22.616906474820144, + "step": 25150 + }, + { + "epoch": 22.616906474820144, + "step": 25150, + "torque_loss": 0.13253921270370483 + }, + { + "epoch": 22.616906474820144, + "force_loss": 0.011147302575409412, + "step": 25150 + }, + { + "epoch": 22.62589928057554, + "grad_norm": 0.17825235426425934, + "learning_rate": 6.712085915448092e-05, + "loss": 0.0208, + "step": 25160 + }, + { + "action_loss": 0.004266685806214809, + "epoch": 22.62589928057554, + "step": 25160 + }, + { + "epoch": 22.62589928057554, + "step": 25160, + "torque_loss": 0.11097616702318192 + }, + { + "epoch": 22.62589928057554, + "force_loss": 0.008491885848343372, + "step": 25160 + }, + { + "epoch": 22.634892086330936, + "grad_norm": 0.3936159908771515, + "learning_rate": 6.709496465060486e-05, + "loss": 0.0214, + "step": 25170 + }, + { + "action_loss": 0.00317780370824039, + "epoch": 22.634892086330936, + "step": 25170 + }, + { + "epoch": 22.634892086330936, + "step": 25170, + "torque_loss": 0.1275627166032791 + }, + { + "epoch": 22.634892086330936, + "force_loss": 0.005806193221360445, + "step": 25170 + }, + { + "epoch": 22.64388489208633, + "grad_norm": 0.29340144991874695, + "learning_rate": 6.706906495372987e-05, + "loss": 0.0187, + "step": 25180 + }, + { + "action_loss": 0.006735945586115122, + "epoch": 22.64388489208633, + "step": 25180 + }, + { + "epoch": 22.64388489208633, + "step": 25180, + "torque_loss": 0.16585038602352142 + }, + { + "epoch": 22.64388489208633, + "force_loss": 0.006408578250557184, + "step": 25180 + }, + { + "epoch": 22.652877697841728, + "grad_norm": 0.5214519500732422, + "learning_rate": 6.704316007172365e-05, + "loss": 0.0201, + "step": 25190 + }, + { + "action_loss": 0.005629087332636118, + "epoch": 22.652877697841728, + "step": 25190 + }, + { + "epoch": 22.652877697841728, + "step": 25190, + "torque_loss": 0.17105501890182495 + }, + { + "epoch": 22.652877697841728, + "force_loss": 0.0044324579648673534, + "step": 25190 + }, + { + "epoch": 22.66187050359712, + "grad_norm": 0.327261745929718, + "learning_rate": 6.701725001245539e-05, + "loss": 0.026, + "step": 25200 + }, + { + "action_loss": 0.005990903824567795, + "epoch": 22.66187050359712, + "step": 25200 + }, + { + "epoch": 22.66187050359712, + "step": 25200, + "torque_loss": 0.10485409945249557 + }, + { + "epoch": 22.66187050359712, + "force_loss": 0.005060315132141113, + "step": 25200 + }, + { + "epoch": 22.67086330935252, + "grad_norm": 0.3446037173271179, + "learning_rate": 6.699133478379588e-05, + "loss": 0.0179, + "step": 25210 + }, + { + "action_loss": 0.0031948660034686327, + "epoch": 22.67086330935252, + "step": 25210 + }, + { + "epoch": 22.67086330935252, + "step": 25210, + "torque_loss": 0.08439884334802628 + }, + { + "epoch": 22.67086330935252, + "force_loss": 0.002503957599401474, + "step": 25210 + }, + { + "epoch": 22.679856115107913, + "grad_norm": 0.2616611123085022, + "learning_rate": 6.69654143936175e-05, + "loss": 0.0187, + "step": 25220 + }, + { + "action_loss": 0.003216393291950226, + "epoch": 22.679856115107913, + "step": 25220 + }, + { + "epoch": 22.679856115107913, + "step": 25220, + "torque_loss": 0.12741748988628387 + }, + { + "epoch": 22.679856115107913, + "force_loss": 0.004136684816330671, + "step": 25220 + }, + { + "epoch": 22.68884892086331, + "grad_norm": 0.20833612978458405, + "learning_rate": 6.693948884979419e-05, + "loss": 0.0215, + "step": 25230 + }, + { + "action_loss": 0.0038545706775039434, + "epoch": 22.68884892086331, + "step": 25230 + }, + { + "epoch": 22.68884892086331, + "step": 25230, + "torque_loss": 0.14403019845485687 + }, + { + "epoch": 22.68884892086331, + "force_loss": 0.002605097135528922, + "step": 25230 + }, + { + "epoch": 22.697841726618705, + "grad_norm": 0.2755392789840698, + "learning_rate": 6.691355816020142e-05, + "loss": 0.0192, + "step": 25240 + }, + { + "action_loss": 0.004064594861119986, + "epoch": 22.697841726618705, + "step": 25240 + }, + { + "epoch": 22.697841726618705, + "step": 25240, + "torque_loss": 0.13240979611873627 + }, + { + "epoch": 22.697841726618705, + "force_loss": 0.005109542980790138, + "step": 25240 + }, + { + "epoch": 22.7068345323741, + "grad_norm": 0.37402114272117615, + "learning_rate": 6.688762233271624e-05, + "loss": 0.0198, + "step": 25250 + }, + { + "action_loss": 0.0055160499177873135, + "epoch": 22.7068345323741, + "step": 25250 + }, + { + "epoch": 22.7068345323741, + "step": 25250, + "torque_loss": 0.12401332706212997 + }, + { + "epoch": 22.7068345323741, + "force_loss": 0.008280246518552303, + "step": 25250 + }, + { + "epoch": 22.715827338129497, + "grad_norm": 0.12323077023029327, + "learning_rate": 6.68616813752173e-05, + "loss": 0.0225, + "step": 25260 + }, + { + "action_loss": 0.004778133239597082, + "epoch": 22.715827338129497, + "step": 25260 + }, + { + "epoch": 22.715827338129497, + "step": 25260, + "torque_loss": 0.11605324596166611 + }, + { + "epoch": 22.715827338129497, + "force_loss": 0.004602286498993635, + "step": 25260 + }, + { + "epoch": 22.72482014388489, + "grad_norm": 0.18610790371894836, + "learning_rate": 6.683573529558477e-05, + "loss": 0.021, + "step": 25270 + }, + { + "action_loss": 0.0023338731843978167, + "epoch": 22.72482014388489, + "step": 25270 + }, + { + "epoch": 22.72482014388489, + "step": 25270, + "torque_loss": 0.10729467868804932 + }, + { + "epoch": 22.72482014388489, + "force_loss": 0.002586699789389968, + "step": 25270 + }, + { + "epoch": 22.73381294964029, + "grad_norm": 0.20692098140716553, + "learning_rate": 6.680978410170037e-05, + "loss": 0.0198, + "step": 25280 + }, + { + "action_loss": 0.0026255929842591286, + "epoch": 22.73381294964029, + "step": 25280 + }, + { + "epoch": 22.73381294964029, + "step": 25280, + "torque_loss": 0.15501603484153748 + }, + { + "epoch": 22.73381294964029, + "force_loss": 0.00565456785261631, + "step": 25280 + }, + { + "epoch": 22.742805755395683, + "grad_norm": 0.41962599754333496, + "learning_rate": 6.678382780144741e-05, + "loss": 0.0222, + "step": 25290 + }, + { + "action_loss": 0.009218104183673859, + "epoch": 22.742805755395683, + "step": 25290 + }, + { + "epoch": 22.742805755395683, + "step": 25290, + "torque_loss": 0.1374831348657608 + }, + { + "epoch": 22.742805755395683, + "force_loss": 0.007170087192207575, + "step": 25290 + }, + { + "epoch": 22.75179856115108, + "grad_norm": 0.15407513082027435, + "learning_rate": 6.675786640271071e-05, + "loss": 0.0186, + "step": 25300 + }, + { + "action_loss": 0.002584812231361866, + "epoch": 22.75179856115108, + "step": 25300 + }, + { + "epoch": 22.75179856115108, + "step": 25300, + "torque_loss": 0.09250873327255249 + }, + { + "epoch": 22.75179856115108, + "force_loss": 0.003979390021413565, + "step": 25300 + }, + { + "epoch": 22.760791366906474, + "grad_norm": 0.16511009633541107, + "learning_rate": 6.673189991337665e-05, + "loss": 0.0193, + "step": 25310 + }, + { + "action_loss": 0.005039873067289591, + "epoch": 22.760791366906474, + "step": 25310 + }, + { + "epoch": 22.760791366906474, + "step": 25310, + "torque_loss": 0.17084509134292603 + }, + { + "epoch": 22.760791366906474, + "force_loss": 0.005266187712550163, + "step": 25310 + }, + { + "epoch": 22.769784172661872, + "grad_norm": 0.18709422647953033, + "learning_rate": 6.670592834133317e-05, + "loss": 0.0241, + "step": 25320 + }, + { + "action_loss": 0.00991649180650711, + "epoch": 22.769784172661872, + "step": 25320 + }, + { + "epoch": 22.769784172661872, + "step": 25320, + "torque_loss": 0.16399723291397095 + }, + { + "epoch": 22.769784172661872, + "force_loss": 0.011603073216974735, + "step": 25320 + }, + { + "epoch": 22.778776978417266, + "grad_norm": 0.5876155495643616, + "learning_rate": 6.667995169446979e-05, + "loss": 0.0211, + "step": 25330 + }, + { + "action_loss": 0.00250917742960155, + "epoch": 22.778776978417266, + "step": 25330 + }, + { + "epoch": 22.778776978417266, + "step": 25330, + "torque_loss": 0.11301350593566895 + }, + { + "epoch": 22.778776978417266, + "force_loss": 0.0021972907707095146, + "step": 25330 + }, + { + "epoch": 22.78776978417266, + "grad_norm": 0.20074112713336945, + "learning_rate": 6.665396998067747e-05, + "loss": 0.0239, + "step": 25340 + }, + { + "action_loss": 0.006846604403108358, + "epoch": 22.78776978417266, + "step": 25340 + }, + { + "epoch": 22.78776978417266, + "step": 25340, + "torque_loss": 0.1528770476579666 + }, + { + "epoch": 22.78776978417266, + "force_loss": 0.006580863147974014, + "step": 25340 + }, + { + "epoch": 22.796762589928058, + "grad_norm": 0.2842159867286682, + "learning_rate": 6.66279832078488e-05, + "loss": 0.0195, + "step": 25350 + }, + { + "action_loss": 0.004146437626332045, + "epoch": 22.796762589928058, + "step": 25350 + }, + { + "epoch": 22.796762589928058, + "step": 25350, + "torque_loss": 0.08629711717367172 + }, + { + "epoch": 22.796762589928058, + "force_loss": 0.0037164564710110426, + "step": 25350 + }, + { + "epoch": 22.805755395683452, + "grad_norm": 0.2562117874622345, + "learning_rate": 6.660199138387786e-05, + "loss": 0.0198, + "step": 25360 + }, + { + "action_loss": 0.009775112383067608, + "epoch": 22.805755395683452, + "step": 25360 + }, + { + "epoch": 22.805755395683452, + "step": 25360, + "torque_loss": 0.11194447427988052 + }, + { + "epoch": 22.805755395683452, + "force_loss": 0.010112606920301914, + "step": 25360 + }, + { + "epoch": 22.81474820143885, + "grad_norm": 0.2811155915260315, + "learning_rate": 6.65759945166603e-05, + "loss": 0.0224, + "step": 25370 + }, + { + "action_loss": 0.004484861623495817, + "epoch": 22.81474820143885, + "step": 25370 + }, + { + "epoch": 22.81474820143885, + "step": 25370, + "torque_loss": 0.1898781657218933 + }, + { + "epoch": 22.81474820143885, + "force_loss": 0.0037356652319431305, + "step": 25370 + }, + { + "epoch": 22.823741007194243, + "grad_norm": 0.44249197840690613, + "learning_rate": 6.654999261409326e-05, + "loss": 0.0282, + "step": 25380 + }, + { + "action_loss": 0.01969350315630436, + "epoch": 22.823741007194243, + "step": 25380 + }, + { + "epoch": 22.823741007194243, + "step": 25380, + "torque_loss": 0.1598086953163147 + }, + { + "epoch": 22.823741007194243, + "force_loss": 0.023778853937983513, + "step": 25380 + }, + { + "epoch": 22.83273381294964, + "grad_norm": 0.6640731692314148, + "learning_rate": 6.652398568407544e-05, + "loss": 0.0256, + "step": 25390 + }, + { + "action_loss": 0.0049319821409881115, + "epoch": 22.83273381294964, + "step": 25390 + }, + { + "epoch": 22.83273381294964, + "step": 25390, + "torque_loss": 0.10348508507013321 + }, + { + "epoch": 22.83273381294964, + "force_loss": 0.0038687579799443483, + "step": 25390 + }, + { + "epoch": 22.841726618705035, + "grad_norm": 0.29850515723228455, + "learning_rate": 6.649797373450707e-05, + "loss": 0.023, + "step": 25400 + }, + { + "action_loss": 0.007974536158144474, + "epoch": 22.841726618705035, + "step": 25400 + }, + { + "epoch": 22.841726618705035, + "step": 25400, + "torque_loss": 0.12296003848314285 + }, + { + "epoch": 22.841726618705035, + "force_loss": 0.005034271627664566, + "step": 25400 + }, + { + "epoch": 22.850719424460433, + "grad_norm": 0.19731557369232178, + "learning_rate": 6.647195677328988e-05, + "loss": 0.0181, + "step": 25410 + }, + { + "action_loss": 0.009311657398939133, + "epoch": 22.850719424460433, + "step": 25410 + }, + { + "epoch": 22.850719424460433, + "step": 25410, + "torque_loss": 0.13293598592281342 + }, + { + "epoch": 22.850719424460433, + "force_loss": 0.011000546626746655, + "step": 25410 + }, + { + "epoch": 22.859712230215827, + "grad_norm": 0.1860690414905548, + "learning_rate": 6.644593480832712e-05, + "loss": 0.018, + "step": 25420 + }, + { + "action_loss": 0.005338681396096945, + "epoch": 22.859712230215827, + "step": 25420 + }, + { + "epoch": 22.859712230215827, + "step": 25420, + "torque_loss": 0.1254522204399109 + }, + { + "epoch": 22.859712230215827, + "force_loss": 0.004750473890453577, + "step": 25420 + }, + { + "epoch": 22.868705035971225, + "grad_norm": 0.2702329754829407, + "learning_rate": 6.641990784752363e-05, + "loss": 0.0206, + "step": 25430 + }, + { + "action_loss": 0.003232441609725356, + "epoch": 22.868705035971225, + "step": 25430 + }, + { + "epoch": 22.868705035971225, + "step": 25430, + "torque_loss": 0.09547365456819534 + }, + { + "epoch": 22.868705035971225, + "force_loss": 0.004664943087846041, + "step": 25430 + }, + { + "epoch": 22.87769784172662, + "grad_norm": 0.7561193108558655, + "learning_rate": 6.639387589878566e-05, + "loss": 0.0222, + "step": 25440 + }, + { + "action_loss": 0.003841092111542821, + "epoch": 22.87769784172662, + "step": 25440 + }, + { + "epoch": 22.87769784172662, + "step": 25440, + "torque_loss": 0.13480597734451294 + }, + { + "epoch": 22.87769784172662, + "force_loss": 0.002297803992405534, + "step": 25440 + }, + { + "epoch": 22.886690647482013, + "grad_norm": 0.3329511880874634, + "learning_rate": 6.636783897002103e-05, + "loss": 0.0208, + "step": 25450 + }, + { + "action_loss": 0.0023310650140047073, + "epoch": 22.886690647482013, + "step": 25450 + }, + { + "epoch": 22.886690647482013, + "step": 25450, + "torque_loss": 0.15812350809574127 + }, + { + "epoch": 22.886690647482013, + "force_loss": 0.0025320190470665693, + "step": 25450 + }, + { + "epoch": 22.89568345323741, + "grad_norm": 0.19684022665023804, + "learning_rate": 6.63417970691391e-05, + "loss": 0.0213, + "step": 25460 + }, + { + "action_loss": 0.0045658499002456665, + "epoch": 22.89568345323741, + "step": 25460 + }, + { + "epoch": 22.89568345323741, + "step": 25460, + "torque_loss": 0.13276435434818268 + }, + { + "epoch": 22.89568345323741, + "force_loss": 0.006904436741024256, + "step": 25460 + }, + { + "epoch": 22.904676258992804, + "grad_norm": 0.4892916977405548, + "learning_rate": 6.63157502040507e-05, + "loss": 0.0182, + "step": 25470 + }, + { + "action_loss": 0.02397470735013485, + "epoch": 22.904676258992804, + "step": 25470 + }, + { + "epoch": 22.904676258992804, + "step": 25470, + "torque_loss": 0.17074398696422577 + }, + { + "epoch": 22.904676258992804, + "force_loss": 0.02185717783868313, + "step": 25470 + }, + { + "epoch": 22.913669064748202, + "grad_norm": 0.2766764760017395, + "learning_rate": 6.628969838266819e-05, + "loss": 0.023, + "step": 25480 + }, + { + "action_loss": 0.008708766661584377, + "epoch": 22.913669064748202, + "step": 25480 + }, + { + "epoch": 22.913669064748202, + "step": 25480, + "torque_loss": 0.12777329981327057 + }, + { + "epoch": 22.913669064748202, + "force_loss": 0.007226257119327784, + "step": 25480 + }, + { + "epoch": 22.922661870503596, + "grad_norm": 0.229297935962677, + "learning_rate": 6.626364161290541e-05, + "loss": 0.0197, + "step": 25490 + }, + { + "action_loss": 0.0076741487719118595, + "epoch": 22.922661870503596, + "step": 25490 + }, + { + "epoch": 22.922661870503596, + "step": 25490, + "torque_loss": 0.13525649905204773 + }, + { + "epoch": 22.922661870503596, + "force_loss": 0.0064540766179561615, + "step": 25490 + }, + { + "epoch": 22.931654676258994, + "grad_norm": 0.40219736099243164, + "learning_rate": 6.623757990267774e-05, + "loss": 0.0221, + "step": 25500 + }, + { + "action_loss": 0.0067048631608486176, + "epoch": 22.931654676258994, + "step": 25500 + }, + { + "epoch": 22.931654676258994, + "step": 25500, + "torque_loss": 0.13198016583919525 + }, + { + "epoch": 22.931654676258994, + "force_loss": 0.004208454862236977, + "step": 25500 + }, + { + "epoch": 22.940647482014388, + "grad_norm": 0.4207010567188263, + "learning_rate": 6.621151325990201e-05, + "loss": 0.0203, + "step": 25510 + }, + { + "action_loss": 0.014719352126121521, + "epoch": 22.940647482014388, + "step": 25510 + }, + { + "epoch": 22.940647482014388, + "step": 25510, + "torque_loss": 0.13498015701770782 + }, + { + "epoch": 22.940647482014388, + "force_loss": 0.008890347555279732, + "step": 25510 + }, + { + "epoch": 22.949640287769785, + "grad_norm": 0.18830212950706482, + "learning_rate": 6.618544169249657e-05, + "loss": 0.0213, + "step": 25520 + }, + { + "action_loss": 0.0021579600870609283, + "epoch": 22.949640287769785, + "step": 25520 + }, + { + "epoch": 22.949640287769785, + "step": 25520, + "torque_loss": 0.10413229465484619 + }, + { + "epoch": 22.949640287769785, + "force_loss": 0.0014954224461689591, + "step": 25520 + }, + { + "epoch": 22.95863309352518, + "grad_norm": 0.330130010843277, + "learning_rate": 6.615936520838133e-05, + "loss": 0.0219, + "step": 25530 + }, + { + "action_loss": 0.0031004927586764097, + "epoch": 22.95863309352518, + "step": 25530 + }, + { + "epoch": 22.95863309352518, + "step": 25530, + "torque_loss": 0.12198827415704727 + }, + { + "epoch": 22.95863309352518, + "force_loss": 0.002654009498655796, + "step": 25530 + }, + { + "epoch": 22.967625899280577, + "grad_norm": 0.5394603610038757, + "learning_rate": 6.613328381547759e-05, + "loss": 0.0179, + "step": 25540 + }, + { + "action_loss": 0.0065713063813745975, + "epoch": 22.967625899280577, + "step": 25540 + }, + { + "epoch": 22.967625899280577, + "step": 25540, + "torque_loss": 0.18813152611255646 + }, + { + "epoch": 22.967625899280577, + "force_loss": 0.00476634269580245, + "step": 25540 + }, + { + "epoch": 22.97661870503597, + "grad_norm": 0.27255162596702576, + "learning_rate": 6.610719752170821e-05, + "loss": 0.0313, + "step": 25550 + }, + { + "action_loss": 0.002748814644291997, + "epoch": 22.97661870503597, + "step": 25550 + }, + { + "epoch": 22.97661870503597, + "step": 25550, + "torque_loss": 0.13115152716636658 + }, + { + "epoch": 22.97661870503597, + "force_loss": 0.0018982872134074569, + "step": 25550 + }, + { + "epoch": 22.985611510791365, + "grad_norm": 0.3695186376571655, + "learning_rate": 6.60811063349975e-05, + "loss": 0.02, + "step": 25560 + }, + { + "action_loss": 0.0027263539377599955, + "epoch": 22.985611510791365, + "step": 25560 + }, + { + "epoch": 22.985611510791365, + "step": 25560, + "torque_loss": 0.08767839521169662 + }, + { + "epoch": 22.985611510791365, + "force_loss": 0.0022533198352903128, + "step": 25560 + }, + { + "epoch": 22.994604316546763, + "grad_norm": 0.24433258175849915, + "learning_rate": 6.605501026327127e-05, + "loss": 0.0206, + "step": 25570 + }, + { + "action_loss": 0.00752577418461442, + "epoch": 22.994604316546763, + "step": 25570 + }, + { + "epoch": 22.994604316546763, + "step": 25570, + "torque_loss": 0.11854282766580582 + }, + { + "epoch": 22.994604316546763, + "force_loss": 0.003949462901800871, + "step": 25570 + }, + { + "epoch": 23.003597122302157, + "grad_norm": 0.588006854057312, + "learning_rate": 6.602890931445685e-05, + "loss": 0.0257, + "step": 25580 + }, + { + "action_loss": 0.008192433975636959, + "epoch": 23.003597122302157, + "step": 25580 + }, + { + "epoch": 23.003597122302157, + "step": 25580, + "torque_loss": 0.14182865619659424 + }, + { + "epoch": 23.003597122302157, + "force_loss": 0.008779712952673435, + "step": 25580 + }, + { + "epoch": 23.012589928057555, + "grad_norm": 0.3446831703186035, + "learning_rate": 6.6002803496483e-05, + "loss": 0.0223, + "step": 25590 + }, + { + "action_loss": 0.0035741832107305527, + "epoch": 23.012589928057555, + "step": 25590 + }, + { + "epoch": 23.012589928057555, + "step": 25590, + "torque_loss": 0.13330493867397308 + }, + { + "epoch": 23.012589928057555, + "force_loss": 0.004079489968717098, + "step": 25590 + }, + { + "epoch": 23.02158273381295, + "grad_norm": 0.19388410449028015, + "learning_rate": 6.597669281727997e-05, + "loss": 0.0178, + "step": 25600 + }, + { + "action_loss": 0.009083190932869911, + "epoch": 23.02158273381295, + "step": 25600 + }, + { + "epoch": 23.02158273381295, + "step": 25600, + "torque_loss": 0.10139637440443039 + }, + { + "epoch": 23.02158273381295, + "force_loss": 0.00901173148304224, + "step": 25600 + }, + { + "epoch": 23.030575539568346, + "grad_norm": 0.22701629996299744, + "learning_rate": 6.595057728477949e-05, + "loss": 0.0201, + "step": 25610 + }, + { + "action_loss": 0.0063840169459581375, + "epoch": 23.030575539568346, + "step": 25610 + }, + { + "epoch": 23.030575539568346, + "step": 25610, + "torque_loss": 0.11656978726387024 + }, + { + "epoch": 23.030575539568346, + "force_loss": 0.011731003411114216, + "step": 25610 + }, + { + "epoch": 23.03956834532374, + "grad_norm": 0.33431360125541687, + "learning_rate": 6.59244569069148e-05, + "loss": 0.0234, + "step": 25620 + }, + { + "action_loss": 0.0028157334309071302, + "epoch": 23.03956834532374, + "step": 25620 + }, + { + "epoch": 23.03956834532374, + "step": 25620, + "torque_loss": 0.14123941957950592 + }, + { + "epoch": 23.03956834532374, + "force_loss": 0.0038219671696424484, + "step": 25620 + }, + { + "epoch": 23.048561151079138, + "grad_norm": 0.3215562105178833, + "learning_rate": 6.589833169162054e-05, + "loss": 0.0186, + "step": 25630 + }, + { + "action_loss": 0.005002176854759455, + "epoch": 23.048561151079138, + "step": 25630 + }, + { + "epoch": 23.048561151079138, + "step": 25630, + "torque_loss": 0.15622670948505402 + }, + { + "epoch": 23.048561151079138, + "force_loss": 0.006401598919183016, + "step": 25630 + }, + { + "epoch": 23.057553956834532, + "grad_norm": 0.5653586983680725, + "learning_rate": 6.587220164683291e-05, + "loss": 0.0231, + "step": 25640 + }, + { + "action_loss": 0.0023761941120028496, + "epoch": 23.057553956834532, + "step": 25640 + }, + { + "epoch": 23.057553956834532, + "step": 25640, + "torque_loss": 0.1033112183213234 + }, + { + "epoch": 23.057553956834532, + "force_loss": 0.0028601244557648897, + "step": 25640 + }, + { + "epoch": 23.06654676258993, + "grad_norm": 0.5096305012702942, + "learning_rate": 6.58460667804895e-05, + "loss": 0.0201, + "step": 25650 + }, + { + "action_loss": 0.007477613165974617, + "epoch": 23.06654676258993, + "step": 25650 + }, + { + "epoch": 23.06654676258993, + "step": 25650, + "torque_loss": 0.09020773321390152 + }, + { + "epoch": 23.06654676258993, + "force_loss": 0.005367389414459467, + "step": 25650 + }, + { + "epoch": 23.075539568345324, + "grad_norm": 0.30646347999572754, + "learning_rate": 6.581992710052938e-05, + "loss": 0.0236, + "step": 25660 + }, + { + "action_loss": 0.005354841705411673, + "epoch": 23.075539568345324, + "step": 25660 + }, + { + "epoch": 23.075539568345324, + "step": 25660, + "torque_loss": 0.14324022829532623 + }, + { + "epoch": 23.075539568345324, + "force_loss": 0.005249425303190947, + "step": 25660 + }, + { + "epoch": 23.084532374100718, + "grad_norm": 0.6165043115615845, + "learning_rate": 6.579378261489311e-05, + "loss": 0.0238, + "step": 25670 + }, + { + "action_loss": 0.020665811374783516, + "epoch": 23.084532374100718, + "step": 25670 + }, + { + "epoch": 23.084532374100718, + "step": 25670, + "torque_loss": 0.11552337557077408 + }, + { + "epoch": 23.084532374100718, + "force_loss": 0.024774039164185524, + "step": 25670 + }, + { + "epoch": 23.093525179856115, + "grad_norm": 0.23046307265758514, + "learning_rate": 6.576763333152268e-05, + "loss": 0.0213, + "step": 25680 + }, + { + "action_loss": 0.004617140628397465, + "epoch": 23.093525179856115, + "step": 25680 + }, + { + "epoch": 23.093525179856115, + "step": 25680, + "torque_loss": 0.19429004192352295 + }, + { + "epoch": 23.093525179856115, + "force_loss": 0.003150787902995944, + "step": 25680 + }, + { + "epoch": 23.10251798561151, + "grad_norm": 0.29696786403656006, + "learning_rate": 6.574147925836159e-05, + "loss": 0.0184, + "step": 25690 + }, + { + "action_loss": 0.01016028132289648, + "epoch": 23.10251798561151, + "step": 25690 + }, + { + "epoch": 23.10251798561151, + "step": 25690, + "torque_loss": 0.1103476956486702 + }, + { + "epoch": 23.10251798561151, + "force_loss": 0.006418560165911913, + "step": 25690 + }, + { + "epoch": 23.111510791366907, + "grad_norm": 0.22070881724357605, + "learning_rate": 6.571532040335472e-05, + "loss": 0.0206, + "step": 25700 + }, + { + "action_loss": 0.002738197334110737, + "epoch": 23.111510791366907, + "step": 25700 + }, + { + "epoch": 23.111510791366907, + "step": 25700, + "torque_loss": 0.1095813736319542 + }, + { + "epoch": 23.111510791366907, + "force_loss": 0.0031854708213359118, + "step": 25700 + }, + { + "epoch": 23.1205035971223, + "grad_norm": 0.494768351316452, + "learning_rate": 6.568915677444845e-05, + "loss": 0.0202, + "step": 25710 + }, + { + "action_loss": 0.011660532094538212, + "epoch": 23.1205035971223, + "step": 25710 + }, + { + "epoch": 23.1205035971223, + "step": 25710, + "torque_loss": 0.12994426488876343 + }, + { + "epoch": 23.1205035971223, + "force_loss": 0.023325903341174126, + "step": 25710 + }, + { + "epoch": 23.1294964028777, + "grad_norm": 0.32806041836738586, + "learning_rate": 6.56629883795906e-05, + "loss": 0.0211, + "step": 25720 + }, + { + "action_loss": 0.00436545442789793, + "epoch": 23.1294964028777, + "step": 25720 + }, + { + "epoch": 23.1294964028777, + "step": 25720, + "torque_loss": 0.1534643918275833 + }, + { + "epoch": 23.1294964028777, + "force_loss": 0.002581397071480751, + "step": 25720 + }, + { + "epoch": 23.138489208633093, + "grad_norm": 0.26697176694869995, + "learning_rate": 6.563681522673043e-05, + "loss": 0.0215, + "step": 25730 + }, + { + "action_loss": 0.004084462765604258, + "epoch": 23.138489208633093, + "step": 25730 + }, + { + "epoch": 23.138489208633093, + "step": 25730, + "torque_loss": 0.1061796173453331 + }, + { + "epoch": 23.138489208633093, + "force_loss": 0.008010889403522015, + "step": 25730 + }, + { + "epoch": 23.14748201438849, + "grad_norm": 0.32571542263031006, + "learning_rate": 6.561063732381867e-05, + "loss": 0.0236, + "step": 25740 + }, + { + "action_loss": 0.007778527680784464, + "epoch": 23.14748201438849, + "step": 25740 + }, + { + "epoch": 23.14748201438849, + "step": 25740, + "torque_loss": 0.14481233060359955 + }, + { + "epoch": 23.14748201438849, + "force_loss": 0.004333335906267166, + "step": 25740 + }, + { + "epoch": 23.156474820143885, + "grad_norm": 0.18872971832752228, + "learning_rate": 6.558445467880745e-05, + "loss": 0.02, + "step": 25750 + }, + { + "action_loss": 0.0035236794501543045, + "epoch": 23.156474820143885, + "step": 25750 + }, + { + "epoch": 23.156474820143885, + "step": 25750, + "torque_loss": 0.12785522639751434 + }, + { + "epoch": 23.156474820143885, + "force_loss": 0.0025902960915118456, + "step": 25750 + }, + { + "epoch": 23.165467625899282, + "grad_norm": 0.18403694033622742, + "learning_rate": 6.55582672996504e-05, + "loss": 0.0212, + "step": 25760 + }, + { + "action_loss": 0.00574125349521637, + "epoch": 23.165467625899282, + "step": 25760 + }, + { + "epoch": 23.165467625899282, + "step": 25760, + "torque_loss": 0.12348950654268265 + }, + { + "epoch": 23.165467625899282, + "force_loss": 0.005121069028973579, + "step": 25760 + }, + { + "epoch": 23.174460431654676, + "grad_norm": 0.35492053627967834, + "learning_rate": 6.553207519430253e-05, + "loss": 0.0182, + "step": 25770 + }, + { + "action_loss": 0.009262331761419773, + "epoch": 23.174460431654676, + "step": 25770 + }, + { + "epoch": 23.174460431654676, + "step": 25770, + "torque_loss": 0.1523372381925583 + }, + { + "epoch": 23.174460431654676, + "force_loss": 0.012263941578567028, + "step": 25770 + }, + { + "epoch": 23.18345323741007, + "grad_norm": 0.16774387657642365, + "learning_rate": 6.550587837072032e-05, + "loss": 0.0207, + "step": 25780 + }, + { + "action_loss": 0.01723303459584713, + "epoch": 23.18345323741007, + "step": 25780 + }, + { + "epoch": 23.18345323741007, + "step": 25780, + "torque_loss": 0.12551383674144745 + }, + { + "epoch": 23.18345323741007, + "force_loss": 0.01829545572400093, + "step": 25780 + }, + { + "epoch": 23.192446043165468, + "grad_norm": 0.19067732989788055, + "learning_rate": 6.547967683686166e-05, + "loss": 0.0228, + "step": 25790 + }, + { + "action_loss": 0.006243444979190826, + "epoch": 23.192446043165468, + "step": 25790 + }, + { + "epoch": 23.192446043165468, + "step": 25790, + "torque_loss": 0.14092488586902618 + }, + { + "epoch": 23.192446043165468, + "force_loss": 0.007283204700797796, + "step": 25790 + }, + { + "epoch": 23.201438848920862, + "grad_norm": 0.1651083081960678, + "learning_rate": 6.545347060068591e-05, + "loss": 0.0181, + "step": 25800 + }, + { + "action_loss": 0.0152888810262084, + "epoch": 23.201438848920862, + "step": 25800 + }, + { + "epoch": 23.201438848920862, + "step": 25800, + "torque_loss": 0.1812572032213211 + }, + { + "epoch": 23.201438848920862, + "force_loss": 0.024338068440556526, + "step": 25800 + }, + { + "epoch": 23.21043165467626, + "grad_norm": 0.42186781764030457, + "learning_rate": 6.542725967015382e-05, + "loss": 0.0212, + "step": 25810 + }, + { + "action_loss": 0.006021048873662949, + "epoch": 23.21043165467626, + "step": 25810 + }, + { + "epoch": 23.21043165467626, + "step": 25810, + "torque_loss": 0.14184804260730743 + }, + { + "epoch": 23.21043165467626, + "force_loss": 0.0029388871043920517, + "step": 25810 + }, + { + "epoch": 23.219424460431654, + "grad_norm": 0.3582594692707062, + "learning_rate": 6.540104405322757e-05, + "loss": 0.0199, + "step": 25820 + }, + { + "action_loss": 0.002735581947490573, + "epoch": 23.219424460431654, + "step": 25820 + }, + { + "epoch": 23.219424460431654, + "step": 25820, + "torque_loss": 0.12251565605401993 + }, + { + "epoch": 23.219424460431654, + "force_loss": 0.0027741764206439257, + "step": 25820 + }, + { + "epoch": 23.22841726618705, + "grad_norm": 0.3141603469848633, + "learning_rate": 6.537482375787077e-05, + "loss": 0.0228, + "step": 25830 + }, + { + "action_loss": 0.003992381971329451, + "epoch": 23.22841726618705, + "step": 25830 + }, + { + "epoch": 23.22841726618705, + "step": 25830, + "torque_loss": 0.0965239629149437 + }, + { + "epoch": 23.22841726618705, + "force_loss": 0.0037076727021485567, + "step": 25830 + }, + { + "epoch": 23.237410071942445, + "grad_norm": 0.4681563377380371, + "learning_rate": 6.534859879204845e-05, + "loss": 0.0226, + "step": 25840 + }, + { + "action_loss": 0.015236998908221722, + "epoch": 23.237410071942445, + "step": 25840 + }, + { + "epoch": 23.237410071942445, + "step": 25840, + "torque_loss": 0.14281809329986572 + }, + { + "epoch": 23.237410071942445, + "force_loss": 0.013179329223930836, + "step": 25840 + }, + { + "epoch": 23.246402877697843, + "grad_norm": 0.22874422371387482, + "learning_rate": 6.532236916372709e-05, + "loss": 0.0202, + "step": 25850 + }, + { + "action_loss": 0.017200618982315063, + "epoch": 23.246402877697843, + "step": 25850 + }, + { + "epoch": 23.246402877697843, + "step": 25850, + "torque_loss": 0.12735037505626678 + }, + { + "epoch": 23.246402877697843, + "force_loss": 0.01724884659051895, + "step": 25850 + }, + { + "epoch": 23.255395683453237, + "grad_norm": 0.46542343497276306, + "learning_rate": 6.529613488087454e-05, + "loss": 0.0218, + "step": 25860 + }, + { + "action_loss": 0.0030846295412629843, + "epoch": 23.255395683453237, + "step": 25860 + }, + { + "epoch": 23.255395683453237, + "step": 25860, + "torque_loss": 0.10783765465021133 + }, + { + "epoch": 23.255395683453237, + "force_loss": 0.0020216742996126413, + "step": 25860 + }, + { + "epoch": 23.264388489208635, + "grad_norm": 0.3403064012527466, + "learning_rate": 6.526989595146009e-05, + "loss": 0.0179, + "step": 25870 + }, + { + "action_loss": 0.010160609148442745, + "epoch": 23.264388489208635, + "step": 25870 + }, + { + "epoch": 23.264388489208635, + "step": 25870, + "torque_loss": 0.08791982382535934 + }, + { + "epoch": 23.264388489208635, + "force_loss": 0.004336020443588495, + "step": 25870 + }, + { + "epoch": 23.27338129496403, + "grad_norm": 0.31536662578582764, + "learning_rate": 6.524365238345441e-05, + "loss": 0.0188, + "step": 25880 + }, + { + "action_loss": 0.007521332707256079, + "epoch": 23.27338129496403, + "step": 25880 + }, + { + "epoch": 23.27338129496403, + "step": 25880, + "torque_loss": 0.1705770492553711 + }, + { + "epoch": 23.27338129496403, + "force_loss": 0.008855112828314304, + "step": 25880 + }, + { + "epoch": 23.282374100719423, + "grad_norm": 0.4632514417171478, + "learning_rate": 6.521740418482964e-05, + "loss": 0.0221, + "step": 25890 + }, + { + "action_loss": 0.0031656965147703886, + "epoch": 23.282374100719423, + "step": 25890 + }, + { + "epoch": 23.282374100719423, + "step": 25890, + "torque_loss": 0.13370315730571747 + }, + { + "epoch": 23.282374100719423, + "force_loss": 0.0024058662820607424, + "step": 25890 + }, + { + "epoch": 23.29136690647482, + "grad_norm": 0.1726468801498413, + "learning_rate": 6.519115136355925e-05, + "loss": 0.0197, + "step": 25900 + }, + { + "action_loss": 0.009261681698262691, + "epoch": 23.29136690647482, + "step": 25900 + }, + { + "epoch": 23.29136690647482, + "step": 25900, + "torque_loss": 0.114000104367733 + }, + { + "epoch": 23.29136690647482, + "force_loss": 0.009563510306179523, + "step": 25900 + }, + { + "epoch": 23.300359712230215, + "grad_norm": 0.49877873063087463, + "learning_rate": 6.51648939276182e-05, + "loss": 0.0213, + "step": 25910 + }, + { + "action_loss": 0.007213458884507418, + "epoch": 23.300359712230215, + "step": 25910 + }, + { + "epoch": 23.300359712230215, + "step": 25910, + "torque_loss": 0.1358034461736679 + }, + { + "epoch": 23.300359712230215, + "force_loss": 0.011465176939964294, + "step": 25910 + }, + { + "epoch": 23.309352517985612, + "grad_norm": 0.20892885327339172, + "learning_rate": 6.513863188498277e-05, + "loss": 0.0211, + "step": 25920 + }, + { + "action_loss": 0.004518096800893545, + "epoch": 23.309352517985612, + "step": 25920 + }, + { + "epoch": 23.309352517985612, + "step": 25920, + "torque_loss": 0.15882180631160736 + }, + { + "epoch": 23.309352517985612, + "force_loss": 0.002603568835183978, + "step": 25920 + }, + { + "epoch": 23.318345323741006, + "grad_norm": 0.19917403161525726, + "learning_rate": 6.511236524363068e-05, + "loss": 0.0212, + "step": 25930 + }, + { + "action_loss": 0.006028077099472284, + "epoch": 23.318345323741006, + "step": 25930 + }, + { + "epoch": 23.318345323741006, + "step": 25930, + "torque_loss": 0.09007533639669418 + }, + { + "epoch": 23.318345323741006, + "force_loss": 0.007944750599563122, + "step": 25930 + }, + { + "epoch": 23.327338129496404, + "grad_norm": 0.24674886465072632, + "learning_rate": 6.508609401154104e-05, + "loss": 0.0243, + "step": 25940 + }, + { + "action_loss": 0.002575724618509412, + "epoch": 23.327338129496404, + "step": 25940 + }, + { + "epoch": 23.327338129496404, + "step": 25940, + "torque_loss": 0.07920623570680618 + }, + { + "epoch": 23.327338129496404, + "force_loss": 0.001899032387882471, + "step": 25940 + }, + { + "epoch": 23.336330935251798, + "grad_norm": 0.23422738909721375, + "learning_rate": 6.505981819669439e-05, + "loss": 0.0228, + "step": 25950 + }, + { + "action_loss": 0.004276049789041281, + "epoch": 23.336330935251798, + "step": 25950 + }, + { + "epoch": 23.336330935251798, + "step": 25950, + "torque_loss": 0.07456272840499878 + }, + { + "epoch": 23.336330935251798, + "force_loss": 0.002772013656795025, + "step": 25950 + }, + { + "epoch": 23.345323741007196, + "grad_norm": 0.3791617751121521, + "learning_rate": 6.503353780707258e-05, + "loss": 0.02, + "step": 25960 + }, + { + "action_loss": 0.004444082733243704, + "epoch": 23.345323741007196, + "step": 25960 + }, + { + "epoch": 23.345323741007196, + "step": 25960, + "torque_loss": 0.09586221724748611 + }, + { + "epoch": 23.345323741007196, + "force_loss": 0.004566045943647623, + "step": 25960 + }, + { + "epoch": 23.35431654676259, + "grad_norm": 0.16332946717739105, + "learning_rate": 6.500725285065895e-05, + "loss": 0.0204, + "step": 25970 + }, + { + "action_loss": 0.00948419887572527, + "epoch": 23.35431654676259, + "step": 25970 + }, + { + "epoch": 23.35431654676259, + "step": 25970, + "torque_loss": 0.14972086250782013 + }, + { + "epoch": 23.35431654676259, + "force_loss": 0.007919912226498127, + "step": 25970 + }, + { + "epoch": 23.363309352517987, + "grad_norm": 0.3652385473251343, + "learning_rate": 6.498096333543813e-05, + "loss": 0.0217, + "step": 25980 + }, + { + "action_loss": 0.007189897820353508, + "epoch": 23.363309352517987, + "step": 25980 + }, + { + "epoch": 23.363309352517987, + "step": 25980, + "torque_loss": 0.10546346753835678 + }, + { + "epoch": 23.363309352517987, + "force_loss": 0.009700865484774113, + "step": 25980 + }, + { + "epoch": 23.37230215827338, + "grad_norm": 0.331389844417572, + "learning_rate": 6.49546692693962e-05, + "loss": 0.0187, + "step": 25990 + }, + { + "action_loss": 0.011981263756752014, + "epoch": 23.37230215827338, + "step": 25990 + }, + { + "epoch": 23.37230215827338, + "step": 25990, + "torque_loss": 0.09773851186037064 + }, + { + "epoch": 23.37230215827338, + "force_loss": 0.011358651332557201, + "step": 25990 + }, + { + "epoch": 23.381294964028775, + "grad_norm": 0.42301568388938904, + "learning_rate": 6.492837066052059e-05, + "loss": 0.0236, + "step": 26000 + }, + { + "action_loss": 0.004145585000514984, + "epoch": 23.381294964028775, + "step": 26000 + }, + { + "epoch": 23.381294964028775, + "step": 26000, + "torque_loss": 0.14076244831085205 + }, + { + "epoch": 23.381294964028775, + "force_loss": 0.00511195370927453, + "step": 26000 + }, + { + "epoch": 23.390287769784173, + "grad_norm": 0.33904626965522766, + "learning_rate": 6.490206751680014e-05, + "loss": 0.0233, + "step": 26010 + }, + { + "action_loss": 0.0027019595727324486, + "epoch": 23.390287769784173, + "step": 26010 + }, + { + "epoch": 23.390287769784173, + "step": 26010, + "torque_loss": 0.11031558364629745 + }, + { + "epoch": 23.390287769784173, + "force_loss": 0.002878331346437335, + "step": 26010 + }, + { + "epoch": 23.399280575539567, + "grad_norm": 0.21259473264217377, + "learning_rate": 6.487575984622505e-05, + "loss": 0.0223, + "step": 26020 + }, + { + "action_loss": 0.008811467327177525, + "epoch": 23.399280575539567, + "step": 26020 + }, + { + "epoch": 23.399280575539567, + "step": 26020, + "torque_loss": 0.12438362091779709 + }, + { + "epoch": 23.399280575539567, + "force_loss": 0.013224628753960133, + "step": 26020 + }, + { + "epoch": 23.408273381294965, + "grad_norm": 0.17997276782989502, + "learning_rate": 6.484944765678689e-05, + "loss": 0.0205, + "step": 26030 + }, + { + "action_loss": 0.003967522643506527, + "epoch": 23.408273381294965, + "step": 26030 + }, + { + "epoch": 23.408273381294965, + "step": 26030, + "torque_loss": 0.11782846599817276 + }, + { + "epoch": 23.408273381294965, + "force_loss": 0.0046944585628807545, + "step": 26030 + }, + { + "epoch": 23.41726618705036, + "grad_norm": 0.38092041015625, + "learning_rate": 6.482313095647861e-05, + "loss": 0.0201, + "step": 26040 + }, + { + "action_loss": 0.0027728062123060226, + "epoch": 23.41726618705036, + "step": 26040 + }, + { + "epoch": 23.41726618705036, + "step": 26040, + "torque_loss": 0.09379864484071732 + }, + { + "epoch": 23.41726618705036, + "force_loss": 0.0022747565526515245, + "step": 26040 + }, + { + "epoch": 23.426258992805757, + "grad_norm": 0.2724270522594452, + "learning_rate": 6.479680975329451e-05, + "loss": 0.0223, + "step": 26050 + }, + { + "action_loss": 0.005345206707715988, + "epoch": 23.426258992805757, + "step": 26050 + }, + { + "epoch": 23.426258992805757, + "step": 26050, + "torque_loss": 0.16053888201713562 + }, + { + "epoch": 23.426258992805757, + "force_loss": 0.003982897847890854, + "step": 26050 + }, + { + "epoch": 23.43525179856115, + "grad_norm": 0.17012503743171692, + "learning_rate": 6.477048405523031e-05, + "loss": 0.0194, + "step": 26060 + }, + { + "action_loss": 0.0031624275725334883, + "epoch": 23.43525179856115, + "step": 26060 + }, + { + "epoch": 23.43525179856115, + "step": 26060, + "torque_loss": 0.1522766798734665 + }, + { + "epoch": 23.43525179856115, + "force_loss": 0.0062133558094501495, + "step": 26060 + }, + { + "epoch": 23.444244604316548, + "grad_norm": 0.28893211483955383, + "learning_rate": 6.474415387028304e-05, + "loss": 0.023, + "step": 26070 + }, + { + "action_loss": 0.0028869889210909605, + "epoch": 23.444244604316548, + "step": 26070 + }, + { + "epoch": 23.444244604316548, + "step": 26070, + "torque_loss": 0.12058921903371811 + }, + { + "epoch": 23.444244604316548, + "force_loss": 0.004029951523989439, + "step": 26070 + }, + { + "epoch": 23.453237410071942, + "grad_norm": 0.18101710081100464, + "learning_rate": 6.471781920645114e-05, + "loss": 0.0183, + "step": 26080 + }, + { + "action_loss": 0.008361860178411007, + "epoch": 23.453237410071942, + "step": 26080 + }, + { + "epoch": 23.453237410071942, + "step": 26080, + "torque_loss": 0.08174052089452744 + }, + { + "epoch": 23.453237410071942, + "force_loss": 0.0051781609654426575, + "step": 26080 + }, + { + "epoch": 23.46223021582734, + "grad_norm": 0.6979445815086365, + "learning_rate": 6.469148007173434e-05, + "loss": 0.0198, + "step": 26090 + }, + { + "action_loss": 0.004804322961717844, + "epoch": 23.46223021582734, + "step": 26090 + }, + { + "epoch": 23.46223021582734, + "step": 26090, + "torque_loss": 0.2135559767484665 + }, + { + "epoch": 23.46223021582734, + "force_loss": 0.0033818751107901335, + "step": 26090 + }, + { + "epoch": 23.471223021582734, + "grad_norm": 0.32751765847206116, + "learning_rate": 6.466513647413381e-05, + "loss": 0.0221, + "step": 26100 + }, + { + "action_loss": 0.003584170015528798, + "epoch": 23.471223021582734, + "step": 26100 + }, + { + "epoch": 23.471223021582734, + "step": 26100, + "torque_loss": 0.13833053410053253 + }, + { + "epoch": 23.471223021582734, + "force_loss": 0.005440997425466776, + "step": 26100 + }, + { + "epoch": 23.480215827338128, + "grad_norm": 0.3299177587032318, + "learning_rate": 6.463878842165203e-05, + "loss": 0.0196, + "step": 26110 + }, + { + "action_loss": 0.0022978943306952715, + "epoch": 23.480215827338128, + "step": 26110 + }, + { + "epoch": 23.480215827338128, + "step": 26110, + "torque_loss": 0.1417805403470993 + }, + { + "epoch": 23.480215827338128, + "force_loss": 0.001809374545700848, + "step": 26110 + }, + { + "epoch": 23.489208633093526, + "grad_norm": 0.23897022008895874, + "learning_rate": 6.461243592229286e-05, + "loss": 0.0218, + "step": 26120 + }, + { + "action_loss": 0.0033195570576936007, + "epoch": 23.489208633093526, + "step": 26120 + }, + { + "epoch": 23.489208633093526, + "step": 26120, + "torque_loss": 0.11360173672437668 + }, + { + "epoch": 23.489208633093526, + "force_loss": 0.0024100469890981913, + "step": 26120 + }, + { + "epoch": 23.49820143884892, + "grad_norm": 0.23758238554000854, + "learning_rate": 6.458607898406146e-05, + "loss": 0.0162, + "step": 26130 + }, + { + "action_loss": 0.0023425498511642218, + "epoch": 23.49820143884892, + "step": 26130 + }, + { + "epoch": 23.49820143884892, + "step": 26130, + "torque_loss": 0.15933813154697418 + }, + { + "epoch": 23.49820143884892, + "force_loss": 0.002432143082842231, + "step": 26130 + }, + { + "epoch": 23.507194244604317, + "grad_norm": 0.1860436201095581, + "learning_rate": 6.455971761496439e-05, + "loss": 0.0162, + "step": 26140 + }, + { + "action_loss": 0.008270727470517159, + "epoch": 23.507194244604317, + "step": 26140 + }, + { + "epoch": 23.507194244604317, + "step": 26140, + "torque_loss": 0.13363142311573029 + }, + { + "epoch": 23.507194244604317, + "force_loss": 0.01450568437576294, + "step": 26140 + }, + { + "epoch": 23.51618705035971, + "grad_norm": 0.19848312437534332, + "learning_rate": 6.453335182300953e-05, + "loss": 0.019, + "step": 26150 + }, + { + "action_loss": 0.00577367702499032, + "epoch": 23.51618705035971, + "step": 26150 + }, + { + "epoch": 23.51618705035971, + "step": 26150, + "torque_loss": 0.14000141620635986 + }, + { + "epoch": 23.51618705035971, + "force_loss": 0.009334307163953781, + "step": 26150 + }, + { + "epoch": 23.52517985611511, + "grad_norm": 0.41124746203422546, + "learning_rate": 6.450698161620612e-05, + "loss": 0.0205, + "step": 26160 + }, + { + "action_loss": 0.0034524749498814344, + "epoch": 23.52517985611511, + "step": 26160 + }, + { + "epoch": 23.52517985611511, + "step": 26160, + "torque_loss": 0.1307174116373062 + }, + { + "epoch": 23.52517985611511, + "force_loss": 0.0021633952856063843, + "step": 26160 + }, + { + "epoch": 23.534172661870503, + "grad_norm": 0.40090763568878174, + "learning_rate": 6.448060700256473e-05, + "loss": 0.0236, + "step": 26170 + }, + { + "action_loss": 0.00533563457429409, + "epoch": 23.534172661870503, + "step": 26170 + }, + { + "epoch": 23.534172661870503, + "step": 26170, + "torque_loss": 0.20002329349517822 + }, + { + "epoch": 23.534172661870503, + "force_loss": 0.004222980234771967, + "step": 26170 + }, + { + "epoch": 23.5431654676259, + "grad_norm": 0.2789079248905182, + "learning_rate": 6.445422799009726e-05, + "loss": 0.0186, + "step": 26180 + }, + { + "action_loss": 0.004568005446344614, + "epoch": 23.5431654676259, + "step": 26180 + }, + { + "epoch": 23.5431654676259, + "step": 26180, + "torque_loss": 0.10053543001413345 + }, + { + "epoch": 23.5431654676259, + "force_loss": 0.0037038931623101234, + "step": 26180 + }, + { + "epoch": 23.552158273381295, + "grad_norm": 0.585283637046814, + "learning_rate": 6.442784458681699e-05, + "loss": 0.0211, + "step": 26190 + }, + { + "action_loss": 0.005224842578172684, + "epoch": 23.552158273381295, + "step": 26190 + }, + { + "epoch": 23.552158273381295, + "step": 26190, + "torque_loss": 0.11199595779180527 + }, + { + "epoch": 23.552158273381295, + "force_loss": 0.0018089317018166184, + "step": 26190 + }, + { + "epoch": 23.56115107913669, + "grad_norm": 0.44279634952545166, + "learning_rate": 6.440145680073847e-05, + "loss": 0.0206, + "step": 26200 + }, + { + "action_loss": 0.003384426236152649, + "epoch": 23.56115107913669, + "step": 26200 + }, + { + "epoch": 23.56115107913669, + "step": 26200, + "torque_loss": 0.12964686751365662 + }, + { + "epoch": 23.56115107913669, + "force_loss": 0.00263400562107563, + "step": 26200 + }, + { + "epoch": 23.570143884892087, + "grad_norm": 0.41551002860069275, + "learning_rate": 6.437506463987762e-05, + "loss": 0.0226, + "step": 26210 + }, + { + "action_loss": 0.006423190236091614, + "epoch": 23.570143884892087, + "step": 26210 + }, + { + "epoch": 23.570143884892087, + "step": 26210, + "torque_loss": 0.14234891533851624 + }, + { + "epoch": 23.570143884892087, + "force_loss": 0.0073415786027908325, + "step": 26210 + }, + { + "epoch": 23.57913669064748, + "grad_norm": 0.15321233868598938, + "learning_rate": 6.434866811225168e-05, + "loss": 0.0176, + "step": 26220 + }, + { + "action_loss": 0.00684645539149642, + "epoch": 23.57913669064748, + "step": 26220 + }, + { + "epoch": 23.57913669064748, + "step": 26220, + "torque_loss": 0.07743039727210999 + }, + { + "epoch": 23.57913669064748, + "force_loss": 0.009907073341310024, + "step": 26220 + }, + { + "epoch": 23.58812949640288, + "grad_norm": 0.1923304796218872, + "learning_rate": 6.432226722587923e-05, + "loss": 0.0227, + "step": 26230 + }, + { + "action_loss": 0.005282173398882151, + "epoch": 23.58812949640288, + "step": 26230 + }, + { + "epoch": 23.58812949640288, + "step": 26230, + "torque_loss": 0.14174269139766693 + }, + { + "epoch": 23.58812949640288, + "force_loss": 0.00299402535893023, + "step": 26230 + }, + { + "epoch": 23.597122302158272, + "grad_norm": 0.29661592841148376, + "learning_rate": 6.429586198878015e-05, + "loss": 0.0201, + "step": 26240 + }, + { + "action_loss": 0.06906166672706604, + "epoch": 23.597122302158272, + "step": 26240 + }, + { + "epoch": 23.597122302158272, + "step": 26240, + "torque_loss": 0.18621863424777985 + }, + { + "epoch": 23.597122302158272, + "force_loss": 0.08686783164739609, + "step": 26240 + }, + { + "epoch": 23.60611510791367, + "grad_norm": 0.1875166893005371, + "learning_rate": 6.426945240897566e-05, + "loss": 0.0264, + "step": 26250 + }, + { + "action_loss": 0.007652702275663614, + "epoch": 23.60611510791367, + "step": 26250 + }, + { + "epoch": 23.60611510791367, + "step": 26250, + "torque_loss": 0.19763286411762238 + }, + { + "epoch": 23.60611510791367, + "force_loss": 0.006358183454722166, + "step": 26250 + }, + { + "epoch": 23.615107913669064, + "grad_norm": 0.1927485167980194, + "learning_rate": 6.424303849448829e-05, + "loss": 0.0203, + "step": 26260 + }, + { + "action_loss": 0.001317686983384192, + "epoch": 23.615107913669064, + "step": 26260 + }, + { + "epoch": 23.615107913669064, + "step": 26260, + "torque_loss": 0.1169653907418251 + }, + { + "epoch": 23.615107913669064, + "force_loss": 0.001565055805258453, + "step": 26260 + }, + { + "epoch": 23.62410071942446, + "grad_norm": 0.34047549962997437, + "learning_rate": 6.42166202533419e-05, + "loss": 0.0194, + "step": 26270 + }, + { + "action_loss": 0.004502443131059408, + "epoch": 23.62410071942446, + "step": 26270 + }, + { + "epoch": 23.62410071942446, + "step": 26270, + "torque_loss": 0.157049298286438 + }, + { + "epoch": 23.62410071942446, + "force_loss": 0.0043264105916023254, + "step": 26270 + }, + { + "epoch": 23.633093525179856, + "grad_norm": 0.23518070578575134, + "learning_rate": 6.419019769356164e-05, + "loss": 0.0192, + "step": 26280 + }, + { + "action_loss": 0.005145172122865915, + "epoch": 23.633093525179856, + "step": 26280 + }, + { + "epoch": 23.633093525179856, + "step": 26280, + "torque_loss": 0.14803844690322876 + }, + { + "epoch": 23.633093525179856, + "force_loss": 0.006488631013780832, + "step": 26280 + }, + { + "epoch": 23.642086330935253, + "grad_norm": 0.4682842493057251, + "learning_rate": 6.416377082317398e-05, + "loss": 0.0196, + "step": 26290 + }, + { + "action_loss": 0.0036925256717950106, + "epoch": 23.642086330935253, + "step": 26290 + }, + { + "epoch": 23.642086330935253, + "step": 26290, + "torque_loss": 0.12495657056570053 + }, + { + "epoch": 23.642086330935253, + "force_loss": 0.0037626430857926607, + "step": 26290 + }, + { + "epoch": 23.651079136690647, + "grad_norm": 0.31905946135520935, + "learning_rate": 6.413733965020674e-05, + "loss": 0.017, + "step": 26300 + }, + { + "action_loss": 0.008717426098883152, + "epoch": 23.651079136690647, + "step": 26300 + }, + { + "epoch": 23.651079136690647, + "step": 26300, + "torque_loss": 0.09252175688743591 + }, + { + "epoch": 23.651079136690647, + "force_loss": 0.006333283614367247, + "step": 26300 + }, + { + "epoch": 23.66007194244604, + "grad_norm": 0.29095759987831116, + "learning_rate": 6.411090418268896e-05, + "loss": 0.0179, + "step": 26310 + }, + { + "action_loss": 0.005678208079189062, + "epoch": 23.66007194244604, + "step": 26310 + }, + { + "epoch": 23.66007194244604, + "step": 26310, + "torque_loss": 0.12558908760547638 + }, + { + "epoch": 23.66007194244604, + "force_loss": 0.013269971124827862, + "step": 26310 + }, + { + "epoch": 23.66906474820144, + "grad_norm": 0.1718796044588089, + "learning_rate": 6.408446442865109e-05, + "loss": 0.0169, + "step": 26320 + }, + { + "action_loss": 0.0051812659949064255, + "epoch": 23.66906474820144, + "step": 26320 + }, + { + "epoch": 23.66906474820144, + "step": 26320, + "torque_loss": 0.08716332167387009 + }, + { + "epoch": 23.66906474820144, + "force_loss": 0.004113268107175827, + "step": 26320 + }, + { + "epoch": 23.678057553956833, + "grad_norm": 0.24299782514572144, + "learning_rate": 6.405802039612479e-05, + "loss": 0.0231, + "step": 26330 + }, + { + "action_loss": 0.003167175455018878, + "epoch": 23.678057553956833, + "step": 26330 + }, + { + "epoch": 23.678057553956833, + "step": 26330, + "torque_loss": 0.098402239382267 + }, + { + "epoch": 23.678057553956833, + "force_loss": 0.001812900067307055, + "step": 26330 + }, + { + "epoch": 23.68705035971223, + "grad_norm": 0.7098739743232727, + "learning_rate": 6.403157209314308e-05, + "loss": 0.0216, + "step": 26340 + }, + { + "action_loss": 0.00874798372387886, + "epoch": 23.68705035971223, + "step": 26340 + }, + { + "epoch": 23.68705035971223, + "step": 26340, + "torque_loss": 0.10877111554145813 + }, + { + "epoch": 23.68705035971223, + "force_loss": 0.005317417439073324, + "step": 26340 + }, + { + "epoch": 23.696043165467625, + "grad_norm": 0.5119842290878296, + "learning_rate": 6.400511952774024e-05, + "loss": 0.0212, + "step": 26350 + }, + { + "action_loss": 0.003754744306206703, + "epoch": 23.696043165467625, + "step": 26350 + }, + { + "epoch": 23.696043165467625, + "step": 26350, + "torque_loss": 0.14863841235637665 + }, + { + "epoch": 23.696043165467625, + "force_loss": 0.0018192650750279427, + "step": 26350 + }, + { + "epoch": 23.705035971223023, + "grad_norm": 0.3002479076385498, + "learning_rate": 6.397866270795187e-05, + "loss": 0.0191, + "step": 26360 + }, + { + "action_loss": 0.0065006990917027, + "epoch": 23.705035971223023, + "step": 26360 + }, + { + "epoch": 23.705035971223023, + "step": 26360, + "torque_loss": 0.12278381735086441 + }, + { + "epoch": 23.705035971223023, + "force_loss": 0.009819511324167252, + "step": 26360 + }, + { + "epoch": 23.714028776978417, + "grad_norm": 0.33609211444854736, + "learning_rate": 6.395220164181489e-05, + "loss": 0.0188, + "step": 26370 + }, + { + "action_loss": 0.009771792218089104, + "epoch": 23.714028776978417, + "step": 26370 + }, + { + "epoch": 23.714028776978417, + "step": 26370, + "torque_loss": 0.11551538109779358 + }, + { + "epoch": 23.714028776978417, + "force_loss": 0.01165012177079916, + "step": 26370 + }, + { + "epoch": 23.723021582733814, + "grad_norm": 0.27792075276374817, + "learning_rate": 6.39257363373674e-05, + "loss": 0.0177, + "step": 26380 + }, + { + "action_loss": 0.009556450881063938, + "epoch": 23.723021582733814, + "step": 26380 + }, + { + "epoch": 23.723021582733814, + "step": 26380, + "torque_loss": 0.13581602275371552 + }, + { + "epoch": 23.723021582733814, + "force_loss": 0.020313812419772148, + "step": 26380 + }, + { + "epoch": 23.73201438848921, + "grad_norm": 0.22145555913448334, + "learning_rate": 6.389926680264892e-05, + "loss": 0.022, + "step": 26390 + }, + { + "action_loss": 0.0036732947919517756, + "epoch": 23.73201438848921, + "step": 26390 + }, + { + "epoch": 23.73201438848921, + "step": 26390, + "torque_loss": 0.1463232785463333 + }, + { + "epoch": 23.73201438848921, + "force_loss": 0.003843870246782899, + "step": 26390 + }, + { + "epoch": 23.741007194244606, + "grad_norm": 0.43495306372642517, + "learning_rate": 6.387279304570017e-05, + "loss": 0.0264, + "step": 26400 + }, + { + "action_loss": 0.0023827191907912493, + "epoch": 23.741007194244606, + "step": 26400 + }, + { + "epoch": 23.741007194244606, + "step": 26400, + "torque_loss": 0.11775105446577072 + }, + { + "epoch": 23.741007194244606, + "force_loss": 0.0022756941616535187, + "step": 26400 + }, + { + "epoch": 23.75, + "grad_norm": 0.17927949130535126, + "learning_rate": 6.384631507456319e-05, + "loss": 0.0204, + "step": 26410 + }, + { + "action_loss": 0.006513234693557024, + "epoch": 23.75, + "step": 26410 + }, + { + "epoch": 23.75, + "step": 26410, + "torque_loss": 0.14344631135463715 + }, + { + "epoch": 23.75, + "force_loss": 0.005243296269327402, + "step": 26410 + }, + { + "epoch": 23.758992805755394, + "grad_norm": 0.35449185967445374, + "learning_rate": 6.381983289728126e-05, + "loss": 0.0199, + "step": 26420 + }, + { + "action_loss": 0.0040943678468465805, + "epoch": 23.758992805755394, + "step": 26420 + }, + { + "epoch": 23.758992805755394, + "step": 26420, + "torque_loss": 0.13349874317646027 + }, + { + "epoch": 23.758992805755394, + "force_loss": 0.00539036700502038, + "step": 26420 + }, + { + "epoch": 23.76798561151079, + "grad_norm": 0.28454479575157166, + "learning_rate": 6.3793346521899e-05, + "loss": 0.0213, + "step": 26430 + }, + { + "action_loss": 0.010007210075855255, + "epoch": 23.76798561151079, + "step": 26430 + }, + { + "epoch": 23.76798561151079, + "step": 26430, + "torque_loss": 0.19674928486347198 + }, + { + "epoch": 23.76798561151079, + "force_loss": 0.006232852581888437, + "step": 26430 + }, + { + "epoch": 23.776978417266186, + "grad_norm": 0.21880799531936646, + "learning_rate": 6.376685595646226e-05, + "loss": 0.0218, + "step": 26440 + }, + { + "action_loss": 0.008988688699901104, + "epoch": 23.776978417266186, + "step": 26440 + }, + { + "epoch": 23.776978417266186, + "step": 26440, + "torque_loss": 0.16790910065174103 + }, + { + "epoch": 23.776978417266186, + "force_loss": 0.013626438565552235, + "step": 26440 + }, + { + "epoch": 23.785971223021583, + "grad_norm": 0.20643463730812073, + "learning_rate": 6.374036120901816e-05, + "loss": 0.0216, + "step": 26450 + }, + { + "action_loss": 0.007177682593464851, + "epoch": 23.785971223021583, + "step": 26450 + }, + { + "epoch": 23.785971223021583, + "step": 26450, + "torque_loss": 0.1461370885372162 + }, + { + "epoch": 23.785971223021583, + "force_loss": 0.010485065169632435, + "step": 26450 + }, + { + "epoch": 23.794964028776977, + "grad_norm": 0.27559787034988403, + "learning_rate": 6.371386228761514e-05, + "loss": 0.0195, + "step": 26460 + }, + { + "action_loss": 0.008658151142299175, + "epoch": 23.794964028776977, + "step": 26460 + }, + { + "epoch": 23.794964028776977, + "step": 26460, + "torque_loss": 0.1518176943063736 + }, + { + "epoch": 23.794964028776977, + "force_loss": 0.002371664857491851, + "step": 26460 + }, + { + "epoch": 23.803956834532375, + "grad_norm": 0.23754039406776428, + "learning_rate": 6.368735920030283e-05, + "loss": 0.0263, + "step": 26470 + }, + { + "action_loss": 0.0035001190844923258, + "epoch": 23.803956834532375, + "step": 26470 + }, + { + "epoch": 23.803956834532375, + "step": 26470, + "torque_loss": 0.15118958055973053 + }, + { + "epoch": 23.803956834532375, + "force_loss": 0.005252012982964516, + "step": 26470 + }, + { + "epoch": 23.81294964028777, + "grad_norm": 0.17233656346797943, + "learning_rate": 6.366085195513218e-05, + "loss": 0.0206, + "step": 26480 + }, + { + "action_loss": 0.0035643859300762415, + "epoch": 23.81294964028777, + "step": 26480 + }, + { + "epoch": 23.81294964028777, + "step": 26480, + "torque_loss": 0.18411237001419067 + }, + { + "epoch": 23.81294964028777, + "force_loss": 0.0026406904216855764, + "step": 26480 + }, + { + "epoch": 23.821942446043167, + "grad_norm": 0.23081789910793304, + "learning_rate": 6.363434056015543e-05, + "loss": 0.0252, + "step": 26490 + }, + { + "action_loss": 0.007509656250476837, + "epoch": 23.821942446043167, + "step": 26490 + }, + { + "epoch": 23.821942446043167, + "step": 26490, + "torque_loss": 0.13051988184452057 + }, + { + "epoch": 23.821942446043167, + "force_loss": 0.007185162510722876, + "step": 26490 + }, + { + "epoch": 23.83093525179856, + "grad_norm": 0.29633012413978577, + "learning_rate": 6.360782502342599e-05, + "loss": 0.0192, + "step": 26500 + }, + { + "action_loss": 0.004706589970737696, + "epoch": 23.83093525179856, + "step": 26500 + }, + { + "epoch": 23.83093525179856, + "step": 26500, + "torque_loss": 0.11669912189245224 + }, + { + "epoch": 23.83093525179856, + "force_loss": 0.004882893059402704, + "step": 26500 + }, + { + "epoch": 23.83992805755396, + "grad_norm": 0.26792123913764954, + "learning_rate": 6.358130535299862e-05, + "loss": 0.0249, + "step": 26510 + }, + { + "action_loss": 0.002868209732696414, + "epoch": 23.83992805755396, + "step": 26510 + }, + { + "epoch": 23.83992805755396, + "step": 26510, + "torque_loss": 0.10993697494268417 + }, + { + "epoch": 23.83992805755396, + "force_loss": 0.0021622648928314447, + "step": 26510 + }, + { + "epoch": 23.848920863309353, + "grad_norm": 0.3968540132045746, + "learning_rate": 6.355478155692926e-05, + "loss": 0.0227, + "step": 26520 + }, + { + "action_loss": 0.007956896908581257, + "epoch": 23.848920863309353, + "step": 26520 + }, + { + "epoch": 23.848920863309353, + "step": 26520, + "torque_loss": 0.12510977685451508 + }, + { + "epoch": 23.848920863309353, + "force_loss": 0.0060476139187812805, + "step": 26520 + }, + { + "epoch": 23.857913669064747, + "grad_norm": 0.45184972882270813, + "learning_rate": 6.352825364327517e-05, + "loss": 0.0209, + "step": 26530 + }, + { + "action_loss": 0.015073730610311031, + "epoch": 23.857913669064747, + "step": 26530 + }, + { + "epoch": 23.857913669064747, + "step": 26530, + "torque_loss": 0.12922672927379608 + }, + { + "epoch": 23.857913669064747, + "force_loss": 0.01225163321942091, + "step": 26530 + }, + { + "epoch": 23.866906474820144, + "grad_norm": 0.41845831274986267, + "learning_rate": 6.350172162009482e-05, + "loss": 0.0268, + "step": 26540 + }, + { + "action_loss": 0.014782925136387348, + "epoch": 23.866906474820144, + "step": 26540 + }, + { + "epoch": 23.866906474820144, + "step": 26540, + "torque_loss": 0.13937172293663025 + }, + { + "epoch": 23.866906474820144, + "force_loss": 0.02035324089229107, + "step": 26540 + }, + { + "epoch": 23.87589928057554, + "grad_norm": 0.334474116563797, + "learning_rate": 6.347518549544793e-05, + "loss": 0.0221, + "step": 26550 + }, + { + "action_loss": 0.003326263278722763, + "epoch": 23.87589928057554, + "step": 26550 + }, + { + "epoch": 23.87589928057554, + "step": 26550, + "torque_loss": 0.1415240317583084 + }, + { + "epoch": 23.87589928057554, + "force_loss": 0.0048951092176139355, + "step": 26550 + }, + { + "epoch": 23.884892086330936, + "grad_norm": 0.22813700139522552, + "learning_rate": 6.344864527739547e-05, + "loss": 0.0199, + "step": 26560 + }, + { + "action_loss": 0.004978131968528032, + "epoch": 23.884892086330936, + "step": 26560 + }, + { + "epoch": 23.884892086330936, + "step": 26560, + "torque_loss": 0.11734786629676819 + }, + { + "epoch": 23.884892086330936, + "force_loss": 0.005578598473221064, + "step": 26560 + }, + { + "epoch": 23.89388489208633, + "grad_norm": 0.19286710023880005, + "learning_rate": 6.342210097399966e-05, + "loss": 0.0204, + "step": 26570 + }, + { + "action_loss": 0.008035891689360142, + "epoch": 23.89388489208633, + "step": 26570 + }, + { + "epoch": 23.89388489208633, + "step": 26570, + "torque_loss": 0.17052531242370605 + }, + { + "epoch": 23.89388489208633, + "force_loss": 0.004830732475966215, + "step": 26570 + }, + { + "epoch": 23.902877697841728, + "grad_norm": 0.2038847953081131, + "learning_rate": 6.339555259332398e-05, + "loss": 0.0274, + "step": 26580 + }, + { + "action_loss": 0.005652909632772207, + "epoch": 23.902877697841728, + "step": 26580 + }, + { + "epoch": 23.902877697841728, + "step": 26580, + "torque_loss": 0.1375824362039566 + }, + { + "epoch": 23.902877697841728, + "force_loss": 0.005405073519796133, + "step": 26580 + }, + { + "epoch": 23.91187050359712, + "grad_norm": 0.4427371621131897, + "learning_rate": 6.33690001434331e-05, + "loss": 0.0246, + "step": 26590 + }, + { + "action_loss": 0.018171582370996475, + "epoch": 23.91187050359712, + "step": 26590 + }, + { + "epoch": 23.91187050359712, + "step": 26590, + "torque_loss": 0.16622121632099152 + }, + { + "epoch": 23.91187050359712, + "force_loss": 0.029015885666012764, + "step": 26590 + }, + { + "epoch": 23.92086330935252, + "grad_norm": 0.358405202627182, + "learning_rate": 6.334244363239296e-05, + "loss": 0.023, + "step": 26600 + }, + { + "action_loss": 0.003429584437981248, + "epoch": 23.92086330935252, + "step": 26600 + }, + { + "epoch": 23.92086330935252, + "step": 26600, + "torque_loss": 0.08238402009010315 + }, + { + "epoch": 23.92086330935252, + "force_loss": 0.002532379003241658, + "step": 26600 + }, + { + "epoch": 23.929856115107913, + "grad_norm": 0.13266392052173615, + "learning_rate": 6.331588306827073e-05, + "loss": 0.018, + "step": 26610 + }, + { + "action_loss": 0.0064096166752278805, + "epoch": 23.929856115107913, + "step": 26610 + }, + { + "epoch": 23.929856115107913, + "step": 26610, + "torque_loss": 0.15423421561717987 + }, + { + "epoch": 23.929856115107913, + "force_loss": 0.009012925438582897, + "step": 26610 + }, + { + "epoch": 23.93884892086331, + "grad_norm": 0.1737116128206253, + "learning_rate": 6.328931845913483e-05, + "loss": 0.0171, + "step": 26620 + }, + { + "action_loss": 0.003073088126257062, + "epoch": 23.93884892086331, + "step": 26620 + }, + { + "epoch": 23.93884892086331, + "step": 26620, + "torque_loss": 0.14280617237091064 + }, + { + "epoch": 23.93884892086331, + "force_loss": 0.0023988857865333557, + "step": 26620 + }, + { + "epoch": 23.947841726618705, + "grad_norm": 0.2824406921863556, + "learning_rate": 6.326274981305484e-05, + "loss": 0.0189, + "step": 26630 + }, + { + "action_loss": 0.007629834581166506, + "epoch": 23.947841726618705, + "step": 26630 + }, + { + "epoch": 23.947841726618705, + "step": 26630, + "torque_loss": 0.12465822696685791 + }, + { + "epoch": 23.947841726618705, + "force_loss": 0.010988034307956696, + "step": 26630 + }, + { + "epoch": 23.9568345323741, + "grad_norm": 0.18834997713565826, + "learning_rate": 6.323617713810166e-05, + "loss": 0.0214, + "step": 26640 + }, + { + "action_loss": 0.003116500796750188, + "epoch": 23.9568345323741, + "step": 26640 + }, + { + "epoch": 23.9568345323741, + "step": 26640, + "torque_loss": 0.07612895220518112 + }, + { + "epoch": 23.9568345323741, + "force_loss": 0.003354916349053383, + "step": 26640 + }, + { + "epoch": 23.965827338129497, + "grad_norm": 0.11095096915960312, + "learning_rate": 6.320960044234734e-05, + "loss": 0.0198, + "step": 26650 + }, + { + "action_loss": 0.010923963040113449, + "epoch": 23.965827338129497, + "step": 26650 + }, + { + "epoch": 23.965827338129497, + "step": 26650, + "torque_loss": 0.1912527084350586 + }, + { + "epoch": 23.965827338129497, + "force_loss": 0.01209546159952879, + "step": 26650 + }, + { + "epoch": 23.97482014388489, + "grad_norm": 0.27918171882629395, + "learning_rate": 6.318301973386518e-05, + "loss": 0.0276, + "step": 26660 + }, + { + "action_loss": 0.006705548148602247, + "epoch": 23.97482014388489, + "step": 26660 + }, + { + "epoch": 23.97482014388489, + "step": 26660, + "torque_loss": 0.16599969565868378 + }, + { + "epoch": 23.97482014388489, + "force_loss": 0.005112719256430864, + "step": 26660 + }, + { + "epoch": 23.98381294964029, + "grad_norm": 0.41221168637275696, + "learning_rate": 6.315643502072971e-05, + "loss": 0.021, + "step": 26670 + }, + { + "action_loss": 0.0027291413862258196, + "epoch": 23.98381294964029, + "step": 26670 + }, + { + "epoch": 23.98381294964029, + "step": 26670, + "torque_loss": 0.1066742017865181 + }, + { + "epoch": 23.98381294964029, + "force_loss": 0.0014603679301217198, + "step": 26670 + }, + { + "epoch": 23.992805755395683, + "grad_norm": 0.2750396132469177, + "learning_rate": 6.312984631101667e-05, + "loss": 0.018, + "step": 26680 + }, + { + "action_loss": 0.004309473093599081, + "epoch": 23.992805755395683, + "step": 26680 + }, + { + "epoch": 23.992805755395683, + "step": 26680, + "torque_loss": 0.12925082445144653 + }, + { + "epoch": 23.992805755395683, + "force_loss": 0.0049682422541081905, + "step": 26680 + }, + { + "epoch": 24.00179856115108, + "grad_norm": 0.14965416491031647, + "learning_rate": 6.310325361280297e-05, + "loss": 0.0195, + "step": 26690 + }, + { + "action_loss": 0.0023170060012489557, + "epoch": 24.00179856115108, + "step": 26690 + }, + { + "epoch": 24.00179856115108, + "step": 26690, + "torque_loss": 0.14102952182292938 + }, + { + "epoch": 24.00179856115108, + "force_loss": 0.002769235521554947, + "step": 26690 + }, + { + "epoch": 24.010791366906474, + "grad_norm": 0.5179844498634338, + "learning_rate": 6.30766569341668e-05, + "loss": 0.0172, + "step": 26700 + }, + { + "action_loss": 0.004210879560559988, + "epoch": 24.010791366906474, + "step": 26700 + }, + { + "epoch": 24.010791366906474, + "step": 26700, + "torque_loss": 0.13975654542446136 + }, + { + "epoch": 24.010791366906474, + "force_loss": 0.013249740935862064, + "step": 26700 + }, + { + "epoch": 24.019784172661872, + "grad_norm": 0.27187013626098633, + "learning_rate": 6.305005628318753e-05, + "loss": 0.0224, + "step": 26710 + }, + { + "action_loss": 0.0035000566858798265, + "epoch": 24.019784172661872, + "step": 26710 + }, + { + "epoch": 24.019784172661872, + "step": 26710, + "torque_loss": 0.12807540595531464 + }, + { + "epoch": 24.019784172661872, + "force_loss": 0.013426161371171474, + "step": 26710 + }, + { + "epoch": 24.028776978417266, + "grad_norm": 0.21601654589176178, + "learning_rate": 6.302345166794572e-05, + "loss": 0.0189, + "step": 26720 + }, + { + "action_loss": 0.013133309781551361, + "epoch": 24.028776978417266, + "step": 26720 + }, + { + "epoch": 24.028776978417266, + "step": 26720, + "torque_loss": 0.09281337261199951 + }, + { + "epoch": 24.028776978417266, + "force_loss": 0.015573568642139435, + "step": 26720 + }, + { + "epoch": 24.037769784172664, + "grad_norm": 0.34611037373542786, + "learning_rate": 6.299684309652316e-05, + "loss": 0.021, + "step": 26730 + }, + { + "action_loss": 0.0021687422413378954, + "epoch": 24.037769784172664, + "step": 26730 + }, + { + "epoch": 24.037769784172664, + "step": 26730, + "torque_loss": 0.08817531913518906 + }, + { + "epoch": 24.037769784172664, + "force_loss": 0.002704254351556301, + "step": 26730 + }, + { + "epoch": 24.046762589928058, + "grad_norm": 0.5585370659828186, + "learning_rate": 6.297023057700283e-05, + "loss": 0.019, + "step": 26740 + }, + { + "action_loss": 0.010020464658737183, + "epoch": 24.046762589928058, + "step": 26740 + }, + { + "epoch": 24.046762589928058, + "step": 26740, + "torque_loss": 0.10950187593698502 + }, + { + "epoch": 24.046762589928058, + "force_loss": 0.010822725482285023, + "step": 26740 + }, + { + "epoch": 24.055755395683452, + "grad_norm": 0.3086368143558502, + "learning_rate": 6.294361411746891e-05, + "loss": 0.0206, + "step": 26750 + }, + { + "action_loss": 0.0038403042126446962, + "epoch": 24.055755395683452, + "step": 26750 + }, + { + "epoch": 24.055755395683452, + "step": 26750, + "torque_loss": 0.14038939774036407 + }, + { + "epoch": 24.055755395683452, + "force_loss": 0.0019277144456282258, + "step": 26750 + }, + { + "epoch": 24.06474820143885, + "grad_norm": 0.11394848674535751, + "learning_rate": 6.291699372600677e-05, + "loss": 0.019, + "step": 26760 + }, + { + "action_loss": 0.005254644900560379, + "epoch": 24.06474820143885, + "step": 26760 + }, + { + "epoch": 24.06474820143885, + "step": 26760, + "torque_loss": 0.07805917412042618 + }, + { + "epoch": 24.06474820143885, + "force_loss": 0.01563364453613758, + "step": 26760 + }, + { + "epoch": 24.073741007194243, + "grad_norm": 0.39980843663215637, + "learning_rate": 6.2890369410703e-05, + "loss": 0.0239, + "step": 26770 + }, + { + "action_loss": 0.0024700958747416735, + "epoch": 24.073741007194243, + "step": 26770 + }, + { + "epoch": 24.073741007194243, + "step": 26770, + "torque_loss": 0.1352996975183487 + }, + { + "epoch": 24.073741007194243, + "force_loss": 0.0021215060260146856, + "step": 26770 + }, + { + "epoch": 24.08273381294964, + "grad_norm": 0.1333743929862976, + "learning_rate": 6.286374117964534e-05, + "loss": 0.0206, + "step": 26780 + }, + { + "action_loss": 0.002684928709641099, + "epoch": 24.08273381294964, + "step": 26780 + }, + { + "epoch": 24.08273381294964, + "step": 26780, + "torque_loss": 0.1584099978208542 + }, + { + "epoch": 24.08273381294964, + "force_loss": 0.0018392599886283278, + "step": 26780 + }, + { + "epoch": 24.091726618705035, + "grad_norm": 0.148902028799057, + "learning_rate": 6.283710904092277e-05, + "loss": 0.0171, + "step": 26790 + }, + { + "action_loss": 0.005456050392240286, + "epoch": 24.091726618705035, + "step": 26790 + }, + { + "epoch": 24.091726618705035, + "step": 26790, + "torque_loss": 0.14530979096889496 + }, + { + "epoch": 24.091726618705035, + "force_loss": 0.006891926284879446, + "step": 26790 + }, + { + "epoch": 24.100719424460433, + "grad_norm": 0.34021157026290894, + "learning_rate": 6.281047300262542e-05, + "loss": 0.0183, + "step": 26800 + }, + { + "action_loss": 0.009326084516942501, + "epoch": 24.100719424460433, + "step": 26800 + }, + { + "epoch": 24.100719424460433, + "step": 26800, + "torque_loss": 0.09304801374673843 + }, + { + "epoch": 24.100719424460433, + "force_loss": 0.00395691953599453, + "step": 26800 + }, + { + "epoch": 24.109712230215827, + "grad_norm": 0.4762473404407501, + "learning_rate": 6.278383307284461e-05, + "loss": 0.0205, + "step": 26810 + }, + { + "action_loss": 0.0042495871894061565, + "epoch": 24.109712230215827, + "step": 26810 + }, + { + "epoch": 24.109712230215827, + "step": 26810, + "torque_loss": 0.14461617171764374 + }, + { + "epoch": 24.109712230215827, + "force_loss": 0.00489812670275569, + "step": 26810 + }, + { + "epoch": 24.118705035971225, + "grad_norm": 0.32843729853630066, + "learning_rate": 6.275718925967284e-05, + "loss": 0.0212, + "step": 26820 + }, + { + "action_loss": 0.007712294813245535, + "epoch": 24.118705035971225, + "step": 26820 + }, + { + "epoch": 24.118705035971225, + "step": 26820, + "torque_loss": 0.14035049080848694 + }, + { + "epoch": 24.118705035971225, + "force_loss": 0.017633654177188873, + "step": 26820 + }, + { + "epoch": 24.12769784172662, + "grad_norm": 0.4606230556964874, + "learning_rate": 6.273054157120382e-05, + "loss": 0.0213, + "step": 26830 + }, + { + "action_loss": 0.008606442250311375, + "epoch": 24.12769784172662, + "step": 26830 + }, + { + "epoch": 24.12769784172662, + "step": 26830, + "torque_loss": 0.14684230089187622 + }, + { + "epoch": 24.12769784172662, + "force_loss": 0.01178403664380312, + "step": 26830 + }, + { + "epoch": 24.136690647482013, + "grad_norm": 0.23865234851837158, + "learning_rate": 6.270389001553238e-05, + "loss": 0.0223, + "step": 26840 + }, + { + "action_loss": 0.005808299873024225, + "epoch": 24.136690647482013, + "step": 26840 + }, + { + "epoch": 24.136690647482013, + "step": 26840, + "torque_loss": 0.10328838229179382 + }, + { + "epoch": 24.136690647482013, + "force_loss": 0.005072253290563822, + "step": 26840 + }, + { + "epoch": 24.14568345323741, + "grad_norm": 0.3093968629837036, + "learning_rate": 6.26772346007546e-05, + "loss": 0.0238, + "step": 26850 + }, + { + "action_loss": 0.004079446662217379, + "epoch": 24.14568345323741, + "step": 26850 + }, + { + "epoch": 24.14568345323741, + "step": 26850, + "torque_loss": 0.10266079753637314 + }, + { + "epoch": 24.14568345323741, + "force_loss": 0.0023946769069880247, + "step": 26850 + }, + { + "epoch": 24.154676258992804, + "grad_norm": 0.48012447357177734, + "learning_rate": 6.265057533496767e-05, + "loss": 0.0206, + "step": 26860 + }, + { + "action_loss": 0.0037369942292571068, + "epoch": 24.154676258992804, + "step": 26860 + }, + { + "epoch": 24.154676258992804, + "step": 26860, + "torque_loss": 0.16007035970687866 + }, + { + "epoch": 24.154676258992804, + "force_loss": 0.0044674742966890335, + "step": 26860 + }, + { + "epoch": 24.163669064748202, + "grad_norm": 0.37928563356399536, + "learning_rate": 6.262391222626997e-05, + "loss": 0.0233, + "step": 26870 + }, + { + "action_loss": 0.002601821906864643, + "epoch": 24.163669064748202, + "step": 26870 + }, + { + "epoch": 24.163669064748202, + "step": 26870, + "torque_loss": 0.15268342196941376 + }, + { + "epoch": 24.163669064748202, + "force_loss": 0.0025890101678669453, + "step": 26870 + }, + { + "epoch": 24.172661870503596, + "grad_norm": 0.23401878774166107, + "learning_rate": 6.259724528276106e-05, + "loss": 0.0253, + "step": 26880 + }, + { + "action_loss": 0.002314871409907937, + "epoch": 24.172661870503596, + "step": 26880 + }, + { + "epoch": 24.172661870503596, + "step": 26880, + "torque_loss": 0.11141131073236465 + }, + { + "epoch": 24.172661870503596, + "force_loss": 0.0026015767361968756, + "step": 26880 + }, + { + "epoch": 24.181654676258994, + "grad_norm": 0.6529308557510376, + "learning_rate": 6.257057451254162e-05, + "loss": 0.0188, + "step": 26890 + }, + { + "action_loss": 0.003148353425785899, + "epoch": 24.181654676258994, + "step": 26890 + }, + { + "epoch": 24.181654676258994, + "step": 26890, + "torque_loss": 0.1261998862028122 + }, + { + "epoch": 24.181654676258994, + "force_loss": 0.010966651141643524, + "step": 26890 + }, + { + "epoch": 24.190647482014388, + "grad_norm": 0.21692568063735962, + "learning_rate": 6.254389992371357e-05, + "loss": 0.0201, + "step": 26900 + }, + { + "action_loss": 0.011829155497252941, + "epoch": 24.190647482014388, + "step": 26900 + }, + { + "epoch": 24.190647482014388, + "step": 26900, + "torque_loss": 0.11395353823900223 + }, + { + "epoch": 24.190647482014388, + "force_loss": 0.012464761734008789, + "step": 26900 + }, + { + "epoch": 24.199640287769785, + "grad_norm": 0.5146127939224243, + "learning_rate": 6.25172215243799e-05, + "loss": 0.0215, + "step": 26910 + }, + { + "action_loss": 0.003134808735921979, + "epoch": 24.199640287769785, + "step": 26910 + }, + { + "epoch": 24.199640287769785, + "step": 26910, + "torque_loss": 0.10611215233802795 + }, + { + "epoch": 24.199640287769785, + "force_loss": 0.008303600363433361, + "step": 26910 + }, + { + "epoch": 24.20863309352518, + "grad_norm": 0.16701440513134003, + "learning_rate": 6.249053932264486e-05, + "loss": 0.02, + "step": 26920 + }, + { + "action_loss": 0.022442683577537537, + "epoch": 24.20863309352518, + "step": 26920 + }, + { + "epoch": 24.20863309352518, + "step": 26920, + "torque_loss": 0.1562359780073166 + }, + { + "epoch": 24.20863309352518, + "force_loss": 0.031283047050237656, + "step": 26920 + }, + { + "epoch": 24.217625899280577, + "grad_norm": 0.28590187430381775, + "learning_rate": 6.246385332661376e-05, + "loss": 0.0215, + "step": 26930 + }, + { + "action_loss": 0.006464321166276932, + "epoch": 24.217625899280577, + "step": 26930 + }, + { + "epoch": 24.217625899280577, + "step": 26930, + "torque_loss": 0.14696358144283295 + }, + { + "epoch": 24.217625899280577, + "force_loss": 0.008115951903164387, + "step": 26930 + }, + { + "epoch": 24.22661870503597, + "grad_norm": 0.3090987801551819, + "learning_rate": 6.24371635443931e-05, + "loss": 0.0202, + "step": 26940 + }, + { + "action_loss": 0.0022081492934376, + "epoch": 24.22661870503597, + "step": 26940 + }, + { + "epoch": 24.22661870503597, + "step": 26940, + "torque_loss": 0.17350207269191742 + }, + { + "epoch": 24.22661870503597, + "force_loss": 0.0024397282395511866, + "step": 26940 + }, + { + "epoch": 24.235611510791365, + "grad_norm": 0.21155443787574768, + "learning_rate": 6.241046998409054e-05, + "loss": 0.0156, + "step": 26950 + }, + { + "action_loss": 0.0036125865299254656, + "epoch": 24.235611510791365, + "step": 26950 + }, + { + "epoch": 24.235611510791365, + "step": 26950, + "torque_loss": 0.07176991552114487 + }, + { + "epoch": 24.235611510791365, + "force_loss": 0.002854272723197937, + "step": 26950 + }, + { + "epoch": 24.244604316546763, + "grad_norm": 0.43226367235183716, + "learning_rate": 6.238377265381489e-05, + "loss": 0.0207, + "step": 26960 + }, + { + "action_loss": 0.005308577325195074, + "epoch": 24.244604316546763, + "step": 26960 + }, + { + "epoch": 24.244604316546763, + "step": 26960, + "torque_loss": 0.13910140097141266 + }, + { + "epoch": 24.244604316546763, + "force_loss": 0.009794993326067924, + "step": 26960 + }, + { + "epoch": 24.253597122302157, + "grad_norm": 0.3633691370487213, + "learning_rate": 6.235707156167607e-05, + "loss": 0.0263, + "step": 26970 + }, + { + "action_loss": 0.003366742981597781, + "epoch": 24.253597122302157, + "step": 26970 + }, + { + "epoch": 24.253597122302157, + "step": 26970, + "torque_loss": 0.11560416221618652 + }, + { + "epoch": 24.253597122302157, + "force_loss": 0.0039924634620547295, + "step": 26970 + }, + { + "epoch": 24.262589928057555, + "grad_norm": 0.2185497283935547, + "learning_rate": 6.233036671578519e-05, + "loss": 0.0265, + "step": 26980 + }, + { + "action_loss": 0.027073144912719727, + "epoch": 24.262589928057555, + "step": 26980 + }, + { + "epoch": 24.262589928057555, + "step": 26980, + "torque_loss": 0.11673716455698013 + }, + { + "epoch": 24.262589928057555, + "force_loss": 0.026011614128947258, + "step": 26980 + }, + { + "epoch": 24.27158273381295, + "grad_norm": 0.5467181205749512, + "learning_rate": 6.230365812425445e-05, + "loss": 0.0227, + "step": 26990 + }, + { + "action_loss": 0.003463304601609707, + "epoch": 24.27158273381295, + "step": 26990 + }, + { + "epoch": 24.27158273381295, + "step": 26990, + "torque_loss": 0.13536104559898376 + }, + { + "epoch": 24.27158273381295, + "force_loss": 0.0035492137540131807, + "step": 26990 + }, + { + "epoch": 24.280575539568346, + "grad_norm": 0.30241721868515015, + "learning_rate": 6.227694579519724e-05, + "loss": 0.0227, + "step": 27000 + }, + { + "action_loss": 0.013331294059753418, + "epoch": 24.280575539568346, + "step": 27000 + }, + { + "epoch": 24.280575539568346, + "step": 27000, + "torque_loss": 0.1690327674150467 + }, + { + "epoch": 24.280575539568346, + "force_loss": 0.021961594000458717, + "step": 27000 + }, + { + "epoch": 24.28956834532374, + "grad_norm": 0.19231468439102173, + "learning_rate": 6.225022973672805e-05, + "loss": 0.0204, + "step": 27010 + }, + { + "action_loss": 0.021512677893042564, + "epoch": 24.28956834532374, + "step": 27010 + }, + { + "epoch": 24.28956834532374, + "step": 27010, + "torque_loss": 0.16950565576553345 + }, + { + "epoch": 24.28956834532374, + "force_loss": 0.026604605838656425, + "step": 27010 + }, + { + "epoch": 24.298561151079138, + "grad_norm": 0.2754470407962799, + "learning_rate": 6.222350995696253e-05, + "loss": 0.0257, + "step": 27020 + }, + { + "action_loss": 0.010160579346120358, + "epoch": 24.298561151079138, + "step": 27020 + }, + { + "epoch": 24.298561151079138, + "step": 27020, + "torque_loss": 0.11716454476118088 + }, + { + "epoch": 24.298561151079138, + "force_loss": 0.017418472096323967, + "step": 27020 + }, + { + "epoch": 24.307553956834532, + "grad_norm": 0.45914801955223083, + "learning_rate": 6.21967864640174e-05, + "loss": 0.0199, + "step": 27030 + }, + { + "action_loss": 0.0032418230548501015, + "epoch": 24.307553956834532, + "step": 27030 + }, + { + "epoch": 24.307553956834532, + "step": 27030, + "torque_loss": 0.12885306775569916 + }, + { + "epoch": 24.307553956834532, + "force_loss": 0.0039643216878175735, + "step": 27030 + }, + { + "epoch": 24.31654676258993, + "grad_norm": 0.3468298017978668, + "learning_rate": 6.217005926601059e-05, + "loss": 0.0174, + "step": 27040 + }, + { + "action_loss": 0.013095776550471783, + "epoch": 24.31654676258993, + "step": 27040 + }, + { + "epoch": 24.31654676258993, + "step": 27040, + "torque_loss": 0.12410322576761246 + }, + { + "epoch": 24.31654676258993, + "force_loss": 0.013539652340114117, + "step": 27040 + }, + { + "epoch": 24.325539568345324, + "grad_norm": 0.578674852848053, + "learning_rate": 6.214332837106111e-05, + "loss": 0.02, + "step": 27050 + }, + { + "action_loss": 0.0065324814058840275, + "epoch": 24.325539568345324, + "step": 27050 + }, + { + "epoch": 24.325539568345324, + "step": 27050, + "torque_loss": 0.09553492814302444 + }, + { + "epoch": 24.325539568345324, + "force_loss": 0.009036104194819927, + "step": 27050 + }, + { + "epoch": 24.334532374100718, + "grad_norm": 0.4983639121055603, + "learning_rate": 6.21165937872891e-05, + "loss": 0.0197, + "step": 27060 + }, + { + "action_loss": 0.030352450907230377, + "epoch": 24.334532374100718, + "step": 27060 + }, + { + "epoch": 24.334532374100718, + "step": 27060, + "torque_loss": 0.1310928910970688 + }, + { + "epoch": 24.334532374100718, + "force_loss": 0.02637692727148533, + "step": 27060 + }, + { + "epoch": 24.343525179856115, + "grad_norm": 0.5432457327842712, + "learning_rate": 6.208985552281582e-05, + "loss": 0.0218, + "step": 27070 + }, + { + "action_loss": 0.026522735133767128, + "epoch": 24.343525179856115, + "step": 27070 + }, + { + "epoch": 24.343525179856115, + "step": 27070, + "torque_loss": 0.1456541270017624 + }, + { + "epoch": 24.343525179856115, + "force_loss": 0.031462885439395905, + "step": 27070 + }, + { + "epoch": 24.35251798561151, + "grad_norm": 0.3561514616012573, + "learning_rate": 6.206311358576364e-05, + "loss": 0.0252, + "step": 27080 + }, + { + "action_loss": 0.006840948015451431, + "epoch": 24.35251798561151, + "step": 27080 + }, + { + "epoch": 24.35251798561151, + "step": 27080, + "torque_loss": 0.18298260867595673 + }, + { + "epoch": 24.35251798561151, + "force_loss": 0.004947637673467398, + "step": 27080 + }, + { + "epoch": 24.361510791366907, + "grad_norm": 0.27993276715278625, + "learning_rate": 6.203636798425608e-05, + "loss": 0.0199, + "step": 27090 + }, + { + "action_loss": 0.018681637942790985, + "epoch": 24.361510791366907, + "step": 27090 + }, + { + "epoch": 24.361510791366907, + "step": 27090, + "torque_loss": 0.18137353658676147 + }, + { + "epoch": 24.361510791366907, + "force_loss": 0.011561229825019836, + "step": 27090 + }, + { + "epoch": 24.3705035971223, + "grad_norm": 0.2753047049045563, + "learning_rate": 6.20096187264177e-05, + "loss": 0.0229, + "step": 27100 + }, + { + "action_loss": 0.009925643913447857, + "epoch": 24.3705035971223, + "step": 27100 + }, + { + "epoch": 24.3705035971223, + "step": 27100, + "torque_loss": 0.11520782858133316 + }, + { + "epoch": 24.3705035971223, + "force_loss": 0.011404546909034252, + "step": 27100 + }, + { + "epoch": 24.3794964028777, + "grad_norm": 0.33238694071769714, + "learning_rate": 6.198286582037425e-05, + "loss": 0.022, + "step": 27110 + }, + { + "action_loss": 0.00781126506626606, + "epoch": 24.3794964028777, + "step": 27110 + }, + { + "epoch": 24.3794964028777, + "step": 27110, + "torque_loss": 0.13570380210876465 + }, + { + "epoch": 24.3794964028777, + "force_loss": 0.004932140465825796, + "step": 27110 + }, + { + "epoch": 24.388489208633093, + "grad_norm": 0.17936363816261292, + "learning_rate": 6.195610927425256e-05, + "loss": 0.0229, + "step": 27120 + }, + { + "action_loss": 0.0032112086191773415, + "epoch": 24.388489208633093, + "step": 27120 + }, + { + "epoch": 24.388489208633093, + "step": 27120, + "torque_loss": 0.1424873024225235 + }, + { + "epoch": 24.388489208633093, + "force_loss": 0.0028595321346074343, + "step": 27120 + }, + { + "epoch": 24.39748201438849, + "grad_norm": 0.12826332449913025, + "learning_rate": 6.192934909618056e-05, + "loss": 0.0164, + "step": 27130 + }, + { + "action_loss": 0.0033648523967713118, + "epoch": 24.39748201438849, + "step": 27130 + }, + { + "epoch": 24.39748201438849, + "step": 27130, + "torque_loss": 0.12207072973251343 + }, + { + "epoch": 24.39748201438849, + "force_loss": 0.002059903694316745, + "step": 27130 + }, + { + "epoch": 24.406474820143885, + "grad_norm": 0.12039807438850403, + "learning_rate": 6.190258529428728e-05, + "loss": 0.0175, + "step": 27140 + }, + { + "action_loss": 0.06430386751890182, + "epoch": 24.406474820143885, + "step": 27140 + }, + { + "epoch": 24.406474820143885, + "step": 27140, + "torque_loss": 0.1655750274658203 + }, + { + "epoch": 24.406474820143885, + "force_loss": 0.07303141802549362, + "step": 27140 + }, + { + "epoch": 24.415467625899282, + "grad_norm": 0.2760085165500641, + "learning_rate": 6.187581787670285e-05, + "loss": 0.0263, + "step": 27150 + }, + { + "action_loss": 0.0025449099484831095, + "epoch": 24.415467625899282, + "step": 27150 + }, + { + "epoch": 24.415467625899282, + "step": 27150, + "torque_loss": 0.12741833925247192 + }, + { + "epoch": 24.415467625899282, + "force_loss": 0.001235464820638299, + "step": 27150 + }, + { + "epoch": 24.424460431654676, + "grad_norm": 0.23651844263076782, + "learning_rate": 6.184904685155852e-05, + "loss": 0.0204, + "step": 27160 + }, + { + "action_loss": 0.023961732164025307, + "epoch": 24.424460431654676, + "step": 27160 + }, + { + "epoch": 24.424460431654676, + "step": 27160, + "torque_loss": 0.17916782200336456 + }, + { + "epoch": 24.424460431654676, + "force_loss": 0.023182248696684837, + "step": 27160 + }, + { + "epoch": 24.43345323741007, + "grad_norm": 0.4052882492542267, + "learning_rate": 6.18222722269866e-05, + "loss": 0.0231, + "step": 27170 + }, + { + "action_loss": 0.003978236578404903, + "epoch": 24.43345323741007, + "step": 27170 + }, + { + "epoch": 24.43345323741007, + "step": 27170, + "torque_loss": 0.1772550791501999 + }, + { + "epoch": 24.43345323741007, + "force_loss": 0.003809179412201047, + "step": 27170 + }, + { + "epoch": 24.442446043165468, + "grad_norm": 0.16913823783397675, + "learning_rate": 6.179549401112053e-05, + "loss": 0.0187, + "step": 27180 + }, + { + "action_loss": 0.005313266068696976, + "epoch": 24.442446043165468, + "step": 27180 + }, + { + "epoch": 24.442446043165468, + "step": 27180, + "torque_loss": 0.15760749578475952 + }, + { + "epoch": 24.442446043165468, + "force_loss": 0.005538178142160177, + "step": 27180 + }, + { + "epoch": 24.451438848920862, + "grad_norm": 0.2454654723405838, + "learning_rate": 6.176871221209482e-05, + "loss": 0.0221, + "step": 27190 + }, + { + "action_loss": 0.00831401627510786, + "epoch": 24.451438848920862, + "step": 27190 + }, + { + "epoch": 24.451438848920862, + "step": 27190, + "torque_loss": 0.13839416205883026 + }, + { + "epoch": 24.451438848920862, + "force_loss": 0.00549171632155776, + "step": 27190 + }, + { + "epoch": 24.46043165467626, + "grad_norm": 0.19690600037574768, + "learning_rate": 6.174192683804508e-05, + "loss": 0.0196, + "step": 27200 + }, + { + "action_loss": 0.0033114359248429537, + "epoch": 24.46043165467626, + "step": 27200 + }, + { + "epoch": 24.46043165467626, + "step": 27200, + "torque_loss": 0.22195708751678467 + }, + { + "epoch": 24.46043165467626, + "force_loss": 0.002759550465270877, + "step": 27200 + }, + { + "epoch": 24.469424460431654, + "grad_norm": 0.17955347895622253, + "learning_rate": 6.1715137897108e-05, + "loss": 0.0193, + "step": 27210 + }, + { + "action_loss": 0.0019358963472768664, + "epoch": 24.469424460431654, + "step": 27210 + }, + { + "epoch": 24.469424460431654, + "step": 27210, + "torque_loss": 0.10629644244909286 + }, + { + "epoch": 24.469424460431654, + "force_loss": 0.0025763895828276873, + "step": 27210 + }, + { + "epoch": 24.47841726618705, + "grad_norm": 0.17782427370548248, + "learning_rate": 6.168834539742134e-05, + "loss": 0.0212, + "step": 27220 + }, + { + "action_loss": 0.005482169333845377, + "epoch": 24.47841726618705, + "step": 27220 + }, + { + "epoch": 24.47841726618705, + "step": 27220, + "torque_loss": 0.2010514736175537 + }, + { + "epoch": 24.47841726618705, + "force_loss": 0.005316395312547684, + "step": 27220 + }, + { + "epoch": 24.487410071942445, + "grad_norm": 0.32597222924232483, + "learning_rate": 6.166154934712397e-05, + "loss": 0.0213, + "step": 27230 + }, + { + "action_loss": 0.0035998879466205835, + "epoch": 24.487410071942445, + "step": 27230 + }, + { + "epoch": 24.487410071942445, + "step": 27230, + "torque_loss": 0.1276254504919052 + }, + { + "epoch": 24.487410071942445, + "force_loss": 0.0035867150872945786, + "step": 27230 + }, + { + "epoch": 24.496402877697843, + "grad_norm": 0.3753611743450165, + "learning_rate": 6.163474975435581e-05, + "loss": 0.0172, + "step": 27240 + }, + { + "action_loss": 0.011537007987499237, + "epoch": 24.496402877697843, + "step": 27240 + }, + { + "epoch": 24.496402877697843, + "step": 27240, + "torque_loss": 0.1387958526611328 + }, + { + "epoch": 24.496402877697843, + "force_loss": 0.008628747425973415, + "step": 27240 + }, + { + "epoch": 24.505395683453237, + "grad_norm": 0.295878142118454, + "learning_rate": 6.160794662725787e-05, + "loss": 0.0234, + "step": 27250 + }, + { + "action_loss": 0.002969893626868725, + "epoch": 24.505395683453237, + "step": 27250 + }, + { + "epoch": 24.505395683453237, + "step": 27250, + "torque_loss": 0.13085995614528656 + }, + { + "epoch": 24.505395683453237, + "force_loss": 0.0024577591102570295, + "step": 27250 + }, + { + "epoch": 24.514388489208635, + "grad_norm": 0.36321496963500977, + "learning_rate": 6.158113997397222e-05, + "loss": 0.0205, + "step": 27260 + }, + { + "action_loss": 0.003001876873895526, + "epoch": 24.514388489208635, + "step": 27260 + }, + { + "epoch": 24.514388489208635, + "step": 27260, + "torque_loss": 0.13007371127605438 + }, + { + "epoch": 24.514388489208635, + "force_loss": 0.005574739072471857, + "step": 27260 + }, + { + "epoch": 24.52338129496403, + "grad_norm": 0.2102063000202179, + "learning_rate": 6.155432980264205e-05, + "loss": 0.0212, + "step": 27270 + }, + { + "action_loss": 0.004698061849921942, + "epoch": 24.52338129496403, + "step": 27270 + }, + { + "epoch": 24.52338129496403, + "step": 27270, + "torque_loss": 0.1412065476179123 + }, + { + "epoch": 24.52338129496403, + "force_loss": 0.002502820221707225, + "step": 27270 + }, + { + "epoch": 24.532374100719423, + "grad_norm": 0.161998450756073, + "learning_rate": 6.152751612141156e-05, + "loss": 0.0234, + "step": 27280 + }, + { + "action_loss": 0.0026472285389900208, + "epoch": 24.532374100719423, + "step": 27280 + }, + { + "epoch": 24.532374100719423, + "step": 27280, + "torque_loss": 0.10982010513544083 + }, + { + "epoch": 24.532374100719423, + "force_loss": 0.00370503100566566, + "step": 27280 + }, + { + "epoch": 24.54136690647482, + "grad_norm": 0.20840491354465485, + "learning_rate": 6.150069893842602e-05, + "loss": 0.0206, + "step": 27290 + }, + { + "action_loss": 0.0054594953544437885, + "epoch": 24.54136690647482, + "step": 27290 + }, + { + "epoch": 24.54136690647482, + "step": 27290, + "torque_loss": 0.10830133408308029 + }, + { + "epoch": 24.54136690647482, + "force_loss": 0.003318127943202853, + "step": 27290 + }, + { + "epoch": 24.550359712230215, + "grad_norm": 0.5064330697059631, + "learning_rate": 6.147387826183182e-05, + "loss": 0.0199, + "step": 27300 + }, + { + "action_loss": 0.011984252370893955, + "epoch": 24.550359712230215, + "step": 27300 + }, + { + "epoch": 24.550359712230215, + "step": 27300, + "torque_loss": 0.14927031099796295 + }, + { + "epoch": 24.550359712230215, + "force_loss": 0.013501625508069992, + "step": 27300 + }, + { + "epoch": 24.559352517985612, + "grad_norm": 0.4219507575035095, + "learning_rate": 6.144705409977635e-05, + "loss": 0.0242, + "step": 27310 + }, + { + "action_loss": 0.004247523378580809, + "epoch": 24.559352517985612, + "step": 27310 + }, + { + "epoch": 24.559352517985612, + "step": 27310, + "torque_loss": 0.11038357019424438 + }, + { + "epoch": 24.559352517985612, + "force_loss": 0.004107773769646883, + "step": 27310 + }, + { + "epoch": 24.568345323741006, + "grad_norm": 0.2655084729194641, + "learning_rate": 6.142022646040808e-05, + "loss": 0.0176, + "step": 27320 + }, + { + "action_loss": 0.00814251322299242, + "epoch": 24.568345323741006, + "step": 27320 + }, + { + "epoch": 24.568345323741006, + "step": 27320, + "torque_loss": 0.15498368442058563 + }, + { + "epoch": 24.568345323741006, + "force_loss": 0.007861743681132793, + "step": 27320 + }, + { + "epoch": 24.577338129496404, + "grad_norm": 0.14792513847351074, + "learning_rate": 6.139339535187653e-05, + "loss": 0.0192, + "step": 27330 + }, + { + "action_loss": 0.00445583974942565, + "epoch": 24.577338129496404, + "step": 27330 + }, + { + "epoch": 24.577338129496404, + "step": 27330, + "torque_loss": 0.15024316310882568 + }, + { + "epoch": 24.577338129496404, + "force_loss": 0.007579391356557608, + "step": 27330 + }, + { + "epoch": 24.586330935251798, + "grad_norm": 0.270242840051651, + "learning_rate": 6.136656078233232e-05, + "loss": 0.017, + "step": 27340 + }, + { + "action_loss": 0.0024240335915237665, + "epoch": 24.586330935251798, + "step": 27340 + }, + { + "epoch": 24.586330935251798, + "step": 27340, + "torque_loss": 0.13511238992214203 + }, + { + "epoch": 24.586330935251798, + "force_loss": 0.004678442608565092, + "step": 27340 + }, + { + "epoch": 24.595323741007196, + "grad_norm": 0.15835519134998322, + "learning_rate": 6.133972275992707e-05, + "loss": 0.0221, + "step": 27350 + }, + { + "action_loss": 0.0024552172981202602, + "epoch": 24.595323741007196, + "step": 27350 + }, + { + "epoch": 24.595323741007196, + "step": 27350, + "torque_loss": 0.12071740627288818 + }, + { + "epoch": 24.595323741007196, + "force_loss": 0.0018101140158250928, + "step": 27350 + }, + { + "epoch": 24.60431654676259, + "grad_norm": 0.2091076374053955, + "learning_rate": 6.131288129281342e-05, + "loss": 0.0217, + "step": 27360 + }, + { + "action_loss": 0.010779518634080887, + "epoch": 24.60431654676259, + "step": 27360 + }, + { + "epoch": 24.60431654676259, + "step": 27360, + "torque_loss": 0.1460753232240677 + }, + { + "epoch": 24.60431654676259, + "force_loss": 0.01828053593635559, + "step": 27360 + }, + { + "epoch": 24.613309352517987, + "grad_norm": 0.36564478278160095, + "learning_rate": 6.128603638914516e-05, + "loss": 0.0209, + "step": 27370 + }, + { + "action_loss": 0.006637075450271368, + "epoch": 24.613309352517987, + "step": 27370 + }, + { + "epoch": 24.613309352517987, + "step": 27370, + "torque_loss": 0.11818021535873413 + }, + { + "epoch": 24.613309352517987, + "force_loss": 0.005635035689920187, + "step": 27370 + }, + { + "epoch": 24.62230215827338, + "grad_norm": 0.7099848985671997, + "learning_rate": 6.125918805707704e-05, + "loss": 0.0184, + "step": 27380 + }, + { + "action_loss": 0.014135800302028656, + "epoch": 24.62230215827338, + "step": 27380 + }, + { + "epoch": 24.62230215827338, + "step": 27380, + "torque_loss": 0.14051954448223114 + }, + { + "epoch": 24.62230215827338, + "force_loss": 0.018400678411126137, + "step": 27380 + }, + { + "epoch": 24.631294964028775, + "grad_norm": 0.37354403734207153, + "learning_rate": 6.123233630476485e-05, + "loss": 0.0227, + "step": 27390 + }, + { + "action_loss": 0.0023275529965758324, + "epoch": 24.631294964028775, + "step": 27390 + }, + { + "epoch": 24.631294964028775, + "step": 27390, + "torque_loss": 0.08032269030809402 + }, + { + "epoch": 24.631294964028775, + "force_loss": 0.004027464892715216, + "step": 27390 + }, + { + "epoch": 24.640287769784173, + "grad_norm": 0.19231006503105164, + "learning_rate": 6.120548114036547e-05, + "loss": 0.0193, + "step": 27400 + }, + { + "action_loss": 0.010985583066940308, + "epoch": 24.640287769784173, + "step": 27400 + }, + { + "epoch": 24.640287769784173, + "step": 27400, + "torque_loss": 0.12024138122797012 + }, + { + "epoch": 24.640287769784173, + "force_loss": 0.008251228369772434, + "step": 27400 + }, + { + "epoch": 24.649280575539567, + "grad_norm": 0.4444057047367096, + "learning_rate": 6.117862257203679e-05, + "loss": 0.0203, + "step": 27410 + }, + { + "action_loss": 0.0030823620036244392, + "epoch": 24.649280575539567, + "step": 27410 + }, + { + "epoch": 24.649280575539567, + "step": 27410, + "torque_loss": 0.0979972556233406 + }, + { + "epoch": 24.649280575539567, + "force_loss": 0.0031780097633600235, + "step": 27410 + }, + { + "epoch": 24.658273381294965, + "grad_norm": 0.617821216583252, + "learning_rate": 6.115176060793771e-05, + "loss": 0.0225, + "step": 27420 + }, + { + "action_loss": 0.0027088781353086233, + "epoch": 24.658273381294965, + "step": 27420 + }, + { + "epoch": 24.658273381294965, + "step": 27420, + "torque_loss": 0.17940177023410797 + }, + { + "epoch": 24.658273381294965, + "force_loss": 0.002800578251481056, + "step": 27420 + }, + { + "epoch": 24.66726618705036, + "grad_norm": 0.2714875638484955, + "learning_rate": 6.112489525622822e-05, + "loss": 0.017, + "step": 27430 + }, + { + "action_loss": 0.003958713263273239, + "epoch": 24.66726618705036, + "step": 27430 + }, + { + "epoch": 24.66726618705036, + "step": 27430, + "torque_loss": 0.10730186849832535 + }, + { + "epoch": 24.66726618705036, + "force_loss": 0.004312272649258375, + "step": 27430 + }, + { + "epoch": 24.676258992805757, + "grad_norm": 0.4587785005569458, + "learning_rate": 6.109802652506928e-05, + "loss": 0.0188, + "step": 27440 + }, + { + "action_loss": 0.005403958726674318, + "epoch": 24.676258992805757, + "step": 27440 + }, + { + "epoch": 24.676258992805757, + "step": 27440, + "torque_loss": 0.11038515716791153 + }, + { + "epoch": 24.676258992805757, + "force_loss": 0.006019880995154381, + "step": 27440 + }, + { + "epoch": 24.68525179856115, + "grad_norm": 0.5030703544616699, + "learning_rate": 6.107115442262291e-05, + "loss": 0.0195, + "step": 27450 + }, + { + "action_loss": 0.00563558004796505, + "epoch": 24.68525179856115, + "step": 27450 + }, + { + "epoch": 24.68525179856115, + "step": 27450, + "torque_loss": 0.16837310791015625 + }, + { + "epoch": 24.68525179856115, + "force_loss": 0.008612028323113918, + "step": 27450 + }, + { + "epoch": 24.694244604316548, + "grad_norm": 0.3643386960029602, + "learning_rate": 6.104427895705214e-05, + "loss": 0.0206, + "step": 27460 + }, + { + "action_loss": 0.0025047778617590666, + "epoch": 24.694244604316548, + "step": 27460 + }, + { + "epoch": 24.694244604316548, + "step": 27460, + "torque_loss": 0.10358033329248428 + }, + { + "epoch": 24.694244604316548, + "force_loss": 0.005856382194906473, + "step": 27460 + }, + { + "epoch": 24.703237410071942, + "grad_norm": 0.2279098778963089, + "learning_rate": 6.101740013652103e-05, + "loss": 0.0176, + "step": 27470 + }, + { + "action_loss": 0.006774865090847015, + "epoch": 24.703237410071942, + "step": 27470 + }, + { + "epoch": 24.703237410071942, + "step": 27470, + "torque_loss": 0.15938641130924225 + }, + { + "epoch": 24.703237410071942, + "force_loss": 0.005501022096723318, + "step": 27470 + }, + { + "epoch": 24.71223021582734, + "grad_norm": 0.18981735408306122, + "learning_rate": 6.099051796919465e-05, + "loss": 0.0224, + "step": 27480 + }, + { + "action_loss": 0.06200120225548744, + "epoch": 24.71223021582734, + "step": 27480 + }, + { + "epoch": 24.71223021582734, + "step": 27480, + "torque_loss": 0.22226791083812714 + }, + { + "epoch": 24.71223021582734, + "force_loss": 0.0631442740559578, + "step": 27480 + }, + { + "epoch": 24.721223021582734, + "grad_norm": 0.28622543811798096, + "learning_rate": 6.096363246323911e-05, + "loss": 0.0246, + "step": 27490 + }, + { + "action_loss": 0.0039014138747006655, + "epoch": 24.721223021582734, + "step": 27490 + }, + { + "epoch": 24.721223021582734, + "step": 27490, + "torque_loss": 0.16382472217082977 + }, + { + "epoch": 24.721223021582734, + "force_loss": 0.0027531490195542574, + "step": 27490 + }, + { + "epoch": 24.730215827338128, + "grad_norm": 0.28025224804878235, + "learning_rate": 6.0936743626821504e-05, + "loss": 0.024, + "step": 27500 + }, + { + "action_loss": 0.003201183630153537, + "epoch": 24.730215827338128, + "step": 27500 + }, + { + "epoch": 24.730215827338128, + "step": 27500, + "torque_loss": 0.1074504628777504 + }, + { + "epoch": 24.730215827338128, + "force_loss": 0.004344044253230095, + "step": 27500 + }, + { + "epoch": 24.739208633093526, + "grad_norm": 0.49105799198150635, + "learning_rate": 6.090985146810996e-05, + "loss": 0.0193, + "step": 27510 + }, + { + "action_loss": 0.00403236597776413, + "epoch": 24.739208633093526, + "step": 27510 + }, + { + "epoch": 24.739208633093526, + "step": 27510, + "torque_loss": 0.12196534872055054 + }, + { + "epoch": 24.739208633093526, + "force_loss": 0.006179175805300474, + "step": 27510 + }, + { + "epoch": 24.74820143884892, + "grad_norm": 0.4832867681980133, + "learning_rate": 6.088295599527357e-05, + "loss": 0.0201, + "step": 27520 + }, + { + "action_loss": 0.00697707012295723, + "epoch": 24.74820143884892, + "step": 27520 + }, + { + "epoch": 24.74820143884892, + "step": 27520, + "torque_loss": 0.1521146595478058 + }, + { + "epoch": 24.74820143884892, + "force_loss": 0.01482886541634798, + "step": 27520 + }, + { + "epoch": 24.757194244604317, + "grad_norm": 0.20004992187023163, + "learning_rate": 6.085605721648252e-05, + "loss": 0.0186, + "step": 27530 + }, + { + "action_loss": 0.009972660802304745, + "epoch": 24.757194244604317, + "step": 27530 + }, + { + "epoch": 24.757194244604317, + "step": 27530, + "torque_loss": 0.1113722026348114 + }, + { + "epoch": 24.757194244604317, + "force_loss": 0.016840727999806404, + "step": 27530 + }, + { + "epoch": 24.76618705035971, + "grad_norm": 0.6739809513092041, + "learning_rate": 6.082915513990792e-05, + "loss": 0.0205, + "step": 27540 + }, + { + "action_loss": 0.004202758893370628, + "epoch": 24.76618705035971, + "step": 27540 + }, + { + "epoch": 24.76618705035971, + "step": 27540, + "torque_loss": 0.11291039735078812 + }, + { + "epoch": 24.76618705035971, + "force_loss": 0.00447657098993659, + "step": 27540 + }, + { + "epoch": 24.77517985611511, + "grad_norm": 0.20661500096321106, + "learning_rate": 6.080224977372192e-05, + "loss": 0.0196, + "step": 27550 + }, + { + "action_loss": 0.015018738806247711, + "epoch": 24.77517985611511, + "step": 27550 + }, + { + "epoch": 24.77517985611511, + "step": 27550, + "torque_loss": 0.12736612558364868 + }, + { + "epoch": 24.77517985611511, + "force_loss": 0.008811368606984615, + "step": 27550 + }, + { + "epoch": 24.784172661870503, + "grad_norm": 0.30827438831329346, + "learning_rate": 6.0775341126097666e-05, + "loss": 0.0184, + "step": 27560 + }, + { + "action_loss": 0.004452539142221212, + "epoch": 24.784172661870503, + "step": 27560 + }, + { + "epoch": 24.784172661870503, + "step": 27560, + "torque_loss": 0.13114501535892487 + }, + { + "epoch": 24.784172661870503, + "force_loss": 0.005534987431019545, + "step": 27560 + }, + { + "epoch": 24.7931654676259, + "grad_norm": 0.21373234689235687, + "learning_rate": 6.074842920520926e-05, + "loss": 0.0215, + "step": 27570 + }, + { + "action_loss": 0.003416118212044239, + "epoch": 24.7931654676259, + "step": 27570 + }, + { + "epoch": 24.7931654676259, + "step": 27570, + "torque_loss": 0.13711723685264587 + }, + { + "epoch": 24.7931654676259, + "force_loss": 0.003428607014939189, + "step": 27570 + }, + { + "epoch": 24.802158273381295, + "grad_norm": 0.23638705909252167, + "learning_rate": 6.072151401923186e-05, + "loss": 0.0183, + "step": 27580 + }, + { + "action_loss": 0.005607984960079193, + "epoch": 24.802158273381295, + "step": 27580 + }, + { + "epoch": 24.802158273381295, + "step": 27580, + "torque_loss": 0.1508052498102188 + }, + { + "epoch": 24.802158273381295, + "force_loss": 0.005074174143373966, + "step": 27580 + }, + { + "epoch": 24.81115107913669, + "grad_norm": 0.21366402506828308, + "learning_rate": 6.069459557634159e-05, + "loss": 0.0222, + "step": 27590 + }, + { + "action_loss": 0.0031138500198721886, + "epoch": 24.81115107913669, + "step": 27590 + }, + { + "epoch": 24.81115107913669, + "step": 27590, + "torque_loss": 0.12468931823968887 + }, + { + "epoch": 24.81115107913669, + "force_loss": 0.007567358668893576, + "step": 27590 + }, + { + "epoch": 24.820143884892087, + "grad_norm": 0.4918735921382904, + "learning_rate": 6.066767388471557e-05, + "loss": 0.0226, + "step": 27600 + }, + { + "action_loss": 0.012953785248100758, + "epoch": 24.820143884892087, + "step": 27600 + }, + { + "epoch": 24.820143884892087, + "step": 27600, + "torque_loss": 0.16156236827373505 + }, + { + "epoch": 24.820143884892087, + "force_loss": 0.018003307282924652, + "step": 27600 + }, + { + "epoch": 24.82913669064748, + "grad_norm": 0.2150358259677887, + "learning_rate": 6.064074895253188e-05, + "loss": 0.0219, + "step": 27610 + }, + { + "action_loss": 0.007434766739606857, + "epoch": 24.82913669064748, + "step": 27610 + }, + { + "epoch": 24.82913669064748, + "step": 27610, + "torque_loss": 0.12523068487644196 + }, + { + "epoch": 24.82913669064748, + "force_loss": 0.011812425218522549, + "step": 27610 + }, + { + "epoch": 24.83812949640288, + "grad_norm": 0.22323743999004364, + "learning_rate": 6.061382078796961e-05, + "loss": 0.0252, + "step": 27620 + }, + { + "action_loss": 0.01067497581243515, + "epoch": 24.83812949640288, + "step": 27620 + }, + { + "epoch": 24.83812949640288, + "step": 27620, + "torque_loss": 0.17736369371414185 + }, + { + "epoch": 24.83812949640288, + "force_loss": 0.01045769453048706, + "step": 27620 + }, + { + "epoch": 24.847122302158272, + "grad_norm": 0.2883903682231903, + "learning_rate": 6.0586889399208814e-05, + "loss": 0.0194, + "step": 27630 + }, + { + "action_loss": 0.004756091628223658, + "epoch": 24.847122302158272, + "step": 27630 + }, + { + "epoch": 24.847122302158272, + "step": 27630, + "torque_loss": 0.13929414749145508 + }, + { + "epoch": 24.847122302158272, + "force_loss": 0.005363310221582651, + "step": 27630 + }, + { + "epoch": 24.85611510791367, + "grad_norm": 0.6270283460617065, + "learning_rate": 6.0559954794430565e-05, + "loss": 0.0241, + "step": 27640 + }, + { + "action_loss": 0.023654451593756676, + "epoch": 24.85611510791367, + "step": 27640 + }, + { + "epoch": 24.85611510791367, + "step": 27640, + "torque_loss": 0.15856419503688812 + }, + { + "epoch": 24.85611510791367, + "force_loss": 0.030192309990525246, + "step": 27640 + }, + { + "epoch": 24.865107913669064, + "grad_norm": 0.2929843068122864, + "learning_rate": 6.053301698181687e-05, + "loss": 0.0207, + "step": 27650 + }, + { + "action_loss": 0.002177464310079813, + "epoch": 24.865107913669064, + "step": 27650 + }, + { + "epoch": 24.865107913669064, + "step": 27650, + "torque_loss": 0.17063532769680023 + }, + { + "epoch": 24.865107913669064, + "force_loss": 0.0013247322058305144, + "step": 27650 + }, + { + "epoch": 24.87410071942446, + "grad_norm": 0.1990029364824295, + "learning_rate": 6.0506075969550725e-05, + "loss": 0.0195, + "step": 27660 + }, + { + "action_loss": 0.003913574386388063, + "epoch": 24.87410071942446, + "step": 27660 + }, + { + "epoch": 24.87410071942446, + "step": 27660, + "torque_loss": 0.16323544085025787 + }, + { + "epoch": 24.87410071942446, + "force_loss": 0.0020988702308386564, + "step": 27660 + }, + { + "epoch": 24.883093525179856, + "grad_norm": 0.1366661936044693, + "learning_rate": 6.047913176581609e-05, + "loss": 0.0202, + "step": 27670 + }, + { + "action_loss": 0.0026836830656975508, + "epoch": 24.883093525179856, + "step": 27670 + }, + { + "epoch": 24.883093525179856, + "step": 27670, + "torque_loss": 0.1376323252916336 + }, + { + "epoch": 24.883093525179856, + "force_loss": 0.0028415534179657698, + "step": 27670 + }, + { + "epoch": 24.892086330935253, + "grad_norm": 0.7001839280128479, + "learning_rate": 6.0452184378797904e-05, + "loss": 0.0192, + "step": 27680 + }, + { + "action_loss": 0.006577400956302881, + "epoch": 24.892086330935253, + "step": 27680 + }, + { + "epoch": 24.892086330935253, + "step": 27680, + "torque_loss": 0.10539466142654419 + }, + { + "epoch": 24.892086330935253, + "force_loss": 0.013131454586982727, + "step": 27680 + }, + { + "epoch": 24.901079136690647, + "grad_norm": 0.1965656727552414, + "learning_rate": 6.042523381668209e-05, + "loss": 0.0192, + "step": 27690 + }, + { + "action_loss": 0.007105072494596243, + "epoch": 24.901079136690647, + "step": 27690 + }, + { + "epoch": 24.901079136690647, + "step": 27690, + "torque_loss": 0.08462647348642349 + }, + { + "epoch": 24.901079136690647, + "force_loss": 0.003805331187322736, + "step": 27690 + }, + { + "epoch": 24.91007194244604, + "grad_norm": 0.30609291791915894, + "learning_rate": 6.03982800876555e-05, + "loss": 0.0189, + "step": 27700 + }, + { + "action_loss": 0.03476320207118988, + "epoch": 24.91007194244604, + "step": 27700 + }, + { + "epoch": 24.91007194244604, + "step": 27700, + "torque_loss": 0.16271285712718964 + }, + { + "epoch": 24.91007194244604, + "force_loss": 0.019819073379039764, + "step": 27700 + }, + { + "epoch": 24.91906474820144, + "grad_norm": 0.28838419914245605, + "learning_rate": 6.0371323199905975e-05, + "loss": 0.024, + "step": 27710 + }, + { + "action_loss": 0.0034765799064189196, + "epoch": 24.91906474820144, + "step": 27710 + }, + { + "epoch": 24.91906474820144, + "step": 27710, + "torque_loss": 0.13389629125595093 + }, + { + "epoch": 24.91906474820144, + "force_loss": 0.004095780197530985, + "step": 27710 + }, + { + "epoch": 24.928057553956833, + "grad_norm": 0.2344946414232254, + "learning_rate": 6.03443631616223e-05, + "loss": 0.0207, + "step": 27720 + }, + { + "action_loss": 0.001939621870405972, + "epoch": 24.928057553956833, + "step": 27720 + }, + { + "epoch": 24.928057553956833, + "step": 27720, + "torque_loss": 0.10942911356687546 + }, + { + "epoch": 24.928057553956833, + "force_loss": 0.0010275017702952027, + "step": 27720 + }, + { + "epoch": 24.93705035971223, + "grad_norm": 0.470210999250412, + "learning_rate": 6.031739998099421e-05, + "loss": 0.0183, + "step": 27730 + }, + { + "action_loss": 0.0030129754450172186, + "epoch": 24.93705035971223, + "step": 27730 + }, + { + "epoch": 24.93705035971223, + "step": 27730, + "torque_loss": 0.13927406072616577 + }, + { + "epoch": 24.93705035971223, + "force_loss": 0.003387420205399394, + "step": 27730 + }, + { + "epoch": 24.946043165467625, + "grad_norm": 0.27437713742256165, + "learning_rate": 6.029043366621243e-05, + "loss": 0.0165, + "step": 27740 + }, + { + "action_loss": 0.0032256992999464273, + "epoch": 24.946043165467625, + "step": 27740 + }, + { + "epoch": 24.946043165467625, + "step": 27740, + "torque_loss": 0.1392812728881836 + }, + { + "epoch": 24.946043165467625, + "force_loss": 0.002643261104822159, + "step": 27740 + }, + { + "epoch": 24.955035971223023, + "grad_norm": 0.2755166292190552, + "learning_rate": 6.0263464225468615e-05, + "loss": 0.0226, + "step": 27750 + }, + { + "action_loss": 0.0022924989461898804, + "epoch": 24.955035971223023, + "step": 27750 + }, + { + "epoch": 24.955035971223023, + "step": 27750, + "torque_loss": 0.1275307983160019 + }, + { + "epoch": 24.955035971223023, + "force_loss": 0.003888562321662903, + "step": 27750 + }, + { + "epoch": 24.964028776978417, + "grad_norm": 0.2888088524341583, + "learning_rate": 6.023649166695534e-05, + "loss": 0.0183, + "step": 27760 + }, + { + "action_loss": 0.004289816599339247, + "epoch": 24.964028776978417, + "step": 27760 + }, + { + "epoch": 24.964028776978417, + "step": 27760, + "torque_loss": 0.14794236421585083 + }, + { + "epoch": 24.964028776978417, + "force_loss": 0.008980989456176758, + "step": 27760 + }, + { + "epoch": 24.973021582733814, + "grad_norm": 0.21840260922908783, + "learning_rate": 6.0209515998866186e-05, + "loss": 0.0196, + "step": 27770 + }, + { + "action_loss": 0.0021711259614676237, + "epoch": 24.973021582733814, + "step": 27770 + }, + { + "epoch": 24.973021582733814, + "step": 27770, + "torque_loss": 0.12228868156671524 + }, + { + "epoch": 24.973021582733814, + "force_loss": 0.003480076789855957, + "step": 27770 + }, + { + "epoch": 24.98201438848921, + "grad_norm": 0.12162240594625473, + "learning_rate": 6.018253722939563e-05, + "loss": 0.0203, + "step": 27780 + }, + { + "action_loss": 0.009355143643915653, + "epoch": 24.98201438848921, + "step": 27780 + }, + { + "epoch": 24.98201438848921, + "step": 27780, + "torque_loss": 0.09987518936395645 + }, + { + "epoch": 24.98201438848921, + "force_loss": 0.004401336889714003, + "step": 27780 + }, + { + "epoch": 24.991007194244606, + "grad_norm": 0.512770414352417, + "learning_rate": 6.015555536673914e-05, + "loss": 0.0214, + "step": 27790 + }, + { + "action_loss": 0.003254072042182088, + "epoch": 24.991007194244606, + "step": 27790 + }, + { + "epoch": 24.991007194244606, + "step": 27790, + "torque_loss": 0.14153435826301575 + }, + { + "epoch": 24.991007194244606, + "force_loss": 0.001677804277278483, + "step": 27790 + }, + { + "epoch": 25.0, + "grad_norm": 0.2779077887535095, + "learning_rate": 6.0128570419093054e-05, + "loss": 0.0235, + "step": 27800 + }, + { + "action_loss": 0.009039516560733318, + "epoch": 25.0, + "step": 27800 + }, + { + "epoch": 25.0, + "step": 27800, + "torque_loss": 0.09193138033151627 + }, + { + "epoch": 25.0, + "force_loss": 0.0035536736249923706, + "step": 27800 + }, + { + "epoch": 25.008992805755394, + "grad_norm": 0.13003653287887573, + "learning_rate": 6.010158239465471e-05, + "loss": 0.0196, + "step": 27810 + }, + { + "action_loss": 0.009136388078331947, + "epoch": 25.008992805755394, + "step": 27810 + }, + { + "epoch": 25.008992805755394, + "step": 27810, + "torque_loss": 0.13190436363220215 + }, + { + "epoch": 25.008992805755394, + "force_loss": 0.008251729421317577, + "step": 27810 + }, + { + "epoch": 25.01798561151079, + "grad_norm": 0.4156569540500641, + "learning_rate": 6.007459130162235e-05, + "loss": 0.0207, + "step": 27820 + }, + { + "action_loss": 0.003521420294418931, + "epoch": 25.01798561151079, + "step": 27820 + }, + { + "epoch": 25.01798561151079, + "step": 27820, + "torque_loss": 0.115324966609478 + }, + { + "epoch": 25.01798561151079, + "force_loss": 0.009384633041918278, + "step": 27820 + }, + { + "epoch": 25.026978417266186, + "grad_norm": 0.24368155002593994, + "learning_rate": 6.004759714819516e-05, + "loss": 0.0211, + "step": 27830 + }, + { + "action_loss": 0.001910856575705111, + "epoch": 25.026978417266186, + "step": 27830 + }, + { + "epoch": 25.026978417266186, + "step": 27830, + "torque_loss": 0.10204290598630905 + }, + { + "epoch": 25.026978417266186, + "force_loss": 0.0018200107151642442, + "step": 27830 + }, + { + "epoch": 25.035971223021583, + "grad_norm": 0.23502390086650848, + "learning_rate": 6.002059994257323e-05, + "loss": 0.0236, + "step": 27840 + }, + { + "action_loss": 0.006032480392605066, + "epoch": 25.035971223021583, + "step": 27840 + }, + { + "epoch": 25.035971223021583, + "step": 27840, + "torque_loss": 0.10046689957380295 + }, + { + "epoch": 25.035971223021583, + "force_loss": 0.004050326067954302, + "step": 27840 + }, + { + "epoch": 25.044964028776977, + "grad_norm": 0.37774401903152466, + "learning_rate": 5.999359969295764e-05, + "loss": 0.0204, + "step": 27850 + }, + { + "action_loss": 0.0031546875834465027, + "epoch": 25.044964028776977, + "step": 27850 + }, + { + "epoch": 25.044964028776977, + "step": 27850, + "torque_loss": 0.14888004958629608 + }, + { + "epoch": 25.044964028776977, + "force_loss": 0.00252740946598351, + "step": 27850 + }, + { + "epoch": 25.053956834532375, + "grad_norm": 0.2149512618780136, + "learning_rate": 5.9966596407550314e-05, + "loss": 0.0173, + "step": 27860 + }, + { + "action_loss": 0.011078891344368458, + "epoch": 25.053956834532375, + "step": 27860 + }, + { + "epoch": 25.053956834532375, + "step": 27860, + "torque_loss": 0.13848333060741425 + }, + { + "epoch": 25.053956834532375, + "force_loss": 0.011729366146028042, + "step": 27860 + }, + { + "epoch": 25.06294964028777, + "grad_norm": 0.2878969609737396, + "learning_rate": 5.993959009455416e-05, + "loss": 0.0229, + "step": 27870 + }, + { + "action_loss": 0.007101371884346008, + "epoch": 25.06294964028777, + "step": 27870 + }, + { + "epoch": 25.06294964028777, + "step": 27870, + "torque_loss": 0.1358853280544281 + }, + { + "epoch": 25.06294964028777, + "force_loss": 0.009204776957631111, + "step": 27870 + }, + { + "epoch": 25.071942446043167, + "grad_norm": 0.19891761243343353, + "learning_rate": 5.991258076217298e-05, + "loss": 0.0171, + "step": 27880 + }, + { + "action_loss": 0.013965328224003315, + "epoch": 25.071942446043167, + "step": 27880 + }, + { + "epoch": 25.071942446043167, + "step": 27880, + "torque_loss": 0.15057577192783356 + }, + { + "epoch": 25.071942446043167, + "force_loss": 0.014601573348045349, + "step": 27880 + }, + { + "epoch": 25.08093525179856, + "grad_norm": 0.31790658831596375, + "learning_rate": 5.988556841861147e-05, + "loss": 0.022, + "step": 27890 + }, + { + "action_loss": 0.006994236260652542, + "epoch": 25.08093525179856, + "step": 27890 + }, + { + "epoch": 25.08093525179856, + "step": 27890, + "torque_loss": 0.13063372671604156 + }, + { + "epoch": 25.08093525179856, + "force_loss": 0.0073255919851362705, + "step": 27890 + }, + { + "epoch": 25.08992805755396, + "grad_norm": 0.5693853497505188, + "learning_rate": 5.985855307207531e-05, + "loss": 0.0238, + "step": 27900 + }, + { + "action_loss": 0.012989816255867481, + "epoch": 25.08992805755396, + "step": 27900 + }, + { + "epoch": 25.08992805755396, + "step": 27900, + "torque_loss": 0.11889564990997314 + }, + { + "epoch": 25.08992805755396, + "force_loss": 0.008619645610451698, + "step": 27900 + }, + { + "epoch": 25.098920863309353, + "grad_norm": 0.5171234011650085, + "learning_rate": 5.9831534730771e-05, + "loss": 0.0183, + "step": 27910 + }, + { + "action_loss": 0.005285435821861029, + "epoch": 25.098920863309353, + "step": 27910 + }, + { + "epoch": 25.098920863309353, + "step": 27910, + "torque_loss": 0.113880455493927 + }, + { + "epoch": 25.098920863309353, + "force_loss": 0.004809692036360502, + "step": 27910 + }, + { + "epoch": 25.107913669064747, + "grad_norm": 0.21858294308185577, + "learning_rate": 5.980451340290605e-05, + "loss": 0.0193, + "step": 27920 + }, + { + "action_loss": 0.0023250191006809473, + "epoch": 25.107913669064747, + "step": 27920 + }, + { + "epoch": 25.107913669064747, + "step": 27920, + "torque_loss": 0.10454988479614258 + }, + { + "epoch": 25.107913669064747, + "force_loss": 0.0014954075450077653, + "step": 27920 + }, + { + "epoch": 25.116906474820144, + "grad_norm": 0.19935208559036255, + "learning_rate": 5.97774890966888e-05, + "loss": 0.0173, + "step": 27930 + }, + { + "action_loss": 0.005066411104053259, + "epoch": 25.116906474820144, + "step": 27930 + }, + { + "epoch": 25.116906474820144, + "step": 27930, + "torque_loss": 0.11119184643030167 + }, + { + "epoch": 25.116906474820144, + "force_loss": 0.002239025430753827, + "step": 27930 + }, + { + "epoch": 25.12589928057554, + "grad_norm": 0.4909704923629761, + "learning_rate": 5.975046182032851e-05, + "loss": 0.0201, + "step": 27940 + }, + { + "action_loss": 0.021295903250575066, + "epoch": 25.12589928057554, + "step": 27940 + }, + { + "epoch": 25.12589928057554, + "step": 27940, + "torque_loss": 0.10453003644943237 + }, + { + "epoch": 25.12589928057554, + "force_loss": 0.02358761429786682, + "step": 27940 + }, + { + "epoch": 25.134892086330936, + "grad_norm": 0.2959257960319519, + "learning_rate": 5.972343158203537e-05, + "loss": 0.0202, + "step": 27950 + }, + { + "action_loss": 0.010482084937393665, + "epoch": 25.134892086330936, + "step": 27950 + }, + { + "epoch": 25.134892086330936, + "step": 27950, + "torque_loss": 0.1191159263253212 + }, + { + "epoch": 25.134892086330936, + "force_loss": 0.013482670299708843, + "step": 27950 + }, + { + "epoch": 25.14388489208633, + "grad_norm": 0.3135308027267456, + "learning_rate": 5.969639839002045e-05, + "loss": 0.0183, + "step": 27960 + }, + { + "action_loss": 0.0136274928227067, + "epoch": 25.14388489208633, + "step": 27960 + }, + { + "epoch": 25.14388489208633, + "step": 27960, + "torque_loss": 0.1246398463845253 + }, + { + "epoch": 25.14388489208633, + "force_loss": 0.01063587237149477, + "step": 27960 + }, + { + "epoch": 25.152877697841728, + "grad_norm": 0.19451689720153809, + "learning_rate": 5.966936225249572e-05, + "loss": 0.0198, + "step": 27970 + }, + { + "action_loss": 0.008372939191758633, + "epoch": 25.152877697841728, + "step": 27970 + }, + { + "epoch": 25.152877697841728, + "step": 27970, + "torque_loss": 0.09813759475946426 + }, + { + "epoch": 25.152877697841728, + "force_loss": 0.008229688741266727, + "step": 27970 + }, + { + "epoch": 25.16187050359712, + "grad_norm": 0.24217577278614044, + "learning_rate": 5.9642323177674044e-05, + "loss": 0.0178, + "step": 27980 + }, + { + "action_loss": 0.0019485506927594543, + "epoch": 25.16187050359712, + "step": 27980 + }, + { + "epoch": 25.16187050359712, + "step": 27980, + "torque_loss": 0.12314663082361221 + }, + { + "epoch": 25.16187050359712, + "force_loss": 0.0016669895267114043, + "step": 27980 + }, + { + "epoch": 25.17086330935252, + "grad_norm": 0.32251855731010437, + "learning_rate": 5.9615281173769154e-05, + "loss": 0.0187, + "step": 27990 + }, + { + "action_loss": 0.02054518461227417, + "epoch": 25.17086330935252, + "step": 27990 + }, + { + "epoch": 25.17086330935252, + "step": 27990, + "torque_loss": 0.17750942707061768 + }, + { + "epoch": 25.17086330935252, + "force_loss": 0.012470967136323452, + "step": 27990 + }, + { + "epoch": 25.179856115107913, + "grad_norm": 0.1701742261648178, + "learning_rate": 5.958823624899574e-05, + "loss": 0.0176, + "step": 28000 + }, + { + "action_loss": 0.006592078600078821, + "epoch": 25.179856115107913, + "step": 28000 + }, + { + "epoch": 25.179856115107913, + "step": 28000, + "torque_loss": 0.1097002848982811 + }, + { + "epoch": 25.179856115107913, + "force_loss": 0.002601605374366045, + "step": 28000 + }, + { + "epoch": 25.18884892086331, + "grad_norm": 0.3739883303642273, + "learning_rate": 5.956118841156933e-05, + "loss": 0.0189, + "step": 28010 + }, + { + "action_loss": 0.00389285571873188, + "epoch": 25.18884892086331, + "step": 28010 + }, + { + "epoch": 25.18884892086331, + "step": 28010, + "torque_loss": 0.15238027274608612 + }, + { + "epoch": 25.18884892086331, + "force_loss": 0.006241050083190203, + "step": 28010 + }, + { + "epoch": 25.197841726618705, + "grad_norm": 0.14512798190116882, + "learning_rate": 5.953413766970631e-05, + "loss": 0.0209, + "step": 28020 + }, + { + "action_loss": 0.004007064737379551, + "epoch": 25.197841726618705, + "step": 28020 + }, + { + "epoch": 25.197841726618705, + "step": 28020, + "torque_loss": 0.15665273368358612 + }, + { + "epoch": 25.197841726618705, + "force_loss": 0.002668733475729823, + "step": 28020 + }, + { + "epoch": 25.2068345323741, + "grad_norm": 0.24719534814357758, + "learning_rate": 5.9507084031624e-05, + "loss": 0.0213, + "step": 28030 + }, + { + "action_loss": 0.0036468645557761192, + "epoch": 25.2068345323741, + "step": 28030 + }, + { + "epoch": 25.2068345323741, + "step": 28030, + "torque_loss": 0.11742961406707764 + }, + { + "epoch": 25.2068345323741, + "force_loss": 0.0037753216456621885, + "step": 28030 + }, + { + "epoch": 25.215827338129497, + "grad_norm": 0.4668901860713959, + "learning_rate": 5.948002750554058e-05, + "loss": 0.0225, + "step": 28040 + }, + { + "action_loss": 0.006935764104127884, + "epoch": 25.215827338129497, + "step": 28040 + }, + { + "epoch": 25.215827338129497, + "step": 28040, + "torque_loss": 0.15631915628910065 + }, + { + "epoch": 25.215827338129497, + "force_loss": 0.018520066514611244, + "step": 28040 + }, + { + "epoch": 25.22482014388489, + "grad_norm": 0.34241315722465515, + "learning_rate": 5.9452968099675124e-05, + "loss": 0.0187, + "step": 28050 + }, + { + "action_loss": 0.00364665687084198, + "epoch": 25.22482014388489, + "step": 28050 + }, + { + "epoch": 25.22482014388489, + "step": 28050, + "torque_loss": 0.12001895904541016 + }, + { + "epoch": 25.22482014388489, + "force_loss": 0.002398020587861538, + "step": 28050 + }, + { + "epoch": 25.23381294964029, + "grad_norm": 0.3288611173629761, + "learning_rate": 5.9425905822247527e-05, + "loss": 0.0208, + "step": 28060 + }, + { + "action_loss": 0.008102469146251678, + "epoch": 25.23381294964029, + "step": 28060 + }, + { + "epoch": 25.23381294964029, + "step": 28060, + "torque_loss": 0.1445932537317276 + }, + { + "epoch": 25.23381294964029, + "force_loss": 0.003417715197429061, + "step": 28060 + }, + { + "epoch": 25.242805755395683, + "grad_norm": 0.8010976910591125, + "learning_rate": 5.939884068147864e-05, + "loss": 0.0198, + "step": 28070 + }, + { + "action_loss": 0.0019338391721248627, + "epoch": 25.242805755395683, + "step": 28070 + }, + { + "epoch": 25.242805755395683, + "step": 28070, + "torque_loss": 0.10716181993484497 + }, + { + "epoch": 25.242805755395683, + "force_loss": 0.0027629092801362276, + "step": 28070 + }, + { + "epoch": 25.25179856115108, + "grad_norm": 0.2820752263069153, + "learning_rate": 5.937177268559011e-05, + "loss": 0.0183, + "step": 28080 + }, + { + "action_loss": 0.004666687920689583, + "epoch": 25.25179856115108, + "step": 28080 + }, + { + "epoch": 25.25179856115108, + "step": 28080, + "torque_loss": 0.11430569738149643 + }, + { + "epoch": 25.25179856115108, + "force_loss": 0.0045473817735910416, + "step": 28080 + }, + { + "epoch": 25.260791366906474, + "grad_norm": 0.2296970933675766, + "learning_rate": 5.934470184280448e-05, + "loss": 0.0161, + "step": 28090 + }, + { + "action_loss": 0.0031554270535707474, + "epoch": 25.260791366906474, + "step": 28090 + }, + { + "epoch": 25.260791366906474, + "step": 28090, + "torque_loss": 0.1344473659992218 + }, + { + "epoch": 25.260791366906474, + "force_loss": 0.0032503902912139893, + "step": 28090 + }, + { + "epoch": 25.269784172661872, + "grad_norm": 0.3302646577358246, + "learning_rate": 5.931762816134516e-05, + "loss": 0.0189, + "step": 28100 + }, + { + "action_loss": 0.00474116625264287, + "epoch": 25.269784172661872, + "step": 28100 + }, + { + "epoch": 25.269784172661872, + "step": 28100, + "torque_loss": 0.17696833610534668 + }, + { + "epoch": 25.269784172661872, + "force_loss": 0.0024352208711206913, + "step": 28100 + }, + { + "epoch": 25.278776978417266, + "grad_norm": 0.2758291959762573, + "learning_rate": 5.9290551649436434e-05, + "loss": 0.0204, + "step": 28110 + }, + { + "action_loss": 0.004586617927998304, + "epoch": 25.278776978417266, + "step": 28110 + }, + { + "epoch": 25.278776978417266, + "step": 28110, + "torque_loss": 0.13379128277301788 + }, + { + "epoch": 25.278776978417266, + "force_loss": 0.003668044460937381, + "step": 28110 + }, + { + "epoch": 25.28776978417266, + "grad_norm": 0.47487664222717285, + "learning_rate": 5.9263472315303416e-05, + "loss": 0.0208, + "step": 28120 + }, + { + "action_loss": 0.0049309092573821545, + "epoch": 25.28776978417266, + "step": 28120 + }, + { + "epoch": 25.28776978417266, + "step": 28120, + "torque_loss": 0.13639463484287262 + }, + { + "epoch": 25.28776978417266, + "force_loss": 0.007970382459461689, + "step": 28120 + }, + { + "epoch": 25.296762589928058, + "grad_norm": 0.5768781304359436, + "learning_rate": 5.9236390167172096e-05, + "loss": 0.0196, + "step": 28130 + }, + { + "action_loss": 0.00518127903342247, + "epoch": 25.296762589928058, + "step": 28130 + }, + { + "epoch": 25.296762589928058, + "step": 28130, + "torque_loss": 0.1471085548400879 + }, + { + "epoch": 25.296762589928058, + "force_loss": 0.0037640798836946487, + "step": 28130 + }, + { + "epoch": 25.305755395683452, + "grad_norm": 0.3404545485973358, + "learning_rate": 5.920930521326932e-05, + "loss": 0.0204, + "step": 28140 + }, + { + "action_loss": 0.02042549103498459, + "epoch": 25.305755395683452, + "step": 28140 + }, + { + "epoch": 25.305755395683452, + "step": 28140, + "torque_loss": 0.2146511673927307 + }, + { + "epoch": 25.305755395683452, + "force_loss": 0.03472762927412987, + "step": 28140 + }, + { + "epoch": 25.31474820143885, + "grad_norm": 0.1535160094499588, + "learning_rate": 5.918221746182276e-05, + "loss": 0.0211, + "step": 28150 + }, + { + "action_loss": 0.009396887384355068, + "epoch": 25.31474820143885, + "step": 28150 + }, + { + "epoch": 25.31474820143885, + "step": 28150, + "torque_loss": 0.12705738842487335 + }, + { + "epoch": 25.31474820143885, + "force_loss": 0.0069696661084890366, + "step": 28150 + }, + { + "epoch": 25.323741007194243, + "grad_norm": 0.16612853109836578, + "learning_rate": 5.9155126921061e-05, + "loss": 0.0181, + "step": 28160 + }, + { + "action_loss": 0.0062751155346632, + "epoch": 25.323741007194243, + "step": 28160 + }, + { + "epoch": 25.323741007194243, + "step": 28160, + "torque_loss": 0.13922913372516632 + }, + { + "epoch": 25.323741007194243, + "force_loss": 0.010980424471199512, + "step": 28160 + }, + { + "epoch": 25.33273381294964, + "grad_norm": 0.19602370262145996, + "learning_rate": 5.91280335992134e-05, + "loss": 0.0165, + "step": 28170 + }, + { + "action_loss": 0.0038515112828463316, + "epoch": 25.33273381294964, + "step": 28170 + }, + { + "epoch": 25.33273381294964, + "step": 28170, + "torque_loss": 0.1181967630982399 + }, + { + "epoch": 25.33273381294964, + "force_loss": 0.003768881084397435, + "step": 28170 + }, + { + "epoch": 25.341726618705035, + "grad_norm": 0.2185714840888977, + "learning_rate": 5.91009375045102e-05, + "loss": 0.0195, + "step": 28180 + }, + { + "action_loss": 0.002243555150926113, + "epoch": 25.341726618705035, + "step": 28180 + }, + { + "epoch": 25.341726618705035, + "step": 28180, + "torque_loss": 0.11303848028182983 + }, + { + "epoch": 25.341726618705035, + "force_loss": 0.003053064690902829, + "step": 28180 + }, + { + "epoch": 25.350719424460433, + "grad_norm": 0.21856456995010376, + "learning_rate": 5.9073838645182476e-05, + "loss": 0.0177, + "step": 28190 + }, + { + "action_loss": 0.001767792389728129, + "epoch": 25.350719424460433, + "step": 28190 + }, + { + "epoch": 25.350719424460433, + "step": 28190, + "torque_loss": 0.1569480150938034 + }, + { + "epoch": 25.350719424460433, + "force_loss": 0.0018111895769834518, + "step": 28190 + }, + { + "epoch": 25.359712230215827, + "grad_norm": 0.34505677223205566, + "learning_rate": 5.904673702946217e-05, + "loss": 0.0214, + "step": 28200 + }, + { + "action_loss": 0.008137241937220097, + "epoch": 25.359712230215827, + "step": 28200 + }, + { + "epoch": 25.359712230215827, + "step": 28200, + "torque_loss": 0.11258846521377563 + }, + { + "epoch": 25.359712230215827, + "force_loss": 0.0054153744131326675, + "step": 28200 + }, + { + "epoch": 25.368705035971225, + "grad_norm": 0.21682202816009521, + "learning_rate": 5.9019632665582004e-05, + "loss": 0.0177, + "step": 28210 + }, + { + "action_loss": 0.0020642413292080164, + "epoch": 25.368705035971225, + "step": 28210 + }, + { + "epoch": 25.368705035971225, + "step": 28210, + "torque_loss": 0.1540975421667099 + }, + { + "epoch": 25.368705035971225, + "force_loss": 0.002290738048031926, + "step": 28210 + }, + { + "epoch": 25.37769784172662, + "grad_norm": 0.5267677307128906, + "learning_rate": 5.899252556177559e-05, + "loss": 0.0181, + "step": 28220 + }, + { + "action_loss": 0.009411846287548542, + "epoch": 25.37769784172662, + "step": 28220 + }, + { + "epoch": 25.37769784172662, + "step": 28220, + "torque_loss": 0.09601055830717087 + }, + { + "epoch": 25.37769784172662, + "force_loss": 0.006265006959438324, + "step": 28220 + }, + { + "epoch": 25.386690647482013, + "grad_norm": 0.1786697804927826, + "learning_rate": 5.896541572627735e-05, + "loss": 0.0193, + "step": 28230 + }, + { + "action_loss": 0.002352232113480568, + "epoch": 25.386690647482013, + "step": 28230 + }, + { + "epoch": 25.386690647482013, + "step": 28230, + "torque_loss": 0.1546822041273117 + }, + { + "epoch": 25.386690647482013, + "force_loss": 0.005397506523877382, + "step": 28230 + }, + { + "epoch": 25.39568345323741, + "grad_norm": 0.23854495584964752, + "learning_rate": 5.893830316732253e-05, + "loss": 0.018, + "step": 28240 + }, + { + "action_loss": 0.002737324684858322, + "epoch": 25.39568345323741, + "step": 28240 + }, + { + "epoch": 25.39568345323741, + "step": 28240, + "torque_loss": 0.10519962757825851 + }, + { + "epoch": 25.39568345323741, + "force_loss": 0.004168535117059946, + "step": 28240 + }, + { + "epoch": 25.404676258992804, + "grad_norm": 0.4477432370185852, + "learning_rate": 5.8911187893147214e-05, + "loss": 0.0201, + "step": 28250 + }, + { + "action_loss": 0.0025989983696490526, + "epoch": 25.404676258992804, + "step": 28250 + }, + { + "epoch": 25.404676258992804, + "step": 28250, + "torque_loss": 0.1423255205154419 + }, + { + "epoch": 25.404676258992804, + "force_loss": 0.0019936368335038424, + "step": 28250 + }, + { + "epoch": 25.413669064748202, + "grad_norm": 0.3411311209201813, + "learning_rate": 5.888406991198828e-05, + "loss": 0.0195, + "step": 28260 + }, + { + "action_loss": 0.0035081792157143354, + "epoch": 25.413669064748202, + "step": 28260 + }, + { + "epoch": 25.413669064748202, + "step": 28260, + "torque_loss": 0.11614874005317688 + }, + { + "epoch": 25.413669064748202, + "force_loss": 0.003963755909353495, + "step": 28260 + }, + { + "epoch": 25.422661870503596, + "grad_norm": 0.30606362223625183, + "learning_rate": 5.885694923208349e-05, + "loss": 0.0232, + "step": 28270 + }, + { + "action_loss": 0.009681626223027706, + "epoch": 25.422661870503596, + "step": 28270 + }, + { + "epoch": 25.422661870503596, + "step": 28270, + "torque_loss": 0.12211836129426956 + }, + { + "epoch": 25.422661870503596, + "force_loss": 0.006401500198990107, + "step": 28270 + }, + { + "epoch": 25.431654676258994, + "grad_norm": 0.18935605883598328, + "learning_rate": 5.882982586167138e-05, + "loss": 0.0204, + "step": 28280 + }, + { + "action_loss": 0.0030593425035476685, + "epoch": 25.431654676258994, + "step": 28280 + }, + { + "epoch": 25.431654676258994, + "step": 28280, + "torque_loss": 0.11507879942655563 + }, + { + "epoch": 25.431654676258994, + "force_loss": 0.003175983903929591, + "step": 28280 + }, + { + "epoch": 25.440647482014388, + "grad_norm": 0.16558599472045898, + "learning_rate": 5.880269980899131e-05, + "loss": 0.0191, + "step": 28290 + }, + { + "action_loss": 0.00271633081138134, + "epoch": 25.440647482014388, + "step": 28290 + }, + { + "epoch": 25.440647482014388, + "step": 28290, + "torque_loss": 0.12530149519443512 + }, + { + "epoch": 25.440647482014388, + "force_loss": 0.0022884663194417953, + "step": 28290 + }, + { + "epoch": 25.449640287769785, + "grad_norm": 0.19729189574718475, + "learning_rate": 5.8775571082283465e-05, + "loss": 0.0178, + "step": 28300 + }, + { + "action_loss": 0.004341150168329477, + "epoch": 25.449640287769785, + "step": 28300 + }, + { + "epoch": 25.449640287769785, + "step": 28300, + "torque_loss": 0.11109278351068497 + }, + { + "epoch": 25.449640287769785, + "force_loss": 0.014054469764232635, + "step": 28300 + }, + { + "epoch": 25.45863309352518, + "grad_norm": 0.11685407161712646, + "learning_rate": 5.8748439689788824e-05, + "loss": 0.0211, + "step": 28310 + }, + { + "action_loss": 0.0035375170409679413, + "epoch": 25.45863309352518, + "step": 28310 + }, + { + "epoch": 25.45863309352518, + "step": 28310, + "torque_loss": 0.1559673398733139 + }, + { + "epoch": 25.45863309352518, + "force_loss": 0.004950309172272682, + "step": 28310 + }, + { + "epoch": 25.467625899280577, + "grad_norm": 0.22283989191055298, + "learning_rate": 5.87213056397492e-05, + "loss": 0.0184, + "step": 28320 + }, + { + "action_loss": 0.013143475167453289, + "epoch": 25.467625899280577, + "step": 28320 + }, + { + "epoch": 25.467625899280577, + "step": 28320, + "torque_loss": 0.1311284452676773 + }, + { + "epoch": 25.467625899280577, + "force_loss": 0.013893249444663525, + "step": 28320 + }, + { + "epoch": 25.47661870503597, + "grad_norm": 0.16183613240718842, + "learning_rate": 5.869416894040719e-05, + "loss": 0.0249, + "step": 28330 + }, + { + "action_loss": 0.00408804090693593, + "epoch": 25.47661870503597, + "step": 28330 + }, + { + "epoch": 25.47661870503597, + "step": 28330, + "torque_loss": 0.12245913594961166 + }, + { + "epoch": 25.47661870503597, + "force_loss": 0.003849039552733302, + "step": 28330 + }, + { + "epoch": 25.485611510791365, + "grad_norm": 0.10859288275241852, + "learning_rate": 5.866702960000621e-05, + "loss": 0.0191, + "step": 28340 + }, + { + "action_loss": 0.0031465962529182434, + "epoch": 25.485611510791365, + "step": 28340 + }, + { + "epoch": 25.485611510791365, + "step": 28340, + "torque_loss": 0.1350121945142746 + }, + { + "epoch": 25.485611510791365, + "force_loss": 0.006646886467933655, + "step": 28340 + }, + { + "epoch": 25.494604316546763, + "grad_norm": 0.3632952868938446, + "learning_rate": 5.863988762679048e-05, + "loss": 0.0212, + "step": 28350 + }, + { + "action_loss": 0.0031209969893097878, + "epoch": 25.494604316546763, + "step": 28350 + }, + { + "epoch": 25.494604316546763, + "step": 28350, + "torque_loss": 0.12694773077964783 + }, + { + "epoch": 25.494604316546763, + "force_loss": 0.0029710112139582634, + "step": 28350 + }, + { + "epoch": 25.503597122302157, + "grad_norm": 0.33126339316368103, + "learning_rate": 5.8612743029005e-05, + "loss": 0.0176, + "step": 28360 + }, + { + "action_loss": 0.001578190946020186, + "epoch": 25.503597122302157, + "step": 28360 + }, + { + "epoch": 25.503597122302157, + "step": 28360, + "torque_loss": 0.1360173374414444 + }, + { + "epoch": 25.503597122302157, + "force_loss": 0.0012496812269091606, + "step": 28360 + }, + { + "epoch": 25.512589928057555, + "grad_norm": 0.26428744196891785, + "learning_rate": 5.858559581489561e-05, + "loss": 0.0189, + "step": 28370 + }, + { + "action_loss": 0.017098193988204002, + "epoch": 25.512589928057555, + "step": 28370 + }, + { + "epoch": 25.512589928057555, + "step": 28370, + "torque_loss": 0.17933158576488495 + }, + { + "epoch": 25.512589928057555, + "force_loss": 0.010257538408041, + "step": 28370 + }, + { + "epoch": 25.52158273381295, + "grad_norm": 0.17129705846309662, + "learning_rate": 5.85584459927089e-05, + "loss": 0.0204, + "step": 28380 + }, + { + "action_loss": 0.011494812555611134, + "epoch": 25.52158273381295, + "step": 28380 + }, + { + "epoch": 25.52158273381295, + "step": 28380, + "torque_loss": 0.13039420545101166 + }, + { + "epoch": 25.52158273381295, + "force_loss": 0.016698889434337616, + "step": 28380 + }, + { + "epoch": 25.530575539568346, + "grad_norm": 0.18305902183055878, + "learning_rate": 5.853129357069227e-05, + "loss": 0.0187, + "step": 28390 + }, + { + "action_loss": 0.0047340672463178635, + "epoch": 25.530575539568346, + "step": 28390 + }, + { + "epoch": 25.530575539568346, + "step": 28390, + "torque_loss": 0.1560722142457962 + }, + { + "epoch": 25.530575539568346, + "force_loss": 0.004592828452587128, + "step": 28390 + }, + { + "epoch": 25.53956834532374, + "grad_norm": 0.34422537684440613, + "learning_rate": 5.8504138557093913e-05, + "loss": 0.0203, + "step": 28400 + }, + { + "action_loss": 0.015056215226650238, + "epoch": 25.53956834532374, + "step": 28400 + }, + { + "epoch": 25.53956834532374, + "step": 28400, + "torque_loss": 0.15267789363861084 + }, + { + "epoch": 25.53956834532374, + "force_loss": 0.0188570749014616, + "step": 28400 + }, + { + "epoch": 25.548561151079138, + "grad_norm": 0.23806895315647125, + "learning_rate": 5.8476980960162784e-05, + "loss": 0.0198, + "step": 28410 + }, + { + "action_loss": 0.004191949497908354, + "epoch": 25.548561151079138, + "step": 28410 + }, + { + "epoch": 25.548561151079138, + "step": 28410, + "torque_loss": 0.1481306254863739 + }, + { + "epoch": 25.548561151079138, + "force_loss": 0.003596404567360878, + "step": 28410 + }, + { + "epoch": 25.557553956834532, + "grad_norm": 0.21229428052902222, + "learning_rate": 5.844982078814868e-05, + "loss": 0.0206, + "step": 28420 + }, + { + "action_loss": 0.004433061927556992, + "epoch": 25.557553956834532, + "step": 28420 + }, + { + "epoch": 25.557553956834532, + "step": 28420, + "torque_loss": 0.1579483300447464 + }, + { + "epoch": 25.557553956834532, + "force_loss": 0.004343610722571611, + "step": 28420 + }, + { + "epoch": 25.56654676258993, + "grad_norm": 0.4190731346607208, + "learning_rate": 5.842265804930211e-05, + "loss": 0.0238, + "step": 28430 + }, + { + "action_loss": 0.0030550810042768717, + "epoch": 25.56654676258993, + "step": 28430 + }, + { + "epoch": 25.56654676258993, + "step": 28430, + "torque_loss": 0.12878479063510895 + }, + { + "epoch": 25.56654676258993, + "force_loss": 0.0020467147696763277, + "step": 28430 + }, + { + "epoch": 25.575539568345324, + "grad_norm": 0.2838549315929413, + "learning_rate": 5.839549275187444e-05, + "loss": 0.018, + "step": 28440 + }, + { + "action_loss": 0.012333524413406849, + "epoch": 25.575539568345324, + "step": 28440 + }, + { + "epoch": 25.575539568345324, + "step": 28440, + "torque_loss": 0.15713512897491455 + }, + { + "epoch": 25.575539568345324, + "force_loss": 0.01004349160939455, + "step": 28440 + }, + { + "epoch": 25.584532374100718, + "grad_norm": 0.5280764102935791, + "learning_rate": 5.836832490411771e-05, + "loss": 0.0199, + "step": 28450 + }, + { + "action_loss": 0.002958972007036209, + "epoch": 25.584532374100718, + "step": 28450 + }, + { + "epoch": 25.584532374100718, + "step": 28450, + "torque_loss": 0.14007197320461273 + }, + { + "epoch": 25.584532374100718, + "force_loss": 0.002835891442373395, + "step": 28450 + }, + { + "epoch": 25.593525179856115, + "grad_norm": 0.2873149812221527, + "learning_rate": 5.834115451428485e-05, + "loss": 0.0199, + "step": 28460 + }, + { + "action_loss": 0.007463912013918161, + "epoch": 25.593525179856115, + "step": 28460 + }, + { + "epoch": 25.593525179856115, + "step": 28460, + "torque_loss": 0.09955960512161255 + }, + { + "epoch": 25.593525179856115, + "force_loss": 0.006850830744951963, + "step": 28460 + }, + { + "epoch": 25.60251798561151, + "grad_norm": 0.16323289275169373, + "learning_rate": 5.831398159062946e-05, + "loss": 0.0179, + "step": 28470 + }, + { + "action_loss": 0.002226385287940502, + "epoch": 25.60251798561151, + "step": 28470 + }, + { + "epoch": 25.60251798561151, + "step": 28470, + "torque_loss": 0.17174698412418365 + }, + { + "epoch": 25.60251798561151, + "force_loss": 0.0024590285029262304, + "step": 28470 + }, + { + "epoch": 25.611510791366907, + "grad_norm": 0.27848368883132935, + "learning_rate": 5.828680614140599e-05, + "loss": 0.0184, + "step": 28480 + }, + { + "action_loss": 0.01565668359398842, + "epoch": 25.611510791366907, + "step": 28480 + }, + { + "epoch": 25.611510791366907, + "step": 28480, + "torque_loss": 0.1555856466293335 + }, + { + "epoch": 25.611510791366907, + "force_loss": 0.023662244901061058, + "step": 28480 + }, + { + "epoch": 25.6205035971223, + "grad_norm": 0.5817075371742249, + "learning_rate": 5.825962817486962e-05, + "loss": 0.0215, + "step": 28490 + }, + { + "action_loss": 0.004613556433469057, + "epoch": 25.6205035971223, + "step": 28490 + }, + { + "epoch": 25.6205035971223, + "step": 28490, + "torque_loss": 0.10848802328109741 + }, + { + "epoch": 25.6205035971223, + "force_loss": 0.0048154438845813274, + "step": 28490 + }, + { + "epoch": 25.6294964028777, + "grad_norm": 0.15831421315670013, + "learning_rate": 5.823244769927629e-05, + "loss": 0.0195, + "step": 28500 + }, + { + "action_loss": 0.005210065748542547, + "epoch": 25.6294964028777, + "step": 28500 + }, + { + "epoch": 25.6294964028777, + "step": 28500, + "torque_loss": 0.12404090911149979 + }, + { + "epoch": 25.6294964028777, + "force_loss": 0.011453814804553986, + "step": 28500 + }, + { + "epoch": 25.638489208633093, + "grad_norm": 0.2887755036354065, + "learning_rate": 5.8205264722882716e-05, + "loss": 0.0266, + "step": 28510 + }, + { + "action_loss": 0.002105491003021598, + "epoch": 25.638489208633093, + "step": 28510 + }, + { + "epoch": 25.638489208633093, + "step": 28510, + "torque_loss": 0.0879632830619812 + }, + { + "epoch": 25.638489208633093, + "force_loss": 0.002024345565587282, + "step": 28510 + }, + { + "epoch": 25.64748201438849, + "grad_norm": 0.46345382928848267, + "learning_rate": 5.817807925394636e-05, + "loss": 0.0182, + "step": 28520 + }, + { + "action_loss": 0.0032017540652304888, + "epoch": 25.64748201438849, + "step": 28520 + }, + { + "epoch": 25.64748201438849, + "step": 28520, + "torque_loss": 0.16883087158203125 + }, + { + "epoch": 25.64748201438849, + "force_loss": 0.0020397247280925512, + "step": 28520 + }, + { + "epoch": 25.656474820143885, + "grad_norm": 0.2489829808473587, + "learning_rate": 5.815089130072546e-05, + "loss": 0.0188, + "step": 28530 + }, + { + "action_loss": 0.004855359438806772, + "epoch": 25.656474820143885, + "step": 28530 + }, + { + "epoch": 25.656474820143885, + "step": 28530, + "torque_loss": 0.08536690473556519 + }, + { + "epoch": 25.656474820143885, + "force_loss": 0.008929177187383175, + "step": 28530 + }, + { + "epoch": 25.665467625899282, + "grad_norm": 0.18484389781951904, + "learning_rate": 5.8123700871479e-05, + "loss": 0.0161, + "step": 28540 + }, + { + "action_loss": 0.0024327461142092943, + "epoch": 25.665467625899282, + "step": 28540 + }, + { + "epoch": 25.665467625899282, + "step": 28540, + "torque_loss": 0.1123182475566864 + }, + { + "epoch": 25.665467625899282, + "force_loss": 0.0032714849803596735, + "step": 28540 + }, + { + "epoch": 25.674460431654676, + "grad_norm": 0.10311053693294525, + "learning_rate": 5.809650797446671e-05, + "loss": 0.018, + "step": 28550 + }, + { + "action_loss": 0.007736308965831995, + "epoch": 25.674460431654676, + "step": 28550 + }, + { + "epoch": 25.674460431654676, + "step": 28550, + "torque_loss": 0.1223251223564148 + }, + { + "epoch": 25.674460431654676, + "force_loss": 0.0098481560125947, + "step": 28550 + }, + { + "epoch": 25.68345323741007, + "grad_norm": 0.195352703332901, + "learning_rate": 5.806931261794907e-05, + "loss": 0.0194, + "step": 28560 + }, + { + "action_loss": 0.001999068772420287, + "epoch": 25.68345323741007, + "step": 28560 + }, + { + "epoch": 25.68345323741007, + "step": 28560, + "torque_loss": 0.12419679760932922 + }, + { + "epoch": 25.68345323741007, + "force_loss": 0.003961937036365271, + "step": 28560 + }, + { + "epoch": 25.692446043165468, + "grad_norm": 0.13347913324832916, + "learning_rate": 5.804211481018731e-05, + "loss": 0.0153, + "step": 28570 + }, + { + "action_loss": 0.01377819012850523, + "epoch": 25.692446043165468, + "step": 28570 + }, + { + "epoch": 25.692446043165468, + "step": 28570, + "torque_loss": 0.16717176139354706 + }, + { + "epoch": 25.692446043165468, + "force_loss": 0.009943501092493534, + "step": 28570 + }, + { + "epoch": 25.701438848920862, + "grad_norm": 0.1758003681898117, + "learning_rate": 5.801491455944341e-05, + "loss": 0.0181, + "step": 28580 + }, + { + "action_loss": 0.003849254921078682, + "epoch": 25.701438848920862, + "step": 28580 + }, + { + "epoch": 25.701438848920862, + "step": 28580, + "torque_loss": 0.11864777654409409 + }, + { + "epoch": 25.701438848920862, + "force_loss": 0.002902369014918804, + "step": 28580 + }, + { + "epoch": 25.71043165467626, + "grad_norm": 0.2609722912311554, + "learning_rate": 5.79877118739801e-05, + "loss": 0.0185, + "step": 28590 + }, + { + "action_loss": 0.007190841715782881, + "epoch": 25.71043165467626, + "step": 28590 + }, + { + "epoch": 25.71043165467626, + "step": 28590, + "torque_loss": 0.14964285492897034 + }, + { + "epoch": 25.71043165467626, + "force_loss": 0.009538150392472744, + "step": 28590 + }, + { + "epoch": 25.719424460431654, + "grad_norm": 0.3223738968372345, + "learning_rate": 5.7960506762060816e-05, + "loss": 0.0196, + "step": 28600 + }, + { + "action_loss": 0.005924099590629339, + "epoch": 25.719424460431654, + "step": 28600 + }, + { + "epoch": 25.719424460431654, + "step": 28600, + "torque_loss": 0.10150185972452164 + }, + { + "epoch": 25.719424460431654, + "force_loss": 0.009191926568746567, + "step": 28600 + }, + { + "epoch": 25.72841726618705, + "grad_norm": 0.4019058644771576, + "learning_rate": 5.793329923194977e-05, + "loss": 0.0167, + "step": 28610 + }, + { + "action_loss": 0.003359417198225856, + "epoch": 25.72841726618705, + "step": 28610 + }, + { + "epoch": 25.72841726618705, + "step": 28610, + "torque_loss": 0.12953747808933258 + }, + { + "epoch": 25.72841726618705, + "force_loss": 0.004174624104052782, + "step": 28610 + }, + { + "epoch": 25.737410071942445, + "grad_norm": 0.4349093735218048, + "learning_rate": 5.790608929191187e-05, + "loss": 0.0191, + "step": 28620 + }, + { + "action_loss": 0.003896950511261821, + "epoch": 25.737410071942445, + "step": 28620 + }, + { + "epoch": 25.737410071942445, + "step": 28620, + "torque_loss": 0.13709606230258942 + }, + { + "epoch": 25.737410071942445, + "force_loss": 0.0024140216410160065, + "step": 28620 + }, + { + "epoch": 25.746402877697843, + "grad_norm": 0.22858905792236328, + "learning_rate": 5.78788769502128e-05, + "loss": 0.0231, + "step": 28630 + }, + { + "action_loss": 0.008807025849819183, + "epoch": 25.746402877697843, + "step": 28630 + }, + { + "epoch": 25.746402877697843, + "step": 28630, + "torque_loss": 0.15493394434452057 + }, + { + "epoch": 25.746402877697843, + "force_loss": 0.011327589862048626, + "step": 28630 + }, + { + "epoch": 25.755395683453237, + "grad_norm": 0.32832226157188416, + "learning_rate": 5.785166221511894e-05, + "loss": 0.0208, + "step": 28640 + }, + { + "action_loss": 0.003166560083627701, + "epoch": 25.755395683453237, + "step": 28640 + }, + { + "epoch": 25.755395683453237, + "step": 28640, + "torque_loss": 0.110824815928936 + }, + { + "epoch": 25.755395683453237, + "force_loss": 0.0033940270077437162, + "step": 28640 + }, + { + "epoch": 25.764388489208635, + "grad_norm": 0.47827088832855225, + "learning_rate": 5.7824445094897415e-05, + "loss": 0.0229, + "step": 28650 + }, + { + "action_loss": 0.005630446132272482, + "epoch": 25.764388489208635, + "step": 28650 + }, + { + "epoch": 25.764388489208635, + "step": 28650, + "torque_loss": 0.13710464537143707 + }, + { + "epoch": 25.764388489208635, + "force_loss": 0.0062186564318835735, + "step": 28650 + }, + { + "epoch": 25.77338129496403, + "grad_norm": 0.4051119089126587, + "learning_rate": 5.7797225597816065e-05, + "loss": 0.0217, + "step": 28660 + }, + { + "action_loss": 0.005888625513762236, + "epoch": 25.77338129496403, + "step": 28660 + }, + { + "epoch": 25.77338129496403, + "step": 28660, + "torque_loss": 0.10672199726104736 + }, + { + "epoch": 25.77338129496403, + "force_loss": 0.005437417421489954, + "step": 28660 + }, + { + "epoch": 25.782374100719423, + "grad_norm": 0.27474355697631836, + "learning_rate": 5.777000373214345e-05, + "loss": 0.0185, + "step": 28670 + }, + { + "action_loss": 0.003132052021101117, + "epoch": 25.782374100719423, + "step": 28670 + }, + { + "epoch": 25.782374100719423, + "step": 28670, + "torque_loss": 0.10297843813896179 + }, + { + "epoch": 25.782374100719423, + "force_loss": 0.0027979493606835604, + "step": 28670 + }, + { + "epoch": 25.79136690647482, + "grad_norm": 0.30740973353385925, + "learning_rate": 5.774277950614885e-05, + "loss": 0.024, + "step": 28680 + }, + { + "action_loss": 0.008867667056620121, + "epoch": 25.79136690647482, + "step": 28680 + }, + { + "epoch": 25.79136690647482, + "step": 28680, + "torque_loss": 0.12430638074874878 + }, + { + "epoch": 25.79136690647482, + "force_loss": 0.007247579749673605, + "step": 28680 + }, + { + "epoch": 25.800359712230215, + "grad_norm": 0.20495644211769104, + "learning_rate": 5.771555292810227e-05, + "loss": 0.0191, + "step": 28690 + }, + { + "action_loss": 0.0036730438005179167, + "epoch": 25.800359712230215, + "step": 28690 + }, + { + "epoch": 25.800359712230215, + "step": 28690, + "torque_loss": 0.12670987844467163 + }, + { + "epoch": 25.800359712230215, + "force_loss": 0.003855082904919982, + "step": 28690 + }, + { + "epoch": 25.809352517985612, + "grad_norm": 0.4939091205596924, + "learning_rate": 5.768832400627444e-05, + "loss": 0.0182, + "step": 28700 + }, + { + "action_loss": 0.005129032302647829, + "epoch": 25.809352517985612, + "step": 28700 + }, + { + "epoch": 25.809352517985612, + "step": 28700, + "torque_loss": 0.1300128698348999 + }, + { + "epoch": 25.809352517985612, + "force_loss": 0.012985593639314175, + "step": 28700 + }, + { + "epoch": 25.818345323741006, + "grad_norm": 0.5835957527160645, + "learning_rate": 5.7661092748936775e-05, + "loss": 0.0219, + "step": 28710 + }, + { + "action_loss": 0.0049516293220222, + "epoch": 25.818345323741006, + "step": 28710 + }, + { + "epoch": 25.818345323741006, + "step": 28710, + "torque_loss": 0.09968063980340958 + }, + { + "epoch": 25.818345323741006, + "force_loss": 0.0068278550170362, + "step": 28710 + }, + { + "epoch": 25.827338129496404, + "grad_norm": 0.21389810740947723, + "learning_rate": 5.76338591643614e-05, + "loss": 0.025, + "step": 28720 + }, + { + "action_loss": 0.005397242959588766, + "epoch": 25.827338129496404, + "step": 28720 + }, + { + "epoch": 25.827338129496404, + "step": 28720, + "torque_loss": 0.10741696506738663 + }, + { + "epoch": 25.827338129496404, + "force_loss": 0.004168794024735689, + "step": 28720 + }, + { + "epoch": 25.836330935251798, + "grad_norm": 0.16188180446624756, + "learning_rate": 5.760662326082118e-05, + "loss": 0.0238, + "step": 28730 + }, + { + "action_loss": 0.003408056451007724, + "epoch": 25.836330935251798, + "step": 28730 + }, + { + "epoch": 25.836330935251798, + "step": 28730, + "torque_loss": 0.12680047750473022 + }, + { + "epoch": 25.836330935251798, + "force_loss": 0.006992408540099859, + "step": 28730 + }, + { + "epoch": 25.845323741007196, + "grad_norm": 0.36538028717041016, + "learning_rate": 5.757938504658965e-05, + "loss": 0.0204, + "step": 28740 + }, + { + "action_loss": 0.009054531343281269, + "epoch": 25.845323741007196, + "step": 28740 + }, + { + "epoch": 25.845323741007196, + "step": 28740, + "torque_loss": 0.11967268586158752 + }, + { + "epoch": 25.845323741007196, + "force_loss": 0.004422473255544901, + "step": 28740 + }, + { + "epoch": 25.85431654676259, + "grad_norm": 0.20116569101810455, + "learning_rate": 5.755214452994107e-05, + "loss": 0.0199, + "step": 28750 + }, + { + "action_loss": 0.005058668088167906, + "epoch": 25.85431654676259, + "step": 28750 + }, + { + "epoch": 25.85431654676259, + "step": 28750, + "torque_loss": 0.14605657756328583 + }, + { + "epoch": 25.85431654676259, + "force_loss": 0.0036439981777220964, + "step": 28750 + }, + { + "epoch": 25.863309352517987, + "grad_norm": 0.17597800493240356, + "learning_rate": 5.752490171915039e-05, + "loss": 0.0222, + "step": 28760 + }, + { + "action_loss": 0.002458389615640044, + "epoch": 25.863309352517987, + "step": 28760 + }, + { + "epoch": 25.863309352517987, + "step": 28760, + "torque_loss": 0.1379869431257248 + }, + { + "epoch": 25.863309352517987, + "force_loss": 0.00291249412111938, + "step": 28760 + }, + { + "epoch": 25.87230215827338, + "grad_norm": 0.3704226315021515, + "learning_rate": 5.749765662249324e-05, + "loss": 0.0239, + "step": 28770 + }, + { + "action_loss": 0.005187605042010546, + "epoch": 25.87230215827338, + "step": 28770 + }, + { + "epoch": 25.87230215827338, + "step": 28770, + "torque_loss": 0.13833649456501007 + }, + { + "epoch": 25.87230215827338, + "force_loss": 0.007807048503309488, + "step": 28770 + }, + { + "epoch": 25.881294964028775, + "grad_norm": 0.18758834898471832, + "learning_rate": 5.747040924824596e-05, + "loss": 0.0161, + "step": 28780 + }, + { + "action_loss": 0.0019194980850443244, + "epoch": 25.881294964028775, + "step": 28780 + }, + { + "epoch": 25.881294964028775, + "step": 28780, + "torque_loss": 0.09164443612098694 + }, + { + "epoch": 25.881294964028775, + "force_loss": 0.0008873723563738167, + "step": 28780 + }, + { + "epoch": 25.890287769784173, + "grad_norm": 0.29400554299354553, + "learning_rate": 5.7443159604685613e-05, + "loss": 0.0198, + "step": 28790 + }, + { + "action_loss": 0.010663188993930817, + "epoch": 25.890287769784173, + "step": 28790 + }, + { + "epoch": 25.890287769784173, + "step": 28790, + "torque_loss": 0.14566320180892944 + }, + { + "epoch": 25.890287769784173, + "force_loss": 0.013719928450882435, + "step": 28790 + }, + { + "epoch": 25.899280575539567, + "grad_norm": 0.30214011669158936, + "learning_rate": 5.74159077000899e-05, + "loss": 0.0217, + "step": 28800 + }, + { + "action_loss": 0.00703857047483325, + "epoch": 25.899280575539567, + "step": 28800 + }, + { + "epoch": 25.899280575539567, + "step": 28800, + "torque_loss": 0.12661753594875336 + }, + { + "epoch": 25.899280575539567, + "force_loss": 0.007603875827044249, + "step": 28800 + }, + { + "epoch": 25.908273381294965, + "grad_norm": 0.5199934840202332, + "learning_rate": 5.7388653542737235e-05, + "loss": 0.0203, + "step": 28810 + }, + { + "action_loss": 0.0063810646533966064, + "epoch": 25.908273381294965, + "step": 28810 + }, + { + "epoch": 25.908273381294965, + "step": 28810, + "torque_loss": 0.14510506391525269 + }, + { + "epoch": 25.908273381294965, + "force_loss": 0.010953851044178009, + "step": 28810 + }, + { + "epoch": 25.91726618705036, + "grad_norm": 0.2643183469772339, + "learning_rate": 5.736139714090672e-05, + "loss": 0.0205, + "step": 28820 + }, + { + "action_loss": 0.03102993406355381, + "epoch": 25.91726618705036, + "step": 28820 + }, + { + "epoch": 25.91726618705036, + "step": 28820, + "torque_loss": 0.1717318296432495 + }, + { + "epoch": 25.91726618705036, + "force_loss": 0.02739032171666622, + "step": 28820 + }, + { + "epoch": 25.926258992805757, + "grad_norm": 0.34062471985816956, + "learning_rate": 5.73341385028781e-05, + "loss": 0.022, + "step": 28830 + }, + { + "action_loss": 0.0028952148277312517, + "epoch": 25.926258992805757, + "step": 28830 + }, + { + "epoch": 25.926258992805757, + "step": 28830, + "torque_loss": 0.0797184482216835 + }, + { + "epoch": 25.926258992805757, + "force_loss": 0.002789615886285901, + "step": 28830 + }, + { + "epoch": 25.93525179856115, + "grad_norm": 0.28866004943847656, + "learning_rate": 5.7306877636931855e-05, + "loss": 0.0164, + "step": 28840 + }, + { + "action_loss": 0.003368654288351536, + "epoch": 25.93525179856115, + "step": 28840 + }, + { + "epoch": 25.93525179856115, + "step": 28840, + "torque_loss": 0.10057816654443741 + }, + { + "epoch": 25.93525179856115, + "force_loss": 0.0037018340080976486, + "step": 28840 + }, + { + "epoch": 25.944244604316548, + "grad_norm": 0.1584034115076065, + "learning_rate": 5.7279614551349125e-05, + "loss": 0.0173, + "step": 28850 + }, + { + "action_loss": 0.0019302628934383392, + "epoch": 25.944244604316548, + "step": 28850 + }, + { + "epoch": 25.944244604316548, + "step": 28850, + "torque_loss": 0.15853749215602875 + }, + { + "epoch": 25.944244604316548, + "force_loss": 0.0033526085317134857, + "step": 28850 + }, + { + "epoch": 25.953237410071942, + "grad_norm": 0.1823311746120453, + "learning_rate": 5.725234925441169e-05, + "loss": 0.0164, + "step": 28860 + }, + { + "action_loss": 0.0013245727168396115, + "epoch": 25.953237410071942, + "step": 28860 + }, + { + "epoch": 25.953237410071942, + "step": 28860, + "torque_loss": 0.09203815460205078 + }, + { + "epoch": 25.953237410071942, + "force_loss": 0.0012865253956988454, + "step": 28860 + }, + { + "epoch": 25.96223021582734, + "grad_norm": 0.6473246216773987, + "learning_rate": 5.7225081754402044e-05, + "loss": 0.0171, + "step": 28870 + }, + { + "action_loss": 0.00738756125792861, + "epoch": 25.96223021582734, + "step": 28870 + }, + { + "epoch": 25.96223021582734, + "step": 28870, + "torque_loss": 0.12493404000997543 + }, + { + "epoch": 25.96223021582734, + "force_loss": 0.005382902454584837, + "step": 28870 + }, + { + "epoch": 25.971223021582734, + "grad_norm": 0.21924792230129242, + "learning_rate": 5.7197812059603326e-05, + "loss": 0.0185, + "step": 28880 + }, + { + "action_loss": 0.008321907371282578, + "epoch": 25.971223021582734, + "step": 28880 + }, + { + "epoch": 25.971223021582734, + "step": 28880, + "torque_loss": 0.13138620555400848 + }, + { + "epoch": 25.971223021582734, + "force_loss": 0.005462937522679567, + "step": 28880 + }, + { + "epoch": 25.980215827338128, + "grad_norm": 0.3269522786140442, + "learning_rate": 5.717054017829934e-05, + "loss": 0.0213, + "step": 28890 + }, + { + "action_loss": 0.002165122888982296, + "epoch": 25.980215827338128, + "step": 28890 + }, + { + "epoch": 25.980215827338128, + "step": 28890, + "torque_loss": 0.10722139477729797 + }, + { + "epoch": 25.980215827338128, + "force_loss": 0.0011654613772407174, + "step": 28890 + }, + { + "epoch": 25.989208633093526, + "grad_norm": 0.3549667298793793, + "learning_rate": 5.7143266118774584e-05, + "loss": 0.0287, + "step": 28900 + }, + { + "action_loss": 0.0026221785228699446, + "epoch": 25.989208633093526, + "step": 28900 + }, + { + "epoch": 25.989208633093526, + "step": 28900, + "torque_loss": 0.12324017286300659 + }, + { + "epoch": 25.989208633093526, + "force_loss": 0.0051282597705721855, + "step": 28900 + }, + { + "epoch": 25.99820143884892, + "grad_norm": 0.28808143734931946, + "learning_rate": 5.711598988931418e-05, + "loss": 0.0188, + "step": 28910 + }, + { + "action_loss": 0.003999910783022642, + "epoch": 25.99820143884892, + "step": 28910 + }, + { + "epoch": 25.99820143884892, + "step": 28910, + "torque_loss": 0.1733754426240921 + }, + { + "epoch": 25.99820143884892, + "force_loss": 0.002792246639728546, + "step": 28910 + }, + { + "epoch": 26.007194244604317, + "grad_norm": 0.32484543323516846, + "learning_rate": 5.7088711498203954e-05, + "loss": 0.0195, + "step": 28920 + }, + { + "action_loss": 0.00449376692995429, + "epoch": 26.007194244604317, + "step": 28920 + }, + { + "epoch": 26.007194244604317, + "step": 28920, + "torque_loss": 0.1329164057970047 + }, + { + "epoch": 26.007194244604317, + "force_loss": 0.005821609403938055, + "step": 28920 + }, + { + "epoch": 26.01618705035971, + "grad_norm": 0.14284273982048035, + "learning_rate": 5.706143095373033e-05, + "loss": 0.021, + "step": 28930 + }, + { + "action_loss": 0.0031853385735303164, + "epoch": 26.01618705035971, + "step": 28930 + }, + { + "epoch": 26.01618705035971, + "step": 28930, + "torque_loss": 0.1327640861272812 + }, + { + "epoch": 26.01618705035971, + "force_loss": 0.005952553357928991, + "step": 28930 + }, + { + "epoch": 26.02517985611511, + "grad_norm": 0.5342223048210144, + "learning_rate": 5.703414826418042e-05, + "loss": 0.0194, + "step": 28940 + }, + { + "action_loss": 0.009315653704106808, + "epoch": 26.02517985611511, + "step": 28940 + }, + { + "epoch": 26.02517985611511, + "step": 28940, + "torque_loss": 0.1404716521501541 + }, + { + "epoch": 26.02517985611511, + "force_loss": 0.006468483712524176, + "step": 28940 + }, + { + "epoch": 26.034172661870503, + "grad_norm": 0.2200510948896408, + "learning_rate": 5.7006863437842007e-05, + "loss": 0.0199, + "step": 28950 + }, + { + "action_loss": 0.005508519243448973, + "epoch": 26.034172661870503, + "step": 28950 + }, + { + "epoch": 26.034172661870503, + "step": 28950, + "torque_loss": 0.14688707888126373 + }, + { + "epoch": 26.034172661870503, + "force_loss": 0.003007271094247699, + "step": 28950 + }, + { + "epoch": 26.0431654676259, + "grad_norm": 0.3171609044075012, + "learning_rate": 5.697957648300348e-05, + "loss": 0.0274, + "step": 28960 + }, + { + "action_loss": 0.0037698314990848303, + "epoch": 26.0431654676259, + "step": 28960 + }, + { + "epoch": 26.0431654676259, + "step": 28960, + "torque_loss": 0.12635333836078644 + }, + { + "epoch": 26.0431654676259, + "force_loss": 0.0025174252223223448, + "step": 28960 + }, + { + "epoch": 26.052158273381295, + "grad_norm": 0.20609548687934875, + "learning_rate": 5.695228740795391e-05, + "loss": 0.0216, + "step": 28970 + }, + { + "action_loss": 0.0028065908700227737, + "epoch": 26.052158273381295, + "step": 28970 + }, + { + "epoch": 26.052158273381295, + "step": 28970, + "torque_loss": 0.13230383396148682 + }, + { + "epoch": 26.052158273381295, + "force_loss": 0.003663417650386691, + "step": 28970 + }, + { + "epoch": 26.06115107913669, + "grad_norm": 0.42974433302879333, + "learning_rate": 5.6924996220982985e-05, + "loss": 0.0196, + "step": 28980 + }, + { + "action_loss": 0.002372222952544689, + "epoch": 26.06115107913669, + "step": 28980 + }, + { + "epoch": 26.06115107913669, + "step": 28980, + "torque_loss": 0.1182672381401062 + }, + { + "epoch": 26.06115107913669, + "force_loss": 0.0022993513848632574, + "step": 28980 + }, + { + "epoch": 26.070143884892087, + "grad_norm": 0.16391532123088837, + "learning_rate": 5.6897702930381045e-05, + "loss": 0.0167, + "step": 28990 + }, + { + "action_loss": 0.0022096759639680386, + "epoch": 26.070143884892087, + "step": 28990 + }, + { + "epoch": 26.070143884892087, + "step": 28990, + "torque_loss": 0.08953986316919327 + }, + { + "epoch": 26.070143884892087, + "force_loss": 0.003215731354430318, + "step": 28990 + }, + { + "epoch": 26.07913669064748, + "grad_norm": 0.12691636383533478, + "learning_rate": 5.687040754443908e-05, + "loss": 0.0164, + "step": 29000 + }, + { + "action_loss": 0.002988505410030484, + "epoch": 26.07913669064748, + "step": 29000 + }, + { + "epoch": 26.07913669064748, + "step": 29000, + "torque_loss": 0.12361162900924683 + }, + { + "epoch": 26.07913669064748, + "force_loss": 0.004084745887666941, + "step": 29000 + }, + { + "epoch": 26.08812949640288, + "grad_norm": 0.20146793127059937, + "learning_rate": 5.6843110071448725e-05, + "loss": 0.0192, + "step": 29010 + }, + { + "action_loss": 0.003533797338604927, + "epoch": 26.08812949640288, + "step": 29010 + }, + { + "epoch": 26.08812949640288, + "step": 29010, + "torque_loss": 0.11133870482444763 + }, + { + "epoch": 26.08812949640288, + "force_loss": 0.004257683642208576, + "step": 29010 + }, + { + "epoch": 26.097122302158272, + "grad_norm": 0.308725506067276, + "learning_rate": 5.6815810519702194e-05, + "loss": 0.0195, + "step": 29020 + }, + { + "action_loss": 0.0036037983372807503, + "epoch": 26.097122302158272, + "step": 29020 + }, + { + "epoch": 26.097122302158272, + "step": 29020, + "torque_loss": 0.08814121037721634 + }, + { + "epoch": 26.097122302158272, + "force_loss": 0.0018510861555114388, + "step": 29020 + }, + { + "epoch": 26.10611510791367, + "grad_norm": 0.3805563449859619, + "learning_rate": 5.6788508897492396e-05, + "loss": 0.0168, + "step": 29030 + }, + { + "action_loss": 0.002368503948673606, + "epoch": 26.10611510791367, + "step": 29030 + }, + { + "epoch": 26.10611510791367, + "step": 29030, + "torque_loss": 0.11723294109106064 + }, + { + "epoch": 26.10611510791367, + "force_loss": 0.0014365953393280506, + "step": 29030 + }, + { + "epoch": 26.115107913669064, + "grad_norm": 0.3679788112640381, + "learning_rate": 5.676120521311282e-05, + "loss": 0.0184, + "step": 29040 + }, + { + "action_loss": 0.007685080636292696, + "epoch": 26.115107913669064, + "step": 29040 + }, + { + "epoch": 26.115107913669064, + "step": 29040, + "torque_loss": 0.1312701404094696 + }, + { + "epoch": 26.115107913669064, + "force_loss": 0.013920996338129044, + "step": 29040 + }, + { + "epoch": 26.12410071942446, + "grad_norm": 0.13891716301441193, + "learning_rate": 5.6733899474857634e-05, + "loss": 0.0196, + "step": 29050 + }, + { + "action_loss": 0.0030181354377418756, + "epoch": 26.12410071942446, + "step": 29050 + }, + { + "epoch": 26.12410071942446, + "step": 29050, + "torque_loss": 0.12050659209489822 + }, + { + "epoch": 26.12410071942446, + "force_loss": 0.003781926818192005, + "step": 29050 + }, + { + "epoch": 26.133093525179856, + "grad_norm": 0.18489331007003784, + "learning_rate": 5.670659169102157e-05, + "loss": 0.0169, + "step": 29060 + }, + { + "action_loss": 0.02172222174704075, + "epoch": 26.133093525179856, + "step": 29060 + }, + { + "epoch": 26.133093525179856, + "step": 29060, + "torque_loss": 0.20113885402679443 + }, + { + "epoch": 26.133093525179856, + "force_loss": 0.019537802785634995, + "step": 29060 + }, + { + "epoch": 26.142086330935253, + "grad_norm": 0.43244943022727966, + "learning_rate": 5.6679281869900044e-05, + "loss": 0.0251, + "step": 29070 + }, + { + "action_loss": 0.0025063930079340935, + "epoch": 26.142086330935253, + "step": 29070 + }, + { + "epoch": 26.142086330935253, + "step": 29070, + "torque_loss": 0.11865457147359848 + }, + { + "epoch": 26.142086330935253, + "force_loss": 0.0032049445435404778, + "step": 29070 + }, + { + "epoch": 26.151079136690647, + "grad_norm": 0.6014410853385925, + "learning_rate": 5.6651970019789045e-05, + "loss": 0.0191, + "step": 29080 + }, + { + "action_loss": 0.006351388990879059, + "epoch": 26.151079136690647, + "step": 29080 + }, + { + "epoch": 26.151079136690647, + "step": 29080, + "torque_loss": 0.1085766926407814 + }, + { + "epoch": 26.151079136690647, + "force_loss": 0.011536809615790844, + "step": 29080 + }, + { + "epoch": 26.16007194244604, + "grad_norm": 0.4379007816314697, + "learning_rate": 5.662465614898519e-05, + "loss": 0.0168, + "step": 29090 + }, + { + "action_loss": 0.002459225943312049, + "epoch": 26.16007194244604, + "step": 29090 + }, + { + "epoch": 26.16007194244604, + "step": 29090, + "torque_loss": 0.11237046867609024 + }, + { + "epoch": 26.16007194244604, + "force_loss": 0.003944239113479853, + "step": 29090 + }, + { + "epoch": 26.16906474820144, + "grad_norm": 0.2697606384754181, + "learning_rate": 5.6597340265785695e-05, + "loss": 0.024, + "step": 29100 + }, + { + "action_loss": 0.0027110965456813574, + "epoch": 26.16906474820144, + "step": 29100 + }, + { + "epoch": 26.16906474820144, + "step": 29100, + "torque_loss": 0.1028432548046112 + }, + { + "epoch": 26.16906474820144, + "force_loss": 0.002248074160888791, + "step": 29100 + }, + { + "epoch": 26.178057553956833, + "grad_norm": 0.13784776628017426, + "learning_rate": 5.657002237848843e-05, + "loss": 0.0193, + "step": 29110 + }, + { + "action_loss": 0.0029081034008413553, + "epoch": 26.178057553956833, + "step": 29110 + }, + { + "epoch": 26.178057553956833, + "step": 29110, + "torque_loss": 0.12708336114883423 + }, + { + "epoch": 26.178057553956833, + "force_loss": 0.002756238216534257, + "step": 29110 + }, + { + "epoch": 26.18705035971223, + "grad_norm": 0.27970412373542786, + "learning_rate": 5.654270249539183e-05, + "loss": 0.0177, + "step": 29120 + }, + { + "action_loss": 0.012793789617717266, + "epoch": 26.18705035971223, + "step": 29120 + }, + { + "epoch": 26.18705035971223, + "step": 29120, + "torque_loss": 0.1278197318315506 + }, + { + "epoch": 26.18705035971223, + "force_loss": 0.01568256877362728, + "step": 29120 + }, + { + "epoch": 26.196043165467625, + "grad_norm": 0.2465725541114807, + "learning_rate": 5.651538062479498e-05, + "loss": 0.0214, + "step": 29130 + }, + { + "action_loss": 0.01291924063116312, + "epoch": 26.196043165467625, + "step": 29130 + }, + { + "epoch": 26.196043165467625, + "step": 29130, + "torque_loss": 0.13238336145877838 + }, + { + "epoch": 26.196043165467625, + "force_loss": 0.01366290170699358, + "step": 29130 + }, + { + "epoch": 26.205035971223023, + "grad_norm": 0.28811484575271606, + "learning_rate": 5.648805677499751e-05, + "loss": 0.0196, + "step": 29140 + }, + { + "action_loss": 0.0030822723638266325, + "epoch": 26.205035971223023, + "step": 29140 + }, + { + "epoch": 26.205035971223023, + "step": 29140, + "torque_loss": 0.17272700369358063 + }, + { + "epoch": 26.205035971223023, + "force_loss": 0.002299186307936907, + "step": 29140 + }, + { + "epoch": 26.214028776978417, + "grad_norm": 0.18055102229118347, + "learning_rate": 5.646073095429969e-05, + "loss": 0.0221, + "step": 29150 + }, + { + "action_loss": 0.007661381736397743, + "epoch": 26.214028776978417, + "step": 29150 + }, + { + "epoch": 26.214028776978417, + "step": 29150, + "torque_loss": 0.13292427361011505 + }, + { + "epoch": 26.214028776978417, + "force_loss": 0.00930861011147499, + "step": 29150 + }, + { + "epoch": 26.223021582733814, + "grad_norm": 0.29005342721939087, + "learning_rate": 5.643340317100241e-05, + "loss": 0.0192, + "step": 29160 + }, + { + "action_loss": 0.0030203352216631174, + "epoch": 26.223021582733814, + "step": 29160 + }, + { + "epoch": 26.223021582733814, + "step": 29160, + "torque_loss": 0.12590394914150238 + }, + { + "epoch": 26.223021582733814, + "force_loss": 0.004403008613735437, + "step": 29160 + }, + { + "epoch": 26.23201438848921, + "grad_norm": 0.8712238073348999, + "learning_rate": 5.64060734334071e-05, + "loss": 0.02, + "step": 29170 + }, + { + "action_loss": 0.0030623984057456255, + "epoch": 26.23201438848921, + "step": 29170 + }, + { + "epoch": 26.23201438848921, + "step": 29170, + "torque_loss": 0.11342933028936386 + }, + { + "epoch": 26.23201438848921, + "force_loss": 0.003106334013864398, + "step": 29170 + }, + { + "epoch": 26.241007194244606, + "grad_norm": 0.22020100057125092, + "learning_rate": 5.637874174981583e-05, + "loss": 0.0179, + "step": 29180 + }, + { + "action_loss": 0.004982968792319298, + "epoch": 26.241007194244606, + "step": 29180 + }, + { + "epoch": 26.241007194244606, + "step": 29180, + "torque_loss": 0.1274523288011551 + }, + { + "epoch": 26.241007194244606, + "force_loss": 0.003581090597435832, + "step": 29180 + }, + { + "epoch": 26.25, + "grad_norm": 0.31618985533714294, + "learning_rate": 5.635140812853124e-05, + "loss": 0.0191, + "step": 29190 + }, + { + "action_loss": 0.0024433142971247435, + "epoch": 26.25, + "step": 29190 + }, + { + "epoch": 26.25, + "step": 29190, + "torque_loss": 0.10915540903806686 + }, + { + "epoch": 26.25, + "force_loss": 0.0013166745193302631, + "step": 29190 + }, + { + "epoch": 26.258992805755394, + "grad_norm": 0.26701149344444275, + "learning_rate": 5.6324072577856544e-05, + "loss": 0.0186, + "step": 29200 + }, + { + "action_loss": 0.01693028025329113, + "epoch": 26.258992805755394, + "step": 29200 + }, + { + "epoch": 26.258992805755394, + "step": 29200, + "torque_loss": 0.165596142411232 + }, + { + "epoch": 26.258992805755394, + "force_loss": 0.020471083000302315, + "step": 29200 + }, + { + "epoch": 26.26798561151079, + "grad_norm": 0.29948124289512634, + "learning_rate": 5.629673510609559e-05, + "loss": 0.0191, + "step": 29210 + }, + { + "action_loss": 0.002859042724594474, + "epoch": 26.26798561151079, + "step": 29210 + }, + { + "epoch": 26.26798561151079, + "step": 29210, + "torque_loss": 0.12832000851631165 + }, + { + "epoch": 26.26798561151079, + "force_loss": 0.006577649619430304, + "step": 29210 + }, + { + "epoch": 26.276978417266186, + "grad_norm": 0.28308770060539246, + "learning_rate": 5.626939572155276e-05, + "loss": 0.0204, + "step": 29220 + }, + { + "action_loss": 0.0046681291423738, + "epoch": 26.276978417266186, + "step": 29220 + }, + { + "epoch": 26.276978417266186, + "step": 29220, + "torque_loss": 0.0951072946190834 + }, + { + "epoch": 26.276978417266186, + "force_loss": 0.007679952774196863, + "step": 29220 + }, + { + "epoch": 26.285971223021583, + "grad_norm": 0.21266824007034302, + "learning_rate": 5.6242054432533054e-05, + "loss": 0.0191, + "step": 29230 + }, + { + "action_loss": 0.004444051533937454, + "epoch": 26.285971223021583, + "step": 29230 + }, + { + "epoch": 26.285971223021583, + "step": 29230, + "torque_loss": 0.16044634580612183 + }, + { + "epoch": 26.285971223021583, + "force_loss": 0.006212200969457626, + "step": 29230 + }, + { + "epoch": 26.294964028776977, + "grad_norm": 0.5010757446289062, + "learning_rate": 5.621471124734201e-05, + "loss": 0.019, + "step": 29240 + }, + { + "action_loss": 0.004960187710821629, + "epoch": 26.294964028776977, + "step": 29240 + }, + { + "epoch": 26.294964028776977, + "step": 29240, + "torque_loss": 0.13931436836719513 + }, + { + "epoch": 26.294964028776977, + "force_loss": 0.0017164964228868484, + "step": 29240 + }, + { + "epoch": 26.303956834532375, + "grad_norm": 0.5563440918922424, + "learning_rate": 5.6187366174285794e-05, + "loss": 0.0248, + "step": 29250 + }, + { + "action_loss": 0.003914941567927599, + "epoch": 26.303956834532375, + "step": 29250 + }, + { + "epoch": 26.303956834532375, + "step": 29250, + "torque_loss": 0.19958682358264923 + }, + { + "epoch": 26.303956834532375, + "force_loss": 0.003421303117647767, + "step": 29250 + }, + { + "epoch": 26.31294964028777, + "grad_norm": 0.3078116476535797, + "learning_rate": 5.616001922167109e-05, + "loss": 0.0205, + "step": 29260 + }, + { + "action_loss": 0.0027273904997855425, + "epoch": 26.31294964028777, + "step": 29260 + }, + { + "epoch": 26.31294964028777, + "step": 29260, + "torque_loss": 0.12430980056524277 + }, + { + "epoch": 26.31294964028777, + "force_loss": 0.0021364546846598387, + "step": 29260 + }, + { + "epoch": 26.321942446043167, + "grad_norm": 0.5256577730178833, + "learning_rate": 5.61326703978052e-05, + "loss": 0.0209, + "step": 29270 + }, + { + "action_loss": 0.001910290215164423, + "epoch": 26.321942446043167, + "step": 29270 + }, + { + "epoch": 26.321942446043167, + "step": 29270, + "torque_loss": 0.12559562921524048 + }, + { + "epoch": 26.321942446043167, + "force_loss": 0.0015483573079109192, + "step": 29270 + }, + { + "epoch": 26.33093525179856, + "grad_norm": 0.42534586787223816, + "learning_rate": 5.6105319710995964e-05, + "loss": 0.0189, + "step": 29280 + }, + { + "action_loss": 0.0027956177946180105, + "epoch": 26.33093525179856, + "step": 29280 + }, + { + "epoch": 26.33093525179856, + "step": 29280, + "torque_loss": 0.13496915996074677 + }, + { + "epoch": 26.33093525179856, + "force_loss": 0.0017784927040338516, + "step": 29280 + }, + { + "epoch": 26.33992805755396, + "grad_norm": 0.22000323235988617, + "learning_rate": 5.60779671695518e-05, + "loss": 0.0192, + "step": 29290 + }, + { + "action_loss": 0.003970836754888296, + "epoch": 26.33992805755396, + "step": 29290 + }, + { + "epoch": 26.33992805755396, + "step": 29290, + "torque_loss": 0.1045316755771637 + }, + { + "epoch": 26.33992805755396, + "force_loss": 0.0021295619662851095, + "step": 29290 + }, + { + "epoch": 26.348920863309353, + "grad_norm": 0.2960430383682251, + "learning_rate": 5.6050612781781684e-05, + "loss": 0.0191, + "step": 29300 + }, + { + "action_loss": 0.0014416227350011468, + "epoch": 26.348920863309353, + "step": 29300 + }, + { + "epoch": 26.348920863309353, + "step": 29300, + "torque_loss": 0.12320087105035782 + }, + { + "epoch": 26.348920863309353, + "force_loss": 0.001710645854473114, + "step": 29300 + }, + { + "epoch": 26.357913669064747, + "grad_norm": 0.2702453136444092, + "learning_rate": 5.602325655599516e-05, + "loss": 0.0182, + "step": 29310 + }, + { + "action_loss": 0.012626200914382935, + "epoch": 26.357913669064747, + "step": 29310 + }, + { + "epoch": 26.357913669064747, + "step": 29310, + "torque_loss": 0.13518483936786652 + }, + { + "epoch": 26.357913669064747, + "force_loss": 0.011082902550697327, + "step": 29310 + }, + { + "epoch": 26.366906474820144, + "grad_norm": 0.2371370941400528, + "learning_rate": 5.599589850050234e-05, + "loss": 0.0187, + "step": 29320 + }, + { + "action_loss": 0.002585978014394641, + "epoch": 26.366906474820144, + "step": 29320 + }, + { + "epoch": 26.366906474820144, + "step": 29320, + "torque_loss": 0.14565639197826385 + }, + { + "epoch": 26.366906474820144, + "force_loss": 0.001961111556738615, + "step": 29320 + }, + { + "epoch": 26.37589928057554, + "grad_norm": 0.1642235964536667, + "learning_rate": 5.5968538623613874e-05, + "loss": 0.0189, + "step": 29330 + }, + { + "action_loss": 0.0021901302970945835, + "epoch": 26.37589928057554, + "step": 29330 + }, + { + "epoch": 26.37589928057554, + "step": 29330, + "torque_loss": 0.1027688980102539 + }, + { + "epoch": 26.37589928057554, + "force_loss": 0.0055335513316094875, + "step": 29330 + }, + { + "epoch": 26.384892086330936, + "grad_norm": 0.302939772605896, + "learning_rate": 5.594117693364095e-05, + "loss": 0.0178, + "step": 29340 + }, + { + "action_loss": 0.011924631893634796, + "epoch": 26.384892086330936, + "step": 29340 + }, + { + "epoch": 26.384892086330936, + "step": 29340, + "torque_loss": 0.12345170974731445 + }, + { + "epoch": 26.384892086330936, + "force_loss": 0.004974150564521551, + "step": 29340 + }, + { + "epoch": 26.39388489208633, + "grad_norm": 0.2409353405237198, + "learning_rate": 5.591381343889535e-05, + "loss": 0.0194, + "step": 29350 + }, + { + "action_loss": 0.008909042924642563, + "epoch": 26.39388489208633, + "step": 29350 + }, + { + "epoch": 26.39388489208633, + "step": 29350, + "torque_loss": 0.1389877200126648 + }, + { + "epoch": 26.39388489208633, + "force_loss": 0.010384388267993927, + "step": 29350 + }, + { + "epoch": 26.402877697841728, + "grad_norm": 0.14128169417381287, + "learning_rate": 5.5886448147689355e-05, + "loss": 0.02, + "step": 29360 + }, + { + "action_loss": 0.004463754128664732, + "epoch": 26.402877697841728, + "step": 29360 + }, + { + "epoch": 26.402877697841728, + "step": 29360, + "torque_loss": 0.13355658948421478 + }, + { + "epoch": 26.402877697841728, + "force_loss": 0.008837021887302399, + "step": 29360 + }, + { + "epoch": 26.41187050359712, + "grad_norm": 0.3179236650466919, + "learning_rate": 5.585908106833585e-05, + "loss": 0.0159, + "step": 29370 + }, + { + "action_loss": 0.0023016862105578184, + "epoch": 26.41187050359712, + "step": 29370 + }, + { + "epoch": 26.41187050359712, + "step": 29370, + "torque_loss": 0.1341308206319809 + }, + { + "epoch": 26.41187050359712, + "force_loss": 0.0015278387581929564, + "step": 29370 + }, + { + "epoch": 26.42086330935252, + "grad_norm": 0.21255384385585785, + "learning_rate": 5.5831712209148226e-05, + "loss": 0.0197, + "step": 29380 + }, + { + "action_loss": 0.014123695902526379, + "epoch": 26.42086330935252, + "step": 29380 + }, + { + "epoch": 26.42086330935252, + "step": 29380, + "torque_loss": 0.18206168711185455 + }, + { + "epoch": 26.42086330935252, + "force_loss": 0.023765431717038155, + "step": 29380 + }, + { + "epoch": 26.429856115107913, + "grad_norm": 0.23572397232055664, + "learning_rate": 5.58043415784404e-05, + "loss": 0.0207, + "step": 29390 + }, + { + "action_loss": 0.005147491116076708, + "epoch": 26.429856115107913, + "step": 29390 + }, + { + "epoch": 26.429856115107913, + "step": 29390, + "torque_loss": 0.12641461193561554 + }, + { + "epoch": 26.429856115107913, + "force_loss": 0.005691490601748228, + "step": 29390 + }, + { + "epoch": 26.43884892086331, + "grad_norm": 0.32689836621284485, + "learning_rate": 5.577696918452686e-05, + "loss": 0.0186, + "step": 29400 + }, + { + "action_loss": 0.0063631427474319935, + "epoch": 26.43884892086331, + "step": 29400 + }, + { + "epoch": 26.43884892086331, + "step": 29400, + "torque_loss": 0.2093459963798523 + }, + { + "epoch": 26.43884892086331, + "force_loss": 0.004051268566399813, + "step": 29400 + }, + { + "epoch": 26.447841726618705, + "grad_norm": 0.1445700079202652, + "learning_rate": 5.5749595035722604e-05, + "loss": 0.0345, + "step": 29410 + }, + { + "action_loss": 0.005245273467153311, + "epoch": 26.447841726618705, + "step": 29410 + }, + { + "epoch": 26.447841726618705, + "step": 29410, + "torque_loss": 0.10218346863985062 + }, + { + "epoch": 26.447841726618705, + "force_loss": 0.0031239886302500963, + "step": 29410 + }, + { + "epoch": 26.4568345323741, + "grad_norm": 0.1571255773305893, + "learning_rate": 5.5722219140343193e-05, + "loss": 0.0241, + "step": 29420 + }, + { + "action_loss": 0.003722322406247258, + "epoch": 26.4568345323741, + "step": 29420 + }, + { + "epoch": 26.4568345323741, + "step": 29420, + "torque_loss": 0.10764622688293457 + }, + { + "epoch": 26.4568345323741, + "force_loss": 0.0037944603245705366, + "step": 29420 + }, + { + "epoch": 26.465827338129497, + "grad_norm": 0.4807431399822235, + "learning_rate": 5.56948415067047e-05, + "loss": 0.0206, + "step": 29430 + }, + { + "action_loss": 0.018917372450232506, + "epoch": 26.465827338129497, + "step": 29430 + }, + { + "epoch": 26.465827338129497, + "step": 29430, + "torque_loss": 0.18301136791706085 + }, + { + "epoch": 26.465827338129497, + "force_loss": 0.020734211429953575, + "step": 29430 + }, + { + "epoch": 26.47482014388489, + "grad_norm": 0.232110857963562, + "learning_rate": 5.5667462143123704e-05, + "loss": 0.0245, + "step": 29440 + }, + { + "action_loss": 0.002458919771015644, + "epoch": 26.47482014388489, + "step": 29440 + }, + { + "epoch": 26.47482014388489, + "step": 29440, + "torque_loss": 0.15756447613239288 + }, + { + "epoch": 26.47482014388489, + "force_loss": 0.001853745779953897, + "step": 29440 + }, + { + "epoch": 26.48381294964029, + "grad_norm": 0.1807197630405426, + "learning_rate": 5.564008105791737e-05, + "loss": 0.0171, + "step": 29450 + }, + { + "action_loss": 0.004783970769494772, + "epoch": 26.48381294964029, + "step": 29450 + }, + { + "epoch": 26.48381294964029, + "step": 29450, + "torque_loss": 0.10162629932165146 + }, + { + "epoch": 26.48381294964029, + "force_loss": 0.0039339023642241955, + "step": 29450 + }, + { + "epoch": 26.492805755395683, + "grad_norm": 0.35580533742904663, + "learning_rate": 5.5612698259403316e-05, + "loss": 0.023, + "step": 29460 + }, + { + "action_loss": 0.0036192673724144697, + "epoch": 26.492805755395683, + "step": 29460 + }, + { + "epoch": 26.492805755395683, + "step": 29460, + "torque_loss": 0.10597450286149979 + }, + { + "epoch": 26.492805755395683, + "force_loss": 0.004650396760553122, + "step": 29460 + }, + { + "epoch": 26.50179856115108, + "grad_norm": 0.24788348376750946, + "learning_rate": 5.5585313755899724e-05, + "loss": 0.0224, + "step": 29470 + }, + { + "action_loss": 0.010820182971656322, + "epoch": 26.50179856115108, + "step": 29470 + }, + { + "epoch": 26.50179856115108, + "step": 29470, + "torque_loss": 0.11108478903770447 + }, + { + "epoch": 26.50179856115108, + "force_loss": 0.005138583481311798, + "step": 29470 + }, + { + "epoch": 26.510791366906474, + "grad_norm": 0.6962348222732544, + "learning_rate": 5.5557927555725285e-05, + "loss": 0.0243, + "step": 29480 + }, + { + "action_loss": 0.003334566717967391, + "epoch": 26.510791366906474, + "step": 29480 + }, + { + "epoch": 26.510791366906474, + "step": 29480, + "torque_loss": 0.1051768884062767 + }, + { + "epoch": 26.510791366906474, + "force_loss": 0.004038270562887192, + "step": 29480 + }, + { + "epoch": 26.519784172661872, + "grad_norm": 0.2491716593503952, + "learning_rate": 5.55305396671992e-05, + "loss": 0.0177, + "step": 29490 + }, + { + "action_loss": 0.004448357503861189, + "epoch": 26.519784172661872, + "step": 29490 + }, + { + "epoch": 26.519784172661872, + "step": 29490, + "torque_loss": 0.09822774678468704 + }, + { + "epoch": 26.519784172661872, + "force_loss": 0.00592459924519062, + "step": 29490 + }, + { + "epoch": 26.528776978417266, + "grad_norm": 0.4447100758552551, + "learning_rate": 5.55031500986412e-05, + "loss": 0.0238, + "step": 29500 + }, + { + "action_loss": 0.0025491274427622557, + "epoch": 26.528776978417266, + "step": 29500 + }, + { + "epoch": 26.528776978417266, + "step": 29500, + "torque_loss": 0.10773605853319168 + }, + { + "epoch": 26.528776978417266, + "force_loss": 0.0015268739080056548, + "step": 29500 + }, + { + "epoch": 26.53776978417266, + "grad_norm": 0.5370615720748901, + "learning_rate": 5.547575885837149e-05, + "loss": 0.021, + "step": 29510 + }, + { + "action_loss": 0.004978204611688852, + "epoch": 26.53776978417266, + "step": 29510 + }, + { + "epoch": 26.53776978417266, + "step": 29510, + "torque_loss": 0.16674725711345673 + }, + { + "epoch": 26.53776978417266, + "force_loss": 0.00587942311540246, + "step": 29510 + }, + { + "epoch": 26.546762589928058, + "grad_norm": 0.20173229277133942, + "learning_rate": 5.5448365954710825e-05, + "loss": 0.022, + "step": 29520 + }, + { + "action_loss": 0.018634973093867302, + "epoch": 26.546762589928058, + "step": 29520 + }, + { + "epoch": 26.546762589928058, + "step": 29520, + "torque_loss": 0.17407572269439697 + }, + { + "epoch": 26.546762589928058, + "force_loss": 0.0200891625136137, + "step": 29520 + }, + { + "epoch": 26.555755395683452, + "grad_norm": 0.4266655743122101, + "learning_rate": 5.5420971395980446e-05, + "loss": 0.0229, + "step": 29530 + }, + { + "action_loss": 0.011320815421640873, + "epoch": 26.555755395683452, + "step": 29530 + }, + { + "epoch": 26.555755395683452, + "step": 29530, + "torque_loss": 0.12391838431358337 + }, + { + "epoch": 26.555755395683452, + "force_loss": 0.011937855742871761, + "step": 29530 + }, + { + "epoch": 26.56474820143885, + "grad_norm": 0.3762349486351013, + "learning_rate": 5.539357519050209e-05, + "loss": 0.022, + "step": 29540 + }, + { + "action_loss": 0.004747358616441488, + "epoch": 26.56474820143885, + "step": 29540 + }, + { + "epoch": 26.56474820143885, + "step": 29540, + "torque_loss": 0.09421396255493164 + }, + { + "epoch": 26.56474820143885, + "force_loss": 0.004423636011779308, + "step": 29540 + }, + { + "epoch": 26.573741007194243, + "grad_norm": 0.2552447021007538, + "learning_rate": 5.536617734659799e-05, + "loss": 0.0224, + "step": 29550 + }, + { + "action_loss": 0.0025371320080012083, + "epoch": 26.573741007194243, + "step": 29550 + }, + { + "epoch": 26.573741007194243, + "step": 29550, + "torque_loss": 0.09377231448888779 + }, + { + "epoch": 26.573741007194243, + "force_loss": 0.0017519989050924778, + "step": 29550 + }, + { + "epoch": 26.58273381294964, + "grad_norm": 0.46805620193481445, + "learning_rate": 5.533877787259091e-05, + "loss": 0.0177, + "step": 29560 + }, + { + "action_loss": 0.004050894174724817, + "epoch": 26.58273381294964, + "step": 29560 + }, + { + "epoch": 26.58273381294964, + "step": 29560, + "torque_loss": 0.12956112623214722 + }, + { + "epoch": 26.58273381294964, + "force_loss": 0.0105274124071002, + "step": 29560 + }, + { + "epoch": 26.591726618705035, + "grad_norm": 0.22202341258525848, + "learning_rate": 5.5311376776804044e-05, + "loss": 0.0202, + "step": 29570 + }, + { + "action_loss": 0.0020610897336155176, + "epoch": 26.591726618705035, + "step": 29570 + }, + { + "epoch": 26.591726618705035, + "step": 29570, + "torque_loss": 0.1485256403684616 + }, + { + "epoch": 26.591726618705035, + "force_loss": 0.0010620758403092623, + "step": 29570 + }, + { + "epoch": 26.600719424460433, + "grad_norm": 0.22027987241744995, + "learning_rate": 5.528397406756118e-05, + "loss": 0.0213, + "step": 29580 + }, + { + "action_loss": 0.007421335205435753, + "epoch": 26.600719424460433, + "step": 29580 + }, + { + "epoch": 26.600719424460433, + "step": 29580, + "torque_loss": 0.15243853628635406 + }, + { + "epoch": 26.600719424460433, + "force_loss": 0.008105410262942314, + "step": 29580 + }, + { + "epoch": 26.609712230215827, + "grad_norm": 0.18835632503032684, + "learning_rate": 5.525656975318652e-05, + "loss": 0.0174, + "step": 29590 + }, + { + "action_loss": 0.0021108174696564674, + "epoch": 26.609712230215827, + "step": 29590 + }, + { + "epoch": 26.609712230215827, + "step": 29590, + "torque_loss": 0.15360350906848907 + }, + { + "epoch": 26.609712230215827, + "force_loss": 0.0019917127210646868, + "step": 29590 + }, + { + "epoch": 26.618705035971225, + "grad_norm": 0.1570572853088379, + "learning_rate": 5.522916384200474e-05, + "loss": 0.0158, + "step": 29600 + }, + { + "action_loss": 0.003603878431022167, + "epoch": 26.618705035971225, + "step": 29600 + }, + { + "epoch": 26.618705035971225, + "step": 29600, + "torque_loss": 0.1303596943616867 + }, + { + "epoch": 26.618705035971225, + "force_loss": 0.004055473022162914, + "step": 29600 + }, + { + "epoch": 26.62769784172662, + "grad_norm": 0.215669184923172, + "learning_rate": 5.520175634234106e-05, + "loss": 0.0182, + "step": 29610 + }, + { + "action_loss": 0.0026220858562737703, + "epoch": 26.62769784172662, + "step": 29610 + }, + { + "epoch": 26.62769784172662, + "step": 29610, + "torque_loss": 0.12466021627187729 + }, + { + "epoch": 26.62769784172662, + "force_loss": 0.002202659845352173, + "step": 29610 + }, + { + "epoch": 26.636690647482013, + "grad_norm": 0.45026540756225586, + "learning_rate": 5.517434726252113e-05, + "loss": 0.0201, + "step": 29620 + }, + { + "action_loss": 0.004916761536151171, + "epoch": 26.636690647482013, + "step": 29620 + }, + { + "epoch": 26.636690647482013, + "step": 29620, + "torque_loss": 0.13714423775672913 + }, + { + "epoch": 26.636690647482013, + "force_loss": 0.0044364831410348415, + "step": 29620 + }, + { + "epoch": 26.64568345323741, + "grad_norm": 0.2543215751647949, + "learning_rate": 5.514693661087113e-05, + "loss": 0.0187, + "step": 29630 + }, + { + "action_loss": 0.0063333637081086636, + "epoch": 26.64568345323741, + "step": 29630 + }, + { + "epoch": 26.64568345323741, + "step": 29630, + "torque_loss": 0.13830113410949707 + }, + { + "epoch": 26.64568345323741, + "force_loss": 0.004002048168331385, + "step": 29630 + }, + { + "epoch": 26.654676258992804, + "grad_norm": 0.30882659554481506, + "learning_rate": 5.511952439571769e-05, + "loss": 0.0176, + "step": 29640 + }, + { + "action_loss": 0.013768695294857025, + "epoch": 26.654676258992804, + "step": 29640 + }, + { + "epoch": 26.654676258992804, + "step": 29640, + "torque_loss": 0.13508917391300201 + }, + { + "epoch": 26.654676258992804, + "force_loss": 0.02008710615336895, + "step": 29640 + }, + { + "epoch": 26.663669064748202, + "grad_norm": 0.19365213811397552, + "learning_rate": 5.509211062538791e-05, + "loss": 0.0187, + "step": 29650 + }, + { + "action_loss": 0.013977513648569584, + "epoch": 26.663669064748202, + "step": 29650 + }, + { + "epoch": 26.663669064748202, + "step": 29650, + "torque_loss": 0.11426761001348495 + }, + { + "epoch": 26.663669064748202, + "force_loss": 0.006032731384038925, + "step": 29650 + }, + { + "epoch": 26.672661870503596, + "grad_norm": 0.3829696774482727, + "learning_rate": 5.506469530820939e-05, + "loss": 0.0214, + "step": 29660 + }, + { + "action_loss": 0.005374563857913017, + "epoch": 26.672661870503596, + "step": 29660 + }, + { + "epoch": 26.672661870503596, + "step": 29660, + "torque_loss": 0.09295514971017838 + }, + { + "epoch": 26.672661870503596, + "force_loss": 0.0066895452328026295, + "step": 29660 + }, + { + "epoch": 26.681654676258994, + "grad_norm": 0.2453082650899887, + "learning_rate": 5.503727845251014e-05, + "loss": 0.0187, + "step": 29670 + }, + { + "action_loss": 0.0045484015718102455, + "epoch": 26.681654676258994, + "step": 29670 + }, + { + "epoch": 26.681654676258994, + "step": 29670, + "torque_loss": 0.1226678192615509 + }, + { + "epoch": 26.681654676258994, + "force_loss": 0.0032400668133050203, + "step": 29670 + }, + { + "epoch": 26.690647482014388, + "grad_norm": 0.18619810044765472, + "learning_rate": 5.50098600666187e-05, + "loss": 0.021, + "step": 29680 + }, + { + "action_loss": 0.0037671327590942383, + "epoch": 26.690647482014388, + "step": 29680 + }, + { + "epoch": 26.690647482014388, + "step": 29680, + "torque_loss": 0.13072626292705536 + }, + { + "epoch": 26.690647482014388, + "force_loss": 0.005459554959088564, + "step": 29680 + }, + { + "epoch": 26.699640287769785, + "grad_norm": 0.3322921395301819, + "learning_rate": 5.498244015886406e-05, + "loss": 0.0188, + "step": 29690 + }, + { + "action_loss": 0.003932202234864235, + "epoch": 26.699640287769785, + "step": 29690 + }, + { + "epoch": 26.699640287769785, + "step": 29690, + "torque_loss": 0.15101401507854462 + }, + { + "epoch": 26.699640287769785, + "force_loss": 0.00469462713226676, + "step": 29690 + }, + { + "epoch": 26.70863309352518, + "grad_norm": 0.2403191328048706, + "learning_rate": 5.495501873757565e-05, + "loss": 0.0192, + "step": 29700 + }, + { + "action_loss": 0.005818183068186045, + "epoch": 26.70863309352518, + "step": 29700 + }, + { + "epoch": 26.70863309352518, + "step": 29700, + "torque_loss": 0.15367823839187622 + }, + { + "epoch": 26.70863309352518, + "force_loss": 0.007292923983186483, + "step": 29700 + }, + { + "epoch": 26.717625899280577, + "grad_norm": 0.21703964471817017, + "learning_rate": 5.492759581108336e-05, + "loss": 0.0188, + "step": 29710 + }, + { + "action_loss": 0.006756692659109831, + "epoch": 26.717625899280577, + "step": 29710 + }, + { + "epoch": 26.717625899280577, + "step": 29710, + "torque_loss": 0.13324739038944244 + }, + { + "epoch": 26.717625899280577, + "force_loss": 0.004057153128087521, + "step": 29710 + }, + { + "epoch": 26.72661870503597, + "grad_norm": 0.3107157349586487, + "learning_rate": 5.490017138771759e-05, + "loss": 0.021, + "step": 29720 + }, + { + "action_loss": 0.010310097597539425, + "epoch": 26.72661870503597, + "step": 29720 + }, + { + "epoch": 26.72661870503597, + "step": 29720, + "torque_loss": 0.15190069377422333 + }, + { + "epoch": 26.72661870503597, + "force_loss": 0.009750566445291042, + "step": 29720 + }, + { + "epoch": 26.735611510791365, + "grad_norm": 0.21111157536506653, + "learning_rate": 5.487274547580912e-05, + "loss": 0.0196, + "step": 29730 + }, + { + "action_loss": 0.013572419993579388, + "epoch": 26.735611510791365, + "step": 29730 + }, + { + "epoch": 26.735611510791365, + "step": 29730, + "torque_loss": 0.14122606813907623 + }, + { + "epoch": 26.735611510791365, + "force_loss": 0.013210773468017578, + "step": 29730 + }, + { + "epoch": 26.744604316546763, + "grad_norm": 0.30866900086402893, + "learning_rate": 5.484531808368923e-05, + "loss": 0.0198, + "step": 29740 + }, + { + "action_loss": 0.002811520593240857, + "epoch": 26.744604316546763, + "step": 29740 + }, + { + "epoch": 26.744604316546763, + "step": 29740, + "torque_loss": 0.13623277842998505 + }, + { + "epoch": 26.744604316546763, + "force_loss": 0.0024728986900299788, + "step": 29740 + }, + { + "epoch": 26.753597122302157, + "grad_norm": 0.2113165259361267, + "learning_rate": 5.4817889219689656e-05, + "loss": 0.0185, + "step": 29750 + }, + { + "action_loss": 0.00368649885058403, + "epoch": 26.753597122302157, + "step": 29750 + }, + { + "epoch": 26.753597122302157, + "step": 29750, + "torque_loss": 0.09717867523431778 + }, + { + "epoch": 26.753597122302157, + "force_loss": 0.0054848454892635345, + "step": 29750 + }, + { + "epoch": 26.762589928057555, + "grad_norm": 0.37648633122444153, + "learning_rate": 5.4790458892142536e-05, + "loss": 0.0188, + "step": 29760 + }, + { + "action_loss": 0.0013744235038757324, + "epoch": 26.762589928057555, + "step": 29760 + }, + { + "epoch": 26.762589928057555, + "step": 29760, + "torque_loss": 0.09067843109369278 + }, + { + "epoch": 26.762589928057555, + "force_loss": 0.0010968871647492051, + "step": 29760 + }, + { + "epoch": 26.77158273381295, + "grad_norm": 0.3552525043487549, + "learning_rate": 5.476302710938048e-05, + "loss": 0.0177, + "step": 29770 + }, + { + "action_loss": 0.005411986727267504, + "epoch": 26.77158273381295, + "step": 29770 + }, + { + "epoch": 26.77158273381295, + "step": 29770, + "torque_loss": 0.10153061151504517 + }, + { + "epoch": 26.77158273381295, + "force_loss": 0.008366730064153671, + "step": 29770 + }, + { + "epoch": 26.780575539568346, + "grad_norm": 0.13714280724525452, + "learning_rate": 5.473559387973657e-05, + "loss": 0.0202, + "step": 29780 + }, + { + "action_loss": 0.008373484946787357, + "epoch": 26.780575539568346, + "step": 29780 + }, + { + "epoch": 26.780575539568346, + "step": 29780, + "torque_loss": 0.13095788657665253 + }, + { + "epoch": 26.780575539568346, + "force_loss": 0.007003776729106903, + "step": 29780 + }, + { + "epoch": 26.78956834532374, + "grad_norm": 0.3039674162864685, + "learning_rate": 5.470815921154425e-05, + "loss": 0.0196, + "step": 29790 + }, + { + "action_loss": 0.0020388788543641567, + "epoch": 26.78956834532374, + "step": 29790 + }, + { + "epoch": 26.78956834532374, + "step": 29790, + "torque_loss": 0.09394717216491699 + }, + { + "epoch": 26.78956834532374, + "force_loss": 0.001609127502888441, + "step": 29790 + }, + { + "epoch": 26.798561151079138, + "grad_norm": 0.11895343661308289, + "learning_rate": 5.468072311313749e-05, + "loss": 0.0155, + "step": 29800 + }, + { + "action_loss": 0.003226721426472068, + "epoch": 26.798561151079138, + "step": 29800 + }, + { + "epoch": 26.798561151079138, + "step": 29800, + "torque_loss": 0.13606567680835724 + }, + { + "epoch": 26.798561151079138, + "force_loss": 0.005955897271633148, + "step": 29800 + }, + { + "epoch": 26.807553956834532, + "grad_norm": 0.21088936924934387, + "learning_rate": 5.465328559285063e-05, + "loss": 0.0186, + "step": 29810 + }, + { + "action_loss": 0.0025117734912782907, + "epoch": 26.807553956834532, + "step": 29810 + }, + { + "epoch": 26.807553956834532, + "step": 29810, + "torque_loss": 0.1175898090004921 + }, + { + "epoch": 26.807553956834532, + "force_loss": 0.0012616555904969573, + "step": 29810 + }, + { + "epoch": 26.81654676258993, + "grad_norm": 0.24970100820064545, + "learning_rate": 5.462584665901849e-05, + "loss": 0.0174, + "step": 29820 + }, + { + "action_loss": 0.0017017270438373089, + "epoch": 26.81654676258993, + "step": 29820 + }, + { + "epoch": 26.81654676258993, + "step": 29820, + "torque_loss": 0.12260963767766953 + }, + { + "epoch": 26.81654676258993, + "force_loss": 0.0018574738642200828, + "step": 29820 + }, + { + "epoch": 26.825539568345324, + "grad_norm": 0.5178494453430176, + "learning_rate": 5.4598406319976235e-05, + "loss": 0.0215, + "step": 29830 + }, + { + "action_loss": 0.040359657257795334, + "epoch": 26.825539568345324, + "step": 29830 + }, + { + "epoch": 26.825539568345324, + "step": 29830, + "torque_loss": 0.1942255049943924 + }, + { + "epoch": 26.825539568345324, + "force_loss": 0.04720534011721611, + "step": 29830 + }, + { + "epoch": 26.834532374100718, + "grad_norm": 0.4567413628101349, + "learning_rate": 5.457096458405958e-05, + "loss": 0.0269, + "step": 29840 + }, + { + "action_loss": 0.0025946127716451883, + "epoch": 26.834532374100718, + "step": 29840 + }, + { + "epoch": 26.834532374100718, + "step": 29840, + "torque_loss": 0.10776170343160629 + }, + { + "epoch": 26.834532374100718, + "force_loss": 0.003184608416631818, + "step": 29840 + }, + { + "epoch": 26.843525179856115, + "grad_norm": 0.3079434931278229, + "learning_rate": 5.454352145960457e-05, + "loss": 0.0194, + "step": 29850 + }, + { + "action_loss": 0.002686734078451991, + "epoch": 26.843525179856115, + "step": 29850 + }, + { + "epoch": 26.843525179856115, + "step": 29850, + "torque_loss": 0.14496742188930511 + }, + { + "epoch": 26.843525179856115, + "force_loss": 0.002164140110835433, + "step": 29850 + }, + { + "epoch": 26.85251798561151, + "grad_norm": 0.18310968577861786, + "learning_rate": 5.4516076954947715e-05, + "loss": 0.0181, + "step": 29860 + }, + { + "action_loss": 0.0035923875402659178, + "epoch": 26.85251798561151, + "step": 29860 + }, + { + "epoch": 26.85251798561151, + "step": 29860, + "torque_loss": 0.13412249088287354 + }, + { + "epoch": 26.85251798561151, + "force_loss": 0.002886866917833686, + "step": 29860 + }, + { + "epoch": 26.861510791366907, + "grad_norm": 0.5331716537475586, + "learning_rate": 5.448863107842591e-05, + "loss": 0.0201, + "step": 29870 + }, + { + "action_loss": 0.00453979754820466, + "epoch": 26.861510791366907, + "step": 29870 + }, + { + "epoch": 26.861510791366907, + "step": 29870, + "torque_loss": 0.13979299366474152 + }, + { + "epoch": 26.861510791366907, + "force_loss": 0.005563817452639341, + "step": 29870 + }, + { + "epoch": 26.8705035971223, + "grad_norm": 0.35070788860321045, + "learning_rate": 5.446118383837651e-05, + "loss": 0.0183, + "step": 29880 + }, + { + "action_loss": 0.019334664568305016, + "epoch": 26.8705035971223, + "step": 29880 + }, + { + "epoch": 26.8705035971223, + "step": 29880, + "torque_loss": 0.16253288090229034 + }, + { + "epoch": 26.8705035971223, + "force_loss": 0.03553791344165802, + "step": 29880 + }, + { + "epoch": 26.8794964028777, + "grad_norm": 0.3043481707572937, + "learning_rate": 5.443373524313722e-05, + "loss": 0.0217, + "step": 29890 + }, + { + "action_loss": 0.0036943492013961077, + "epoch": 26.8794964028777, + "step": 29890 + }, + { + "epoch": 26.8794964028777, + "step": 29890, + "torque_loss": 0.10507988184690475 + }, + { + "epoch": 26.8794964028777, + "force_loss": 0.002917099744081497, + "step": 29890 + }, + { + "epoch": 26.888489208633093, + "grad_norm": 0.21925991773605347, + "learning_rate": 5.440628530104626e-05, + "loss": 0.0176, + "step": 29900 + }, + { + "action_loss": 0.0013846675865352154, + "epoch": 26.888489208633093, + "step": 29900 + }, + { + "epoch": 26.888489208633093, + "step": 29900, + "torque_loss": 0.13571317493915558 + }, + { + "epoch": 26.888489208633093, + "force_loss": 0.001583204255439341, + "step": 29900 + }, + { + "epoch": 26.89748201438849, + "grad_norm": 0.11622114479541779, + "learning_rate": 5.4378834020442146e-05, + "loss": 0.0178, + "step": 29910 + }, + { + "action_loss": 0.005388239398598671, + "epoch": 26.89748201438849, + "step": 29910 + }, + { + "epoch": 26.89748201438849, + "step": 29910, + "torque_loss": 0.1476847529411316 + }, + { + "epoch": 26.89748201438849, + "force_loss": 0.0067005157470703125, + "step": 29910 + }, + { + "epoch": 26.906474820143885, + "grad_norm": 0.3066387474536896, + "learning_rate": 5.4351381409663884e-05, + "loss": 0.0168, + "step": 29920 + }, + { + "action_loss": 0.0020175655372440815, + "epoch": 26.906474820143885, + "step": 29920 + }, + { + "epoch": 26.906474820143885, + "step": 29920, + "torque_loss": 0.1331719160079956 + }, + { + "epoch": 26.906474820143885, + "force_loss": 0.002690813271328807, + "step": 29920 + }, + { + "epoch": 26.915467625899282, + "grad_norm": 0.27886462211608887, + "learning_rate": 5.432392747705084e-05, + "loss": 0.0173, + "step": 29930 + }, + { + "action_loss": 0.004540421534329653, + "epoch": 26.915467625899282, + "step": 29930 + }, + { + "epoch": 26.915467625899282, + "step": 29930, + "torque_loss": 0.16133111715316772 + }, + { + "epoch": 26.915467625899282, + "force_loss": 0.0035180847626179457, + "step": 29930 + }, + { + "epoch": 26.924460431654676, + "grad_norm": 0.13023129105567932, + "learning_rate": 5.429647223094278e-05, + "loss": 0.017, + "step": 29940 + }, + { + "action_loss": 0.007440451066941023, + "epoch": 26.924460431654676, + "step": 29940 + }, + { + "epoch": 26.924460431654676, + "step": 29940, + "torque_loss": 0.13692821562290192 + }, + { + "epoch": 26.924460431654676, + "force_loss": 0.007159040775150061, + "step": 29940 + }, + { + "epoch": 26.93345323741007, + "grad_norm": 0.124339759349823, + "learning_rate": 5.4269015679679924e-05, + "loss": 0.019, + "step": 29950 + }, + { + "action_loss": 0.0022405320778489113, + "epoch": 26.93345323741007, + "step": 29950 + }, + { + "epoch": 26.93345323741007, + "step": 29950, + "torque_loss": 0.114387147128582 + }, + { + "epoch": 26.93345323741007, + "force_loss": 0.0014298348687589169, + "step": 29950 + }, + { + "epoch": 26.942446043165468, + "grad_norm": 0.37802475690841675, + "learning_rate": 5.424155783160281e-05, + "loss": 0.0153, + "step": 29960 + }, + { + "action_loss": 0.009256296791136265, + "epoch": 26.942446043165468, + "step": 29960 + }, + { + "epoch": 26.942446043165468, + "step": 29960, + "torque_loss": 0.1303991824388504 + }, + { + "epoch": 26.942446043165468, + "force_loss": 0.003956787753850222, + "step": 29960 + }, + { + "epoch": 26.951438848920862, + "grad_norm": 0.27281561493873596, + "learning_rate": 5.4214098695052415e-05, + "loss": 0.0213, + "step": 29970 + }, + { + "action_loss": 0.019115492701530457, + "epoch": 26.951438848920862, + "step": 29970 + }, + { + "epoch": 26.951438848920862, + "step": 29970, + "torque_loss": 0.1212594285607338 + }, + { + "epoch": 26.951438848920862, + "force_loss": 0.017049849033355713, + "step": 29970 + }, + { + "epoch": 26.96043165467626, + "grad_norm": 0.28068768978118896, + "learning_rate": 5.418663827837012e-05, + "loss": 0.0226, + "step": 29980 + }, + { + "action_loss": 0.0035388183314353228, + "epoch": 26.96043165467626, + "step": 29980 + }, + { + "epoch": 26.96043165467626, + "step": 29980, + "torque_loss": 0.12805412709712982 + }, + { + "epoch": 26.96043165467626, + "force_loss": 0.0059658922255039215, + "step": 29980 + }, + { + "epoch": 26.969424460431654, + "grad_norm": 0.18451905250549316, + "learning_rate": 5.415917658989763e-05, + "loss": 0.0173, + "step": 29990 + }, + { + "action_loss": 0.0013209698954597116, + "epoch": 26.969424460431654, + "step": 29990 + }, + { + "epoch": 26.969424460431654, + "step": 29990, + "torque_loss": 0.09117806702852249 + }, + { + "epoch": 26.969424460431654, + "force_loss": 0.0021667832043021917, + "step": 29990 + }, + { + "epoch": 26.97841726618705, + "grad_norm": 0.26481354236602783, + "learning_rate": 5.413171363797713e-05, + "loss": 0.0207, + "step": 30000 + }, + { + "action_loss": 0.00438087759539485, + "epoch": 26.97841726618705, + "step": 30000 + }, + { + "epoch": 26.97841726618705, + "step": 30000, + "torque_loss": 0.17940616607666016 + }, + { + "epoch": 26.97841726618705, + "force_loss": 0.0053337570279836655, + "step": 30000 + }, + { + "epoch": 26.987410071942445, + "grad_norm": 0.2747499346733093, + "learning_rate": 5.4104249430951116e-05, + "loss": 0.0176, + "step": 30010 + }, + { + "action_loss": 0.0031281306874006987, + "epoch": 26.987410071942445, + "step": 30010 + }, + { + "epoch": 26.987410071942445, + "step": 30010, + "torque_loss": 0.15624935925006866 + }, + { + "epoch": 26.987410071942445, + "force_loss": 0.006158431526273489, + "step": 30010 + }, + { + "epoch": 26.996402877697843, + "grad_norm": 0.525160014629364, + "learning_rate": 5.4076783977162494e-05, + "loss": 0.019, + "step": 30020 + }, + { + "action_loss": 0.005360439419746399, + "epoch": 26.996402877697843, + "step": 30020 + }, + { + "epoch": 26.996402877697843, + "step": 30020, + "torque_loss": 0.12258223444223404 + }, + { + "epoch": 26.996402877697843, + "force_loss": 0.009998547844588757, + "step": 30020 + }, + { + "epoch": 27.005395683453237, + "grad_norm": 0.27929526567459106, + "learning_rate": 5.4049317284954525e-05, + "loss": 0.0178, + "step": 30030 + }, + { + "action_loss": 0.003984289709478617, + "epoch": 27.005395683453237, + "step": 30030 + }, + { + "epoch": 27.005395683453237, + "step": 30030, + "torque_loss": 0.1588083952665329 + }, + { + "epoch": 27.005395683453237, + "force_loss": 0.015379909425973892, + "step": 30030 + }, + { + "epoch": 27.014388489208635, + "grad_norm": 0.17650508880615234, + "learning_rate": 5.4021849362670884e-05, + "loss": 0.0235, + "step": 30040 + }, + { + "action_loss": 0.0028035659343004227, + "epoch": 27.014388489208635, + "step": 30040 + }, + { + "epoch": 27.014388489208635, + "step": 30040, + "torque_loss": 0.08240079879760742 + }, + { + "epoch": 27.014388489208635, + "force_loss": 0.0018409885233268142, + "step": 30040 + }, + { + "epoch": 27.02338129496403, + "grad_norm": 0.2881733775138855, + "learning_rate": 5.3994380218655604e-05, + "loss": 0.0209, + "step": 30050 + }, + { + "action_loss": 0.005864724982529879, + "epoch": 27.02338129496403, + "step": 30050 + }, + { + "epoch": 27.02338129496403, + "step": 30050, + "torque_loss": 0.11897751688957214 + }, + { + "epoch": 27.02338129496403, + "force_loss": 0.007947350852191448, + "step": 30050 + }, + { + "epoch": 27.032374100719423, + "grad_norm": 0.28892263770103455, + "learning_rate": 5.396690986125309e-05, + "loss": 0.0198, + "step": 30060 + }, + { + "action_loss": 0.0025683999992907047, + "epoch": 27.032374100719423, + "step": 30060 + }, + { + "epoch": 27.032374100719423, + "step": 30060, + "torque_loss": 0.13800550997257233 + }, + { + "epoch": 27.032374100719423, + "force_loss": 0.0018803342245519161, + "step": 30060 + }, + { + "epoch": 27.04136690647482, + "grad_norm": 0.3276672959327698, + "learning_rate": 5.3939438298808075e-05, + "loss": 0.0204, + "step": 30070 + }, + { + "action_loss": 0.0027023537550121546, + "epoch": 27.04136690647482, + "step": 30070 + }, + { + "epoch": 27.04136690647482, + "step": 30070, + "torque_loss": 0.13884642720222473 + }, + { + "epoch": 27.04136690647482, + "force_loss": 0.003128311363980174, + "step": 30070 + }, + { + "epoch": 27.050359712230215, + "grad_norm": 0.450895756483078, + "learning_rate": 5.3911965539665744e-05, + "loss": 0.0216, + "step": 30080 + }, + { + "action_loss": 0.007366721052676439, + "epoch": 27.050359712230215, + "step": 30080 + }, + { + "epoch": 27.050359712230215, + "step": 30080, + "torque_loss": 0.11305730789899826 + }, + { + "epoch": 27.050359712230215, + "force_loss": 0.0037769523914903402, + "step": 30080 + }, + { + "epoch": 27.059352517985612, + "grad_norm": 0.21291057765483856, + "learning_rate": 5.388449159217156e-05, + "loss": 0.0203, + "step": 30090 + }, + { + "action_loss": 0.004496064502745867, + "epoch": 27.059352517985612, + "step": 30090 + }, + { + "epoch": 27.059352517985612, + "step": 30090, + "torque_loss": 0.1814671754837036 + }, + { + "epoch": 27.059352517985612, + "force_loss": 0.004324872046709061, + "step": 30090 + }, + { + "epoch": 27.068345323741006, + "grad_norm": 0.15907500684261322, + "learning_rate": 5.3857016464671385e-05, + "loss": 0.0182, + "step": 30100 + }, + { + "action_loss": 0.0033876586239784956, + "epoch": 27.068345323741006, + "step": 30100 + }, + { + "epoch": 27.068345323741006, + "step": 30100, + "torque_loss": 0.12786339223384857 + }, + { + "epoch": 27.068345323741006, + "force_loss": 0.0033877205569297075, + "step": 30100 + }, + { + "epoch": 27.077338129496404, + "grad_norm": 0.1309467852115631, + "learning_rate": 5.382954016551146e-05, + "loss": 0.0171, + "step": 30110 + }, + { + "action_loss": 0.002663847291842103, + "epoch": 27.077338129496404, + "step": 30110 + }, + { + "epoch": 27.077338129496404, + "step": 30110, + "torque_loss": 0.11073434352874756 + }, + { + "epoch": 27.077338129496404, + "force_loss": 0.004452723544090986, + "step": 30110 + }, + { + "epoch": 27.086330935251798, + "grad_norm": 0.46172890067100525, + "learning_rate": 5.380206270303835e-05, + "loss": 0.0196, + "step": 30120 + }, + { + "action_loss": 0.004135601222515106, + "epoch": 27.086330935251798, + "step": 30120 + }, + { + "epoch": 27.086330935251798, + "step": 30120, + "torque_loss": 0.138643279671669 + }, + { + "epoch": 27.086330935251798, + "force_loss": 0.00646270951256156, + "step": 30120 + }, + { + "epoch": 27.095323741007196, + "grad_norm": 0.17744766175746918, + "learning_rate": 5.377458408559897e-05, + "loss": 0.019, + "step": 30130 + }, + { + "action_loss": 0.005453914403915405, + "epoch": 27.095323741007196, + "step": 30130 + }, + { + "epoch": 27.095323741007196, + "step": 30130, + "torque_loss": 0.1135575994849205 + }, + { + "epoch": 27.095323741007196, + "force_loss": 0.003186618909239769, + "step": 30130 + }, + { + "epoch": 27.10431654676259, + "grad_norm": 0.2936372756958008, + "learning_rate": 5.374710432154061e-05, + "loss": 0.0193, + "step": 30140 + }, + { + "action_loss": 0.006100647617131472, + "epoch": 27.10431654676259, + "step": 30140 + }, + { + "epoch": 27.10431654676259, + "step": 30140, + "torque_loss": 0.17243479192256927 + }, + { + "epoch": 27.10431654676259, + "force_loss": 0.004507630597800016, + "step": 30140 + }, + { + "epoch": 27.113309352517987, + "grad_norm": 0.20095570385456085, + "learning_rate": 5.3719623419210886e-05, + "loss": 0.0181, + "step": 30150 + }, + { + "action_loss": 0.004813058767467737, + "epoch": 27.113309352517987, + "step": 30150 + }, + { + "epoch": 27.113309352517987, + "step": 30150, + "torque_loss": 0.17041879892349243 + }, + { + "epoch": 27.113309352517987, + "force_loss": 0.004747232887893915, + "step": 30150 + }, + { + "epoch": 27.12230215827338, + "grad_norm": 0.3171005845069885, + "learning_rate": 5.3692141386957786e-05, + "loss": 0.0172, + "step": 30160 + }, + { + "action_loss": 0.004677954595535994, + "epoch": 27.12230215827338, + "step": 30160 + }, + { + "epoch": 27.12230215827338, + "step": 30160, + "torque_loss": 0.0752430260181427 + }, + { + "epoch": 27.12230215827338, + "force_loss": 0.007557352539151907, + "step": 30160 + }, + { + "epoch": 27.131294964028775, + "grad_norm": 0.15077587962150574, + "learning_rate": 5.3664658233129616e-05, + "loss": 0.021, + "step": 30170 + }, + { + "action_loss": 0.005904344841837883, + "epoch": 27.131294964028775, + "step": 30170 + }, + { + "epoch": 27.131294964028775, + "step": 30170, + "torque_loss": 0.1867804378271103 + }, + { + "epoch": 27.131294964028775, + "force_loss": 0.0032666174229234457, + "step": 30170 + }, + { + "epoch": 27.140287769784173, + "grad_norm": 0.27926909923553467, + "learning_rate": 5.363717396607504e-05, + "loss": 0.0193, + "step": 30180 + }, + { + "action_loss": 0.0019679819233715534, + "epoch": 27.140287769784173, + "step": 30180 + }, + { + "epoch": 27.140287769784173, + "step": 30180, + "torque_loss": 0.09325297921895981 + }, + { + "epoch": 27.140287769784173, + "force_loss": 0.0022979744244366884, + "step": 30180 + }, + { + "epoch": 27.149280575539567, + "grad_norm": 0.21495363116264343, + "learning_rate": 5.360968859414305e-05, + "loss": 0.0173, + "step": 30190 + }, + { + "action_loss": 0.005858736112713814, + "epoch": 27.149280575539567, + "step": 30190 + }, + { + "epoch": 27.149280575539567, + "step": 30190, + "torque_loss": 0.13998264074325562 + }, + { + "epoch": 27.149280575539567, + "force_loss": 0.008681301027536392, + "step": 30190 + }, + { + "epoch": 27.158273381294965, + "grad_norm": 0.18916158378124237, + "learning_rate": 5.358220212568295e-05, + "loss": 0.0167, + "step": 30200 + }, + { + "action_loss": 0.022345414385199547, + "epoch": 27.158273381294965, + "step": 30200 + }, + { + "epoch": 27.158273381294965, + "step": 30200, + "torque_loss": 0.17376093566417694 + }, + { + "epoch": 27.158273381294965, + "force_loss": 0.023212112486362457, + "step": 30200 + }, + { + "epoch": 27.16726618705036, + "grad_norm": 0.17001235485076904, + "learning_rate": 5.355471456904444e-05, + "loss": 0.0179, + "step": 30210 + }, + { + "action_loss": 0.03106633387506008, + "epoch": 27.16726618705036, + "step": 30210 + }, + { + "epoch": 27.16726618705036, + "step": 30210, + "torque_loss": 0.19194336235523224 + }, + { + "epoch": 27.16726618705036, + "force_loss": 0.028198689222335815, + "step": 30210 + }, + { + "epoch": 27.176258992805757, + "grad_norm": 0.5119553804397583, + "learning_rate": 5.3527225932577495e-05, + "loss": 0.0241, + "step": 30220 + }, + { + "action_loss": 0.028138378635048866, + "epoch": 27.176258992805757, + "step": 30220 + }, + { + "epoch": 27.176258992805757, + "step": 30220, + "torque_loss": 0.22290174663066864 + }, + { + "epoch": 27.176258992805757, + "force_loss": 0.04579739645123482, + "step": 30220 + }, + { + "epoch": 27.18525179856115, + "grad_norm": 0.32575497031211853, + "learning_rate": 5.349973622463246e-05, + "loss": 0.0238, + "step": 30230 + }, + { + "action_loss": 0.008045557886362076, + "epoch": 27.18525179856115, + "step": 30230 + }, + { + "epoch": 27.18525179856115, + "step": 30230, + "torque_loss": 0.1500275880098343 + }, + { + "epoch": 27.18525179856115, + "force_loss": 0.023438720032572746, + "step": 30230 + }, + { + "epoch": 27.194244604316548, + "grad_norm": 0.20883388817310333, + "learning_rate": 5.3472245453559956e-05, + "loss": 0.0179, + "step": 30240 + }, + { + "action_loss": 0.004487939178943634, + "epoch": 27.194244604316548, + "step": 30240 + }, + { + "epoch": 27.194244604316548, + "step": 30240, + "torque_loss": 0.14595277607440948 + }, + { + "epoch": 27.194244604316548, + "force_loss": 0.009683411568403244, + "step": 30240 + }, + { + "epoch": 27.203237410071942, + "grad_norm": 0.23641663789749146, + "learning_rate": 5.3444753627710955e-05, + "loss": 0.0207, + "step": 30250 + }, + { + "action_loss": 0.005362626165151596, + "epoch": 27.203237410071942, + "step": 30250 + }, + { + "epoch": 27.203237410071942, + "step": 30250, + "torque_loss": 0.12094292789697647 + }, + { + "epoch": 27.203237410071942, + "force_loss": 0.004349933937191963, + "step": 30250 + }, + { + "epoch": 27.21223021582734, + "grad_norm": 0.3433016240596771, + "learning_rate": 5.341726075543676e-05, + "loss": 0.0185, + "step": 30260 + }, + { + "action_loss": 0.0029825549572706223, + "epoch": 27.21223021582734, + "step": 30260 + }, + { + "epoch": 27.21223021582734, + "step": 30260, + "torque_loss": 0.15964572131633759 + }, + { + "epoch": 27.21223021582734, + "force_loss": 0.0013917317846789956, + "step": 30260 + }, + { + "epoch": 27.221223021582734, + "grad_norm": 0.7519928216934204, + "learning_rate": 5.338976684508898e-05, + "loss": 0.019, + "step": 30270 + }, + { + "action_loss": 0.021886220201849937, + "epoch": 27.221223021582734, + "step": 30270 + }, + { + "epoch": 27.221223021582734, + "step": 30270, + "torque_loss": 0.15924613177776337 + }, + { + "epoch": 27.221223021582734, + "force_loss": 0.016282370314002037, + "step": 30270 + }, + { + "epoch": 27.230215827338128, + "grad_norm": 0.1829599291086197, + "learning_rate": 5.336227190501953e-05, + "loss": 0.0182, + "step": 30280 + }, + { + "action_loss": 0.0018750490853562951, + "epoch": 27.230215827338128, + "step": 30280 + }, + { + "epoch": 27.230215827338128, + "step": 30280, + "torque_loss": 0.12079592794179916 + }, + { + "epoch": 27.230215827338128, + "force_loss": 0.0016502775251865387, + "step": 30280 + }, + { + "epoch": 27.239208633093526, + "grad_norm": 0.08746183663606644, + "learning_rate": 5.3334775943580664e-05, + "loss": 0.0193, + "step": 30290 + }, + { + "action_loss": 0.0016757560661062598, + "epoch": 27.239208633093526, + "step": 30290 + }, + { + "epoch": 27.239208633093526, + "step": 30290, + "torque_loss": 0.08195025473833084 + }, + { + "epoch": 27.239208633093526, + "force_loss": 0.0013245666632428765, + "step": 30290 + }, + { + "epoch": 27.24820143884892, + "grad_norm": 0.18650297820568085, + "learning_rate": 5.330727896912491e-05, + "loss": 0.0151, + "step": 30300 + }, + { + "action_loss": 0.003503574291244149, + "epoch": 27.24820143884892, + "step": 30300 + }, + { + "epoch": 27.24820143884892, + "step": 30300, + "torque_loss": 0.08101185411214828 + }, + { + "epoch": 27.24820143884892, + "force_loss": 0.004829870071262121, + "step": 30300 + }, + { + "epoch": 27.257194244604317, + "grad_norm": 0.3734641671180725, + "learning_rate": 5.327978099000511e-05, + "loss": 0.0167, + "step": 30310 + }, + { + "action_loss": 0.0027369551826268435, + "epoch": 27.257194244604317, + "step": 30310 + }, + { + "epoch": 27.257194244604317, + "step": 30310, + "torque_loss": 0.11790794134140015 + }, + { + "epoch": 27.257194244604317, + "force_loss": 0.00968188513070345, + "step": 30310 + }, + { + "epoch": 27.26618705035971, + "grad_norm": 0.20427779853343964, + "learning_rate": 5.3252282014574465e-05, + "loss": 0.018, + "step": 30320 + }, + { + "action_loss": 0.005135164130479097, + "epoch": 27.26618705035971, + "step": 30320 + }, + { + "epoch": 27.26618705035971, + "step": 30320, + "torque_loss": 0.11646892875432968 + }, + { + "epoch": 27.26618705035971, + "force_loss": 0.00904246885329485, + "step": 30320 + }, + { + "epoch": 27.27517985611511, + "grad_norm": 0.09920324385166168, + "learning_rate": 5.322478205118641e-05, + "loss": 0.0157, + "step": 30330 + }, + { + "action_loss": 0.00667195999994874, + "epoch": 27.27517985611511, + "step": 30330 + }, + { + "epoch": 27.27517985611511, + "step": 30330, + "torque_loss": 0.1300421804189682 + }, + { + "epoch": 27.27517985611511, + "force_loss": 0.0047349329106509686, + "step": 30330 + }, + { + "epoch": 27.284172661870503, + "grad_norm": 0.300405889749527, + "learning_rate": 5.3197281108194704e-05, + "loss": 0.0222, + "step": 30340 + }, + { + "action_loss": 0.004093907307833433, + "epoch": 27.284172661870503, + "step": 30340 + }, + { + "epoch": 27.284172661870503, + "step": 30340, + "torque_loss": 0.0945718064904213 + }, + { + "epoch": 27.284172661870503, + "force_loss": 0.009305437095463276, + "step": 30340 + }, + { + "epoch": 27.2931654676259, + "grad_norm": 0.27889901399612427, + "learning_rate": 5.316977919395342e-05, + "loss": 0.0174, + "step": 30350 + }, + { + "action_loss": 0.006806804332882166, + "epoch": 27.2931654676259, + "step": 30350 + }, + { + "epoch": 27.2931654676259, + "step": 30350, + "torque_loss": 0.15492509305477142 + }, + { + "epoch": 27.2931654676259, + "force_loss": 0.007464412599802017, + "step": 30350 + }, + { + "epoch": 27.302158273381295, + "grad_norm": 0.35391148924827576, + "learning_rate": 5.314227631681691e-05, + "loss": 0.0191, + "step": 30360 + }, + { + "action_loss": 0.006740959826856852, + "epoch": 27.302158273381295, + "step": 30360 + }, + { + "epoch": 27.302158273381295, + "step": 30360, + "torque_loss": 0.1226671114563942 + }, + { + "epoch": 27.302158273381295, + "force_loss": 0.004497290588915348, + "step": 30360 + }, + { + "epoch": 27.31115107913669, + "grad_norm": 0.17393139004707336, + "learning_rate": 5.311477248513982e-05, + "loss": 0.0171, + "step": 30370 + }, + { + "action_loss": 0.0036024879664182663, + "epoch": 27.31115107913669, + "step": 30370 + }, + { + "epoch": 27.31115107913669, + "step": 30370, + "torque_loss": 0.11064424365758896 + }, + { + "epoch": 27.31115107913669, + "force_loss": 0.004538074601441622, + "step": 30370 + }, + { + "epoch": 27.320143884892087, + "grad_norm": 0.2588514983654022, + "learning_rate": 5.30872677072771e-05, + "loss": 0.0219, + "step": 30380 + }, + { + "action_loss": 0.002123519079759717, + "epoch": 27.320143884892087, + "step": 30380 + }, + { + "epoch": 27.320143884892087, + "step": 30380, + "torque_loss": 0.13704858720302582 + }, + { + "epoch": 27.320143884892087, + "force_loss": 0.001857923693023622, + "step": 30380 + }, + { + "epoch": 27.32913669064748, + "grad_norm": 0.11446554958820343, + "learning_rate": 5.3059761991583954e-05, + "loss": 0.0211, + "step": 30390 + }, + { + "action_loss": 0.017162993550300598, + "epoch": 27.32913669064748, + "step": 30390 + }, + { + "epoch": 27.32913669064748, + "step": 30390, + "torque_loss": 0.15154393017292023 + }, + { + "epoch": 27.32913669064748, + "force_loss": 0.015158439986407757, + "step": 30390 + }, + { + "epoch": 27.33812949640288, + "grad_norm": 0.14296375215053558, + "learning_rate": 5.303225534641592e-05, + "loss": 0.0185, + "step": 30400 + }, + { + "action_loss": 0.004359942860901356, + "epoch": 27.33812949640288, + "step": 30400 + }, + { + "epoch": 27.33812949640288, + "step": 30400, + "torque_loss": 0.12148231267929077 + }, + { + "epoch": 27.33812949640288, + "force_loss": 0.007306875195354223, + "step": 30400 + }, + { + "epoch": 27.347122302158272, + "grad_norm": 0.2763224244117737, + "learning_rate": 5.300474778012875e-05, + "loss": 0.0183, + "step": 30410 + }, + { + "action_loss": 0.0025603987742215395, + "epoch": 27.347122302158272, + "step": 30410 + }, + { + "epoch": 27.347122302158272, + "step": 30410, + "torque_loss": 0.13627688586711884 + }, + { + "epoch": 27.347122302158272, + "force_loss": 0.005227160174399614, + "step": 30410 + }, + { + "epoch": 27.35611510791367, + "grad_norm": 0.18527504801750183, + "learning_rate": 5.297723930107855e-05, + "loss": 0.0181, + "step": 30420 + }, + { + "action_loss": 0.008046342991292477, + "epoch": 27.35611510791367, + "step": 30420 + }, + { + "epoch": 27.35611510791367, + "step": 30420, + "torque_loss": 0.11432340741157532 + }, + { + "epoch": 27.35611510791367, + "force_loss": 0.01383527647703886, + "step": 30420 + }, + { + "epoch": 27.365107913669064, + "grad_norm": 0.17165902256965637, + "learning_rate": 5.294972991762167e-05, + "loss": 0.0186, + "step": 30430 + }, + { + "action_loss": 0.00251086731441319, + "epoch": 27.365107913669064, + "step": 30430 + }, + { + "epoch": 27.365107913669064, + "step": 30430, + "torque_loss": 0.13733571767807007 + }, + { + "epoch": 27.365107913669064, + "force_loss": 0.0028186349663883448, + "step": 30430 + }, + { + "epoch": 27.37410071942446, + "grad_norm": 0.1820177286863327, + "learning_rate": 5.292221963811472e-05, + "loss": 0.0161, + "step": 30440 + }, + { + "action_loss": 0.0028685119468718767, + "epoch": 27.37410071942446, + "step": 30440 + }, + { + "epoch": 27.37410071942446, + "step": 30440, + "torque_loss": 0.11441189050674438 + }, + { + "epoch": 27.37410071942446, + "force_loss": 0.00430305814370513, + "step": 30440 + }, + { + "epoch": 27.383093525179856, + "grad_norm": 0.2745693624019623, + "learning_rate": 5.28947084709146e-05, + "loss": 0.0183, + "step": 30450 + }, + { + "action_loss": 0.0054676649160683155, + "epoch": 27.383093525179856, + "step": 30450 + }, + { + "epoch": 27.383093525179856, + "step": 30450, + "torque_loss": 0.13324573636054993 + }, + { + "epoch": 27.383093525179856, + "force_loss": 0.003394828410819173, + "step": 30450 + }, + { + "epoch": 27.392086330935253, + "grad_norm": 0.41122880578041077, + "learning_rate": 5.2867196424378465e-05, + "loss": 0.0198, + "step": 30460 + }, + { + "action_loss": 0.0030081484001129866, + "epoch": 27.392086330935253, + "step": 30460 + }, + { + "epoch": 27.392086330935253, + "step": 30460, + "torque_loss": 0.0999179258942604 + }, + { + "epoch": 27.392086330935253, + "force_loss": 0.004558890126645565, + "step": 30460 + }, + { + "epoch": 27.401079136690647, + "grad_norm": 0.6369983553886414, + "learning_rate": 5.2839683506863765e-05, + "loss": 0.0183, + "step": 30470 + }, + { + "action_loss": 0.0035806421656161547, + "epoch": 27.401079136690647, + "step": 30470 + }, + { + "epoch": 27.401079136690647, + "step": 30470, + "torque_loss": 0.17002622783184052 + }, + { + "epoch": 27.401079136690647, + "force_loss": 0.002485469914972782, + "step": 30470 + }, + { + "epoch": 27.41007194244604, + "grad_norm": 0.2785317301750183, + "learning_rate": 5.281216972672821e-05, + "loss": 0.0186, + "step": 30480 + }, + { + "action_loss": 0.007724123075604439, + "epoch": 27.41007194244604, + "step": 30480 + }, + { + "epoch": 27.41007194244604, + "step": 30480, + "torque_loss": 0.08777821063995361 + }, + { + "epoch": 27.41007194244604, + "force_loss": 0.006125069689005613, + "step": 30480 + }, + { + "epoch": 27.41906474820144, + "grad_norm": 0.6122022271156311, + "learning_rate": 5.278465509232973e-05, + "loss": 0.0241, + "step": 30490 + }, + { + "action_loss": 0.0026271052192896605, + "epoch": 27.41906474820144, + "step": 30490 + }, + { + "epoch": 27.41906474820144, + "step": 30490, + "torque_loss": 0.14399898052215576 + }, + { + "epoch": 27.41906474820144, + "force_loss": 0.0026529596652835608, + "step": 30490 + }, + { + "epoch": 27.428057553956833, + "grad_norm": 0.2580064833164215, + "learning_rate": 5.275713961202655e-05, + "loss": 0.0191, + "step": 30500 + }, + { + "action_loss": 0.006645339075475931, + "epoch": 27.428057553956833, + "step": 30500 + }, + { + "epoch": 27.428057553956833, + "step": 30500, + "torque_loss": 0.13289107382297516 + }, + { + "epoch": 27.428057553956833, + "force_loss": 0.006983119994401932, + "step": 30500 + }, + { + "epoch": 27.43705035971223, + "grad_norm": 0.21055203676223755, + "learning_rate": 5.2729623294177165e-05, + "loss": 0.0169, + "step": 30510 + }, + { + "action_loss": 0.0030639495234936476, + "epoch": 27.43705035971223, + "step": 30510 + }, + { + "epoch": 27.43705035971223, + "step": 30510, + "torque_loss": 0.095901720225811 + }, + { + "epoch": 27.43705035971223, + "force_loss": 0.00638936460018158, + "step": 30510 + }, + { + "epoch": 27.446043165467625, + "grad_norm": 0.2256162315607071, + "learning_rate": 5.270210614714028e-05, + "loss": 0.0186, + "step": 30520 + }, + { + "action_loss": 0.009638610295951366, + "epoch": 27.446043165467625, + "step": 30520 + }, + { + "epoch": 27.446043165467625, + "step": 30520, + "torque_loss": 0.14846175909042358 + }, + { + "epoch": 27.446043165467625, + "force_loss": 0.007739686872810125, + "step": 30520 + }, + { + "epoch": 27.455035971223023, + "grad_norm": 0.10142328590154648, + "learning_rate": 5.267458817927491e-05, + "loss": 0.0201, + "step": 30530 + }, + { + "action_loss": 0.021939754486083984, + "epoch": 27.455035971223023, + "step": 30530 + }, + { + "epoch": 27.455035971223023, + "step": 30530, + "torque_loss": 0.14862318336963654 + }, + { + "epoch": 27.455035971223023, + "force_loss": 0.03013339452445507, + "step": 30530 + }, + { + "epoch": 27.464028776978417, + "grad_norm": 0.20716537535190582, + "learning_rate": 5.264706939894026e-05, + "loss": 0.0214, + "step": 30540 + }, + { + "action_loss": 0.00564810074865818, + "epoch": 27.464028776978417, + "step": 30540 + }, + { + "epoch": 27.464028776978417, + "step": 30540, + "torque_loss": 0.11731040477752686 + }, + { + "epoch": 27.464028776978417, + "force_loss": 0.004977627191692591, + "step": 30540 + }, + { + "epoch": 27.473021582733814, + "grad_norm": 0.1534290909767151, + "learning_rate": 5.261954981449584e-05, + "loss": 0.0188, + "step": 30550 + }, + { + "action_loss": 0.002763771452009678, + "epoch": 27.473021582733814, + "step": 30550 + }, + { + "epoch": 27.473021582733814, + "step": 30550, + "torque_loss": 0.09688911586999893 + }, + { + "epoch": 27.473021582733814, + "force_loss": 0.0027837317902594805, + "step": 30550 + }, + { + "epoch": 27.48201438848921, + "grad_norm": 0.3309992849826813, + "learning_rate": 5.2592029434301324e-05, + "loss": 0.0183, + "step": 30560 + }, + { + "action_loss": 0.012967114336788654, + "epoch": 27.48201438848921, + "step": 30560 + }, + { + "epoch": 27.48201438848921, + "step": 30560, + "torque_loss": 0.1345806121826172 + }, + { + "epoch": 27.48201438848921, + "force_loss": 0.00974154844880104, + "step": 30560 + }, + { + "epoch": 27.491007194244606, + "grad_norm": 0.329872190952301, + "learning_rate": 5.256450826671672e-05, + "loss": 0.0241, + "step": 30570 + }, + { + "action_loss": 0.006892070639878511, + "epoch": 27.491007194244606, + "step": 30570 + }, + { + "epoch": 27.491007194244606, + "step": 30570, + "torque_loss": 0.12427874654531479 + }, + { + "epoch": 27.491007194244606, + "force_loss": 0.00313993520103395, + "step": 30570 + }, + { + "epoch": 27.5, + "grad_norm": 0.2719530165195465, + "learning_rate": 5.253698632010221e-05, + "loss": 0.0201, + "step": 30580 + }, + { + "action_loss": 0.0027273735031485558, + "epoch": 27.5, + "step": 30580 + }, + { + "epoch": 27.5, + "step": 30580, + "torque_loss": 0.15049169957637787 + }, + { + "epoch": 27.5, + "force_loss": 0.001852459623478353, + "step": 30580 + }, + { + "epoch": 27.508992805755394, + "grad_norm": 0.2190684825181961, + "learning_rate": 5.2509463602818246e-05, + "loss": 0.0198, + "step": 30590 + }, + { + "action_loss": 0.006629563868045807, + "epoch": 27.508992805755394, + "step": 30590 + }, + { + "epoch": 27.508992805755394, + "step": 30590, + "torque_loss": 0.14348743855953217 + }, + { + "epoch": 27.508992805755394, + "force_loss": 0.010787595994770527, + "step": 30590 + }, + { + "epoch": 27.51798561151079, + "grad_norm": 0.38350728154182434, + "learning_rate": 5.248194012322549e-05, + "loss": 0.0178, + "step": 30600 + }, + { + "action_loss": 0.01122139673680067, + "epoch": 27.51798561151079, + "step": 30600 + }, + { + "epoch": 27.51798561151079, + "step": 30600, + "torque_loss": 0.10846803337335587 + }, + { + "epoch": 27.51798561151079, + "force_loss": 0.011705628596246243, + "step": 30600 + }, + { + "epoch": 27.526978417266186, + "grad_norm": 0.3710283935070038, + "learning_rate": 5.245441588968486e-05, + "loss": 0.019, + "step": 30610 + }, + { + "action_loss": 0.00363993551582098, + "epoch": 27.526978417266186, + "step": 30610 + }, + { + "epoch": 27.526978417266186, + "step": 30610, + "torque_loss": 0.11770457029342651 + }, + { + "epoch": 27.526978417266186, + "force_loss": 0.006347086746245623, + "step": 30610 + }, + { + "epoch": 27.535971223021583, + "grad_norm": 0.4064185321331024, + "learning_rate": 5.242689091055748e-05, + "loss": 0.0212, + "step": 30620 + }, + { + "action_loss": 0.0039041517302393913, + "epoch": 27.535971223021583, + "step": 30620 + }, + { + "epoch": 27.535971223021583, + "step": 30620, + "torque_loss": 0.18442851305007935 + }, + { + "epoch": 27.535971223021583, + "force_loss": 0.00466783344745636, + "step": 30620 + }, + { + "epoch": 27.544964028776977, + "grad_norm": 0.38118013739585876, + "learning_rate": 5.239936519420473e-05, + "loss": 0.0255, + "step": 30630 + }, + { + "action_loss": 0.0021402251441031694, + "epoch": 27.544964028776977, + "step": 30630 + }, + { + "epoch": 27.544964028776977, + "step": 30630, + "torque_loss": 0.11720553785562515 + }, + { + "epoch": 27.544964028776977, + "force_loss": 0.002472731051966548, + "step": 30630 + }, + { + "epoch": 27.553956834532375, + "grad_norm": 0.34569528698921204, + "learning_rate": 5.2371838748988175e-05, + "loss": 0.0221, + "step": 30640 + }, + { + "action_loss": 0.005891752894967794, + "epoch": 27.553956834532375, + "step": 30640 + }, + { + "epoch": 27.553956834532375, + "step": 30640, + "torque_loss": 0.11999966949224472 + }, + { + "epoch": 27.553956834532375, + "force_loss": 0.007090532686561346, + "step": 30640 + }, + { + "epoch": 27.56294964028777, + "grad_norm": 0.26249584555625916, + "learning_rate": 5.234431158326965e-05, + "loss": 0.0224, + "step": 30650 + }, + { + "action_loss": 0.00848119705915451, + "epoch": 27.56294964028777, + "step": 30650 + }, + { + "epoch": 27.56294964028777, + "step": 30650, + "torque_loss": 0.15211278200149536 + }, + { + "epoch": 27.56294964028777, + "force_loss": 0.011708579957485199, + "step": 30650 + }, + { + "epoch": 27.571942446043167, + "grad_norm": 0.233008474111557, + "learning_rate": 5.231678370541115e-05, + "loss": 0.0201, + "step": 30660 + }, + { + "action_loss": 0.01889168657362461, + "epoch": 27.571942446043167, + "step": 30660 + }, + { + "epoch": 27.571942446043167, + "step": 30660, + "torque_loss": 0.1622740626335144 + }, + { + "epoch": 27.571942446043167, + "force_loss": 0.031231537461280823, + "step": 30660 + }, + { + "epoch": 27.58093525179856, + "grad_norm": 0.1595122516155243, + "learning_rate": 5.228925512377495e-05, + "loss": 0.0198, + "step": 30670 + }, + { + "action_loss": 0.00274129887111485, + "epoch": 27.58093525179856, + "step": 30670 + }, + { + "epoch": 27.58093525179856, + "step": 30670, + "torque_loss": 0.17599289119243622 + }, + { + "epoch": 27.58093525179856, + "force_loss": 0.001764418208040297, + "step": 30670 + }, + { + "epoch": 27.58992805755396, + "grad_norm": 0.19278109073638916, + "learning_rate": 5.2261725846723465e-05, + "loss": 0.0185, + "step": 30680 + }, + { + "action_loss": 0.008990000002086163, + "epoch": 27.58992805755396, + "step": 30680 + }, + { + "epoch": 27.58992805755396, + "step": 30680, + "torque_loss": 0.1137491837143898 + }, + { + "epoch": 27.58992805755396, + "force_loss": 0.014333575963973999, + "step": 30680 + }, + { + "epoch": 27.598920863309353, + "grad_norm": 0.16543430089950562, + "learning_rate": 5.22341958826194e-05, + "loss": 0.0192, + "step": 30690 + }, + { + "action_loss": 0.007123140152543783, + "epoch": 27.598920863309353, + "step": 30690 + }, + { + "epoch": 27.598920863309353, + "step": 30690, + "torque_loss": 0.11266070604324341 + }, + { + "epoch": 27.598920863309353, + "force_loss": 0.004885811358690262, + "step": 30690 + }, + { + "epoch": 27.607913669064747, + "grad_norm": 0.16478745639324188, + "learning_rate": 5.22066652398256e-05, + "loss": 0.0175, + "step": 30700 + }, + { + "action_loss": 0.003914508502930403, + "epoch": 27.607913669064747, + "step": 30700 + }, + { + "epoch": 27.607913669064747, + "step": 30700, + "torque_loss": 0.10943523794412613 + }, + { + "epoch": 27.607913669064747, + "force_loss": 0.005109761841595173, + "step": 30700 + }, + { + "epoch": 27.616906474820144, + "grad_norm": 0.26371338963508606, + "learning_rate": 5.2179133926705185e-05, + "loss": 0.0169, + "step": 30710 + }, + { + "action_loss": 0.006401343736797571, + "epoch": 27.616906474820144, + "step": 30710 + }, + { + "epoch": 27.616906474820144, + "step": 30710, + "torque_loss": 0.1256575733423233 + }, + { + "epoch": 27.616906474820144, + "force_loss": 0.003221782622858882, + "step": 30710 + }, + { + "epoch": 27.62589928057554, + "grad_norm": 0.13252998888492584, + "learning_rate": 5.215160195162141e-05, + "loss": 0.0186, + "step": 30720 + }, + { + "action_loss": 0.0028024381026625633, + "epoch": 27.62589928057554, + "step": 30720 + }, + { + "epoch": 27.62589928057554, + "step": 30720, + "torque_loss": 0.10049884766340256 + }, + { + "epoch": 27.62589928057554, + "force_loss": 0.004064956679940224, + "step": 30720 + }, + { + "epoch": 27.634892086330936, + "grad_norm": 0.18607401847839355, + "learning_rate": 5.212406932293776e-05, + "loss": 0.0165, + "step": 30730 + }, + { + "action_loss": 0.003135338658466935, + "epoch": 27.634892086330936, + "step": 30730 + }, + { + "epoch": 27.634892086330936, + "step": 30730, + "torque_loss": 0.12078125029802322 + }, + { + "epoch": 27.634892086330936, + "force_loss": 0.002940795151516795, + "step": 30730 + }, + { + "epoch": 27.64388489208633, + "grad_norm": 0.13245603442192078, + "learning_rate": 5.209653604901795e-05, + "loss": 0.0178, + "step": 30740 + }, + { + "action_loss": 0.005995666608214378, + "epoch": 27.64388489208633, + "step": 30740 + }, + { + "epoch": 27.64388489208633, + "step": 30740, + "torque_loss": 0.24334824085235596 + }, + { + "epoch": 27.64388489208633, + "force_loss": 0.00946358498185873, + "step": 30740 + }, + { + "epoch": 27.652877697841728, + "grad_norm": 0.28509190678596497, + "learning_rate": 5.206900213822584e-05, + "loss": 0.0184, + "step": 30750 + }, + { + "action_loss": 0.009454579092562199, + "epoch": 27.652877697841728, + "step": 30750 + }, + { + "epoch": 27.652877697841728, + "step": 30750, + "torque_loss": 0.1258021593093872 + }, + { + "epoch": 27.652877697841728, + "force_loss": 0.0027389039751142263, + "step": 30750 + }, + { + "epoch": 27.66187050359712, + "grad_norm": 0.152934268116951, + "learning_rate": 5.204146759892551e-05, + "loss": 0.0201, + "step": 30760 + }, + { + "action_loss": 0.001829686458222568, + "epoch": 27.66187050359712, + "step": 30760 + }, + { + "epoch": 27.66187050359712, + "step": 30760, + "torque_loss": 0.10581382364034653 + }, + { + "epoch": 27.66187050359712, + "force_loss": 0.001624145545065403, + "step": 30760 + }, + { + "epoch": 27.67086330935252, + "grad_norm": 0.2874409854412079, + "learning_rate": 5.2013932439481216e-05, + "loss": 0.0186, + "step": 30770 + }, + { + "action_loss": 0.01221607904881239, + "epoch": 27.67086330935252, + "step": 30770 + }, + { + "epoch": 27.67086330935252, + "step": 30770, + "torque_loss": 0.18106383085250854 + }, + { + "epoch": 27.67086330935252, + "force_loss": 0.01274307444691658, + "step": 30770 + }, + { + "epoch": 27.679856115107913, + "grad_norm": 0.241510808467865, + "learning_rate": 5.198639666825743e-05, + "loss": 0.0267, + "step": 30780 + }, + { + "action_loss": 0.006198238581418991, + "epoch": 27.679856115107913, + "step": 30780 + }, + { + "epoch": 27.679856115107913, + "step": 30780, + "torque_loss": 0.10350052267313004 + }, + { + "epoch": 27.679856115107913, + "force_loss": 0.0062993853352963924, + "step": 30780 + }, + { + "epoch": 27.68884892086331, + "grad_norm": 0.16487713158130646, + "learning_rate": 5.195886029361877e-05, + "loss": 0.019, + "step": 30790 + }, + { + "action_loss": 0.009002833627164364, + "epoch": 27.68884892086331, + "step": 30790 + }, + { + "epoch": 27.68884892086331, + "step": 30790, + "torque_loss": 0.15934205055236816 + }, + { + "epoch": 27.68884892086331, + "force_loss": 0.006100728642195463, + "step": 30790 + }, + { + "epoch": 27.697841726618705, + "grad_norm": 0.5091961622238159, + "learning_rate": 5.193132332393009e-05, + "loss": 0.0231, + "step": 30800 + }, + { + "action_loss": 0.004027887247502804, + "epoch": 27.697841726618705, + "step": 30800 + }, + { + "epoch": 27.697841726618705, + "step": 30800, + "torque_loss": 0.17949558794498444 + }, + { + "epoch": 27.697841726618705, + "force_loss": 0.0030570297967642546, + "step": 30800 + }, + { + "epoch": 27.7068345323741, + "grad_norm": 0.3422536551952362, + "learning_rate": 5.1903785767556376e-05, + "loss": 0.0201, + "step": 30810 + }, + { + "action_loss": 0.004884196911007166, + "epoch": 27.7068345323741, + "step": 30810 + }, + { + "epoch": 27.7068345323741, + "step": 30810, + "torque_loss": 0.07200178503990173 + }, + { + "epoch": 27.7068345323741, + "force_loss": 0.006248340476304293, + "step": 30810 + }, + { + "epoch": 27.715827338129497, + "grad_norm": 0.1799556463956833, + "learning_rate": 5.187624763286282e-05, + "loss": 0.0164, + "step": 30820 + }, + { + "action_loss": 0.008699198253452778, + "epoch": 27.715827338129497, + "step": 30820 + }, + { + "epoch": 27.715827338129497, + "step": 30820, + "torque_loss": 0.10013705492019653 + }, + { + "epoch": 27.715827338129497, + "force_loss": 0.007091708946973085, + "step": 30820 + }, + { + "epoch": 27.72482014388489, + "grad_norm": 0.6433326005935669, + "learning_rate": 5.184870892821475e-05, + "loss": 0.0243, + "step": 30830 + }, + { + "action_loss": 0.007349391933530569, + "epoch": 27.72482014388489, + "step": 30830 + }, + { + "epoch": 27.72482014388489, + "step": 30830, + "torque_loss": 0.1412038803100586 + }, + { + "epoch": 27.72482014388489, + "force_loss": 0.005403654184192419, + "step": 30830 + }, + { + "epoch": 27.73381294964029, + "grad_norm": 0.13341479003429413, + "learning_rate": 5.182116966197773e-05, + "loss": 0.0191, + "step": 30840 + }, + { + "action_loss": 0.005824791267514229, + "epoch": 27.73381294964029, + "step": 30840 + }, + { + "epoch": 27.73381294964029, + "step": 30840, + "torque_loss": 0.1492571383714676 + }, + { + "epoch": 27.73381294964029, + "force_loss": 0.006928595248609781, + "step": 30840 + }, + { + "epoch": 27.742805755395683, + "grad_norm": 0.1712639182806015, + "learning_rate": 5.1793629842517466e-05, + "loss": 0.0184, + "step": 30850 + }, + { + "action_loss": 0.004385871347039938, + "epoch": 27.742805755395683, + "step": 30850 + }, + { + "epoch": 27.742805755395683, + "step": 30850, + "torque_loss": 0.17530196905136108 + }, + { + "epoch": 27.742805755395683, + "force_loss": 0.002255291910842061, + "step": 30850 + }, + { + "epoch": 27.75179856115108, + "grad_norm": 0.15210753679275513, + "learning_rate": 5.17660894781998e-05, + "loss": 0.0184, + "step": 30860 + }, + { + "action_loss": 0.0028193567413836718, + "epoch": 27.75179856115108, + "step": 30860 + }, + { + "epoch": 27.75179856115108, + "step": 30860, + "torque_loss": 0.11524689197540283 + }, + { + "epoch": 27.75179856115108, + "force_loss": 0.0037801319267600775, + "step": 30860 + }, + { + "epoch": 27.760791366906474, + "grad_norm": 0.41138550639152527, + "learning_rate": 5.173854857739079e-05, + "loss": 0.0189, + "step": 30870 + }, + { + "action_loss": 0.004349606577306986, + "epoch": 27.760791366906474, + "step": 30870 + }, + { + "epoch": 27.760791366906474, + "step": 30870, + "torque_loss": 0.1613604575395584 + }, + { + "epoch": 27.760791366906474, + "force_loss": 0.005117138847708702, + "step": 30870 + }, + { + "epoch": 27.769784172661872, + "grad_norm": 0.35171759128570557, + "learning_rate": 5.171100714845661e-05, + "loss": 0.0179, + "step": 30880 + }, + { + "action_loss": 0.010870051570236683, + "epoch": 27.769784172661872, + "step": 30880 + }, + { + "epoch": 27.769784172661872, + "step": 30880, + "torque_loss": 0.1550101935863495 + }, + { + "epoch": 27.769784172661872, + "force_loss": 0.013455321080982685, + "step": 30880 + }, + { + "epoch": 27.778776978417266, + "grad_norm": 0.3980942964553833, + "learning_rate": 5.1683465199763646e-05, + "loss": 0.0211, + "step": 30890 + }, + { + "action_loss": 0.002072709845378995, + "epoch": 27.778776978417266, + "step": 30890 + }, + { + "epoch": 27.778776978417266, + "step": 30890, + "torque_loss": 0.1543998122215271 + }, + { + "epoch": 27.778776978417266, + "force_loss": 0.0012014691019430757, + "step": 30890 + }, + { + "epoch": 27.78776978417266, + "grad_norm": 0.21990813314914703, + "learning_rate": 5.16559227396784e-05, + "loss": 0.0171, + "step": 30900 + }, + { + "action_loss": 0.005485077854245901, + "epoch": 27.78776978417266, + "step": 30900 + }, + { + "epoch": 27.78776978417266, + "step": 30900, + "torque_loss": 0.13829806447029114 + }, + { + "epoch": 27.78776978417266, + "force_loss": 0.00719534931704402, + "step": 30900 + }, + { + "epoch": 27.796762589928058, + "grad_norm": 0.21184466779232025, + "learning_rate": 5.1628379776567556e-05, + "loss": 0.0196, + "step": 30910 + }, + { + "action_loss": 0.0020861951634287834, + "epoch": 27.796762589928058, + "step": 30910 + }, + { + "epoch": 27.796762589928058, + "step": 30910, + "torque_loss": 0.08997714519500732 + }, + { + "epoch": 27.796762589928058, + "force_loss": 0.0049504549242556095, + "step": 30910 + }, + { + "epoch": 27.805755395683452, + "grad_norm": 0.3364858627319336, + "learning_rate": 5.160083631879792e-05, + "loss": 0.0186, + "step": 30920 + }, + { + "action_loss": 0.004167437087744474, + "epoch": 27.805755395683452, + "step": 30920 + }, + { + "epoch": 27.805755395683452, + "step": 30920, + "torque_loss": 0.0762174054980278 + }, + { + "epoch": 27.805755395683452, + "force_loss": 0.010606507770717144, + "step": 30920 + }, + { + "epoch": 27.81474820143885, + "grad_norm": 0.28972136974334717, + "learning_rate": 5.1573292374736484e-05, + "loss": 0.0178, + "step": 30930 + }, + { + "action_loss": 0.0051741148345172405, + "epoch": 27.81474820143885, + "step": 30930 + }, + { + "epoch": 27.81474820143885, + "step": 30930, + "torque_loss": 0.16094911098480225 + }, + { + "epoch": 27.81474820143885, + "force_loss": 0.002703087404370308, + "step": 30930 + }, + { + "epoch": 27.823741007194243, + "grad_norm": 0.32346779108047485, + "learning_rate": 5.1545747952750356e-05, + "loss": 0.0203, + "step": 30940 + }, + { + "action_loss": 0.0021596148144453764, + "epoch": 27.823741007194243, + "step": 30940 + }, + { + "epoch": 27.823741007194243, + "step": 30940, + "torque_loss": 0.10066640377044678 + }, + { + "epoch": 27.823741007194243, + "force_loss": 0.006780712399631739, + "step": 30940 + }, + { + "epoch": 27.83273381294964, + "grad_norm": 0.22288697957992554, + "learning_rate": 5.151820306120682e-05, + "loss": 0.0164, + "step": 30950 + }, + { + "action_loss": 0.002462455304339528, + "epoch": 27.83273381294964, + "step": 30950 + }, + { + "epoch": 27.83273381294964, + "step": 30950, + "torque_loss": 0.11628085374832153 + }, + { + "epoch": 27.83273381294964, + "force_loss": 0.0019165457924827933, + "step": 30950 + }, + { + "epoch": 27.841726618705035, + "grad_norm": 0.20578458905220032, + "learning_rate": 5.149065770847328e-05, + "loss": 0.0176, + "step": 30960 + }, + { + "action_loss": 0.00621893210336566, + "epoch": 27.841726618705035, + "step": 30960 + }, + { + "epoch": 27.841726618705035, + "step": 30960, + "torque_loss": 0.1364258974790573 + }, + { + "epoch": 27.841726618705035, + "force_loss": 0.00447434326633811, + "step": 30960 + }, + { + "epoch": 27.850719424460433, + "grad_norm": 0.3081282079219818, + "learning_rate": 5.1463111902917297e-05, + "loss": 0.021, + "step": 30970 + }, + { + "action_loss": 0.004593221936374903, + "epoch": 27.850719424460433, + "step": 30970 + }, + { + "epoch": 27.850719424460433, + "step": 30970, + "torque_loss": 0.16942958533763885 + }, + { + "epoch": 27.850719424460433, + "force_loss": 0.004925244953483343, + "step": 30970 + }, + { + "epoch": 27.859712230215827, + "grad_norm": 0.2719041705131531, + "learning_rate": 5.143556565290654e-05, + "loss": 0.0194, + "step": 30980 + }, + { + "action_loss": 0.007191583979874849, + "epoch": 27.859712230215827, + "step": 30980 + }, + { + "epoch": 27.859712230215827, + "step": 30980, + "torque_loss": 0.15952707827091217 + }, + { + "epoch": 27.859712230215827, + "force_loss": 0.007490354124456644, + "step": 30980 + }, + { + "epoch": 27.868705035971225, + "grad_norm": 0.4152081608772278, + "learning_rate": 5.140801896680882e-05, + "loss": 0.0184, + "step": 30990 + }, + { + "action_loss": 0.007137230131775141, + "epoch": 27.868705035971225, + "step": 30990 + }, + { + "epoch": 27.868705035971225, + "step": 30990, + "torque_loss": 0.14315825700759888 + }, + { + "epoch": 27.868705035971225, + "force_loss": 0.007594504859298468, + "step": 30990 + }, + { + "epoch": 27.87769784172662, + "grad_norm": 0.15846173465251923, + "learning_rate": 5.1380471852992144e-05, + "loss": 0.0186, + "step": 31000 + }, + { + "action_loss": 0.004153303802013397, + "epoch": 27.87769784172662, + "step": 31000 + }, + { + "epoch": 27.87769784172662, + "step": 31000, + "torque_loss": 0.1316547840833664 + }, + { + "epoch": 27.87769784172662, + "force_loss": 0.00436300178989768, + "step": 31000 + }, + { + "epoch": 27.886690647482013, + "grad_norm": 0.22018012404441833, + "learning_rate": 5.135292431982457e-05, + "loss": 0.0185, + "step": 31010 + }, + { + "action_loss": 0.002149861538782716, + "epoch": 27.886690647482013, + "step": 31010 + }, + { + "epoch": 27.886690647482013, + "step": 31010, + "torque_loss": 0.12831994891166687 + }, + { + "epoch": 27.886690647482013, + "force_loss": 0.004885531030595303, + "step": 31010 + }, + { + "epoch": 27.89568345323741, + "grad_norm": 0.2307564616203308, + "learning_rate": 5.1325376375674294e-05, + "loss": 0.0175, + "step": 31020 + }, + { + "action_loss": 0.010385863482952118, + "epoch": 27.89568345323741, + "step": 31020 + }, + { + "epoch": 27.89568345323741, + "step": 31020, + "torque_loss": 0.16394855082035065 + }, + { + "epoch": 27.89568345323741, + "force_loss": 0.017414994537830353, + "step": 31020 + }, + { + "epoch": 27.904676258992804, + "grad_norm": 0.161410853266716, + "learning_rate": 5.129782802890968e-05, + "loss": 0.0199, + "step": 31030 + }, + { + "action_loss": 0.004073642659932375, + "epoch": 27.904676258992804, + "step": 31030 + }, + { + "epoch": 27.904676258992804, + "step": 31030, + "torque_loss": 0.15164317190647125 + }, + { + "epoch": 27.904676258992804, + "force_loss": 0.0034698061645030975, + "step": 31030 + }, + { + "epoch": 27.913669064748202, + "grad_norm": 0.2141055315732956, + "learning_rate": 5.127027928789916e-05, + "loss": 0.0181, + "step": 31040 + }, + { + "action_loss": 0.0021478633861988783, + "epoch": 27.913669064748202, + "step": 31040 + }, + { + "epoch": 27.913669064748202, + "step": 31040, + "torque_loss": 0.11276334524154663 + }, + { + "epoch": 27.913669064748202, + "force_loss": 0.0028508733958005905, + "step": 31040 + }, + { + "epoch": 27.922661870503596, + "grad_norm": 0.1585572212934494, + "learning_rate": 5.124273016101135e-05, + "loss": 0.028, + "step": 31050 + }, + { + "action_loss": 0.005104747600853443, + "epoch": 27.922661870503596, + "step": 31050 + }, + { + "epoch": 27.922661870503596, + "step": 31050, + "torque_loss": 0.15689681470394135 + }, + { + "epoch": 27.922661870503596, + "force_loss": 0.003291598754003644, + "step": 31050 + }, + { + "epoch": 27.931654676258994, + "grad_norm": 0.20900021493434906, + "learning_rate": 5.121518065661492e-05, + "loss": 0.0198, + "step": 31060 + }, + { + "action_loss": 0.0021802065894007683, + "epoch": 27.931654676258994, + "step": 31060 + }, + { + "epoch": 27.931654676258994, + "step": 31060, + "torque_loss": 0.1086922362446785 + }, + { + "epoch": 27.931654676258994, + "force_loss": 0.0023348331451416016, + "step": 31060 + }, + { + "epoch": 27.940647482014388, + "grad_norm": 0.25486835837364197, + "learning_rate": 5.11876307830787e-05, + "loss": 0.0176, + "step": 31070 + }, + { + "action_loss": 0.010240408591926098, + "epoch": 27.940647482014388, + "step": 31070 + }, + { + "epoch": 27.940647482014388, + "step": 31070, + "torque_loss": 0.12701404094696045 + }, + { + "epoch": 27.940647482014388, + "force_loss": 0.006227945443242788, + "step": 31070 + }, + { + "epoch": 27.949640287769785, + "grad_norm": 0.21160483360290527, + "learning_rate": 5.1160080548771596e-05, + "loss": 0.022, + "step": 31080 + }, + { + "action_loss": 0.0023566593881696463, + "epoch": 27.949640287769785, + "step": 31080 + }, + { + "epoch": 27.949640287769785, + "step": 31080, + "torque_loss": 0.14099116623401642 + }, + { + "epoch": 27.949640287769785, + "force_loss": 0.0016366178169846535, + "step": 31080 + }, + { + "epoch": 27.95863309352518, + "grad_norm": 0.5432224869728088, + "learning_rate": 5.1132529962062656e-05, + "loss": 0.019, + "step": 31090 + }, + { + "action_loss": 0.002328429138287902, + "epoch": 27.95863309352518, + "step": 31090 + }, + { + "epoch": 27.95863309352518, + "step": 31090, + "torque_loss": 0.15630583465099335 + }, + { + "epoch": 27.95863309352518, + "force_loss": 0.001649947720579803, + "step": 31090 + }, + { + "epoch": 27.967625899280577, + "grad_norm": 0.2914307415485382, + "learning_rate": 5.110497903132101e-05, + "loss": 0.0198, + "step": 31100 + }, + { + "action_loss": 0.0015938254073262215, + "epoch": 27.967625899280577, + "step": 31100 + }, + { + "epoch": 27.967625899280577, + "step": 31100, + "torque_loss": 0.0902237519621849 + }, + { + "epoch": 27.967625899280577, + "force_loss": 0.0016521824290975928, + "step": 31100 + }, + { + "epoch": 27.97661870503597, + "grad_norm": 0.2809683680534363, + "learning_rate": 5.107742776491592e-05, + "loss": 0.0191, + "step": 31110 + }, + { + "action_loss": 0.0029244229663163424, + "epoch": 27.97661870503597, + "step": 31110 + }, + { + "epoch": 27.97661870503597, + "step": 31110, + "torque_loss": 0.13668620586395264 + }, + { + "epoch": 27.97661870503597, + "force_loss": 0.002639237092807889, + "step": 31110 + }, + { + "epoch": 27.985611510791365, + "grad_norm": 0.16187016665935516, + "learning_rate": 5.104987617121673e-05, + "loss": 0.0189, + "step": 31120 + }, + { + "action_loss": 0.005097375717014074, + "epoch": 27.985611510791365, + "step": 31120 + }, + { + "epoch": 27.985611510791365, + "step": 31120, + "torque_loss": 0.0963716208934784 + }, + { + "epoch": 27.985611510791365, + "force_loss": 0.008771772496402264, + "step": 31120 + }, + { + "epoch": 27.994604316546763, + "grad_norm": 0.19888824224472046, + "learning_rate": 5.102232425859287e-05, + "loss": 0.0183, + "step": 31130 + }, + { + "action_loss": 0.001765283290296793, + "epoch": 27.994604316546763, + "step": 31130 + }, + { + "epoch": 27.994604316546763, + "step": 31130, + "torque_loss": 0.1185249462723732 + }, + { + "epoch": 27.994604316546763, + "force_loss": 0.001336389104835689, + "step": 31130 + }, + { + "epoch": 28.003597122302157, + "grad_norm": 0.23520608246326447, + "learning_rate": 5.09947720354139e-05, + "loss": 0.0175, + "step": 31140 + }, + { + "action_loss": 0.003011924447491765, + "epoch": 28.003597122302157, + "step": 31140 + }, + { + "epoch": 28.003597122302157, + "step": 31140, + "torque_loss": 0.08293508738279343 + }, + { + "epoch": 28.003597122302157, + "force_loss": 0.0032558289822191, + "step": 31140 + }, + { + "epoch": 28.012589928057555, + "grad_norm": 0.3765396177768707, + "learning_rate": 5.096721951004942e-05, + "loss": 0.0187, + "step": 31150 + }, + { + "action_loss": 0.01665198989212513, + "epoch": 28.012589928057555, + "step": 31150 + }, + { + "epoch": 28.012589928057555, + "step": 31150, + "torque_loss": 0.10503220558166504 + }, + { + "epoch": 28.012589928057555, + "force_loss": 0.027542075142264366, + "step": 31150 + }, + { + "epoch": 28.02158273381295, + "grad_norm": 0.26802268624305725, + "learning_rate": 5.0939666690869227e-05, + "loss": 0.0198, + "step": 31160 + }, + { + "action_loss": 0.00382077693939209, + "epoch": 28.02158273381295, + "step": 31160 + }, + { + "epoch": 28.02158273381295, + "step": 31160, + "torque_loss": 0.12123435735702515 + }, + { + "epoch": 28.02158273381295, + "force_loss": 0.0025932935532182455, + "step": 31160 + }, + { + "epoch": 28.030575539568346, + "grad_norm": 0.3540857434272766, + "learning_rate": 5.0912113586243096e-05, + "loss": 0.0189, + "step": 31170 + }, + { + "action_loss": 0.0028544620145112276, + "epoch": 28.030575539568346, + "step": 31170 + }, + { + "epoch": 28.030575539568346, + "step": 31170, + "torque_loss": 0.12507446110248566 + }, + { + "epoch": 28.030575539568346, + "force_loss": 0.004324841778725386, + "step": 31170 + }, + { + "epoch": 28.03956834532374, + "grad_norm": 0.616055428981781, + "learning_rate": 5.0884560204540935e-05, + "loss": 0.0203, + "step": 31180 + }, + { + "action_loss": 0.011461793445050716, + "epoch": 28.03956834532374, + "step": 31180 + }, + { + "epoch": 28.03956834532374, + "step": 31180, + "torque_loss": 0.08699599653482437 + }, + { + "epoch": 28.03956834532374, + "force_loss": 0.014050401747226715, + "step": 31180 + }, + { + "epoch": 28.048561151079138, + "grad_norm": 0.1850016564130783, + "learning_rate": 5.0857006554132736e-05, + "loss": 0.0189, + "step": 31190 + }, + { + "action_loss": 0.008330575190484524, + "epoch": 28.048561151079138, + "step": 31190 + }, + { + "epoch": 28.048561151079138, + "step": 31190, + "torque_loss": 0.22060860693454742 + }, + { + "epoch": 28.048561151079138, + "force_loss": 0.009200750850141048, + "step": 31190 + }, + { + "epoch": 28.057553956834532, + "grad_norm": 0.1515791118144989, + "learning_rate": 5.0829452643388575e-05, + "loss": 0.0172, + "step": 31200 + }, + { + "action_loss": 0.003502137726172805, + "epoch": 28.057553956834532, + "step": 31200 + }, + { + "epoch": 28.057553956834532, + "step": 31200, + "torque_loss": 0.12400698661804199 + }, + { + "epoch": 28.057553956834532, + "force_loss": 0.004505326505750418, + "step": 31200 + }, + { + "epoch": 28.06654676258993, + "grad_norm": 0.2560032904148102, + "learning_rate": 5.08018984806786e-05, + "loss": 0.0167, + "step": 31210 + }, + { + "action_loss": 0.0030723672825843096, + "epoch": 28.06654676258993, + "step": 31210 + }, + { + "epoch": 28.06654676258993, + "step": 31210, + "torque_loss": 0.12460774928331375 + }, + { + "epoch": 28.06654676258993, + "force_loss": 0.0016551404260098934, + "step": 31210 + }, + { + "epoch": 28.075539568345324, + "grad_norm": 0.3447650372982025, + "learning_rate": 5.0774344074373036e-05, + "loss": 0.0199, + "step": 31220 + }, + { + "action_loss": 0.002020900370553136, + "epoch": 28.075539568345324, + "step": 31220 + }, + { + "epoch": 28.075539568345324, + "step": 31220, + "torque_loss": 0.14845220744609833 + }, + { + "epoch": 28.075539568345324, + "force_loss": 0.0034141484647989273, + "step": 31220 + }, + { + "epoch": 28.084532374100718, + "grad_norm": 0.2701382040977478, + "learning_rate": 5.07467894328422e-05, + "loss": 0.0191, + "step": 31230 + }, + { + "action_loss": 0.0048328242264688015, + "epoch": 28.084532374100718, + "step": 31230 + }, + { + "epoch": 28.084532374100718, + "step": 31230, + "torque_loss": 0.10102403163909912 + }, + { + "epoch": 28.084532374100718, + "force_loss": 0.0024929549545049667, + "step": 31230 + }, + { + "epoch": 28.093525179856115, + "grad_norm": 0.4575992226600647, + "learning_rate": 5.0719234564456454e-05, + "loss": 0.018, + "step": 31240 + }, + { + "action_loss": 0.0035630997736006975, + "epoch": 28.093525179856115, + "step": 31240 + }, + { + "epoch": 28.093525179856115, + "step": 31240, + "torque_loss": 0.14600805938243866 + }, + { + "epoch": 28.093525179856115, + "force_loss": 0.003792768344283104, + "step": 31240 + }, + { + "epoch": 28.10251798561151, + "grad_norm": 0.14927834272384644, + "learning_rate": 5.0691679477586216e-05, + "loss": 0.0147, + "step": 31250 + }, + { + "action_loss": 0.0029871861916035414, + "epoch": 28.10251798561151, + "step": 31250 + }, + { + "epoch": 28.10251798561151, + "step": 31250, + "torque_loss": 0.13111555576324463 + }, + { + "epoch": 28.10251798561151, + "force_loss": 0.005156335886567831, + "step": 31250 + }, + { + "epoch": 28.111510791366907, + "grad_norm": 0.21014977991580963, + "learning_rate": 5.0664124180602035e-05, + "loss": 0.017, + "step": 31260 + }, + { + "action_loss": 0.0018609591061249375, + "epoch": 28.111510791366907, + "step": 31260 + }, + { + "epoch": 28.111510791366907, + "step": 31260, + "torque_loss": 0.08364728838205338 + }, + { + "epoch": 28.111510791366907, + "force_loss": 0.003630842315033078, + "step": 31260 + }, + { + "epoch": 28.1205035971223, + "grad_norm": 0.2883286476135254, + "learning_rate": 5.063656868187447e-05, + "loss": 0.0174, + "step": 31270 + }, + { + "action_loss": 0.004610724281519651, + "epoch": 28.1205035971223, + "step": 31270 + }, + { + "epoch": 28.1205035971223, + "step": 31270, + "torque_loss": 0.11254767328500748 + }, + { + "epoch": 28.1205035971223, + "force_loss": 0.008744189515709877, + "step": 31270 + }, + { + "epoch": 28.1294964028777, + "grad_norm": 0.26366499066352844, + "learning_rate": 5.060901298977413e-05, + "loss": 0.0185, + "step": 31280 + }, + { + "action_loss": 0.011458448134362698, + "epoch": 28.1294964028777, + "step": 31280 + }, + { + "epoch": 28.1294964028777, + "step": 31280, + "torque_loss": 0.13499057292938232 + }, + { + "epoch": 28.1294964028777, + "force_loss": 0.010758512653410435, + "step": 31280 + }, + { + "epoch": 28.138489208633093, + "grad_norm": 0.42329901456832886, + "learning_rate": 5.0581457112671725e-05, + "loss": 0.028, + "step": 31290 + }, + { + "action_loss": 0.006546385586261749, + "epoch": 28.138489208633093, + "step": 31290 + }, + { + "epoch": 28.138489208633093, + "step": 31290, + "torque_loss": 0.10845958441495895 + }, + { + "epoch": 28.138489208633093, + "force_loss": 0.010698576457798481, + "step": 31290 + }, + { + "epoch": 28.14748201438849, + "grad_norm": 0.28220894932746887, + "learning_rate": 5.0553901058938016e-05, + "loss": 0.0177, + "step": 31300 + }, + { + "action_loss": 0.00202188896946609, + "epoch": 28.14748201438849, + "step": 31300 + }, + { + "epoch": 28.14748201438849, + "step": 31300, + "torque_loss": 0.09039351344108582 + }, + { + "epoch": 28.14748201438849, + "force_loss": 0.0016027702949941158, + "step": 31300 + }, + { + "epoch": 28.156474820143885, + "grad_norm": 0.1555168777704239, + "learning_rate": 5.052634483694377e-05, + "loss": 0.0178, + "step": 31310 + }, + { + "action_loss": 0.003646890399977565, + "epoch": 28.156474820143885, + "step": 31310 + }, + { + "epoch": 28.156474820143885, + "step": 31310, + "torque_loss": 0.1680331975221634 + }, + { + "epoch": 28.156474820143885, + "force_loss": 0.002329886658117175, + "step": 31310 + }, + { + "epoch": 28.165467625899282, + "grad_norm": 0.3172502815723419, + "learning_rate": 5.049878845505988e-05, + "loss": 0.0232, + "step": 31320 + }, + { + "action_loss": 0.0020603330340236425, + "epoch": 28.165467625899282, + "step": 31320 + }, + { + "epoch": 28.165467625899282, + "step": 31320, + "torque_loss": 0.12851010262966156 + }, + { + "epoch": 28.165467625899282, + "force_loss": 0.001722722896374762, + "step": 31320 + }, + { + "epoch": 28.174460431654676, + "grad_norm": 0.1463720053434372, + "learning_rate": 5.047123192165721e-05, + "loss": 0.0195, + "step": 31330 + }, + { + "action_loss": 0.003479136386886239, + "epoch": 28.174460431654676, + "step": 31330 + }, + { + "epoch": 28.174460431654676, + "step": 31330, + "torque_loss": 0.11305113881826401 + }, + { + "epoch": 28.174460431654676, + "force_loss": 0.003512629074975848, + "step": 31330 + }, + { + "epoch": 28.18345323741007, + "grad_norm": 0.18156354129314423, + "learning_rate": 5.0443675245106735e-05, + "loss": 0.0184, + "step": 31340 + }, + { + "action_loss": 0.0027709228452295065, + "epoch": 28.18345323741007, + "step": 31340 + }, + { + "epoch": 28.18345323741007, + "step": 31340, + "torque_loss": 0.12847156822681427 + }, + { + "epoch": 28.18345323741007, + "force_loss": 0.002399100223556161, + "step": 31340 + }, + { + "epoch": 28.192446043165468, + "grad_norm": 0.24686631560325623, + "learning_rate": 5.0416118433779426e-05, + "loss": 0.0165, + "step": 31350 + }, + { + "action_loss": 0.007418935652822256, + "epoch": 28.192446043165468, + "step": 31350 + }, + { + "epoch": 28.192446043165468, + "step": 31350, + "torque_loss": 0.1823035627603531 + }, + { + "epoch": 28.192446043165468, + "force_loss": 0.00943506695330143, + "step": 31350 + }, + { + "epoch": 28.201438848920862, + "grad_norm": 0.21008306741714478, + "learning_rate": 5.038856149604633e-05, + "loss": 0.0178, + "step": 31360 + }, + { + "action_loss": 0.009561304934322834, + "epoch": 28.201438848920862, + "step": 31360 + }, + { + "epoch": 28.201438848920862, + "step": 31360, + "torque_loss": 0.17719674110412598 + }, + { + "epoch": 28.201438848920862, + "force_loss": 0.009404500015079975, + "step": 31360 + }, + { + "epoch": 28.21043165467626, + "grad_norm": 0.28640440106391907, + "learning_rate": 5.03610044402785e-05, + "loss": 0.0197, + "step": 31370 + }, + { + "action_loss": 0.0022651939652860165, + "epoch": 28.21043165467626, + "step": 31370 + }, + { + "epoch": 28.21043165467626, + "step": 31370, + "torque_loss": 0.12357058376073837 + }, + { + "epoch": 28.21043165467626, + "force_loss": 0.0031259600073099136, + "step": 31370 + }, + { + "epoch": 28.219424460431654, + "grad_norm": 0.1845446527004242, + "learning_rate": 5.033344727484707e-05, + "loss": 0.0214, + "step": 31380 + }, + { + "action_loss": 0.00495353527367115, + "epoch": 28.219424460431654, + "step": 31380 + }, + { + "epoch": 28.219424460431654, + "step": 31380, + "torque_loss": 0.16547518968582153 + }, + { + "epoch": 28.219424460431654, + "force_loss": 0.005297411698848009, + "step": 31380 + }, + { + "epoch": 28.22841726618705, + "grad_norm": 0.15085051953792572, + "learning_rate": 5.030589000812315e-05, + "loss": 0.0173, + "step": 31390 + }, + { + "action_loss": 0.003035797504708171, + "epoch": 28.22841726618705, + "step": 31390 + }, + { + "epoch": 28.22841726618705, + "step": 31390, + "torque_loss": 0.12055474519729614 + }, + { + "epoch": 28.22841726618705, + "force_loss": 0.006290100514888763, + "step": 31390 + }, + { + "epoch": 28.237410071942445, + "grad_norm": 0.36627402901649475, + "learning_rate": 5.027833264847793e-05, + "loss": 0.0178, + "step": 31400 + }, + { + "action_loss": 0.005598003510385752, + "epoch": 28.237410071942445, + "step": 31400 + }, + { + "epoch": 28.237410071942445, + "step": 31400, + "torque_loss": 0.1297248750925064 + }, + { + "epoch": 28.237410071942445, + "force_loss": 0.0020954047795385122, + "step": 31400 + }, + { + "epoch": 28.246402877697843, + "grad_norm": 0.22061501443386078, + "learning_rate": 5.025077520428258e-05, + "loss": 0.0213, + "step": 31410 + }, + { + "action_loss": 0.0025652453768998384, + "epoch": 28.246402877697843, + "step": 31410 + }, + { + "epoch": 28.246402877697843, + "step": 31410, + "torque_loss": 0.12894245982170105 + }, + { + "epoch": 28.246402877697843, + "force_loss": 0.003746572183445096, + "step": 31410 + }, + { + "epoch": 28.255395683453237, + "grad_norm": 0.15218910574913025, + "learning_rate": 5.022321768390837e-05, + "loss": 0.0166, + "step": 31420 + }, + { + "action_loss": 0.014568574726581573, + "epoch": 28.255395683453237, + "step": 31420 + }, + { + "epoch": 28.255395683453237, + "step": 31420, + "torque_loss": 0.12638191878795624 + }, + { + "epoch": 28.255395683453237, + "force_loss": 0.01743035577237606, + "step": 31420 + }, + { + "epoch": 28.264388489208635, + "grad_norm": 0.25253164768218994, + "learning_rate": 5.0195660095726516e-05, + "loss": 0.0199, + "step": 31430 + }, + { + "action_loss": 0.0025993958115577698, + "epoch": 28.264388489208635, + "step": 31430 + }, + { + "epoch": 28.264388489208635, + "step": 31430, + "torque_loss": 0.11008652299642563 + }, + { + "epoch": 28.264388489208635, + "force_loss": 0.0024869602639228106, + "step": 31430 + }, + { + "epoch": 28.27338129496403, + "grad_norm": 0.147151917219162, + "learning_rate": 5.016810244810829e-05, + "loss": 0.0192, + "step": 31440 + }, + { + "action_loss": 0.00280805304646492, + "epoch": 28.27338129496403, + "step": 31440 + }, + { + "epoch": 28.27338129496403, + "step": 31440, + "torque_loss": 0.1111692562699318 + }, + { + "epoch": 28.27338129496403, + "force_loss": 0.0022786271292716265, + "step": 31440 + }, + { + "epoch": 28.282374100719423, + "grad_norm": 0.56572425365448, + "learning_rate": 5.0140544749424976e-05, + "loss": 0.017, + "step": 31450 + }, + { + "action_loss": 0.00293734110891819, + "epoch": 28.282374100719423, + "step": 31450 + }, + { + "epoch": 28.282374100719423, + "step": 31450, + "torque_loss": 0.16455556452274323 + }, + { + "epoch": 28.282374100719423, + "force_loss": 0.002204336691647768, + "step": 31450 + }, + { + "epoch": 28.29136690647482, + "grad_norm": 0.27453145384788513, + "learning_rate": 5.0112987008047874e-05, + "loss": 0.0163, + "step": 31460 + }, + { + "action_loss": 0.019957998767495155, + "epoch": 28.29136690647482, + "step": 31460 + }, + { + "epoch": 28.29136690647482, + "step": 31460, + "torque_loss": 0.15143389999866486 + }, + { + "epoch": 28.29136690647482, + "force_loss": 0.025196874514222145, + "step": 31460 + }, + { + "epoch": 28.300359712230215, + "grad_norm": 0.1736888736486435, + "learning_rate": 5.008542923234831e-05, + "loss": 0.0197, + "step": 31470 + }, + { + "action_loss": 0.00510973297059536, + "epoch": 28.300359712230215, + "step": 31470 + }, + { + "epoch": 28.300359712230215, + "step": 31470, + "torque_loss": 0.11906745284795761 + }, + { + "epoch": 28.300359712230215, + "force_loss": 0.009846851229667664, + "step": 31470 + }, + { + "epoch": 28.309352517985612, + "grad_norm": 0.1319504976272583, + "learning_rate": 5.00578714306976e-05, + "loss": 0.0198, + "step": 31480 + }, + { + "action_loss": 0.00496891001239419, + "epoch": 28.309352517985612, + "step": 31480 + }, + { + "epoch": 28.309352517985612, + "step": 31480, + "torque_loss": 0.13380688428878784 + }, + { + "epoch": 28.309352517985612, + "force_loss": 0.004541265778243542, + "step": 31480 + }, + { + "epoch": 28.318345323741006, + "grad_norm": 0.2568466067314148, + "learning_rate": 5.0030313611467084e-05, + "loss": 0.0192, + "step": 31490 + }, + { + "action_loss": 0.0056123011745512486, + "epoch": 28.318345323741006, + "step": 31490 + }, + { + "epoch": 28.318345323741006, + "step": 31490, + "torque_loss": 0.12392618507146835 + }, + { + "epoch": 28.318345323741006, + "force_loss": 0.00703589478507638, + "step": 31490 + }, + { + "epoch": 28.327338129496404, + "grad_norm": 0.33176225423812866, + "learning_rate": 5.0002755783028074e-05, + "loss": 0.0177, + "step": 31500 + }, + { + "action_loss": 0.012295238673686981, + "epoch": 28.327338129496404, + "step": 31500 + }, + { + "epoch": 28.327338129496404, + "step": 31500, + "torque_loss": 0.12550227344036102 + }, + { + "epoch": 28.327338129496404, + "force_loss": 0.013909243047237396, + "step": 31500 + }, + { + "epoch": 28.336330935251798, + "grad_norm": 0.6499210000038147, + "learning_rate": 4.997519795375194e-05, + "loss": 0.0191, + "step": 31510 + }, + { + "action_loss": 0.0024094032123684883, + "epoch": 28.336330935251798, + "step": 31510 + }, + { + "epoch": 28.336330935251798, + "step": 31510, + "torque_loss": 0.11365920305252075 + }, + { + "epoch": 28.336330935251798, + "force_loss": 0.0022959013003855944, + "step": 31510 + }, + { + "epoch": 28.345323741007196, + "grad_norm": 0.14598187804222107, + "learning_rate": 4.9947640132010016e-05, + "loss": 0.0191, + "step": 31520 + }, + { + "action_loss": 0.010777813382446766, + "epoch": 28.345323741007196, + "step": 31520 + }, + { + "epoch": 28.345323741007196, + "step": 31520, + "torque_loss": 0.13041585683822632 + }, + { + "epoch": 28.345323741007196, + "force_loss": 0.01159286592155695, + "step": 31520 + }, + { + "epoch": 28.35431654676259, + "grad_norm": 0.28974649310112, + "learning_rate": 4.9920082326173625e-05, + "loss": 0.0189, + "step": 31530 + }, + { + "action_loss": 0.007011393550783396, + "epoch": 28.35431654676259, + "step": 31530 + }, + { + "epoch": 28.35431654676259, + "step": 31530, + "torque_loss": 0.12972255051136017 + }, + { + "epoch": 28.35431654676259, + "force_loss": 0.003591722808778286, + "step": 31530 + }, + { + "epoch": 28.363309352517987, + "grad_norm": 0.2784913182258606, + "learning_rate": 4.9892524544614114e-05, + "loss": 0.0204, + "step": 31540 + }, + { + "action_loss": 0.00864789541810751, + "epoch": 28.363309352517987, + "step": 31540 + }, + { + "epoch": 28.363309352517987, + "step": 31540, + "torque_loss": 0.12829583883285522 + }, + { + "epoch": 28.363309352517987, + "force_loss": 0.009747441858053207, + "step": 31540 + }, + { + "epoch": 28.37230215827338, + "grad_norm": 0.2597323954105377, + "learning_rate": 4.986496679570283e-05, + "loss": 0.0197, + "step": 31550 + }, + { + "action_loss": 0.009286990389227867, + "epoch": 28.37230215827338, + "step": 31550 + }, + { + "epoch": 28.37230215827338, + "step": 31550, + "torque_loss": 0.11492288112640381 + }, + { + "epoch": 28.37230215827338, + "force_loss": 0.006870301906019449, + "step": 31550 + }, + { + "epoch": 28.381294964028775, + "grad_norm": 0.6100925207138062, + "learning_rate": 4.983740908781105e-05, + "loss": 0.0217, + "step": 31560 + }, + { + "action_loss": 0.002930929185822606, + "epoch": 28.381294964028775, + "step": 31560 + }, + { + "epoch": 28.381294964028775, + "step": 31560, + "torque_loss": 0.15469162166118622 + }, + { + "epoch": 28.381294964028775, + "force_loss": 0.003778379410505295, + "step": 31560 + }, + { + "epoch": 28.390287769784173, + "grad_norm": 0.3829963505268097, + "learning_rate": 4.9809851429310116e-05, + "loss": 0.0175, + "step": 31570 + }, + { + "action_loss": 0.003712309757247567, + "epoch": 28.390287769784173, + "step": 31570 + }, + { + "epoch": 28.390287769784173, + "step": 31570, + "torque_loss": 0.12879160046577454 + }, + { + "epoch": 28.390287769784173, + "force_loss": 0.0027420343831181526, + "step": 31570 + }, + { + "epoch": 28.399280575539567, + "grad_norm": 0.23992829024791718, + "learning_rate": 4.9782293828571275e-05, + "loss": 0.022, + "step": 31580 + }, + { + "action_loss": 0.004066178109496832, + "epoch": 28.399280575539567, + "step": 31580 + }, + { + "epoch": 28.399280575539567, + "step": 31580, + "torque_loss": 0.08748013526201248 + }, + { + "epoch": 28.399280575539567, + "force_loss": 0.002831103513017297, + "step": 31580 + }, + { + "epoch": 28.408273381294965, + "grad_norm": 0.2187284380197525, + "learning_rate": 4.9754736293965846e-05, + "loss": 0.0167, + "step": 31590 + }, + { + "action_loss": 0.0063866134732961655, + "epoch": 28.408273381294965, + "step": 31590 + }, + { + "epoch": 28.408273381294965, + "step": 31590, + "torque_loss": 0.15923090279102325 + }, + { + "epoch": 28.408273381294965, + "force_loss": 0.0038600738625973463, + "step": 31590 + }, + { + "epoch": 28.41726618705036, + "grad_norm": 0.5560687780380249, + "learning_rate": 4.972717883386502e-05, + "loss": 0.0219, + "step": 31600 + }, + { + "action_loss": 0.0023293609265238047, + "epoch": 28.41726618705036, + "step": 31600 + }, + { + "epoch": 28.41726618705036, + "step": 31600, + "torque_loss": 0.1120397076010704 + }, + { + "epoch": 28.41726618705036, + "force_loss": 0.001863547950051725, + "step": 31600 + }, + { + "epoch": 28.426258992805757, + "grad_norm": 0.2414456009864807, + "learning_rate": 4.9699621456640075e-05, + "loss": 0.0192, + "step": 31610 + }, + { + "action_loss": 0.0022713118232786655, + "epoch": 28.426258992805757, + "step": 31610 + }, + { + "epoch": 28.426258992805757, + "step": 31610, + "torque_loss": 0.09746291488409042 + }, + { + "epoch": 28.426258992805757, + "force_loss": 0.0019517071777954698, + "step": 31610 + }, + { + "epoch": 28.43525179856115, + "grad_norm": 0.44207125902175903, + "learning_rate": 4.9672064170662214e-05, + "loss": 0.0166, + "step": 31620 + }, + { + "action_loss": 0.0016194864874705672, + "epoch": 28.43525179856115, + "step": 31620 + }, + { + "epoch": 28.43525179856115, + "step": 31620, + "torque_loss": 0.11972985416650772 + }, + { + "epoch": 28.43525179856115, + "force_loss": 0.0015085259219631553, + "step": 31620 + }, + { + "epoch": 28.444244604316548, + "grad_norm": 0.6432380080223083, + "learning_rate": 4.9644506984302583e-05, + "loss": 0.0229, + "step": 31630 + }, + { + "action_loss": 0.0028622543904930353, + "epoch": 28.444244604316548, + "step": 31630 + }, + { + "epoch": 28.444244604316548, + "step": 31630, + "torque_loss": 0.11647132784128189 + }, + { + "epoch": 28.444244604316548, + "force_loss": 0.0022364670876413584, + "step": 31630 + }, + { + "epoch": 28.453237410071942, + "grad_norm": 0.2759113013744354, + "learning_rate": 4.9616949905932356e-05, + "loss": 0.0184, + "step": 31640 + }, + { + "action_loss": 0.0022402871400117874, + "epoch": 28.453237410071942, + "step": 31640 + }, + { + "epoch": 28.453237410071942, + "step": 31640, + "torque_loss": 0.07226861268281937 + }, + { + "epoch": 28.453237410071942, + "force_loss": 0.002943779109045863, + "step": 31640 + }, + { + "epoch": 28.46223021582734, + "grad_norm": 0.26935675740242004, + "learning_rate": 4.9589392943922615e-05, + "loss": 0.02, + "step": 31650 + }, + { + "action_loss": 0.003252230817452073, + "epoch": 28.46223021582734, + "step": 31650 + }, + { + "epoch": 28.46223021582734, + "step": 31650, + "torque_loss": 0.13012158870697021 + }, + { + "epoch": 28.46223021582734, + "force_loss": 0.0021820305846631527, + "step": 31650 + }, + { + "epoch": 28.471223021582734, + "grad_norm": 0.3346239924430847, + "learning_rate": 4.956183610664447e-05, + "loss": 0.0207, + "step": 31660 + }, + { + "action_loss": 0.004996235948055983, + "epoch": 28.471223021582734, + "step": 31660 + }, + { + "epoch": 28.471223021582734, + "step": 31660, + "torque_loss": 0.1323365569114685 + }, + { + "epoch": 28.471223021582734, + "force_loss": 0.006417748052626848, + "step": 31660 + }, + { + "epoch": 28.480215827338128, + "grad_norm": 0.2346462607383728, + "learning_rate": 4.9534279402468945e-05, + "loss": 0.0172, + "step": 31670 + }, + { + "action_loss": 0.015941092744469643, + "epoch": 28.480215827338128, + "step": 31670 + }, + { + "epoch": 28.480215827338128, + "step": 31670, + "torque_loss": 0.1559155136346817 + }, + { + "epoch": 28.480215827338128, + "force_loss": 0.019919589161872864, + "step": 31670 + }, + { + "epoch": 28.489208633093526, + "grad_norm": 0.11588010936975479, + "learning_rate": 4.9506722839767036e-05, + "loss": 0.0182, + "step": 31680 + }, + { + "action_loss": 0.008000590838491917, + "epoch": 28.489208633093526, + "step": 31680 + }, + { + "epoch": 28.489208633093526, + "step": 31680, + "torque_loss": 0.12118353694677353 + }, + { + "epoch": 28.489208633093526, + "force_loss": 0.0066853477619588375, + "step": 31680 + }, + { + "epoch": 28.49820143884892, + "grad_norm": 0.18816101551055908, + "learning_rate": 4.947916642690972e-05, + "loss": 0.026, + "step": 31690 + }, + { + "action_loss": 0.0025853884872049093, + "epoch": 28.49820143884892, + "step": 31690 + }, + { + "epoch": 28.49820143884892, + "step": 31690, + "torque_loss": 0.12101741880178452 + }, + { + "epoch": 28.49820143884892, + "force_loss": 0.0020138428080826998, + "step": 31690 + }, + { + "epoch": 28.507194244604317, + "grad_norm": 0.16970165073871613, + "learning_rate": 4.9451610172267874e-05, + "loss": 0.0155, + "step": 31700 + }, + { + "action_loss": 0.00584329292178154, + "epoch": 28.507194244604317, + "step": 31700 + }, + { + "epoch": 28.507194244604317, + "step": 31700, + "torque_loss": 0.13173158466815948 + }, + { + "epoch": 28.507194244604317, + "force_loss": 0.006607357412576675, + "step": 31700 + }, + { + "epoch": 28.51618705035971, + "grad_norm": 0.5986936688423157, + "learning_rate": 4.9424054084212376e-05, + "loss": 0.0179, + "step": 31710 + }, + { + "action_loss": 0.002945384941995144, + "epoch": 28.51618705035971, + "step": 31710 + }, + { + "epoch": 28.51618705035971, + "step": 31710, + "torque_loss": 0.1225002184510231 + }, + { + "epoch": 28.51618705035971, + "force_loss": 0.00539809325709939, + "step": 31710 + }, + { + "epoch": 28.52517985611511, + "grad_norm": 0.25696679949760437, + "learning_rate": 4.939649817111407e-05, + "loss": 0.0181, + "step": 31720 + }, + { + "action_loss": 0.0038010275457054377, + "epoch": 28.52517985611511, + "step": 31720 + }, + { + "epoch": 28.52517985611511, + "step": 31720, + "torque_loss": 0.14548729360103607 + }, + { + "epoch": 28.52517985611511, + "force_loss": 0.0033391022589057684, + "step": 31720 + }, + { + "epoch": 28.534172661870503, + "grad_norm": 0.23569805920124054, + "learning_rate": 4.936894244134365e-05, + "loss": 0.0171, + "step": 31730 + }, + { + "action_loss": 0.0066145905293524265, + "epoch": 28.534172661870503, + "step": 31730 + }, + { + "epoch": 28.534172661870503, + "step": 31730, + "torque_loss": 0.17376501858234406 + }, + { + "epoch": 28.534172661870503, + "force_loss": 0.005036663729697466, + "step": 31730 + }, + { + "epoch": 28.5431654676259, + "grad_norm": 0.27967146039009094, + "learning_rate": 4.9341386903271886e-05, + "loss": 0.0185, + "step": 31740 + }, + { + "action_loss": 0.014002300798892975, + "epoch": 28.5431654676259, + "step": 31740 + }, + { + "epoch": 28.5431654676259, + "step": 31740, + "torque_loss": 0.1286267787218094 + }, + { + "epoch": 28.5431654676259, + "force_loss": 0.014078046195209026, + "step": 31740 + }, + { + "epoch": 28.552158273381295, + "grad_norm": 0.2546086013317108, + "learning_rate": 4.931383156526936e-05, + "loss": 0.0229, + "step": 31750 + }, + { + "action_loss": 0.015389028005301952, + "epoch": 28.552158273381295, + "step": 31750 + }, + { + "epoch": 28.552158273381295, + "step": 31750, + "torque_loss": 0.10529843717813492 + }, + { + "epoch": 28.552158273381295, + "force_loss": 0.013241861946880817, + "step": 31750 + }, + { + "epoch": 28.56115107913669, + "grad_norm": 0.19817949831485748, + "learning_rate": 4.92862764357067e-05, + "loss": 0.0211, + "step": 31760 + }, + { + "action_loss": 0.0013725543394684792, + "epoch": 28.56115107913669, + "step": 31760 + }, + { + "epoch": 28.56115107913669, + "step": 31760, + "torque_loss": 0.10182621330022812 + }, + { + "epoch": 28.56115107913669, + "force_loss": 0.0017656233394518495, + "step": 31760 + }, + { + "epoch": 28.570143884892087, + "grad_norm": 0.13827678561210632, + "learning_rate": 4.925872152295443e-05, + "loss": 0.0171, + "step": 31770 + }, + { + "action_loss": 0.0016914677107706666, + "epoch": 28.570143884892087, + "step": 31770 + }, + { + "epoch": 28.570143884892087, + "step": 31770, + "torque_loss": 0.07993989437818527 + }, + { + "epoch": 28.570143884892087, + "force_loss": 0.0034969402477145195, + "step": 31770 + }, + { + "epoch": 28.57913669064748, + "grad_norm": 0.2877127528190613, + "learning_rate": 4.923116683538296e-05, + "loss": 0.0188, + "step": 31780 + }, + { + "action_loss": 0.010105691850185394, + "epoch": 28.57913669064748, + "step": 31780 + }, + { + "epoch": 28.57913669064748, + "step": 31780, + "torque_loss": 0.11479947715997696 + }, + { + "epoch": 28.57913669064748, + "force_loss": 0.011219676584005356, + "step": 31780 + }, + { + "epoch": 28.58812949640288, + "grad_norm": 0.11982277780771255, + "learning_rate": 4.920361238136273e-05, + "loss": 0.0191, + "step": 31790 + }, + { + "action_loss": 0.007315091788768768, + "epoch": 28.58812949640288, + "step": 31790 + }, + { + "epoch": 28.58812949640288, + "step": 31790, + "torque_loss": 0.1151256263256073 + }, + { + "epoch": 28.58812949640288, + "force_loss": 0.010056677274405956, + "step": 31790 + }, + { + "epoch": 28.597122302158272, + "grad_norm": 0.09665153920650482, + "learning_rate": 4.9176058169264014e-05, + "loss": 0.0193, + "step": 31800 + }, + { + "action_loss": 0.00902259536087513, + "epoch": 28.597122302158272, + "step": 31800 + }, + { + "epoch": 28.597122302158272, + "step": 31800, + "torque_loss": 0.10882830619812012 + }, + { + "epoch": 28.597122302158272, + "force_loss": 0.022170864045619965, + "step": 31800 + }, + { + "epoch": 28.60611510791367, + "grad_norm": 0.16901050508022308, + "learning_rate": 4.9148504207457074e-05, + "loss": 0.0212, + "step": 31810 + }, + { + "action_loss": 0.002538155997171998, + "epoch": 28.60611510791367, + "step": 31810 + }, + { + "epoch": 28.60611510791367, + "step": 31810, + "torque_loss": 0.15164512395858765 + }, + { + "epoch": 28.60611510791367, + "force_loss": 0.002965029561892152, + "step": 31810 + }, + { + "epoch": 28.615107913669064, + "grad_norm": 0.2416938692331314, + "learning_rate": 4.912095050431208e-05, + "loss": 0.0217, + "step": 31820 + }, + { + "action_loss": 0.004177636466920376, + "epoch": 28.615107913669064, + "step": 31820 + }, + { + "epoch": 28.615107913669064, + "step": 31820, + "torque_loss": 0.09113707393407822 + }, + { + "epoch": 28.615107913669064, + "force_loss": 0.008358192630112171, + "step": 31820 + }, + { + "epoch": 28.62410071942446, + "grad_norm": 0.1306232362985611, + "learning_rate": 4.909339706819911e-05, + "loss": 0.0194, + "step": 31830 + }, + { + "action_loss": 0.00744277611374855, + "epoch": 28.62410071942446, + "step": 31830 + }, + { + "epoch": 28.62410071942446, + "step": 31830, + "torque_loss": 0.15952813625335693 + }, + { + "epoch": 28.62410071942446, + "force_loss": 0.004684206563979387, + "step": 31830 + }, + { + "epoch": 28.633093525179856, + "grad_norm": 0.11685939133167267, + "learning_rate": 4.906584390748819e-05, + "loss": 0.0181, + "step": 31840 + }, + { + "action_loss": 0.010659020394086838, + "epoch": 28.633093525179856, + "step": 31840 + }, + { + "epoch": 28.633093525179856, + "step": 31840, + "torque_loss": 0.12336523085832596 + }, + { + "epoch": 28.633093525179856, + "force_loss": 0.023605575785040855, + "step": 31840 + }, + { + "epoch": 28.642086330935253, + "grad_norm": 0.3025333881378174, + "learning_rate": 4.9038291030549195e-05, + "loss": 0.0285, + "step": 31850 + }, + { + "action_loss": 0.0026493777986615896, + "epoch": 28.642086330935253, + "step": 31850 + }, + { + "epoch": 28.642086330935253, + "step": 31850, + "torque_loss": 0.1358613222837448 + }, + { + "epoch": 28.642086330935253, + "force_loss": 0.003288527950644493, + "step": 31850 + }, + { + "epoch": 28.651079136690647, + "grad_norm": 0.1424875408411026, + "learning_rate": 4.9010738445751995e-05, + "loss": 0.0187, + "step": 31860 + }, + { + "action_loss": 0.006690997630357742, + "epoch": 28.651079136690647, + "step": 31860 + }, + { + "epoch": 28.651079136690647, + "step": 31860, + "torque_loss": 0.1267218440771103 + }, + { + "epoch": 28.651079136690647, + "force_loss": 0.010846694000065327, + "step": 31860 + }, + { + "epoch": 28.66007194244604, + "grad_norm": 0.10953143239021301, + "learning_rate": 4.8983186161466364e-05, + "loss": 0.0172, + "step": 31870 + }, + { + "action_loss": 0.0069437469355762005, + "epoch": 28.66007194244604, + "step": 31870 + }, + { + "epoch": 28.66007194244604, + "step": 31870, + "torque_loss": 0.13778366148471832 + }, + { + "epoch": 28.66007194244604, + "force_loss": 0.0042550694197416306, + "step": 31870 + }, + { + "epoch": 28.66906474820144, + "grad_norm": 0.22482098639011383, + "learning_rate": 4.89556341860619e-05, + "loss": 0.0185, + "step": 31880 + }, + { + "action_loss": 0.00789960753172636, + "epoch": 28.66906474820144, + "step": 31880 + }, + { + "epoch": 28.66906474820144, + "step": 31880, + "torque_loss": 0.10606253147125244 + }, + { + "epoch": 28.66906474820144, + "force_loss": 0.01030336506664753, + "step": 31880 + }, + { + "epoch": 28.678057553956833, + "grad_norm": 0.25729864835739136, + "learning_rate": 4.892808252790822e-05, + "loss": 0.0199, + "step": 31890 + }, + { + "action_loss": 0.0035116756334900856, + "epoch": 28.678057553956833, + "step": 31890 + }, + { + "epoch": 28.678057553956833, + "step": 31890, + "torque_loss": 0.13032568991184235 + }, + { + "epoch": 28.678057553956833, + "force_loss": 0.002061855746433139, + "step": 31890 + }, + { + "epoch": 28.68705035971223, + "grad_norm": 0.21324051916599274, + "learning_rate": 4.890053119537475e-05, + "loss": 0.0184, + "step": 31900 + }, + { + "action_loss": 0.009989923797547817, + "epoch": 28.68705035971223, + "step": 31900 + }, + { + "epoch": 28.68705035971223, + "step": 31900, + "torque_loss": 0.11143625527620316 + }, + { + "epoch": 28.68705035971223, + "force_loss": 0.014830250293016434, + "step": 31900 + }, + { + "epoch": 28.696043165467625, + "grad_norm": 0.19445998966693878, + "learning_rate": 4.887298019683087e-05, + "loss": 0.0224, + "step": 31910 + }, + { + "action_loss": 0.004206265322864056, + "epoch": 28.696043165467625, + "step": 31910 + }, + { + "epoch": 28.696043165467625, + "step": 31910, + "torque_loss": 0.10461030155420303 + }, + { + "epoch": 28.696043165467625, + "force_loss": 0.002050551353022456, + "step": 31910 + }, + { + "epoch": 28.705035971223023, + "grad_norm": 0.11839746683835983, + "learning_rate": 4.884542954064587e-05, + "loss": 0.0231, + "step": 31920 + }, + { + "action_loss": 0.00904050562530756, + "epoch": 28.705035971223023, + "step": 31920 + }, + { + "epoch": 28.705035971223023, + "step": 31920, + "torque_loss": 0.16358019411563873 + }, + { + "epoch": 28.705035971223023, + "force_loss": 0.007803190499544144, + "step": 31920 + }, + { + "epoch": 28.714028776978417, + "grad_norm": 0.1934649497270584, + "learning_rate": 4.881787923518887e-05, + "loss": 0.0199, + "step": 31930 + }, + { + "action_loss": 0.004410514608025551, + "epoch": 28.714028776978417, + "step": 31930 + }, + { + "epoch": 28.714028776978417, + "step": 31930, + "torque_loss": 0.0705103725194931 + }, + { + "epoch": 28.714028776978417, + "force_loss": 0.007975791580975056, + "step": 31930 + }, + { + "epoch": 28.723021582733814, + "grad_norm": 0.44208401441574097, + "learning_rate": 4.879032928882896e-05, + "loss": 0.0177, + "step": 31940 + }, + { + "action_loss": 0.0029375366866588593, + "epoch": 28.723021582733814, + "step": 31940 + }, + { + "epoch": 28.723021582733814, + "step": 31940, + "torque_loss": 0.1289682239294052 + }, + { + "epoch": 28.723021582733814, + "force_loss": 0.0037312351632863283, + "step": 31940 + }, + { + "epoch": 28.73201438848921, + "grad_norm": 0.14763537049293518, + "learning_rate": 4.876277970993505e-05, + "loss": 0.0171, + "step": 31950 + }, + { + "action_loss": 0.007328465580940247, + "epoch": 28.73201438848921, + "step": 31950 + }, + { + "epoch": 28.73201438848921, + "step": 31950, + "torque_loss": 0.19901573657989502 + }, + { + "epoch": 28.73201438848921, + "force_loss": 0.008131143637001514, + "step": 31950 + }, + { + "epoch": 28.741007194244606, + "grad_norm": 0.1703796088695526, + "learning_rate": 4.873523050687602e-05, + "loss": 0.0209, + "step": 31960 + }, + { + "action_loss": 0.003658779663965106, + "epoch": 28.741007194244606, + "step": 31960 + }, + { + "epoch": 28.741007194244606, + "step": 31960, + "torque_loss": 0.11144045740365982 + }, + { + "epoch": 28.741007194244606, + "force_loss": 0.002541125286370516, + "step": 31960 + }, + { + "epoch": 28.75, + "grad_norm": 0.12297157943248749, + "learning_rate": 4.870768168802056e-05, + "loss": 0.0169, + "step": 31970 + }, + { + "action_loss": 0.0037621979136019945, + "epoch": 28.75, + "step": 31970 + }, + { + "epoch": 28.75, + "step": 31970, + "torque_loss": 0.12326759099960327 + }, + { + "epoch": 28.75, + "force_loss": 0.0085930610075593, + "step": 31970 + }, + { + "epoch": 28.758992805755394, + "grad_norm": 0.192536398768425, + "learning_rate": 4.868013326173728e-05, + "loss": 0.0177, + "step": 31980 + }, + { + "action_loss": 0.003589194267988205, + "epoch": 28.758992805755394, + "step": 31980 + }, + { + "epoch": 28.758992805755394, + "step": 31980, + "torque_loss": 0.11283278465270996 + }, + { + "epoch": 28.758992805755394, + "force_loss": 0.010618864558637142, + "step": 31980 + }, + { + "epoch": 28.76798561151079, + "grad_norm": 0.3223935663700104, + "learning_rate": 4.865258523639468e-05, + "loss": 0.0172, + "step": 31990 + }, + { + "action_loss": 0.0027538712602108717, + "epoch": 28.76798561151079, + "step": 31990 + }, + { + "epoch": 28.76798561151079, + "step": 31990, + "torque_loss": 0.15496884286403656 + }, + { + "epoch": 28.76798561151079, + "force_loss": 0.003215826814994216, + "step": 31990 + }, + { + "epoch": 28.776978417266186, + "grad_norm": 0.18327292799949646, + "learning_rate": 4.862503762036109e-05, + "loss": 0.0189, + "step": 32000 + }, + { + "action_loss": 0.012443861924111843, + "epoch": 28.776978417266186, + "step": 32000 + }, + { + "epoch": 28.776978417266186, + "step": 32000, + "torque_loss": 0.13682183623313904 + }, + { + "epoch": 28.776978417266186, + "force_loss": 0.010915939696133137, + "step": 32000 + }, + { + "epoch": 28.785971223021583, + "grad_norm": 0.13749296963214874, + "learning_rate": 4.859749042200478e-05, + "loss": 0.0184, + "step": 32010 + }, + { + "action_loss": 0.0048415870405733585, + "epoch": 28.785971223021583, + "step": 32010 + }, + { + "epoch": 28.785971223021583, + "step": 32010, + "torque_loss": 0.1794721633195877 + }, + { + "epoch": 28.785971223021583, + "force_loss": 0.007957744412124157, + "step": 32010 + }, + { + "epoch": 28.794964028776977, + "grad_norm": 0.16890855133533478, + "learning_rate": 4.856994364969384e-05, + "loss": 0.0186, + "step": 32020 + }, + { + "action_loss": 0.0025698344688862562, + "epoch": 28.794964028776977, + "step": 32020 + }, + { + "epoch": 28.794964028776977, + "step": 32020, + "torque_loss": 0.11772950738668442 + }, + { + "epoch": 28.794964028776977, + "force_loss": 0.0036689986009150743, + "step": 32020 + }, + { + "epoch": 28.803956834532375, + "grad_norm": 0.11281709372997284, + "learning_rate": 4.854239731179625e-05, + "loss": 0.0153, + "step": 32030 + }, + { + "action_loss": 0.0019129790598526597, + "epoch": 28.803956834532375, + "step": 32030 + }, + { + "epoch": 28.803956834532375, + "step": 32030, + "torque_loss": 0.1618196964263916 + }, + { + "epoch": 28.803956834532375, + "force_loss": 0.0015649019042029977, + "step": 32030 + }, + { + "epoch": 28.81294964028777, + "grad_norm": 0.2970485985279083, + "learning_rate": 4.85148514166799e-05, + "loss": 0.0177, + "step": 32040 + }, + { + "action_loss": 0.0009857051772996783, + "epoch": 28.81294964028777, + "step": 32040 + }, + { + "epoch": 28.81294964028777, + "step": 32040, + "torque_loss": 0.10178893059492111 + }, + { + "epoch": 28.81294964028777, + "force_loss": 0.0007166683790273964, + "step": 32040 + }, + { + "epoch": 28.821942446043167, + "grad_norm": 0.17108264565467834, + "learning_rate": 4.8487305972712456e-05, + "loss": 0.0178, + "step": 32050 + }, + { + "action_loss": 0.007683031260967255, + "epoch": 28.821942446043167, + "step": 32050 + }, + { + "epoch": 28.821942446043167, + "step": 32050, + "torque_loss": 0.1575421839952469 + }, + { + "epoch": 28.821942446043167, + "force_loss": 0.007862170226871967, + "step": 32050 + }, + { + "epoch": 28.83093525179856, + "grad_norm": 0.24892657995224, + "learning_rate": 4.8459760988261526e-05, + "loss": 0.0165, + "step": 32060 + }, + { + "action_loss": 0.0024876922834664583, + "epoch": 28.83093525179856, + "step": 32060 + }, + { + "epoch": 28.83093525179856, + "step": 32060, + "torque_loss": 0.14223970472812653 + }, + { + "epoch": 28.83093525179856, + "force_loss": 0.001781096332706511, + "step": 32060 + }, + { + "epoch": 28.83992805755396, + "grad_norm": 0.1431387960910797, + "learning_rate": 4.843221647169453e-05, + "loss": 0.0207, + "step": 32070 + }, + { + "action_loss": 0.005526240915060043, + "epoch": 28.83992805755396, + "step": 32070 + }, + { + "epoch": 28.83992805755396, + "step": 32070, + "torque_loss": 0.10630917549133301 + }, + { + "epoch": 28.83992805755396, + "force_loss": 0.007075069472193718, + "step": 32070 + }, + { + "epoch": 28.848920863309353, + "grad_norm": 0.17426855862140656, + "learning_rate": 4.840467243137878e-05, + "loss": 0.0206, + "step": 32080 + }, + { + "action_loss": 0.002417096635326743, + "epoch": 28.848920863309353, + "step": 32080 + }, + { + "epoch": 28.848920863309353, + "step": 32080, + "torque_loss": 0.13800916075706482 + }, + { + "epoch": 28.848920863309353, + "force_loss": 0.0017435640329495072, + "step": 32080 + }, + { + "epoch": 28.857913669064747, + "grad_norm": 0.3956768214702606, + "learning_rate": 4.837712887568143e-05, + "loss": 0.0209, + "step": 32090 + }, + { + "action_loss": 0.004531675949692726, + "epoch": 28.857913669064747, + "step": 32090 + }, + { + "epoch": 28.857913669064747, + "step": 32090, + "torque_loss": 0.13119183480739594 + }, + { + "epoch": 28.857913669064747, + "force_loss": 0.006331259850412607, + "step": 32090 + }, + { + "epoch": 28.866906474820144, + "grad_norm": 0.24706391990184784, + "learning_rate": 4.8349585812969464e-05, + "loss": 0.0191, + "step": 32100 + }, + { + "action_loss": 0.006737230345606804, + "epoch": 28.866906474820144, + "step": 32100 + }, + { + "epoch": 28.866906474820144, + "step": 32100, + "torque_loss": 0.08923038095235825 + }, + { + "epoch": 28.866906474820144, + "force_loss": 0.006674828007817268, + "step": 32100 + }, + { + "epoch": 28.87589928057554, + "grad_norm": 0.19916436076164246, + "learning_rate": 4.8322043251609775e-05, + "loss": 0.0185, + "step": 32110 + }, + { + "action_loss": 0.001197325182147324, + "epoch": 28.87589928057554, + "step": 32110 + }, + { + "epoch": 28.87589928057554, + "step": 32110, + "torque_loss": 0.13352930545806885 + }, + { + "epoch": 28.87589928057554, + "force_loss": 0.0032023098319768906, + "step": 32110 + }, + { + "epoch": 28.884892086330936, + "grad_norm": 0.293727308511734, + "learning_rate": 4.8294501199969015e-05, + "loss": 0.0178, + "step": 32120 + }, + { + "action_loss": 0.0013039332116022706, + "epoch": 28.884892086330936, + "step": 32120 + }, + { + "epoch": 28.884892086330936, + "step": 32120, + "torque_loss": 0.09936841577291489 + }, + { + "epoch": 28.884892086330936, + "force_loss": 0.002565234201028943, + "step": 32120 + }, + { + "epoch": 28.89388489208633, + "grad_norm": 0.1831299513578415, + "learning_rate": 4.826695966641376e-05, + "loss": 0.0174, + "step": 32130 + }, + { + "action_loss": 0.0028378423303365707, + "epoch": 28.89388489208633, + "step": 32130 + }, + { + "epoch": 28.89388489208633, + "step": 32130, + "torque_loss": 0.1480691134929657 + }, + { + "epoch": 28.89388489208633, + "force_loss": 0.0015282105887308717, + "step": 32130 + }, + { + "epoch": 28.902877697841728, + "grad_norm": 0.27493205666542053, + "learning_rate": 4.823941865931043e-05, + "loss": 0.0192, + "step": 32140 + }, + { + "action_loss": 0.0119972825050354, + "epoch": 28.902877697841728, + "step": 32140 + }, + { + "epoch": 28.902877697841728, + "step": 32140, + "torque_loss": 0.16558034718036652 + }, + { + "epoch": 28.902877697841728, + "force_loss": 0.013252168893814087, + "step": 32140 + }, + { + "epoch": 28.91187050359712, + "grad_norm": 0.27799856662750244, + "learning_rate": 4.82118781870252e-05, + "loss": 0.0218, + "step": 32150 + }, + { + "action_loss": 0.0039338041096925735, + "epoch": 28.91187050359712, + "step": 32150 + }, + { + "epoch": 28.91187050359712, + "step": 32150, + "torque_loss": 0.1132541075348854 + }, + { + "epoch": 28.91187050359712, + "force_loss": 0.011024055071175098, + "step": 32150 + }, + { + "epoch": 28.92086330935252, + "grad_norm": 0.4246889650821686, + "learning_rate": 4.8184338257924185e-05, + "loss": 0.0188, + "step": 32160 + }, + { + "action_loss": 0.01281951367855072, + "epoch": 28.92086330935252, + "step": 32160 + }, + { + "epoch": 28.92086330935252, + "step": 32160, + "torque_loss": 0.14115017652511597 + }, + { + "epoch": 28.92086330935252, + "force_loss": 0.010982475243508816, + "step": 32160 + }, + { + "epoch": 28.929856115107913, + "grad_norm": 0.3644884526729584, + "learning_rate": 4.815679888037324e-05, + "loss": 0.0215, + "step": 32170 + }, + { + "action_loss": 0.001956092892214656, + "epoch": 28.929856115107913, + "step": 32170 + }, + { + "epoch": 28.929856115107913, + "step": 32170, + "torque_loss": 0.11333179473876953 + }, + { + "epoch": 28.929856115107913, + "force_loss": 0.0022038861643522978, + "step": 32170 + }, + { + "epoch": 28.93884892086331, + "grad_norm": 0.2975490093231201, + "learning_rate": 4.8129260062738135e-05, + "loss": 0.0165, + "step": 32180 + }, + { + "action_loss": 0.0011761150090023875, + "epoch": 28.93884892086331, + "step": 32180 + }, + { + "epoch": 28.93884892086331, + "step": 32180, + "torque_loss": 0.0744553878903389 + }, + { + "epoch": 28.93884892086331, + "force_loss": 0.0010741964215412736, + "step": 32180 + }, + { + "epoch": 28.947841726618705, + "grad_norm": 0.2999323606491089, + "learning_rate": 4.810172181338445e-05, + "loss": 0.0182, + "step": 32190 + }, + { + "action_loss": 0.023830803111195564, + "epoch": 28.947841726618705, + "step": 32190 + }, + { + "epoch": 28.947841726618705, + "step": 32190, + "torque_loss": 0.10264798998832703 + }, + { + "epoch": 28.947841726618705, + "force_loss": 0.03188973665237427, + "step": 32190 + }, + { + "epoch": 28.9568345323741, + "grad_norm": 0.1338382512331009, + "learning_rate": 4.807418414067753e-05, + "loss": 0.0183, + "step": 32200 + }, + { + "action_loss": 0.0027587839867919683, + "epoch": 28.9568345323741, + "step": 32200 + }, + { + "epoch": 28.9568345323741, + "step": 32200, + "torque_loss": 0.10562457889318466 + }, + { + "epoch": 28.9568345323741, + "force_loss": 0.002024925546720624, + "step": 32200 + }, + { + "epoch": 28.965827338129497, + "grad_norm": 0.13481660187244415, + "learning_rate": 4.804664705298264e-05, + "loss": 0.0214, + "step": 32210 + }, + { + "action_loss": 0.0025620502419769764, + "epoch": 28.965827338129497, + "step": 32210 + }, + { + "epoch": 28.965827338129497, + "step": 32210, + "torque_loss": 0.12761642038822174 + }, + { + "epoch": 28.965827338129497, + "force_loss": 0.004373255651444197, + "step": 32210 + }, + { + "epoch": 28.97482014388489, + "grad_norm": 0.2162230908870697, + "learning_rate": 4.80191105586648e-05, + "loss": 0.0172, + "step": 32220 + }, + { + "action_loss": 0.002540873596444726, + "epoch": 28.97482014388489, + "step": 32220 + }, + { + "epoch": 28.97482014388489, + "step": 32220, + "torque_loss": 0.16925716400146484 + }, + { + "epoch": 28.97482014388489, + "force_loss": 0.002540864981710911, + "step": 32220 + }, + { + "epoch": 28.98381294964029, + "grad_norm": 0.4287576973438263, + "learning_rate": 4.799157466608886e-05, + "loss": 0.0177, + "step": 32230 + }, + { + "action_loss": 0.005182547960430384, + "epoch": 28.98381294964029, + "step": 32230 + }, + { + "epoch": 28.98381294964029, + "step": 32230, + "torque_loss": 0.15841057896614075 + }, + { + "epoch": 28.98381294964029, + "force_loss": 0.002919477177783847, + "step": 32230 + }, + { + "epoch": 28.992805755395683, + "grad_norm": 0.10567975044250488, + "learning_rate": 4.796403938361951e-05, + "loss": 0.0198, + "step": 32240 + }, + { + "action_loss": 0.0012668374693021178, + "epoch": 28.992805755395683, + "step": 32240 + }, + { + "epoch": 28.992805755395683, + "step": 32240, + "torque_loss": 0.11107317358255386 + }, + { + "epoch": 28.992805755395683, + "force_loss": 0.001408244133926928, + "step": 32240 + }, + { + "epoch": 29.00179856115108, + "grad_norm": 0.41919589042663574, + "learning_rate": 4.793650471962123e-05, + "loss": 0.0184, + "step": 32250 + }, + { + "action_loss": 0.0016538627678528428, + "epoch": 29.00179856115108, + "step": 32250 + }, + { + "epoch": 29.00179856115108, + "step": 32250, + "torque_loss": 0.09937354922294617 + }, + { + "epoch": 29.00179856115108, + "force_loss": 0.0019216047367081046, + "step": 32250 + }, + { + "epoch": 29.010791366906474, + "grad_norm": 0.27208903431892395, + "learning_rate": 4.790897068245835e-05, + "loss": 0.0183, + "step": 32260 + }, + { + "action_loss": 0.007077125366777182, + "epoch": 29.010791366906474, + "step": 32260 + }, + { + "epoch": 29.010791366906474, + "step": 32260, + "torque_loss": 0.09648188948631287 + }, + { + "epoch": 29.010791366906474, + "force_loss": 0.00590849993750453, + "step": 32260 + }, + { + "epoch": 29.019784172661872, + "grad_norm": 0.14177268743515015, + "learning_rate": 4.7881437280494954e-05, + "loss": 0.0174, + "step": 32270 + }, + { + "action_loss": 0.0015265486435964704, + "epoch": 29.019784172661872, + "step": 32270 + }, + { + "epoch": 29.019784172661872, + "step": 32270, + "torque_loss": 0.10215165466070175 + }, + { + "epoch": 29.019784172661872, + "force_loss": 0.0038197848480194807, + "step": 32270 + }, + { + "epoch": 29.028776978417266, + "grad_norm": 0.23310257494449615, + "learning_rate": 4.7853904522094965e-05, + "loss": 0.0214, + "step": 32280 + }, + { + "action_loss": 0.0017850763397291303, + "epoch": 29.028776978417266, + "step": 32280 + }, + { + "epoch": 29.028776978417266, + "step": 32280, + "torque_loss": 0.10656794160604477 + }, + { + "epoch": 29.028776978417266, + "force_loss": 0.001972500467672944, + "step": 32280 + }, + { + "epoch": 29.037769784172664, + "grad_norm": 0.21488600969314575, + "learning_rate": 4.782637241562215e-05, + "loss": 0.0199, + "step": 32290 + }, + { + "action_loss": 0.003017843933776021, + "epoch": 29.037769784172664, + "step": 32290 + }, + { + "epoch": 29.037769784172664, + "step": 32290, + "torque_loss": 0.11039341241121292 + }, + { + "epoch": 29.037769784172664, + "force_loss": 0.008591358549892902, + "step": 32290 + }, + { + "epoch": 29.046762589928058, + "grad_norm": 0.18550518155097961, + "learning_rate": 4.779884096943997e-05, + "loss": 0.0172, + "step": 32300 + }, + { + "action_loss": 0.0059019471518695354, + "epoch": 29.046762589928058, + "step": 32300 + }, + { + "epoch": 29.046762589928058, + "step": 32300, + "torque_loss": 0.1212267205119133 + }, + { + "epoch": 29.046762589928058, + "force_loss": 0.01663847826421261, + "step": 32300 + }, + { + "epoch": 29.055755395683452, + "grad_norm": 0.45165735483169556, + "learning_rate": 4.777131019191182e-05, + "loss": 0.0277, + "step": 32310 + }, + { + "action_loss": 0.0054965815506875515, + "epoch": 29.055755395683452, + "step": 32310 + }, + { + "epoch": 29.055755395683452, + "step": 32310, + "torque_loss": 0.10573756694793701 + }, + { + "epoch": 29.055755395683452, + "force_loss": 0.0016368599608540535, + "step": 32310 + }, + { + "epoch": 29.06474820143885, + "grad_norm": 0.2798215448856354, + "learning_rate": 4.774378009140076e-05, + "loss": 0.0189, + "step": 32320 + }, + { + "action_loss": 0.01228462252765894, + "epoch": 29.06474820143885, + "step": 32320 + }, + { + "epoch": 29.06474820143885, + "step": 32320, + "torque_loss": 0.10527908802032471 + }, + { + "epoch": 29.06474820143885, + "force_loss": 0.010610915720462799, + "step": 32320 + }, + { + "epoch": 29.073741007194243, + "grad_norm": 0.4051395654678345, + "learning_rate": 4.7716250676269735e-05, + "loss": 0.018, + "step": 32330 + }, + { + "action_loss": 0.004625875037163496, + "epoch": 29.073741007194243, + "step": 32330 + }, + { + "epoch": 29.073741007194243, + "step": 32330, + "torque_loss": 0.12205108255147934 + }, + { + "epoch": 29.073741007194243, + "force_loss": 0.004206816665828228, + "step": 32330 + }, + { + "epoch": 29.08273381294964, + "grad_norm": 0.1723063588142395, + "learning_rate": 4.7688721954881485e-05, + "loss": 0.0186, + "step": 32340 + }, + { + "action_loss": 0.0039085340686142445, + "epoch": 29.08273381294964, + "step": 32340 + }, + { + "epoch": 29.08273381294964, + "step": 32340, + "torque_loss": 0.18239738047122955 + }, + { + "epoch": 29.08273381294964, + "force_loss": 0.002797734225168824, + "step": 32340 + }, + { + "epoch": 29.091726618705035, + "grad_norm": 0.2537984848022461, + "learning_rate": 4.7661193935598446e-05, + "loss": 0.0277, + "step": 32350 + }, + { + "action_loss": 0.004381152801215649, + "epoch": 29.091726618705035, + "step": 32350 + }, + { + "epoch": 29.091726618705035, + "step": 32350, + "torque_loss": 0.095862478017807 + }, + { + "epoch": 29.091726618705035, + "force_loss": 0.006126149091869593, + "step": 32350 + }, + { + "epoch": 29.100719424460433, + "grad_norm": 0.2501719295978546, + "learning_rate": 4.763366662678296e-05, + "loss": 0.019, + "step": 32360 + }, + { + "action_loss": 0.004788230173289776, + "epoch": 29.100719424460433, + "step": 32360 + }, + { + "epoch": 29.100719424460433, + "step": 32360, + "torque_loss": 0.11477348953485489 + }, + { + "epoch": 29.100719424460433, + "force_loss": 0.003308061510324478, + "step": 32360 + }, + { + "epoch": 29.109712230215827, + "grad_norm": 0.30460652709007263, + "learning_rate": 4.7606140036797064e-05, + "loss": 0.0206, + "step": 32370 + }, + { + "action_loss": 0.0032112309709191322, + "epoch": 29.109712230215827, + "step": 32370 + }, + { + "epoch": 29.109712230215827, + "step": 32370, + "torque_loss": 0.15254874527454376 + }, + { + "epoch": 29.109712230215827, + "force_loss": 0.008447595871984959, + "step": 32370 + }, + { + "epoch": 29.118705035971225, + "grad_norm": 0.23224332928657532, + "learning_rate": 4.7578614174002614e-05, + "loss": 0.0178, + "step": 32380 + }, + { + "action_loss": 0.003697851672768593, + "epoch": 29.118705035971225, + "step": 32380 + }, + { + "epoch": 29.118705035971225, + "step": 32380, + "torque_loss": 0.15930971503257751 + }, + { + "epoch": 29.118705035971225, + "force_loss": 0.006726616993546486, + "step": 32380 + }, + { + "epoch": 29.12769784172662, + "grad_norm": 0.2128390222787857, + "learning_rate": 4.755108904676125e-05, + "loss": 0.0173, + "step": 32390 + }, + { + "action_loss": 0.007075525354593992, + "epoch": 29.12769784172662, + "step": 32390 + }, + { + "epoch": 29.12769784172662, + "step": 32390, + "torque_loss": 0.14321278035640717 + }, + { + "epoch": 29.12769784172662, + "force_loss": 0.012733793817460537, + "step": 32390 + }, + { + "epoch": 29.136690647482013, + "grad_norm": 0.17491494119167328, + "learning_rate": 4.752356466343436e-05, + "loss": 0.0243, + "step": 32400 + }, + { + "action_loss": 0.0029850397258996964, + "epoch": 29.136690647482013, + "step": 32400 + }, + { + "epoch": 29.136690647482013, + "step": 32400, + "torque_loss": 0.11526184529066086 + }, + { + "epoch": 29.136690647482013, + "force_loss": 0.005373217165470123, + "step": 32400 + }, + { + "epoch": 29.14568345323741, + "grad_norm": 0.140650674700737, + "learning_rate": 4.7496041032383174e-05, + "loss": 0.0174, + "step": 32410 + }, + { + "action_loss": 0.0027762267272919416, + "epoch": 29.14568345323741, + "step": 32410 + }, + { + "epoch": 29.14568345323741, + "step": 32410, + "torque_loss": 0.08548658341169357 + }, + { + "epoch": 29.14568345323741, + "force_loss": 0.0017282020999118686, + "step": 32410 + }, + { + "epoch": 29.154676258992804, + "grad_norm": 0.521045982837677, + "learning_rate": 4.746851816196858e-05, + "loss": 0.0161, + "step": 32420 + }, + { + "action_loss": 0.00275931297801435, + "epoch": 29.154676258992804, + "step": 32420 + }, + { + "epoch": 29.154676258992804, + "step": 32420, + "torque_loss": 0.11880142241716385 + }, + { + "epoch": 29.154676258992804, + "force_loss": 0.0073845237493515015, + "step": 32420 + }, + { + "epoch": 29.163669064748202, + "grad_norm": 0.2052893489599228, + "learning_rate": 4.744099606055135e-05, + "loss": 0.0176, + "step": 32430 + }, + { + "action_loss": 0.015046007931232452, + "epoch": 29.163669064748202, + "step": 32430 + }, + { + "epoch": 29.163669064748202, + "step": 32430, + "torque_loss": 0.1811208575963974 + }, + { + "epoch": 29.163669064748202, + "force_loss": 0.014213413000106812, + "step": 32430 + }, + { + "epoch": 29.172661870503596, + "grad_norm": 0.27192258834838867, + "learning_rate": 4.741347473649193e-05, + "loss": 0.0193, + "step": 32440 + }, + { + "action_loss": 0.005661696195602417, + "epoch": 29.172661870503596, + "step": 32440 + }, + { + "epoch": 29.172661870503596, + "step": 32440, + "torque_loss": 0.0931566059589386 + }, + { + "epoch": 29.172661870503596, + "force_loss": 0.0030042286962270737, + "step": 32440 + }, + { + "epoch": 29.181654676258994, + "grad_norm": 0.10476517677307129, + "learning_rate": 4.738595419815058e-05, + "loss": 0.0175, + "step": 32450 + }, + { + "action_loss": 0.005921449512243271, + "epoch": 29.181654676258994, + "step": 32450 + }, + { + "epoch": 29.181654676258994, + "step": 32450, + "torque_loss": 0.10327693819999695 + }, + { + "epoch": 29.181654676258994, + "force_loss": 0.009495220147073269, + "step": 32450 + }, + { + "epoch": 29.190647482014388, + "grad_norm": 0.22602613270282745, + "learning_rate": 4.7358434453887365e-05, + "loss": 0.0197, + "step": 32460 + }, + { + "action_loss": 0.008681467734277248, + "epoch": 29.190647482014388, + "step": 32460 + }, + { + "epoch": 29.190647482014388, + "step": 32460, + "torque_loss": 0.07998576760292053 + }, + { + "epoch": 29.190647482014388, + "force_loss": 0.0065096099860966206, + "step": 32460 + }, + { + "epoch": 29.199640287769785, + "grad_norm": 0.2433185875415802, + "learning_rate": 4.7330915512061976e-05, + "loss": 0.0184, + "step": 32470 + }, + { + "action_loss": 0.003693939186632633, + "epoch": 29.199640287769785, + "step": 32470 + }, + { + "epoch": 29.199640287769785, + "step": 32470, + "torque_loss": 0.10094495862722397 + }, + { + "epoch": 29.199640287769785, + "force_loss": 0.007911822758615017, + "step": 32470 + }, + { + "epoch": 29.20863309352518, + "grad_norm": 0.2534589469432831, + "learning_rate": 4.730339738103402e-05, + "loss": 0.0224, + "step": 32480 + }, + { + "action_loss": 0.004707122687250376, + "epoch": 29.20863309352518, + "step": 32480 + }, + { + "epoch": 29.20863309352518, + "step": 32480, + "torque_loss": 0.132128044962883 + }, + { + "epoch": 29.20863309352518, + "force_loss": 0.00502332067117095, + "step": 32480 + }, + { + "epoch": 29.217625899280577, + "grad_norm": 0.2297942191362381, + "learning_rate": 4.727588006916271e-05, + "loss": 0.0214, + "step": 32490 + }, + { + "action_loss": 0.002568019786849618, + "epoch": 29.217625899280577, + "step": 32490 + }, + { + "epoch": 29.217625899280577, + "step": 32490, + "torque_loss": 0.11688452214002609 + }, + { + "epoch": 29.217625899280577, + "force_loss": 0.002252336358651519, + "step": 32490 + }, + { + "epoch": 29.22661870503597, + "grad_norm": 0.2703711688518524, + "learning_rate": 4.724836358480711e-05, + "loss": 0.0169, + "step": 32500 + }, + { + "action_loss": 0.01019375305622816, + "epoch": 29.22661870503597, + "step": 32500 + }, + { + "epoch": 29.22661870503597, + "step": 32500, + "torque_loss": 0.1894327849149704 + }, + { + "epoch": 29.22661870503597, + "force_loss": 0.02001328580081463, + "step": 32500 + }, + { + "epoch": 29.235611510791365, + "grad_norm": 0.2101975381374359, + "learning_rate": 4.722084793632601e-05, + "loss": 0.0195, + "step": 32510 + }, + { + "action_loss": 0.006329949479550123, + "epoch": 29.235611510791365, + "step": 32510 + }, + { + "epoch": 29.235611510791365, + "step": 32510, + "torque_loss": 0.14494441449642181 + }, + { + "epoch": 29.235611510791365, + "force_loss": 0.00831457506865263, + "step": 32510 + }, + { + "epoch": 29.244604316546763, + "grad_norm": 0.2567432224750519, + "learning_rate": 4.719333313207792e-05, + "loss": 0.0197, + "step": 32520 + }, + { + "action_loss": 0.0057547432370483875, + "epoch": 29.244604316546763, + "step": 32520 + }, + { + "epoch": 29.244604316546763, + "step": 32520, + "torque_loss": 0.14106887578964233 + }, + { + "epoch": 29.244604316546763, + "force_loss": 0.004578470718115568, + "step": 32520 + }, + { + "epoch": 29.253597122302157, + "grad_norm": 0.27158123254776, + "learning_rate": 4.716581918042114e-05, + "loss": 0.0194, + "step": 32530 + }, + { + "action_loss": 0.008752552792429924, + "epoch": 29.253597122302157, + "step": 32530 + }, + { + "epoch": 29.253597122302157, + "step": 32530, + "torque_loss": 0.1290217787027359 + }, + { + "epoch": 29.253597122302157, + "force_loss": 0.014507760293781757, + "step": 32530 + }, + { + "epoch": 29.262589928057555, + "grad_norm": 0.17042985558509827, + "learning_rate": 4.7138306089713636e-05, + "loss": 0.0172, + "step": 32540 + }, + { + "action_loss": 0.0013127584243193269, + "epoch": 29.262589928057555, + "step": 32540 + }, + { + "epoch": 29.262589928057555, + "step": 32540, + "torque_loss": 0.07442139834165573 + }, + { + "epoch": 29.262589928057555, + "force_loss": 0.001402603113092482, + "step": 32540 + }, + { + "epoch": 29.27158273381295, + "grad_norm": 0.14285852015018463, + "learning_rate": 4.7110793868313183e-05, + "loss": 0.0153, + "step": 32550 + }, + { + "action_loss": 0.00603299168869853, + "epoch": 29.27158273381295, + "step": 32550 + }, + { + "epoch": 29.27158273381295, + "step": 32550, + "torque_loss": 0.09963829070329666 + }, + { + "epoch": 29.27158273381295, + "force_loss": 0.0076375603675842285, + "step": 32550 + }, + { + "epoch": 29.280575539568346, + "grad_norm": 0.513832688331604, + "learning_rate": 4.708328252457729e-05, + "loss": 0.0185, + "step": 32560 + }, + { + "action_loss": 0.011000323109328747, + "epoch": 29.280575539568346, + "step": 32560 + }, + { + "epoch": 29.280575539568346, + "step": 32560, + "torque_loss": 0.13765794038772583 + }, + { + "epoch": 29.280575539568346, + "force_loss": 0.015668898820877075, + "step": 32560 + }, + { + "epoch": 29.28956834532374, + "grad_norm": 0.18672128021717072, + "learning_rate": 4.7055772066863135e-05, + "loss": 0.0236, + "step": 32570 + }, + { + "action_loss": 0.00420399708673358, + "epoch": 29.28956834532374, + "step": 32570 + }, + { + "epoch": 29.28956834532374, + "step": 32570, + "torque_loss": 0.1491629034280777 + }, + { + "epoch": 29.28956834532374, + "force_loss": 0.005232276860624552, + "step": 32570 + }, + { + "epoch": 29.298561151079138, + "grad_norm": 0.1859084963798523, + "learning_rate": 4.702826250352771e-05, + "loss": 0.0161, + "step": 32580 + }, + { + "action_loss": 0.005574436392635107, + "epoch": 29.298561151079138, + "step": 32580 + }, + { + "epoch": 29.298561151079138, + "step": 32580, + "torque_loss": 0.1271819919347763 + }, + { + "epoch": 29.298561151079138, + "force_loss": 0.009282768703997135, + "step": 32580 + }, + { + "epoch": 29.307553956834532, + "grad_norm": 0.2615342140197754, + "learning_rate": 4.7000753842927653e-05, + "loss": 0.0181, + "step": 32590 + }, + { + "action_loss": 0.0013217715313658118, + "epoch": 29.307553956834532, + "step": 32590 + }, + { + "epoch": 29.307553956834532, + "step": 32590, + "torque_loss": 0.09290742874145508 + }, + { + "epoch": 29.307553956834532, + "force_loss": 0.0027795464266091585, + "step": 32590 + }, + { + "epoch": 29.31654676258993, + "grad_norm": 0.35575351119041443, + "learning_rate": 4.6973246093419384e-05, + "loss": 0.0183, + "step": 32600 + }, + { + "action_loss": 0.0021544245537370443, + "epoch": 29.31654676258993, + "step": 32600 + }, + { + "epoch": 29.31654676258993, + "step": 32600, + "torque_loss": 0.08024931699037552 + }, + { + "epoch": 29.31654676258993, + "force_loss": 0.0024534789845347404, + "step": 32600 + }, + { + "epoch": 29.325539568345324, + "grad_norm": 0.31143516302108765, + "learning_rate": 4.694573926335906e-05, + "loss": 0.021, + "step": 32610 + }, + { + "action_loss": 0.0023949090391397476, + "epoch": 29.325539568345324, + "step": 32610 + }, + { + "epoch": 29.325539568345324, + "step": 32610, + "torque_loss": 0.0861029401421547 + }, + { + "epoch": 29.325539568345324, + "force_loss": 0.0039823721162974834, + "step": 32610 + }, + { + "epoch": 29.334532374100718, + "grad_norm": 0.3357939124107361, + "learning_rate": 4.6918233361102476e-05, + "loss": 0.0172, + "step": 32620 + }, + { + "action_loss": 0.0055252485908567905, + "epoch": 29.334532374100718, + "step": 32620 + }, + { + "epoch": 29.334532374100718, + "step": 32620, + "torque_loss": 0.11591412872076035 + }, + { + "epoch": 29.334532374100718, + "force_loss": 0.008989318273961544, + "step": 32620 + }, + { + "epoch": 29.343525179856115, + "grad_norm": 0.5258576273918152, + "learning_rate": 4.689072839500525e-05, + "loss": 0.0187, + "step": 32630 + }, + { + "action_loss": 0.006293279584497213, + "epoch": 29.343525179856115, + "step": 32630 + }, + { + "epoch": 29.343525179856115, + "step": 32630, + "torque_loss": 0.11771956831216812 + }, + { + "epoch": 29.343525179856115, + "force_loss": 0.0033822599798440933, + "step": 32630 + }, + { + "epoch": 29.35251798561151, + "grad_norm": 0.5485567450523376, + "learning_rate": 4.6863224373422635e-05, + "loss": 0.0181, + "step": 32640 + }, + { + "action_loss": 0.004397273063659668, + "epoch": 29.35251798561151, + "step": 32640 + }, + { + "epoch": 29.35251798561151, + "step": 32640, + "torque_loss": 0.10283096879720688 + }, + { + "epoch": 29.35251798561151, + "force_loss": 0.0037207824643701315, + "step": 32640 + }, + { + "epoch": 29.361510791366907, + "grad_norm": 0.09677112102508545, + "learning_rate": 4.683572130470962e-05, + "loss": 0.0177, + "step": 32650 + }, + { + "action_loss": 0.001985314302146435, + "epoch": 29.361510791366907, + "step": 32650 + }, + { + "epoch": 29.361510791366907, + "step": 32650, + "torque_loss": 0.10480914264917374 + }, + { + "epoch": 29.361510791366907, + "force_loss": 0.0034442029427736998, + "step": 32650 + }, + { + "epoch": 29.3705035971223, + "grad_norm": 0.1992332637310028, + "learning_rate": 4.680821919722094e-05, + "loss": 0.0206, + "step": 32660 + }, + { + "action_loss": 0.011067663319408894, + "epoch": 29.3705035971223, + "step": 32660 + }, + { + "epoch": 29.3705035971223, + "step": 32660, + "torque_loss": 0.12920941412448883 + }, + { + "epoch": 29.3705035971223, + "force_loss": 0.008987017907202244, + "step": 32660 + }, + { + "epoch": 29.3794964028777, + "grad_norm": 0.3118537962436676, + "learning_rate": 4.6780718059310975e-05, + "loss": 0.0217, + "step": 32670 + }, + { + "action_loss": 0.004925935994833708, + "epoch": 29.3794964028777, + "step": 32670 + }, + { + "epoch": 29.3794964028777, + "step": 32670, + "torque_loss": 0.1036626324057579 + }, + { + "epoch": 29.3794964028777, + "force_loss": 0.0030708431731909513, + "step": 32670 + }, + { + "epoch": 29.388489208633093, + "grad_norm": 0.16304568946361542, + "learning_rate": 4.675321789933389e-05, + "loss": 0.0171, + "step": 32680 + }, + { + "action_loss": 0.0061505562625825405, + "epoch": 29.388489208633093, + "step": 32680 + }, + { + "epoch": 29.388489208633093, + "step": 32680, + "torque_loss": 0.10227325558662415 + }, + { + "epoch": 29.388489208633093, + "force_loss": 0.001908316626213491, + "step": 32680 + }, + { + "epoch": 29.39748201438849, + "grad_norm": 0.20562444627285004, + "learning_rate": 4.6725718725643464e-05, + "loss": 0.0151, + "step": 32690 + }, + { + "action_loss": 0.02423258125782013, + "epoch": 29.39748201438849, + "step": 32690 + }, + { + "epoch": 29.39748201438849, + "step": 32690, + "torque_loss": 0.16639037430286407 + }, + { + "epoch": 29.39748201438849, + "force_loss": 0.038983047008514404, + "step": 32690 + }, + { + "epoch": 29.406474820143885, + "grad_norm": 0.15895070135593414, + "learning_rate": 4.669822054659323e-05, + "loss": 0.0209, + "step": 32700 + }, + { + "action_loss": 0.01568661816418171, + "epoch": 29.406474820143885, + "step": 32700 + }, + { + "epoch": 29.406474820143885, + "step": 32700, + "torque_loss": 0.09653004258871078 + }, + { + "epoch": 29.406474820143885, + "force_loss": 0.0159252118319273, + "step": 32700 + }, + { + "epoch": 29.415467625899282, + "grad_norm": 0.3581959307193756, + "learning_rate": 4.667072337053644e-05, + "loss": 0.0184, + "step": 32710 + }, + { + "action_loss": 0.010245249606668949, + "epoch": 29.415467625899282, + "step": 32710 + }, + { + "epoch": 29.415467625899282, + "step": 32710, + "torque_loss": 0.11866069585084915 + }, + { + "epoch": 29.415467625899282, + "force_loss": 0.006856624037027359, + "step": 32710 + }, + { + "epoch": 29.424460431654676, + "grad_norm": 0.17700177431106567, + "learning_rate": 4.6643227205825965e-05, + "loss": 0.0205, + "step": 32720 + }, + { + "action_loss": 0.0032879996579140425, + "epoch": 29.424460431654676, + "step": 32720 + }, + { + "epoch": 29.424460431654676, + "step": 32720, + "torque_loss": 0.11360689997673035 + }, + { + "epoch": 29.424460431654676, + "force_loss": 0.004228583537042141, + "step": 32720 + }, + { + "epoch": 29.43345323741007, + "grad_norm": 0.2889029085636139, + "learning_rate": 4.6615732060814454e-05, + "loss": 0.018, + "step": 32730 + }, + { + "action_loss": 0.003413207596167922, + "epoch": 29.43345323741007, + "step": 32730 + }, + { + "epoch": 29.43345323741007, + "step": 32730, + "torque_loss": 0.15451616048812866 + }, + { + "epoch": 29.43345323741007, + "force_loss": 0.004634483251720667, + "step": 32730 + }, + { + "epoch": 29.442446043165468, + "grad_norm": 0.1654779464006424, + "learning_rate": 4.658823794385417e-05, + "loss": 0.019, + "step": 32740 + }, + { + "action_loss": 0.005410236772149801, + "epoch": 29.442446043165468, + "step": 32740 + }, + { + "epoch": 29.442446043165468, + "step": 32740, + "torque_loss": 0.09852194786071777 + }, + { + "epoch": 29.442446043165468, + "force_loss": 0.0046052876859903336, + "step": 32740 + }, + { + "epoch": 29.451438848920862, + "grad_norm": 0.4740029573440552, + "learning_rate": 4.6560744863297115e-05, + "loss": 0.0191, + "step": 32750 + }, + { + "action_loss": 0.0026829184498637915, + "epoch": 29.451438848920862, + "step": 32750 + }, + { + "epoch": 29.451438848920862, + "step": 32750, + "torque_loss": 0.10991368442773819 + }, + { + "epoch": 29.451438848920862, + "force_loss": 0.002388973021879792, + "step": 32750 + }, + { + "epoch": 29.46043165467626, + "grad_norm": 0.39620524644851685, + "learning_rate": 4.653325282749498e-05, + "loss": 0.0205, + "step": 32760 + }, + { + "action_loss": 0.0012941869208589196, + "epoch": 29.46043165467626, + "step": 32760 + }, + { + "epoch": 29.46043165467626, + "step": 32760, + "torque_loss": 0.13842102885246277 + }, + { + "epoch": 29.46043165467626, + "force_loss": 0.0016389338998124003, + "step": 32760 + }, + { + "epoch": 29.469424460431654, + "grad_norm": 0.27129366993904114, + "learning_rate": 4.6505761844799075e-05, + "loss": 0.019, + "step": 32770 + }, + { + "action_loss": 0.004043588414788246, + "epoch": 29.469424460431654, + "step": 32770 + }, + { + "epoch": 29.469424460431654, + "step": 32770, + "torque_loss": 0.15159566700458527 + }, + { + "epoch": 29.469424460431654, + "force_loss": 0.009783412329852581, + "step": 32770 + }, + { + "epoch": 29.47841726618705, + "grad_norm": 0.3575635850429535, + "learning_rate": 4.647827192356048e-05, + "loss": 0.0191, + "step": 32780 + }, + { + "action_loss": 0.004523263778537512, + "epoch": 29.47841726618705, + "step": 32780 + }, + { + "epoch": 29.47841726618705, + "step": 32780, + "torque_loss": 0.12116900831460953 + }, + { + "epoch": 29.47841726618705, + "force_loss": 0.008808523416519165, + "step": 32780 + }, + { + "epoch": 29.487410071942445, + "grad_norm": 0.29399749636650085, + "learning_rate": 4.645078307212989e-05, + "loss": 0.0202, + "step": 32790 + }, + { + "action_loss": 0.004080316983163357, + "epoch": 29.487410071942445, + "step": 32790 + }, + { + "epoch": 29.487410071942445, + "step": 32790, + "torque_loss": 0.10931116342544556 + }, + { + "epoch": 29.487410071942445, + "force_loss": 0.003771195886656642, + "step": 32790 + }, + { + "epoch": 29.496402877697843, + "grad_norm": 0.23084160685539246, + "learning_rate": 4.642329529885768e-05, + "loss": 0.0168, + "step": 32800 + }, + { + "action_loss": 0.003108937991783023, + "epoch": 29.496402877697843, + "step": 32800 + }, + { + "epoch": 29.496402877697843, + "step": 32800, + "torque_loss": 0.06809798628091812 + }, + { + "epoch": 29.496402877697843, + "force_loss": 0.005005677696317434, + "step": 32800 + }, + { + "epoch": 29.505395683453237, + "grad_norm": 0.5844730138778687, + "learning_rate": 4.639580861209393e-05, + "loss": 0.0175, + "step": 32810 + }, + { + "action_loss": 0.00632280670106411, + "epoch": 29.505395683453237, + "step": 32810 + }, + { + "epoch": 29.505395683453237, + "step": 32810, + "torque_loss": 0.15455208718776703 + }, + { + "epoch": 29.505395683453237, + "force_loss": 0.006788300815969706, + "step": 32810 + }, + { + "epoch": 29.514388489208635, + "grad_norm": 0.2245039939880371, + "learning_rate": 4.636832302018835e-05, + "loss": 0.0208, + "step": 32820 + }, + { + "action_loss": 0.0030229396652430296, + "epoch": 29.514388489208635, + "step": 32820 + }, + { + "epoch": 29.514388489208635, + "step": 32820, + "torque_loss": 0.13324254751205444 + }, + { + "epoch": 29.514388489208635, + "force_loss": 0.004435569513589144, + "step": 32820 + }, + { + "epoch": 29.52338129496403, + "grad_norm": 0.1462528258562088, + "learning_rate": 4.6340838531490365e-05, + "loss": 0.0166, + "step": 32830 + }, + { + "action_loss": 0.00136242201551795, + "epoch": 29.52338129496403, + "step": 32830 + }, + { + "epoch": 29.52338129496403, + "step": 32830, + "torque_loss": 0.0685676857829094 + }, + { + "epoch": 29.52338129496403, + "force_loss": 0.0013977460330352187, + "step": 32830 + }, + { + "epoch": 29.532374100719423, + "grad_norm": 0.22453230619430542, + "learning_rate": 4.6313355154349e-05, + "loss": 0.0176, + "step": 32840 + }, + { + "action_loss": 0.005980899091809988, + "epoch": 29.532374100719423, + "step": 32840 + }, + { + "epoch": 29.532374100719423, + "step": 32840, + "torque_loss": 0.10074014216661453 + }, + { + "epoch": 29.532374100719423, + "force_loss": 0.004690586123615503, + "step": 32840 + }, + { + "epoch": 29.54136690647482, + "grad_norm": 0.1655723750591278, + "learning_rate": 4.6285872897113025e-05, + "loss": 0.017, + "step": 32850 + }, + { + "action_loss": 0.004144648090004921, + "epoch": 29.54136690647482, + "step": 32850 + }, + { + "epoch": 29.54136690647482, + "step": 32850, + "torque_loss": 0.1202431321144104 + }, + { + "epoch": 29.54136690647482, + "force_loss": 0.0019840833265334368, + "step": 32850 + }, + { + "epoch": 29.550359712230215, + "grad_norm": 0.1184563934803009, + "learning_rate": 4.625839176813077e-05, + "loss": 0.0163, + "step": 32860 + }, + { + "action_loss": 0.0019806192722171545, + "epoch": 29.550359712230215, + "step": 32860 + }, + { + "epoch": 29.550359712230215, + "step": 32860, + "torque_loss": 0.10810966044664383 + }, + { + "epoch": 29.550359712230215, + "force_loss": 0.004407880827784538, + "step": 32860 + }, + { + "epoch": 29.559352517985612, + "grad_norm": 0.12117161601781845, + "learning_rate": 4.623091177575031e-05, + "loss": 0.0179, + "step": 32870 + }, + { + "action_loss": 0.003812926122918725, + "epoch": 29.559352517985612, + "step": 32870 + }, + { + "epoch": 29.559352517985612, + "step": 32870, + "torque_loss": 0.08688423782587051 + }, + { + "epoch": 29.559352517985612, + "force_loss": 0.005498215556144714, + "step": 32870 + }, + { + "epoch": 29.568345323741006, + "grad_norm": 0.16760703921318054, + "learning_rate": 4.620343292831936e-05, + "loss": 0.018, + "step": 32880 + }, + { + "action_loss": 0.003354416461661458, + "epoch": 29.568345323741006, + "step": 32880 + }, + { + "epoch": 29.568345323741006, + "step": 32880, + "torque_loss": 0.09763610363006592 + }, + { + "epoch": 29.568345323741006, + "force_loss": 0.0025031135883182287, + "step": 32880 + }, + { + "epoch": 29.577338129496404, + "grad_norm": 0.2660999894142151, + "learning_rate": 4.6175955234185206e-05, + "loss": 0.0181, + "step": 32890 + }, + { + "action_loss": 0.009728002361953259, + "epoch": 29.577338129496404, + "step": 32890 + }, + { + "epoch": 29.577338129496404, + "step": 32890, + "torque_loss": 0.1275230199098587 + }, + { + "epoch": 29.577338129496404, + "force_loss": 0.010925649665296078, + "step": 32890 + }, + { + "epoch": 29.586330935251798, + "grad_norm": 0.3399328887462616, + "learning_rate": 4.614847870169492e-05, + "loss": 0.0168, + "step": 32900 + }, + { + "action_loss": 0.004511337727308273, + "epoch": 29.586330935251798, + "step": 32900 + }, + { + "epoch": 29.586330935251798, + "step": 32900, + "torque_loss": 0.1351112574338913 + }, + { + "epoch": 29.586330935251798, + "force_loss": 0.006338247563689947, + "step": 32900 + }, + { + "epoch": 29.595323741007196, + "grad_norm": 0.16598787903785706, + "learning_rate": 4.612100333919509e-05, + "loss": 0.0164, + "step": 32910 + }, + { + "action_loss": 0.0022442378103733063, + "epoch": 29.595323741007196, + "step": 32910 + }, + { + "epoch": 29.595323741007196, + "step": 32910, + "torque_loss": 0.1249852403998375 + }, + { + "epoch": 29.595323741007196, + "force_loss": 0.0027145585045218468, + "step": 32910 + }, + { + "epoch": 29.60431654676259, + "grad_norm": 0.8419445157051086, + "learning_rate": 4.609352915503202e-05, + "loss": 0.0193, + "step": 32920 + }, + { + "action_loss": 0.0026799149345606565, + "epoch": 29.60431654676259, + "step": 32920 + }, + { + "epoch": 29.60431654676259, + "step": 32920, + "torque_loss": 0.10197170823812485 + }, + { + "epoch": 29.60431654676259, + "force_loss": 0.004200031515210867, + "step": 32920 + }, + { + "epoch": 29.613309352517987, + "grad_norm": 0.17716027796268463, + "learning_rate": 4.606605615755166e-05, + "loss": 0.0169, + "step": 32930 + }, + { + "action_loss": 0.003672495484352112, + "epoch": 29.613309352517987, + "step": 32930 + }, + { + "epoch": 29.613309352517987, + "step": 32930, + "torque_loss": 0.09633766859769821 + }, + { + "epoch": 29.613309352517987, + "force_loss": 0.002823582151904702, + "step": 32930 + }, + { + "epoch": 29.62230215827338, + "grad_norm": 0.1838403046131134, + "learning_rate": 4.6038584355099576e-05, + "loss": 0.0195, + "step": 32940 + }, + { + "action_loss": 0.0030016282107681036, + "epoch": 29.62230215827338, + "step": 32940 + }, + { + "epoch": 29.62230215827338, + "step": 32940, + "torque_loss": 0.10197654366493225 + }, + { + "epoch": 29.62230215827338, + "force_loss": 0.0039540971629321575, + "step": 32940 + }, + { + "epoch": 29.631294964028775, + "grad_norm": 0.1331952065229416, + "learning_rate": 4.6011113756020964e-05, + "loss": 0.0195, + "step": 32950 + }, + { + "action_loss": 0.00984635204076767, + "epoch": 29.631294964028775, + "step": 32950 + }, + { + "epoch": 29.631294964028775, + "step": 32950, + "torque_loss": 0.13478004932403564 + }, + { + "epoch": 29.631294964028775, + "force_loss": 0.020974332466721535, + "step": 32950 + }, + { + "epoch": 29.640287769784173, + "grad_norm": 0.14265209436416626, + "learning_rate": 4.598364436866066e-05, + "loss": 0.0202, + "step": 32960 + }, + { + "action_loss": 0.003692147321999073, + "epoch": 29.640287769784173, + "step": 32960 + }, + { + "epoch": 29.640287769784173, + "step": 32960, + "torque_loss": 0.11055556684732437 + }, + { + "epoch": 29.640287769784173, + "force_loss": 0.008668371476233006, + "step": 32960 + }, + { + "epoch": 29.649280575539567, + "grad_norm": 0.303299218416214, + "learning_rate": 4.595617620136316e-05, + "loss": 0.02, + "step": 32970 + }, + { + "action_loss": 0.007363330572843552, + "epoch": 29.649280575539567, + "step": 32970 + }, + { + "epoch": 29.649280575539567, + "step": 32970, + "torque_loss": 0.13789105415344238 + }, + { + "epoch": 29.649280575539567, + "force_loss": 0.010800435207784176, + "step": 32970 + }, + { + "epoch": 29.658273381294965, + "grad_norm": 0.20006980001926422, + "learning_rate": 4.592870926247257e-05, + "loss": 0.0207, + "step": 32980 + }, + { + "action_loss": 0.0036609049420803785, + "epoch": 29.658273381294965, + "step": 32980 + }, + { + "epoch": 29.658273381294965, + "step": 32980, + "torque_loss": 0.1461556851863861 + }, + { + "epoch": 29.658273381294965, + "force_loss": 0.00689267972484231, + "step": 32980 + }, + { + "epoch": 29.66726618705036, + "grad_norm": 0.19028274714946747, + "learning_rate": 4.5901243560332594e-05, + "loss": 0.0186, + "step": 32990 + }, + { + "action_loss": 0.003864791477099061, + "epoch": 29.66726618705036, + "step": 32990 + }, + { + "epoch": 29.66726618705036, + "step": 32990, + "torque_loss": 0.11155102401971817 + }, + { + "epoch": 29.66726618705036, + "force_loss": 0.004080370534211397, + "step": 32990 + }, + { + "epoch": 29.676258992805757, + "grad_norm": 0.22748075425624847, + "learning_rate": 4.587377910328662e-05, + "loss": 0.0164, + "step": 33000 + }, + { + "action_loss": 0.0033679369371384382, + "epoch": 29.676258992805757, + "step": 33000 + }, + { + "epoch": 29.676258992805757, + "step": 33000, + "torque_loss": 0.12033047527074814 + }, + { + "epoch": 29.676258992805757, + "force_loss": 0.002878366969525814, + "step": 33000 + }, + { + "epoch": 29.68525179856115, + "grad_norm": 0.11324349790811539, + "learning_rate": 4.5846315899677586e-05, + "loss": 0.0165, + "step": 33010 + }, + { + "action_loss": 0.002293139696121216, + "epoch": 29.68525179856115, + "step": 33010 + }, + { + "epoch": 29.68525179856115, + "step": 33010, + "torque_loss": 0.11542467027902603 + }, + { + "epoch": 29.68525179856115, + "force_loss": 0.004007941577583551, + "step": 33010 + }, + { + "epoch": 29.694244604316548, + "grad_norm": 0.23012064397335052, + "learning_rate": 4.5818853957848114e-05, + "loss": 0.0175, + "step": 33020 + }, + { + "action_loss": 0.007706517819315195, + "epoch": 29.694244604316548, + "step": 33020 + }, + { + "epoch": 29.694244604316548, + "step": 33020, + "torque_loss": 0.1225409284234047 + }, + { + "epoch": 29.694244604316548, + "force_loss": 0.007782285567373037, + "step": 33020 + }, + { + "epoch": 29.703237410071942, + "grad_norm": 0.2857019305229187, + "learning_rate": 4.579139328614043e-05, + "loss": 0.0183, + "step": 33030 + }, + { + "action_loss": 0.0113174794241786, + "epoch": 29.703237410071942, + "step": 33030 + }, + { + "epoch": 29.703237410071942, + "step": 33030, + "torque_loss": 0.14130271971225739 + }, + { + "epoch": 29.703237410071942, + "force_loss": 0.008874858729541302, + "step": 33030 + }, + { + "epoch": 29.71223021582734, + "grad_norm": 0.21743011474609375, + "learning_rate": 4.576393389289633e-05, + "loss": 0.0187, + "step": 33040 + }, + { + "action_loss": 0.004120629280805588, + "epoch": 29.71223021582734, + "step": 33040 + }, + { + "epoch": 29.71223021582734, + "step": 33040, + "torque_loss": 0.1165727749466896 + }, + { + "epoch": 29.71223021582734, + "force_loss": 0.00596576975658536, + "step": 33040 + }, + { + "epoch": 29.721223021582734, + "grad_norm": 0.14444954693317413, + "learning_rate": 4.573647578645728e-05, + "loss": 0.02, + "step": 33050 + }, + { + "action_loss": 0.002949989167973399, + "epoch": 29.721223021582734, + "step": 33050 + }, + { + "epoch": 29.721223021582734, + "step": 33050, + "torque_loss": 0.13620251417160034 + }, + { + "epoch": 29.721223021582734, + "force_loss": 0.0032610802445560694, + "step": 33050 + }, + { + "epoch": 29.730215827338128, + "grad_norm": 0.11691472679376602, + "learning_rate": 4.57090189751643e-05, + "loss": 0.0153, + "step": 33060 + }, + { + "action_loss": 0.0019554237369447947, + "epoch": 29.730215827338128, + "step": 33060 + }, + { + "epoch": 29.730215827338128, + "step": 33060, + "torque_loss": 0.12198758125305176 + }, + { + "epoch": 29.730215827338128, + "force_loss": 0.0035621251445263624, + "step": 33060 + }, + { + "epoch": 29.739208633093526, + "grad_norm": 0.3457503020763397, + "learning_rate": 4.568156346735806e-05, + "loss": 0.0186, + "step": 33070 + }, + { + "action_loss": 0.002127479063346982, + "epoch": 29.739208633093526, + "step": 33070 + }, + { + "epoch": 29.739208633093526, + "step": 33070, + "torque_loss": 0.11081872135400772 + }, + { + "epoch": 29.739208633093526, + "force_loss": 0.003237922443076968, + "step": 33070 + }, + { + "epoch": 29.74820143884892, + "grad_norm": 0.14352914690971375, + "learning_rate": 4.565410927137882e-05, + "loss": 0.0172, + "step": 33080 + }, + { + "action_loss": 0.0018772659823298454, + "epoch": 29.74820143884892, + "step": 33080 + }, + { + "epoch": 29.74820143884892, + "step": 33080, + "torque_loss": 0.11937590688467026 + }, + { + "epoch": 29.74820143884892, + "force_loss": 0.003528344677761197, + "step": 33080 + }, + { + "epoch": 29.757194244604317, + "grad_norm": 0.2091064751148224, + "learning_rate": 4.562665639556644e-05, + "loss": 0.0208, + "step": 33090 + }, + { + "action_loss": 0.0020316801965236664, + "epoch": 29.757194244604317, + "step": 33090 + }, + { + "epoch": 29.757194244604317, + "step": 33090, + "torque_loss": 0.10107707977294922 + }, + { + "epoch": 29.757194244604317, + "force_loss": 0.003617035923525691, + "step": 33090 + }, + { + "epoch": 29.76618705035971, + "grad_norm": 0.18773119151592255, + "learning_rate": 4.559920484826037e-05, + "loss": 0.0176, + "step": 33100 + }, + { + "action_loss": 0.0025949233677238226, + "epoch": 29.76618705035971, + "step": 33100 + }, + { + "epoch": 29.76618705035971, + "step": 33100, + "torque_loss": 0.0989958867430687 + }, + { + "epoch": 29.76618705035971, + "force_loss": 0.0015219500055536628, + "step": 33100 + }, + { + "epoch": 29.77517985611511, + "grad_norm": 0.3331056535243988, + "learning_rate": 4.5571754637799665e-05, + "loss": 0.0184, + "step": 33110 + }, + { + "action_loss": 0.009025699459016323, + "epoch": 29.77517985611511, + "step": 33110 + }, + { + "epoch": 29.77517985611511, + "step": 33110, + "torque_loss": 0.10117089003324509 + }, + { + "epoch": 29.77517985611511, + "force_loss": 0.006383243948221207, + "step": 33110 + }, + { + "epoch": 29.784172661870503, + "grad_norm": 0.23136396706104279, + "learning_rate": 4.554430577252298e-05, + "loss": 0.0166, + "step": 33120 + }, + { + "action_loss": 0.008841276168823242, + "epoch": 29.784172661870503, + "step": 33120 + }, + { + "epoch": 29.784172661870503, + "step": 33120, + "torque_loss": 0.15196190774440765 + }, + { + "epoch": 29.784172661870503, + "force_loss": 0.005283138249069452, + "step": 33120 + }, + { + "epoch": 29.7931654676259, + "grad_norm": 0.17802201211452484, + "learning_rate": 4.551685826076858e-05, + "loss": 0.0186, + "step": 33130 + }, + { + "action_loss": 0.0073266527615487576, + "epoch": 29.7931654676259, + "step": 33130 + }, + { + "epoch": 29.7931654676259, + "step": 33130, + "torque_loss": 0.14417017996311188 + }, + { + "epoch": 29.7931654676259, + "force_loss": 0.0027394357603043318, + "step": 33130 + }, + { + "epoch": 29.802158273381295, + "grad_norm": 0.10232173651456833, + "learning_rate": 4.5489412110874246e-05, + "loss": 0.0186, + "step": 33140 + }, + { + "action_loss": 0.0026322982739657164, + "epoch": 29.802158273381295, + "step": 33140 + }, + { + "epoch": 29.802158273381295, + "step": 33140, + "torque_loss": 0.09275990724563599 + }, + { + "epoch": 29.802158273381295, + "force_loss": 0.0025260073598474264, + "step": 33140 + }, + { + "epoch": 29.81115107913669, + "grad_norm": 0.2526633143424988, + "learning_rate": 4.5461967331177444e-05, + "loss": 0.0191, + "step": 33150 + }, + { + "action_loss": 0.0031592827290296555, + "epoch": 29.81115107913669, + "step": 33150 + }, + { + "epoch": 29.81115107913669, + "step": 33150, + "torque_loss": 0.08308272808790207 + }, + { + "epoch": 29.81115107913669, + "force_loss": 0.0032640835270285606, + "step": 33150 + }, + { + "epoch": 29.820143884892087, + "grad_norm": 0.23503777384757996, + "learning_rate": 4.5434523930015115e-05, + "loss": 0.0185, + "step": 33160 + }, + { + "action_loss": 0.014097201637923717, + "epoch": 29.820143884892087, + "step": 33160 + }, + { + "epoch": 29.820143884892087, + "step": 33160, + "torque_loss": 0.15288548171520233 + }, + { + "epoch": 29.820143884892087, + "force_loss": 0.0197067242115736, + "step": 33160 + }, + { + "epoch": 29.82913669064748, + "grad_norm": 0.5467575788497925, + "learning_rate": 4.540708191572388e-05, + "loss": 0.0205, + "step": 33170 + }, + { + "action_loss": 0.0020516656804829836, + "epoch": 29.82913669064748, + "step": 33170 + }, + { + "epoch": 29.82913669064748, + "step": 33170, + "torque_loss": 0.07463323324918747 + }, + { + "epoch": 29.82913669064748, + "force_loss": 0.0032436437904834747, + "step": 33170 + }, + { + "epoch": 29.83812949640288, + "grad_norm": 0.31882551312446594, + "learning_rate": 4.537964129663991e-05, + "loss": 0.0173, + "step": 33180 + }, + { + "action_loss": 0.002108158776536584, + "epoch": 29.83812949640288, + "step": 33180 + }, + { + "epoch": 29.83812949640288, + "step": 33180, + "torque_loss": 0.15018528699874878 + }, + { + "epoch": 29.83812949640288, + "force_loss": 0.002047862159088254, + "step": 33180 + }, + { + "epoch": 29.847122302158272, + "grad_norm": 0.3126389980316162, + "learning_rate": 4.535220208109889e-05, + "loss": 0.0201, + "step": 33190 + }, + { + "action_loss": 0.0034148755948990583, + "epoch": 29.847122302158272, + "step": 33190 + }, + { + "epoch": 29.847122302158272, + "step": 33190, + "torque_loss": 0.14748704433441162 + }, + { + "epoch": 29.847122302158272, + "force_loss": 0.0017256079008802772, + "step": 33190 + }, + { + "epoch": 29.85611510791367, + "grad_norm": 0.15697187185287476, + "learning_rate": 4.5324764277436194e-05, + "loss": 0.0164, + "step": 33200 + }, + { + "action_loss": 0.0034842982422560453, + "epoch": 29.85611510791367, + "step": 33200 + }, + { + "epoch": 29.85611510791367, + "step": 33200, + "torque_loss": 0.13564208149909973 + }, + { + "epoch": 29.85611510791367, + "force_loss": 0.001287777558900416, + "step": 33200 + }, + { + "epoch": 29.865107913669064, + "grad_norm": 0.24154828488826752, + "learning_rate": 4.529732789398664e-05, + "loss": 0.0201, + "step": 33210 + }, + { + "action_loss": 0.003454518737271428, + "epoch": 29.865107913669064, + "step": 33210 + }, + { + "epoch": 29.865107913669064, + "step": 33210, + "torque_loss": 0.17831595242023468 + }, + { + "epoch": 29.865107913669064, + "force_loss": 0.001054757391102612, + "step": 33210 + }, + { + "epoch": 29.87410071942446, + "grad_norm": 0.1497463583946228, + "learning_rate": 4.526989293908472e-05, + "loss": 0.0208, + "step": 33220 + }, + { + "action_loss": 0.002602895488962531, + "epoch": 29.87410071942446, + "step": 33220 + }, + { + "epoch": 29.87410071942446, + "step": 33220, + "torque_loss": 0.15655624866485596 + }, + { + "epoch": 29.87410071942446, + "force_loss": 0.0034274160861968994, + "step": 33220 + }, + { + "epoch": 29.883093525179856, + "grad_norm": 0.2563171982765198, + "learning_rate": 4.524245942106442e-05, + "loss": 0.0184, + "step": 33230 + }, + { + "action_loss": 0.007358677219599485, + "epoch": 29.883093525179856, + "step": 33230 + }, + { + "epoch": 29.883093525179856, + "step": 33230, + "torque_loss": 0.16274546086788177 + }, + { + "epoch": 29.883093525179856, + "force_loss": 0.007875451818108559, + "step": 33230 + }, + { + "epoch": 29.892086330935253, + "grad_norm": 0.2812725007534027, + "learning_rate": 4.5215027348259345e-05, + "loss": 0.0209, + "step": 33240 + }, + { + "action_loss": 0.004986295476555824, + "epoch": 29.892086330935253, + "step": 33240 + }, + { + "epoch": 29.892086330935253, + "step": 33240, + "torque_loss": 0.11599931865930557 + }, + { + "epoch": 29.892086330935253, + "force_loss": 0.019289052113890648, + "step": 33240 + }, + { + "epoch": 29.901079136690647, + "grad_norm": 0.18221893906593323, + "learning_rate": 4.5187596729002616e-05, + "loss": 0.0196, + "step": 33250 + }, + { + "action_loss": 0.012487287633121014, + "epoch": 29.901079136690647, + "step": 33250 + }, + { + "epoch": 29.901079136690647, + "step": 33250, + "torque_loss": 0.07998945564031601 + }, + { + "epoch": 29.901079136690647, + "force_loss": 0.025370754301548004, + "step": 33250 + }, + { + "epoch": 29.91007194244604, + "grad_norm": 0.2595881521701813, + "learning_rate": 4.516016757162693e-05, + "loss": 0.0167, + "step": 33260 + }, + { + "action_loss": 0.019895339384675026, + "epoch": 29.91007194244604, + "step": 33260 + }, + { + "epoch": 29.91007194244604, + "step": 33260, + "torque_loss": 0.16868579387664795 + }, + { + "epoch": 29.91007194244604, + "force_loss": 0.008317751809954643, + "step": 33260 + }, + { + "epoch": 29.91906474820144, + "grad_norm": 0.16835442185401917, + "learning_rate": 4.513273988446457e-05, + "loss": 0.0187, + "step": 33270 + }, + { + "action_loss": 0.0020997589454054832, + "epoch": 29.91906474820144, + "step": 33270 + }, + { + "epoch": 29.91906474820144, + "step": 33270, + "torque_loss": 0.1342884749174118 + }, + { + "epoch": 29.91906474820144, + "force_loss": 0.005571839865297079, + "step": 33270 + }, + { + "epoch": 29.928057553956833, + "grad_norm": 0.22526520490646362, + "learning_rate": 4.5105313675847296e-05, + "loss": 0.0164, + "step": 33280 + }, + { + "action_loss": 0.004929483402520418, + "epoch": 29.928057553956833, + "step": 33280 + }, + { + "epoch": 29.928057553956833, + "step": 33280, + "torque_loss": 0.16145117580890656 + }, + { + "epoch": 29.928057553956833, + "force_loss": 0.005234928801655769, + "step": 33280 + }, + { + "epoch": 29.93705035971223, + "grad_norm": 0.20873883366584778, + "learning_rate": 4.5077888954106495e-05, + "loss": 0.0178, + "step": 33290 + }, + { + "action_loss": 0.0016989628784358501, + "epoch": 29.93705035971223, + "step": 33290 + }, + { + "epoch": 29.93705035971223, + "step": 33290, + "torque_loss": 0.09238481521606445 + }, + { + "epoch": 29.93705035971223, + "force_loss": 0.0024258980993181467, + "step": 33290 + }, + { + "epoch": 29.946043165467625, + "grad_norm": 0.1721733659505844, + "learning_rate": 4.505046572757309e-05, + "loss": 0.0202, + "step": 33300 + }, + { + "action_loss": 0.014291095547378063, + "epoch": 29.946043165467625, + "step": 33300 + }, + { + "epoch": 29.946043165467625, + "step": 33300, + "torque_loss": 0.13486896455287933 + }, + { + "epoch": 29.946043165467625, + "force_loss": 0.01131768524646759, + "step": 33300 + }, + { + "epoch": 29.955035971223023, + "grad_norm": 0.26423338055610657, + "learning_rate": 4.502304400457749e-05, + "loss": 0.0205, + "step": 33310 + }, + { + "action_loss": 0.002390447538346052, + "epoch": 29.955035971223023, + "step": 33310 + }, + { + "epoch": 29.955035971223023, + "step": 33310, + "torque_loss": 0.09509583562612534 + }, + { + "epoch": 29.955035971223023, + "force_loss": 0.0037280935794115067, + "step": 33310 + }, + { + "epoch": 29.964028776978417, + "grad_norm": 0.39813458919525146, + "learning_rate": 4.499562379344973e-05, + "loss": 0.0168, + "step": 33320 + }, + { + "action_loss": 0.01173321157693863, + "epoch": 29.964028776978417, + "step": 33320 + }, + { + "epoch": 29.964028776978417, + "step": 33320, + "torque_loss": 0.11689639091491699 + }, + { + "epoch": 29.964028776978417, + "force_loss": 0.011147972196340561, + "step": 33320 + }, + { + "epoch": 29.973021582733814, + "grad_norm": 0.09179895371198654, + "learning_rate": 4.4968205102519306e-05, + "loss": 0.0179, + "step": 33330 + }, + { + "action_loss": 0.0064166150987148285, + "epoch": 29.973021582733814, + "step": 33330 + }, + { + "epoch": 29.973021582733814, + "step": 33330, + "torque_loss": 0.1534060388803482 + }, + { + "epoch": 29.973021582733814, + "force_loss": 0.009151996113359928, + "step": 33330 + }, + { + "epoch": 29.98201438848921, + "grad_norm": 0.32343992590904236, + "learning_rate": 4.494078794011532e-05, + "loss": 0.0185, + "step": 33340 + }, + { + "action_loss": 0.0036133381072431803, + "epoch": 29.98201438848921, + "step": 33340 + }, + { + "epoch": 29.98201438848921, + "step": 33340, + "torque_loss": 0.11830791085958481 + }, + { + "epoch": 29.98201438848921, + "force_loss": 0.0028760216664522886, + "step": 33340 + }, + { + "epoch": 29.991007194244606, + "grad_norm": 0.4685978889465332, + "learning_rate": 4.491337231456639e-05, + "loss": 0.018, + "step": 33350 + }, + { + "action_loss": 0.004563137423247099, + "epoch": 29.991007194244606, + "step": 33350 + }, + { + "epoch": 29.991007194244606, + "step": 33350, + "torque_loss": 0.112477146089077 + }, + { + "epoch": 29.991007194244606, + "force_loss": 0.002000430366024375, + "step": 33350 + }, + { + "epoch": 30.0, + "grad_norm": 0.22582240402698517, + "learning_rate": 4.4885958234200634e-05, + "loss": 0.0164, + "step": 33360 + }, + { + "action_loss": 0.05387647822499275, + "epoch": 30.0, + "step": 33360 + }, + { + "epoch": 30.0, + "step": 33360, + "torque_loss": 0.15187954902648926 + }, + { + "epoch": 30.0, + "force_loss": 0.052053917199373245, + "step": 33360 + }, + { + "epoch": 30.008992805755394, + "grad_norm": 0.1293298304080963, + "learning_rate": 4.485854570734575e-05, + "loss": 0.0212, + "step": 33370 + }, + { + "action_loss": 0.02416323870420456, + "epoch": 30.008992805755394, + "step": 33370 + }, + { + "epoch": 30.008992805755394, + "step": 33370, + "torque_loss": 0.16026407480239868 + }, + { + "epoch": 30.008992805755394, + "force_loss": 0.03959124907851219, + "step": 33370 + }, + { + "epoch": 30.01798561151079, + "grad_norm": 0.4653620719909668, + "learning_rate": 4.483113474232891e-05, + "loss": 0.0203, + "step": 33380 + }, + { + "action_loss": 0.006471555680036545, + "epoch": 30.01798561151079, + "step": 33380 + }, + { + "epoch": 30.01798561151079, + "step": 33380, + "torque_loss": 0.10242074728012085 + }, + { + "epoch": 30.01798561151079, + "force_loss": 0.007715189829468727, + "step": 33380 + }, + { + "epoch": 30.026978417266186, + "grad_norm": 0.1794714629650116, + "learning_rate": 4.480372534747688e-05, + "loss": 0.0195, + "step": 33390 + }, + { + "action_loss": 0.002253317041322589, + "epoch": 30.026978417266186, + "step": 33390 + }, + { + "epoch": 30.026978417266186, + "step": 33390, + "torque_loss": 0.1080147996544838 + }, + { + "epoch": 30.026978417266186, + "force_loss": 0.0016623451374471188, + "step": 33390 + }, + { + "epoch": 30.035971223021583, + "grad_norm": 0.2829987704753876, + "learning_rate": 4.477631753111588e-05, + "loss": 0.022, + "step": 33400 + }, + { + "action_loss": 0.012816439382731915, + "epoch": 30.035971223021583, + "step": 33400 + }, + { + "epoch": 30.035971223021583, + "step": 33400, + "torque_loss": 0.12620994448661804 + }, + { + "epoch": 30.035971223021583, + "force_loss": 0.005510048475116491, + "step": 33400 + }, + { + "epoch": 30.044964028776977, + "grad_norm": 0.15960019826889038, + "learning_rate": 4.4748911301571686e-05, + "loss": 0.0188, + "step": 33410 + }, + { + "action_loss": 0.0019453917630016804, + "epoch": 30.044964028776977, + "step": 33410 + }, + { + "epoch": 30.044964028776977, + "step": 33410, + "torque_loss": 0.15751896798610687 + }, + { + "epoch": 30.044964028776977, + "force_loss": 0.0013027280801907182, + "step": 33410 + }, + { + "epoch": 30.053956834532375, + "grad_norm": 0.16101831197738647, + "learning_rate": 4.472150666716961e-05, + "loss": 0.0162, + "step": 33420 + }, + { + "action_loss": 0.0054649473167955875, + "epoch": 30.053956834532375, + "step": 33420 + }, + { + "epoch": 30.053956834532375, + "step": 33420, + "torque_loss": 0.1371350735425949 + }, + { + "epoch": 30.053956834532375, + "force_loss": 0.007342675235122442, + "step": 33420 + }, + { + "epoch": 30.06294964028777, + "grad_norm": 0.21752667427062988, + "learning_rate": 4.469410363623442e-05, + "loss": 0.0148, + "step": 33430 + }, + { + "action_loss": 0.0023852363228797913, + "epoch": 30.06294964028777, + "step": 33430 + }, + { + "epoch": 30.06294964028777, + "step": 33430, + "torque_loss": 0.11105093359947205 + }, + { + "epoch": 30.06294964028777, + "force_loss": 0.007129593286663294, + "step": 33430 + }, + { + "epoch": 30.071942446043167, + "grad_norm": 0.31025755405426025, + "learning_rate": 4.466670221709044e-05, + "loss": 0.0272, + "step": 33440 + }, + { + "action_loss": 0.0015622731298208237, + "epoch": 30.071942446043167, + "step": 33440 + }, + { + "epoch": 30.071942446043167, + "step": 33440, + "torque_loss": 0.07495087385177612 + }, + { + "epoch": 30.071942446043167, + "force_loss": 0.0027264754753559828, + "step": 33440 + }, + { + "epoch": 30.08093525179856, + "grad_norm": 0.16972821950912476, + "learning_rate": 4.463930241806154e-05, + "loss": 0.0186, + "step": 33450 + }, + { + "action_loss": 0.0013034323928877711, + "epoch": 30.08093525179856, + "step": 33450 + }, + { + "epoch": 30.08093525179856, + "step": 33450, + "torque_loss": 0.08380503207445145 + }, + { + "epoch": 30.08093525179856, + "force_loss": 0.0033943590242415667, + "step": 33450 + }, + { + "epoch": 30.08992805755396, + "grad_norm": 0.17786522209644318, + "learning_rate": 4.4611904247471006e-05, + "loss": 0.0155, + "step": 33460 + }, + { + "action_loss": 0.004154016263782978, + "epoch": 30.08992805755396, + "step": 33460 + }, + { + "epoch": 30.08992805755396, + "step": 33460, + "torque_loss": 0.12680579721927643 + }, + { + "epoch": 30.08992805755396, + "force_loss": 0.005134863313287497, + "step": 33460 + }, + { + "epoch": 30.098920863309353, + "grad_norm": 0.24578897655010223, + "learning_rate": 4.458450771364171e-05, + "loss": 0.0159, + "step": 33470 + }, + { + "action_loss": 0.0011644229525700212, + "epoch": 30.098920863309353, + "step": 33470 + }, + { + "epoch": 30.098920863309353, + "step": 33470, + "torque_loss": 0.08433590084314346 + }, + { + "epoch": 30.098920863309353, + "force_loss": 0.001824203529395163, + "step": 33470 + }, + { + "epoch": 30.107913669064747, + "grad_norm": 0.23850153386592865, + "learning_rate": 4.4557112824895965e-05, + "loss": 0.0155, + "step": 33480 + }, + { + "action_loss": 0.004469658713787794, + "epoch": 30.107913669064747, + "step": 33480 + }, + { + "epoch": 30.107913669064747, + "step": 33480, + "torque_loss": 0.13697348535060883 + }, + { + "epoch": 30.107913669064747, + "force_loss": 0.012753158807754517, + "step": 33480 + }, + { + "epoch": 30.116906474820144, + "grad_norm": 0.23066876828670502, + "learning_rate": 4.452971958955563e-05, + "loss": 0.0171, + "step": 33490 + }, + { + "action_loss": 0.004219897091388702, + "epoch": 30.116906474820144, + "step": 33490 + }, + { + "epoch": 30.116906474820144, + "step": 33490, + "torque_loss": 0.10240279883146286 + }, + { + "epoch": 30.116906474820144, + "force_loss": 0.0023729100357741117, + "step": 33490 + }, + { + "epoch": 30.12589928057554, + "grad_norm": 0.183693066239357, + "learning_rate": 4.450232801594208e-05, + "loss": 0.0166, + "step": 33500 + }, + { + "action_loss": 0.008158431388437748, + "epoch": 30.12589928057554, + "step": 33500 + }, + { + "epoch": 30.12589928057554, + "step": 33500, + "torque_loss": 0.11823907494544983 + }, + { + "epoch": 30.12589928057554, + "force_loss": 0.009602522477507591, + "step": 33500 + }, + { + "epoch": 30.134892086330936, + "grad_norm": 0.23512250185012817, + "learning_rate": 4.447493811237609e-05, + "loss": 0.0178, + "step": 33510 + }, + { + "action_loss": 0.012874695472419262, + "epoch": 30.134892086330936, + "step": 33510 + }, + { + "epoch": 30.134892086330936, + "step": 33510, + "torque_loss": 0.1644458770751953 + }, + { + "epoch": 30.134892086330936, + "force_loss": 0.018665602430701256, + "step": 33510 + }, + { + "epoch": 30.14388489208633, + "grad_norm": 0.2832042872905731, + "learning_rate": 4.444754988717804e-05, + "loss": 0.0197, + "step": 33520 + }, + { + "action_loss": 0.0023626608308404684, + "epoch": 30.14388489208633, + "step": 33520 + }, + { + "epoch": 30.14388489208633, + "step": 33520, + "torque_loss": 0.13426519930362701 + }, + { + "epoch": 30.14388489208633, + "force_loss": 0.0022349469363689423, + "step": 33520 + }, + { + "epoch": 30.152877697841728, + "grad_norm": 0.3160881996154785, + "learning_rate": 4.442016334866771e-05, + "loss": 0.0234, + "step": 33530 + }, + { + "action_loss": 0.007441936060786247, + "epoch": 30.152877697841728, + "step": 33530 + }, + { + "epoch": 30.152877697841728, + "step": 33530, + "torque_loss": 0.10917917639017105 + }, + { + "epoch": 30.152877697841728, + "force_loss": 0.0067154522985219955, + "step": 33530 + }, + { + "epoch": 30.16187050359712, + "grad_norm": 0.17749209702014923, + "learning_rate": 4.4392778505164445e-05, + "loss": 0.0173, + "step": 33540 + }, + { + "action_loss": 0.0013483191141858697, + "epoch": 30.16187050359712, + "step": 33540 + }, + { + "epoch": 30.16187050359712, + "step": 33540, + "torque_loss": 0.13838058710098267 + }, + { + "epoch": 30.16187050359712, + "force_loss": 0.0011774327140301466, + "step": 33540 + }, + { + "epoch": 30.17086330935252, + "grad_norm": 0.3133264482021332, + "learning_rate": 4.436539536498702e-05, + "loss": 0.0165, + "step": 33550 + }, + { + "action_loss": 0.00280605535954237, + "epoch": 30.17086330935252, + "step": 33550 + }, + { + "epoch": 30.17086330935252, + "step": 33550, + "torque_loss": 0.13557972013950348 + }, + { + "epoch": 30.17086330935252, + "force_loss": 0.0042430199682712555, + "step": 33550 + }, + { + "epoch": 30.179856115107913, + "grad_norm": 0.2542215883731842, + "learning_rate": 4.433801393645369e-05, + "loss": 0.0175, + "step": 33560 + }, + { + "action_loss": 0.004041679203510284, + "epoch": 30.179856115107913, + "step": 33560 + }, + { + "epoch": 30.179856115107913, + "step": 33560, + "torque_loss": 0.13326963782310486 + }, + { + "epoch": 30.179856115107913, + "force_loss": 0.0023123843129724264, + "step": 33560 + }, + { + "epoch": 30.18884892086331, + "grad_norm": 0.6925213932991028, + "learning_rate": 4.431063422788226e-05, + "loss": 0.0181, + "step": 33570 + }, + { + "action_loss": 0.003081059781834483, + "epoch": 30.18884892086331, + "step": 33570 + }, + { + "epoch": 30.18884892086331, + "step": 33570, + "torque_loss": 0.1330539584159851 + }, + { + "epoch": 30.18884892086331, + "force_loss": 0.007840510457754135, + "step": 33570 + }, + { + "epoch": 30.197841726618705, + "grad_norm": 0.3583317697048187, + "learning_rate": 4.428325624758991e-05, + "loss": 0.0177, + "step": 33580 + }, + { + "action_loss": 0.003772567957639694, + "epoch": 30.197841726618705, + "step": 33580 + }, + { + "epoch": 30.197841726618705, + "step": 33580, + "torque_loss": 0.10897797346115112 + }, + { + "epoch": 30.197841726618705, + "force_loss": 0.003163060173392296, + "step": 33580 + }, + { + "epoch": 30.2068345323741, + "grad_norm": 0.4390246868133545, + "learning_rate": 4.4255880003893366e-05, + "loss": 0.0217, + "step": 33590 + }, + { + "action_loss": 0.002452633110806346, + "epoch": 30.2068345323741, + "step": 33590 + }, + { + "epoch": 30.2068345323741, + "step": 33590, + "torque_loss": 0.13239872455596924 + }, + { + "epoch": 30.2068345323741, + "force_loss": 0.003955683205276728, + "step": 33590 + }, + { + "epoch": 30.215827338129497, + "grad_norm": 0.23491589725017548, + "learning_rate": 4.422850550510884e-05, + "loss": 0.0166, + "step": 33600 + }, + { + "action_loss": 0.007710794452577829, + "epoch": 30.215827338129497, + "step": 33600 + }, + { + "epoch": 30.215827338129497, + "step": 33600, + "torque_loss": 0.21398377418518066 + }, + { + "epoch": 30.215827338129497, + "force_loss": 0.012334768660366535, + "step": 33600 + }, + { + "epoch": 30.22482014388489, + "grad_norm": 0.29465538263320923, + "learning_rate": 4.4201132759551934e-05, + "loss": 0.0205, + "step": 33610 + }, + { + "action_loss": 0.004370903596282005, + "epoch": 30.22482014388489, + "step": 33610 + }, + { + "epoch": 30.22482014388489, + "step": 33610, + "torque_loss": 0.1381704956293106 + }, + { + "epoch": 30.22482014388489, + "force_loss": 0.0032118279486894608, + "step": 33610 + }, + { + "epoch": 30.23381294964029, + "grad_norm": 0.21574260294437408, + "learning_rate": 4.4173761775537804e-05, + "loss": 0.0207, + "step": 33620 + }, + { + "action_loss": 0.0028051387052983046, + "epoch": 30.23381294964029, + "step": 33620 + }, + { + "epoch": 30.23381294964029, + "step": 33620, + "torque_loss": 0.10356395691633224 + }, + { + "epoch": 30.23381294964029, + "force_loss": 0.0020543793216347694, + "step": 33620 + }, + { + "epoch": 30.242805755395683, + "grad_norm": 0.320326566696167, + "learning_rate": 4.414639256138099e-05, + "loss": 0.0185, + "step": 33630 + }, + { + "action_loss": 0.00179259292781353, + "epoch": 30.242805755395683, + "step": 33630 + }, + { + "epoch": 30.242805755395683, + "step": 33630, + "torque_loss": 0.1169067919254303 + }, + { + "epoch": 30.242805755395683, + "force_loss": 0.003782205516472459, + "step": 33630 + }, + { + "epoch": 30.25179856115108, + "grad_norm": 0.24306286871433258, + "learning_rate": 4.411902512539557e-05, + "loss": 0.0186, + "step": 33640 + }, + { + "action_loss": 0.002906916895881295, + "epoch": 30.25179856115108, + "step": 33640 + }, + { + "epoch": 30.25179856115108, + "step": 33640, + "torque_loss": 0.1894126981496811 + }, + { + "epoch": 30.25179856115108, + "force_loss": 0.0016277733957394958, + "step": 33640 + }, + { + "epoch": 30.260791366906474, + "grad_norm": 0.36424165964126587, + "learning_rate": 4.4091659475895044e-05, + "loss": 0.0173, + "step": 33650 + }, + { + "action_loss": 0.0023266819771379232, + "epoch": 30.260791366906474, + "step": 33650 + }, + { + "epoch": 30.260791366906474, + "step": 33650, + "torque_loss": 0.12251365184783936 + }, + { + "epoch": 30.260791366906474, + "force_loss": 0.005497979465872049, + "step": 33650 + }, + { + "epoch": 30.269784172661872, + "grad_norm": 0.23687084019184113, + "learning_rate": 4.406429562119235e-05, + "loss": 0.0184, + "step": 33660 + }, + { + "action_loss": 0.008143831975758076, + "epoch": 30.269784172661872, + "step": 33660 + }, + { + "epoch": 30.269784172661872, + "step": 33660, + "torque_loss": 0.13751937448978424 + }, + { + "epoch": 30.269784172661872, + "force_loss": 0.00900186225771904, + "step": 33660 + }, + { + "epoch": 30.278776978417266, + "grad_norm": 0.37069347500801086, + "learning_rate": 4.4036933569599945e-05, + "loss": 0.0192, + "step": 33670 + }, + { + "action_loss": 0.003267800435423851, + "epoch": 30.278776978417266, + "step": 33670 + }, + { + "epoch": 30.278776978417266, + "step": 33670, + "torque_loss": 0.14806006848812103 + }, + { + "epoch": 30.278776978417266, + "force_loss": 0.003686965210363269, + "step": 33670 + }, + { + "epoch": 30.28776978417266, + "grad_norm": 0.22585856914520264, + "learning_rate": 4.400957332942965e-05, + "loss": 0.0192, + "step": 33680 + }, + { + "action_loss": 0.004052708391100168, + "epoch": 30.28776978417266, + "step": 33680 + }, + { + "epoch": 30.28776978417266, + "step": 33680, + "torque_loss": 0.109751857817173 + }, + { + "epoch": 30.28776978417266, + "force_loss": 0.005819523707032204, + "step": 33680 + }, + { + "epoch": 30.296762589928058, + "grad_norm": 0.33284908533096313, + "learning_rate": 4.3982214908992844e-05, + "loss": 0.0173, + "step": 33690 + }, + { + "action_loss": 0.00896837655454874, + "epoch": 30.296762589928058, + "step": 33690 + }, + { + "epoch": 30.296762589928058, + "step": 33690, + "torque_loss": 0.12630636990070343 + }, + { + "epoch": 30.296762589928058, + "force_loss": 0.009608641266822815, + "step": 33690 + }, + { + "epoch": 30.305755395683452, + "grad_norm": 0.3137732148170471, + "learning_rate": 4.3954858316600235e-05, + "loss": 0.0176, + "step": 33700 + }, + { + "action_loss": 0.0027881485875695944, + "epoch": 30.305755395683452, + "step": 33700 + }, + { + "epoch": 30.305755395683452, + "step": 33700, + "torque_loss": 0.14884357154369354 + }, + { + "epoch": 30.305755395683452, + "force_loss": 0.0032020965591073036, + "step": 33700 + }, + { + "epoch": 30.31474820143885, + "grad_norm": 0.21497157216072083, + "learning_rate": 4.392750356056205e-05, + "loss": 0.0173, + "step": 33710 + }, + { + "action_loss": 0.0028638120274990797, + "epoch": 30.31474820143885, + "step": 33710 + }, + { + "epoch": 30.31474820143885, + "step": 33710, + "torque_loss": 0.07155754417181015 + }, + { + "epoch": 30.31474820143885, + "force_loss": 0.00342006329447031, + "step": 33710 + }, + { + "epoch": 30.323741007194243, + "grad_norm": 0.13051877915859222, + "learning_rate": 4.390015064918798e-05, + "loss": 0.0161, + "step": 33720 + }, + { + "action_loss": 0.0015397461829707026, + "epoch": 30.323741007194243, + "step": 33720 + }, + { + "epoch": 30.323741007194243, + "step": 33720, + "torque_loss": 0.17357449233531952 + }, + { + "epoch": 30.323741007194243, + "force_loss": 0.0026060156524181366, + "step": 33720 + }, + { + "epoch": 30.33273381294964, + "grad_norm": 0.13377784192562103, + "learning_rate": 4.387279959078705e-05, + "loss": 0.0165, + "step": 33730 + }, + { + "action_loss": 0.008763168938457966, + "epoch": 30.33273381294964, + "step": 33730 + }, + { + "epoch": 30.33273381294964, + "step": 33730, + "torque_loss": 0.14452257752418518 + }, + { + "epoch": 30.33273381294964, + "force_loss": 0.010376120917499065, + "step": 33730 + }, + { + "epoch": 30.341726618705035, + "grad_norm": 0.14394210278987885, + "learning_rate": 4.384545039366786e-05, + "loss": 0.0176, + "step": 33740 + }, + { + "action_loss": 0.005538098514080048, + "epoch": 30.341726618705035, + "step": 33740 + }, + { + "epoch": 30.341726618705035, + "step": 33740, + "torque_loss": 0.22733789682388306 + }, + { + "epoch": 30.341726618705035, + "force_loss": 0.004319025669246912, + "step": 33740 + }, + { + "epoch": 30.350719424460433, + "grad_norm": 0.3404535949230194, + "learning_rate": 4.381810306613831e-05, + "loss": 0.0216, + "step": 33750 + }, + { + "action_loss": 0.006918886210769415, + "epoch": 30.350719424460433, + "step": 33750 + }, + { + "epoch": 30.350719424460433, + "step": 33750, + "torque_loss": 0.1701337844133377 + }, + { + "epoch": 30.350719424460433, + "force_loss": 0.011591176502406597, + "step": 33750 + }, + { + "epoch": 30.359712230215827, + "grad_norm": 0.13359081745147705, + "learning_rate": 4.3790757616505826e-05, + "loss": 0.0171, + "step": 33760 + }, + { + "action_loss": 0.013710635714232922, + "epoch": 30.359712230215827, + "step": 33760 + }, + { + "epoch": 30.359712230215827, + "step": 33760, + "torque_loss": 0.10910716652870178 + }, + { + "epoch": 30.359712230215827, + "force_loss": 0.01313174981623888, + "step": 33760 + }, + { + "epoch": 30.368705035971225, + "grad_norm": 0.33932897448539734, + "learning_rate": 4.376341405307725e-05, + "loss": 0.0221, + "step": 33770 + }, + { + "action_loss": 0.0016877828165888786, + "epoch": 30.368705035971225, + "step": 33770 + }, + { + "epoch": 30.368705035971225, + "step": 33770, + "torque_loss": 0.12250272184610367 + }, + { + "epoch": 30.368705035971225, + "force_loss": 0.001253430382348597, + "step": 33770 + }, + { + "epoch": 30.37769784172662, + "grad_norm": 0.15971879661083221, + "learning_rate": 4.37360723841588e-05, + "loss": 0.0208, + "step": 33780 + }, + { + "action_loss": 0.006034780293703079, + "epoch": 30.37769784172662, + "step": 33780 + }, + { + "epoch": 30.37769784172662, + "step": 33780, + "torque_loss": 0.16161827743053436 + }, + { + "epoch": 30.37769784172662, + "force_loss": 0.004980245605111122, + "step": 33780 + }, + { + "epoch": 30.386690647482013, + "grad_norm": 0.3918313980102539, + "learning_rate": 4.370873261805619e-05, + "loss": 0.0245, + "step": 33790 + }, + { + "action_loss": 0.01284277718514204, + "epoch": 30.386690647482013, + "step": 33790 + }, + { + "epoch": 30.386690647482013, + "step": 33790, + "torque_loss": 0.16608965396881104 + }, + { + "epoch": 30.386690647482013, + "force_loss": 0.00825351383537054, + "step": 33790 + }, + { + "epoch": 30.39568345323741, + "grad_norm": 0.21983924508094788, + "learning_rate": 4.368139476307449e-05, + "loss": 0.0221, + "step": 33800 + }, + { + "action_loss": 0.007103261072188616, + "epoch": 30.39568345323741, + "step": 33800 + }, + { + "epoch": 30.39568345323741, + "step": 33800, + "torque_loss": 0.11381250619888306 + }, + { + "epoch": 30.39568345323741, + "force_loss": 0.007707206532359123, + "step": 33800 + }, + { + "epoch": 30.404676258992804, + "grad_norm": 0.32419559359550476, + "learning_rate": 4.365405882751822e-05, + "loss": 0.019, + "step": 33810 + }, + { + "action_loss": 0.0071796514093875885, + "epoch": 30.404676258992804, + "step": 33810 + }, + { + "epoch": 30.404676258992804, + "step": 33810, + "torque_loss": 0.16883157193660736 + }, + { + "epoch": 30.404676258992804, + "force_loss": 0.008002888411283493, + "step": 33810 + }, + { + "epoch": 30.413669064748202, + "grad_norm": 0.30898603796958923, + "learning_rate": 4.3626724819691326e-05, + "loss": 0.0164, + "step": 33820 + }, + { + "action_loss": 0.003020596457645297, + "epoch": 30.413669064748202, + "step": 33820 + }, + { + "epoch": 30.413669064748202, + "step": 33820, + "torque_loss": 0.08189874142408371 + }, + { + "epoch": 30.413669064748202, + "force_loss": 0.0019158223876729608, + "step": 33820 + }, + { + "epoch": 30.422661870503596, + "grad_norm": 0.18998531997203827, + "learning_rate": 4.359939274789715e-05, + "loss": 0.0165, + "step": 33830 + }, + { + "action_loss": 0.003167690709233284, + "epoch": 30.422661870503596, + "step": 33830 + }, + { + "epoch": 30.422661870503596, + "step": 33830, + "torque_loss": 0.11638975143432617 + }, + { + "epoch": 30.422661870503596, + "force_loss": 0.001651280210353434, + "step": 33830 + }, + { + "epoch": 30.431654676258994, + "grad_norm": 0.128376767039299, + "learning_rate": 4.357206262043848e-05, + "loss": 0.0191, + "step": 33840 + }, + { + "action_loss": 0.0014494335046038032, + "epoch": 30.431654676258994, + "step": 33840 + }, + { + "epoch": 30.431654676258994, + "step": 33840, + "torque_loss": 0.10283250361680984 + }, + { + "epoch": 30.431654676258994, + "force_loss": 0.0010659225517883897, + "step": 33840 + }, + { + "epoch": 30.440647482014388, + "grad_norm": 0.14935314655303955, + "learning_rate": 4.354473444561745e-05, + "loss": 0.0154, + "step": 33850 + }, + { + "action_loss": 0.0070334733463823795, + "epoch": 30.440647482014388, + "step": 33850 + }, + { + "epoch": 30.440647482014388, + "step": 33850, + "torque_loss": 0.13468003273010254 + }, + { + "epoch": 30.440647482014388, + "force_loss": 0.01708083599805832, + "step": 33850 + }, + { + "epoch": 30.449640287769785, + "grad_norm": 0.3647903501987457, + "learning_rate": 4.3517408231735644e-05, + "loss": 0.0179, + "step": 33860 + }, + { + "action_loss": 0.004732321482151747, + "epoch": 30.449640287769785, + "step": 33860 + }, + { + "epoch": 30.449640287769785, + "step": 33860, + "torque_loss": 0.12329337000846863 + }, + { + "epoch": 30.449640287769785, + "force_loss": 0.018216587603092194, + "step": 33860 + }, + { + "epoch": 30.45863309352518, + "grad_norm": 0.1677381694316864, + "learning_rate": 4.3490083987094086e-05, + "loss": 0.0162, + "step": 33870 + }, + { + "action_loss": 0.0013670871267095208, + "epoch": 30.45863309352518, + "step": 33870 + }, + { + "epoch": 30.45863309352518, + "step": 33870, + "torque_loss": 0.10352057218551636 + }, + { + "epoch": 30.45863309352518, + "force_loss": 0.0013810965465381742, + "step": 33870 + }, + { + "epoch": 30.467625899280577, + "grad_norm": 0.131908118724823, + "learning_rate": 4.34627617199931e-05, + "loss": 0.0206, + "step": 33880 + }, + { + "action_loss": 0.0023977644741535187, + "epoch": 30.467625899280577, + "step": 33880 + }, + { + "epoch": 30.467625899280577, + "step": 33880, + "torque_loss": 0.11434202641248703 + }, + { + "epoch": 30.467625899280577, + "force_loss": 0.002391970017924905, + "step": 33880 + }, + { + "epoch": 30.47661870503597, + "grad_norm": 0.3285118341445923, + "learning_rate": 4.3435441438732526e-05, + "loss": 0.0168, + "step": 33890 + }, + { + "action_loss": 0.003221740946173668, + "epoch": 30.47661870503597, + "step": 33890 + }, + { + "epoch": 30.47661870503597, + "step": 33890, + "torque_loss": 0.12017184495925903 + }, + { + "epoch": 30.47661870503597, + "force_loss": 0.002145914128050208, + "step": 33890 + }, + { + "epoch": 30.485611510791365, + "grad_norm": 0.11932337284088135, + "learning_rate": 4.340812315161149e-05, + "loss": 0.0183, + "step": 33900 + }, + { + "action_loss": 0.00820623617619276, + "epoch": 30.485611510791365, + "step": 33900 + }, + { + "epoch": 30.485611510791365, + "step": 33900, + "torque_loss": 0.13924716413021088 + }, + { + "epoch": 30.485611510791365, + "force_loss": 0.010649804025888443, + "step": 33900 + }, + { + "epoch": 30.494604316546763, + "grad_norm": 0.9350066184997559, + "learning_rate": 4.338080686692859e-05, + "loss": 0.0217, + "step": 33910 + }, + { + "action_loss": 0.004893421661108732, + "epoch": 30.494604316546763, + "step": 33910 + }, + { + "epoch": 30.494604316546763, + "step": 33910, + "torque_loss": 0.10739513486623764 + }, + { + "epoch": 30.494604316546763, + "force_loss": 0.009484733454883099, + "step": 33910 + }, + { + "epoch": 30.503597122302157, + "grad_norm": 0.2702327072620392, + "learning_rate": 4.3353492592981816e-05, + "loss": 0.0164, + "step": 33920 + }, + { + "action_loss": 0.0021730372682213783, + "epoch": 30.503597122302157, + "step": 33920 + }, + { + "epoch": 30.503597122302157, + "step": 33920, + "torque_loss": 0.14096295833587646 + }, + { + "epoch": 30.503597122302157, + "force_loss": 0.003891879925504327, + "step": 33920 + }, + { + "epoch": 30.512589928057555, + "grad_norm": 0.29781439900398254, + "learning_rate": 4.3326180338068485e-05, + "loss": 0.0224, + "step": 33930 + }, + { + "action_loss": 0.0019507581600919366, + "epoch": 30.512589928057555, + "step": 33930 + }, + { + "epoch": 30.512589928057555, + "step": 33930, + "torque_loss": 0.10935936123132706 + }, + { + "epoch": 30.512589928057555, + "force_loss": 0.004337312187999487, + "step": 33930 + }, + { + "epoch": 30.52158273381295, + "grad_norm": 0.18422941863536835, + "learning_rate": 4.3298870110485356e-05, + "loss": 0.0189, + "step": 33940 + }, + { + "action_loss": 0.003793962299823761, + "epoch": 30.52158273381295, + "step": 33940 + }, + { + "epoch": 30.52158273381295, + "step": 33940, + "torque_loss": 0.09891750663518906 + }, + { + "epoch": 30.52158273381295, + "force_loss": 0.003323591547086835, + "step": 33940 + }, + { + "epoch": 30.530575539568346, + "grad_norm": 0.13422386348247528, + "learning_rate": 4.3271561918528567e-05, + "loss": 0.0159, + "step": 33950 + }, + { + "action_loss": 0.0013124371180310845, + "epoch": 30.530575539568346, + "step": 33950 + }, + { + "epoch": 30.530575539568346, + "step": 33950, + "torque_loss": 0.1449468731880188 + }, + { + "epoch": 30.530575539568346, + "force_loss": 0.0011732857674360275, + "step": 33950 + }, + { + "epoch": 30.53956834532374, + "grad_norm": 0.12928429245948792, + "learning_rate": 4.324425577049359e-05, + "loss": 0.0188, + "step": 33960 + }, + { + "action_loss": 0.004642493557184935, + "epoch": 30.53956834532374, + "step": 33960 + }, + { + "epoch": 30.53956834532374, + "step": 33960, + "torque_loss": 0.1675863415002823 + }, + { + "epoch": 30.53956834532374, + "force_loss": 0.01138545572757721, + "step": 33960 + }, + { + "epoch": 30.548561151079138, + "grad_norm": 0.2427515685558319, + "learning_rate": 4.321695167467535e-05, + "loss": 0.0179, + "step": 33970 + }, + { + "action_loss": 0.0022519289050251245, + "epoch": 30.548561151079138, + "step": 33970 + }, + { + "epoch": 30.548561151079138, + "step": 33970, + "torque_loss": 0.09331681579351425 + }, + { + "epoch": 30.548561151079138, + "force_loss": 0.004880804102867842, + "step": 33970 + }, + { + "epoch": 30.557553956834532, + "grad_norm": 0.24680624902248383, + "learning_rate": 4.3189649639368093e-05, + "loss": 0.0223, + "step": 33980 + }, + { + "action_loss": 0.00781483668833971, + "epoch": 30.557553956834532, + "step": 33980 + }, + { + "epoch": 30.557553956834532, + "step": 33980, + "torque_loss": 0.10990267992019653 + }, + { + "epoch": 30.557553956834532, + "force_loss": 0.004496725741773844, + "step": 33980 + }, + { + "epoch": 30.56654676258993, + "grad_norm": 0.27627503871917725, + "learning_rate": 4.316234967286547e-05, + "loss": 0.018, + "step": 33990 + }, + { + "action_loss": 0.002367333509027958, + "epoch": 30.56654676258993, + "step": 33990 + }, + { + "epoch": 30.56654676258993, + "step": 33990, + "torque_loss": 0.17009912431240082 + }, + { + "epoch": 30.56654676258993, + "force_loss": 0.002819410292431712, + "step": 33990 + }, + { + "epoch": 30.575539568345324, + "grad_norm": 0.42873600125312805, + "learning_rate": 4.313505178346046e-05, + "loss": 0.0234, + "step": 34000 + }, + { + "action_loss": 0.0024660711642354727, + "epoch": 30.575539568345324, + "step": 34000 + }, + { + "epoch": 30.575539568345324, + "step": 34000, + "torque_loss": 0.0902329683303833 + }, + { + "epoch": 30.575539568345324, + "force_loss": 0.0014743782812729478, + "step": 34000 + }, + { + "epoch": 30.584532374100718, + "grad_norm": 0.245400533080101, + "learning_rate": 4.3107755979445465e-05, + "loss": 0.0189, + "step": 34010 + }, + { + "action_loss": 0.00279278215020895, + "epoch": 30.584532374100718, + "step": 34010 + }, + { + "epoch": 30.584532374100718, + "step": 34010, + "torque_loss": 0.18927471339702606 + }, + { + "epoch": 30.584532374100718, + "force_loss": 0.0023267928045243025, + "step": 34010 + }, + { + "epoch": 30.593525179856115, + "grad_norm": 0.14921408891677856, + "learning_rate": 4.308046226911224e-05, + "loss": 0.017, + "step": 34020 + }, + { + "action_loss": 0.0031119815539568663, + "epoch": 30.593525179856115, + "step": 34020 + }, + { + "epoch": 30.593525179856115, + "step": 34020, + "torque_loss": 0.11340288072824478 + }, + { + "epoch": 30.593525179856115, + "force_loss": 0.0018722632667049766, + "step": 34020 + }, + { + "epoch": 30.60251798561151, + "grad_norm": 0.1204448863863945, + "learning_rate": 4.305317066075185e-05, + "loss": 0.019, + "step": 34030 + }, + { + "action_loss": 0.00167891348246485, + "epoch": 30.60251798561151, + "step": 34030 + }, + { + "epoch": 30.60251798561151, + "step": 34030, + "torque_loss": 0.17503099143505096 + }, + { + "epoch": 30.60251798561151, + "force_loss": 0.0012211333960294724, + "step": 34030 + }, + { + "epoch": 30.611510791366907, + "grad_norm": 0.21496781706809998, + "learning_rate": 4.302588116265482e-05, + "loss": 0.0179, + "step": 34040 + }, + { + "action_loss": 0.006426831241697073, + "epoch": 30.611510791366907, + "step": 34040 + }, + { + "epoch": 30.611510791366907, + "step": 34040, + "torque_loss": 0.1717856377363205 + }, + { + "epoch": 30.611510791366907, + "force_loss": 0.009729974903166294, + "step": 34040 + }, + { + "epoch": 30.6205035971223, + "grad_norm": 0.2579571008682251, + "learning_rate": 4.299859378311094e-05, + "loss": 0.0224, + "step": 34050 + }, + { + "action_loss": 0.00375187024474144, + "epoch": 30.6205035971223, + "step": 34050 + }, + { + "epoch": 30.6205035971223, + "step": 34050, + "torque_loss": 0.10367164760828018 + }, + { + "epoch": 30.6205035971223, + "force_loss": 0.009184802882373333, + "step": 34050 + }, + { + "epoch": 30.6294964028777, + "grad_norm": 0.31832313537597656, + "learning_rate": 4.2971308530409424e-05, + "loss": 0.0164, + "step": 34060 + }, + { + "action_loss": 0.01362632866948843, + "epoch": 30.6294964028777, + "step": 34060 + }, + { + "epoch": 30.6294964028777, + "step": 34060, + "torque_loss": 0.13499194383621216 + }, + { + "epoch": 30.6294964028777, + "force_loss": 0.017313940450549126, + "step": 34060 + }, + { + "epoch": 30.638489208633093, + "grad_norm": 0.29899221658706665, + "learning_rate": 4.2944025412838765e-05, + "loss": 0.0167, + "step": 34070 + }, + { + "action_loss": 0.004860586486756802, + "epoch": 30.638489208633093, + "step": 34070 + }, + { + "epoch": 30.638489208633093, + "step": 34070, + "torque_loss": 0.16460739076137543 + }, + { + "epoch": 30.638489208633093, + "force_loss": 0.003158821724355221, + "step": 34070 + }, + { + "epoch": 30.64748201438849, + "grad_norm": 0.2701317071914673, + "learning_rate": 4.291674443868689e-05, + "loss": 0.0177, + "step": 34080 + }, + { + "action_loss": 0.004506182856857777, + "epoch": 30.64748201438849, + "step": 34080 + }, + { + "epoch": 30.64748201438849, + "step": 34080, + "torque_loss": 0.14024712145328522 + }, + { + "epoch": 30.64748201438849, + "force_loss": 0.0014873739564791322, + "step": 34080 + }, + { + "epoch": 30.656474820143885, + "grad_norm": 0.1805032640695572, + "learning_rate": 4.288946561624104e-05, + "loss": 0.0168, + "step": 34090 + }, + { + "action_loss": 0.002074779011309147, + "epoch": 30.656474820143885, + "step": 34090 + }, + { + "epoch": 30.656474820143885, + "step": 34090, + "torque_loss": 0.09850070625543594 + }, + { + "epoch": 30.656474820143885, + "force_loss": 0.0031660879030823708, + "step": 34090 + }, + { + "epoch": 30.665467625899282, + "grad_norm": 0.10007679462432861, + "learning_rate": 4.2862188953787794e-05, + "loss": 0.0182, + "step": 34100 + }, + { + "action_loss": 0.005797685589641333, + "epoch": 30.665467625899282, + "step": 34100 + }, + { + "epoch": 30.665467625899282, + "step": 34100, + "torque_loss": 0.12656909227371216 + }, + { + "epoch": 30.665467625899282, + "force_loss": 0.010923708789050579, + "step": 34100 + }, + { + "epoch": 30.674460431654676, + "grad_norm": 0.295224130153656, + "learning_rate": 4.283491445961308e-05, + "loss": 0.0178, + "step": 34110 + }, + { + "action_loss": 0.004737206734716892, + "epoch": 30.674460431654676, + "step": 34110 + }, + { + "epoch": 30.674460431654676, + "step": 34110, + "torque_loss": 0.12477246671915054 + }, + { + "epoch": 30.674460431654676, + "force_loss": 0.003697453299537301, + "step": 34110 + }, + { + "epoch": 30.68345323741007, + "grad_norm": 0.2363261878490448, + "learning_rate": 4.2807642142002155e-05, + "loss": 0.0179, + "step": 34120 + }, + { + "action_loss": 0.0015916851116344333, + "epoch": 30.68345323741007, + "step": 34120 + }, + { + "epoch": 30.68345323741007, + "step": 34120, + "torque_loss": 0.15076014399528503 + }, + { + "epoch": 30.68345323741007, + "force_loss": 0.0037832444068044424, + "step": 34120 + }, + { + "epoch": 30.692446043165468, + "grad_norm": 0.2545408606529236, + "learning_rate": 4.278037200923966e-05, + "loss": 0.0162, + "step": 34130 + }, + { + "action_loss": 0.0030171528924256563, + "epoch": 30.692446043165468, + "step": 34130 + }, + { + "epoch": 30.692446043165468, + "step": 34130, + "torque_loss": 0.12975893914699554 + }, + { + "epoch": 30.692446043165468, + "force_loss": 0.003769857110455632, + "step": 34130 + }, + { + "epoch": 30.701438848920862, + "grad_norm": 0.19470427930355072, + "learning_rate": 4.275310406960953e-05, + "loss": 0.0225, + "step": 34140 + }, + { + "action_loss": 0.02639020048081875, + "epoch": 30.701438848920862, + "step": 34140 + }, + { + "epoch": 30.701438848920862, + "step": 34140, + "torque_loss": 0.2032298594713211 + }, + { + "epoch": 30.701438848920862, + "force_loss": 0.044616106897592545, + "step": 34140 + }, + { + "epoch": 30.71043165467626, + "grad_norm": 0.19291940331459045, + "learning_rate": 4.272583833139502e-05, + "loss": 0.0237, + "step": 34150 + }, + { + "action_loss": 0.0019570773001760244, + "epoch": 30.71043165467626, + "step": 34150 + }, + { + "epoch": 30.71043165467626, + "step": 34150, + "torque_loss": 0.15377110242843628 + }, + { + "epoch": 30.71043165467626, + "force_loss": 0.0014714262215420604, + "step": 34150 + }, + { + "epoch": 30.719424460431654, + "grad_norm": 0.35958021879196167, + "learning_rate": 4.2698574802878794e-05, + "loss": 0.0187, + "step": 34160 + }, + { + "action_loss": 0.008195770904421806, + "epoch": 30.719424460431654, + "step": 34160 + }, + { + "epoch": 30.719424460431654, + "step": 34160, + "torque_loss": 0.10825852304697037 + }, + { + "epoch": 30.719424460431654, + "force_loss": 0.0059619080275297165, + "step": 34160 + }, + { + "epoch": 30.72841726618705, + "grad_norm": 0.25675928592681885, + "learning_rate": 4.2671313492342734e-05, + "loss": 0.0164, + "step": 34170 + }, + { + "action_loss": 0.004299210850149393, + "epoch": 30.72841726618705, + "step": 34170 + }, + { + "epoch": 30.72841726618705, + "step": 34170, + "torque_loss": 0.08962175995111465 + }, + { + "epoch": 30.72841726618705, + "force_loss": 0.009974678978323936, + "step": 34170 + }, + { + "epoch": 30.737410071942445, + "grad_norm": 0.337780624628067, + "learning_rate": 4.264405440806813e-05, + "loss": 0.0172, + "step": 34180 + }, + { + "action_loss": 0.0011797873303294182, + "epoch": 30.737410071942445, + "step": 34180 + }, + { + "epoch": 30.737410071942445, + "step": 34180, + "torque_loss": 0.12248945981264114 + }, + { + "epoch": 30.737410071942445, + "force_loss": 0.001072687329724431, + "step": 34180 + }, + { + "epoch": 30.746402877697843, + "grad_norm": 0.2946050465106964, + "learning_rate": 4.26167975583356e-05, + "loss": 0.0227, + "step": 34190 + }, + { + "action_loss": 0.0018826182931661606, + "epoch": 30.746402877697843, + "step": 34190 + }, + { + "epoch": 30.746402877697843, + "step": 34190, + "torque_loss": 0.1174575462937355 + }, + { + "epoch": 30.746402877697843, + "force_loss": 0.0018945131450891495, + "step": 34190 + }, + { + "epoch": 30.755395683453237, + "grad_norm": 0.23761264979839325, + "learning_rate": 4.2589542951425e-05, + "loss": 0.0165, + "step": 34200 + }, + { + "action_loss": 0.003781116334721446, + "epoch": 30.755395683453237, + "step": 34200 + }, + { + "epoch": 30.755395683453237, + "step": 34200, + "torque_loss": 0.15604843199253082 + }, + { + "epoch": 30.755395683453237, + "force_loss": 0.003606157610192895, + "step": 34200 + }, + { + "epoch": 30.764388489208635, + "grad_norm": 0.2848336696624756, + "learning_rate": 4.2562290595615615e-05, + "loss": 0.0181, + "step": 34210 + }, + { + "action_loss": 0.003935912624001503, + "epoch": 30.764388489208635, + "step": 34210 + }, + { + "epoch": 30.764388489208635, + "step": 34210, + "torque_loss": 0.11677604913711548 + }, + { + "epoch": 30.764388489208635, + "force_loss": 0.005298606585711241, + "step": 34210 + }, + { + "epoch": 30.77338129496403, + "grad_norm": 0.18371392786502838, + "learning_rate": 4.2535040499185946e-05, + "loss": 0.0206, + "step": 34220 + }, + { + "action_loss": 0.003213894786313176, + "epoch": 30.77338129496403, + "step": 34220 + }, + { + "epoch": 30.77338129496403, + "step": 34220, + "torque_loss": 0.12822218239307404 + }, + { + "epoch": 30.77338129496403, + "force_loss": 0.0016867819940671325, + "step": 34220 + }, + { + "epoch": 30.782374100719423, + "grad_norm": 0.4384494423866272, + "learning_rate": 4.250779267041387e-05, + "loss": 0.0197, + "step": 34230 + }, + { + "action_loss": 0.0026134636718779802, + "epoch": 30.782374100719423, + "step": 34230 + }, + { + "epoch": 30.782374100719423, + "step": 34230, + "torque_loss": 0.09193725138902664 + }, + { + "epoch": 30.782374100719423, + "force_loss": 0.002919525606557727, + "step": 34230 + }, + { + "epoch": 30.79136690647482, + "grad_norm": 0.3756982088088989, + "learning_rate": 4.248054711757657e-05, + "loss": 0.0195, + "step": 34240 + }, + { + "action_loss": 0.0046348064206540585, + "epoch": 30.79136690647482, + "step": 34240 + }, + { + "epoch": 30.79136690647482, + "step": 34240, + "torque_loss": 0.10517802834510803 + }, + { + "epoch": 30.79136690647482, + "force_loss": 0.0036902560386806726, + "step": 34240 + }, + { + "epoch": 30.800359712230215, + "grad_norm": 0.18024572730064392, + "learning_rate": 4.245330384895052e-05, + "loss": 0.0199, + "step": 34250 + }, + { + "action_loss": 0.016274841502308846, + "epoch": 30.800359712230215, + "step": 34250 + }, + { + "epoch": 30.800359712230215, + "step": 34250, + "torque_loss": 0.18422596156597137 + }, + { + "epoch": 30.800359712230215, + "force_loss": 0.01038005668669939, + "step": 34250 + }, + { + "epoch": 30.809352517985612, + "grad_norm": 0.2398492991924286, + "learning_rate": 4.242606287281151e-05, + "loss": 0.0241, + "step": 34260 + }, + { + "action_loss": 0.012687087059020996, + "epoch": 30.809352517985612, + "step": 34260 + }, + { + "epoch": 30.809352517985612, + "step": 34260, + "torque_loss": 0.14182384312152863 + }, + { + "epoch": 30.809352517985612, + "force_loss": 0.02039421536028385, + "step": 34260 + }, + { + "epoch": 30.818345323741006, + "grad_norm": 0.2453010231256485, + "learning_rate": 4.2398824197434595e-05, + "loss": 0.0252, + "step": 34270 + }, + { + "action_loss": 0.0049733868800103664, + "epoch": 30.818345323741006, + "step": 34270 + }, + { + "epoch": 30.818345323741006, + "step": 34270, + "torque_loss": 0.16707904636859894 + }, + { + "epoch": 30.818345323741006, + "force_loss": 0.010182465426623821, + "step": 34270 + }, + { + "epoch": 30.827338129496404, + "grad_norm": 0.11318911612033844, + "learning_rate": 4.23715878310942e-05, + "loss": 0.019, + "step": 34280 + }, + { + "action_loss": 0.0023354210425168276, + "epoch": 30.827338129496404, + "step": 34280 + }, + { + "epoch": 30.827338129496404, + "step": 34280, + "torque_loss": 0.08740288019180298 + }, + { + "epoch": 30.827338129496404, + "force_loss": 0.0018805539002642035, + "step": 34280 + }, + { + "epoch": 30.836330935251798, + "grad_norm": 0.27674686908721924, + "learning_rate": 4.234435378206402e-05, + "loss": 0.0171, + "step": 34290 + }, + { + "action_loss": 0.005275351461023092, + "epoch": 30.836330935251798, + "step": 34290 + }, + { + "epoch": 30.836330935251798, + "step": 34290, + "torque_loss": 0.1801472306251526 + }, + { + "epoch": 30.836330935251798, + "force_loss": 0.008779492229223251, + "step": 34290 + }, + { + "epoch": 30.845323741007196, + "grad_norm": 0.22926506400108337, + "learning_rate": 4.2317122058617006e-05, + "loss": 0.0189, + "step": 34300 + }, + { + "action_loss": 0.0030152134131640196, + "epoch": 30.845323741007196, + "step": 34300 + }, + { + "epoch": 30.845323741007196, + "step": 34300, + "torque_loss": 0.11216884106397629 + }, + { + "epoch": 30.845323741007196, + "force_loss": 0.0015955861890688539, + "step": 34300 + }, + { + "epoch": 30.85431654676259, + "grad_norm": 0.22880510985851288, + "learning_rate": 4.2289892669025485e-05, + "loss": 0.0185, + "step": 34310 + }, + { + "action_loss": 0.0020585735328495502, + "epoch": 30.85431654676259, + "step": 34310 + }, + { + "epoch": 30.85431654676259, + "step": 34310, + "torque_loss": 0.12581248581409454 + }, + { + "epoch": 30.85431654676259, + "force_loss": 0.0010630955221131444, + "step": 34310 + }, + { + "epoch": 30.863309352517987, + "grad_norm": 0.3811962902545929, + "learning_rate": 4.226266562156097e-05, + "loss": 0.0194, + "step": 34320 + }, + { + "action_loss": 0.0009385032462887466, + "epoch": 30.863309352517987, + "step": 34320 + }, + { + "epoch": 30.863309352517987, + "step": 34320, + "torque_loss": 0.10718772560358047 + }, + { + "epoch": 30.863309352517987, + "force_loss": 0.0010268172482028604, + "step": 34320 + }, + { + "epoch": 30.87230215827338, + "grad_norm": 0.2305983603000641, + "learning_rate": 4.223544092449435e-05, + "loss": 0.0182, + "step": 34330 + }, + { + "action_loss": 0.002569180214777589, + "epoch": 30.87230215827338, + "step": 34330 + }, + { + "epoch": 30.87230215827338, + "step": 34330, + "torque_loss": 0.1357460469007492 + }, + { + "epoch": 30.87230215827338, + "force_loss": 0.0052687483839690685, + "step": 34330 + }, + { + "epoch": 30.881294964028775, + "grad_norm": 0.1490369290113449, + "learning_rate": 4.2208218586095784e-05, + "loss": 0.0168, + "step": 34340 + }, + { + "action_loss": 0.002988257212564349, + "epoch": 30.881294964028775, + "step": 34340 + }, + { + "epoch": 30.881294964028775, + "step": 34340, + "torque_loss": 0.10673753172159195 + }, + { + "epoch": 30.881294964028775, + "force_loss": 0.0021501716692000628, + "step": 34340 + }, + { + "epoch": 30.890287769784173, + "grad_norm": 0.31900548934936523, + "learning_rate": 4.218099861463466e-05, + "loss": 0.0183, + "step": 34350 + }, + { + "action_loss": 0.0057869236916303635, + "epoch": 30.890287769784173, + "step": 34350 + }, + { + "epoch": 30.890287769784173, + "step": 34350, + "torque_loss": 0.13345248997211456 + }, + { + "epoch": 30.890287769784173, + "force_loss": 0.006804469972848892, + "step": 34350 + }, + { + "epoch": 30.899280575539567, + "grad_norm": 0.19831882417201996, + "learning_rate": 4.215378101837972e-05, + "loss": 0.0212, + "step": 34360 + }, + { + "action_loss": 0.004619716200977564, + "epoch": 30.899280575539567, + "step": 34360 + }, + { + "epoch": 30.899280575539567, + "step": 34360, + "torque_loss": 0.16431497037410736 + }, + { + "epoch": 30.899280575539567, + "force_loss": 0.003166493261232972, + "step": 34360 + }, + { + "epoch": 30.908273381294965, + "grad_norm": 0.11223962157964706, + "learning_rate": 4.2126565805598937e-05, + "loss": 0.0183, + "step": 34370 + }, + { + "action_loss": 0.0017998417606577277, + "epoch": 30.908273381294965, + "step": 34370 + }, + { + "epoch": 30.908273381294965, + "step": 34370, + "torque_loss": 0.09303891658782959 + }, + { + "epoch": 30.908273381294965, + "force_loss": 0.0026637546252459288, + "step": 34370 + }, + { + "epoch": 30.91726618705036, + "grad_norm": 0.29248949885368347, + "learning_rate": 4.209935298455957e-05, + "loss": 0.0165, + "step": 34380 + }, + { + "action_loss": 0.0026354186702519655, + "epoch": 30.91726618705036, + "step": 34380 + }, + { + "epoch": 30.91726618705036, + "step": 34380, + "torque_loss": 0.10319829732179642 + }, + { + "epoch": 30.91726618705036, + "force_loss": 0.0020492158364504576, + "step": 34380 + }, + { + "epoch": 30.926258992805757, + "grad_norm": 0.17651765048503876, + "learning_rate": 4.207214256352817e-05, + "loss": 0.0168, + "step": 34390 + }, + { + "action_loss": 0.0015649498673155904, + "epoch": 30.926258992805757, + "step": 34390 + }, + { + "epoch": 30.926258992805757, + "step": 34390, + "torque_loss": 0.0876346305012703 + }, + { + "epoch": 30.926258992805757, + "force_loss": 0.002223416231572628, + "step": 34390 + }, + { + "epoch": 30.93525179856115, + "grad_norm": 0.23415303230285645, + "learning_rate": 4.2044934550770524e-05, + "loss": 0.0206, + "step": 34400 + }, + { + "action_loss": 0.048367906361818314, + "epoch": 30.93525179856115, + "step": 34400 + }, + { + "epoch": 30.93525179856115, + "step": 34400, + "torque_loss": 0.17771399021148682 + }, + { + "epoch": 30.93525179856115, + "force_loss": 0.07640401273965836, + "step": 34400 + }, + { + "epoch": 30.944244604316548, + "grad_norm": 0.1210138276219368, + "learning_rate": 4.201772895455174e-05, + "loss": 0.0219, + "step": 34410 + }, + { + "action_loss": 0.002077364129945636, + "epoch": 30.944244604316548, + "step": 34410 + }, + { + "epoch": 30.944244604316548, + "step": 34410, + "torque_loss": 0.1436670571565628 + }, + { + "epoch": 30.944244604316548, + "force_loss": 0.0031349908094853163, + "step": 34410 + }, + { + "epoch": 30.953237410071942, + "grad_norm": 0.23080870509147644, + "learning_rate": 4.199052578313613e-05, + "loss": 0.0202, + "step": 34420 + }, + { + "action_loss": 0.0038766537327319384, + "epoch": 30.953237410071942, + "step": 34420 + }, + { + "epoch": 30.953237410071942, + "step": 34420, + "torque_loss": 0.12370244413614273 + }, + { + "epoch": 30.953237410071942, + "force_loss": 0.00461923610419035, + "step": 34420 + }, + { + "epoch": 30.96223021582734, + "grad_norm": 0.3497636020183563, + "learning_rate": 4.1963325044787294e-05, + "loss": 0.0196, + "step": 34430 + }, + { + "action_loss": 0.0013608367880806327, + "epoch": 30.96223021582734, + "step": 34430 + }, + { + "epoch": 30.96223021582734, + "step": 34430, + "torque_loss": 0.11652910709381104 + }, + { + "epoch": 30.96223021582734, + "force_loss": 0.00146779406350106, + "step": 34430 + }, + { + "epoch": 30.971223021582734, + "grad_norm": 0.18539556860923767, + "learning_rate": 4.193612674776814e-05, + "loss": 0.0167, + "step": 34440 + }, + { + "action_loss": 0.005846960935741663, + "epoch": 30.971223021582734, + "step": 34440 + }, + { + "epoch": 30.971223021582734, + "step": 34440, + "torque_loss": 0.14601987600326538 + }, + { + "epoch": 30.971223021582734, + "force_loss": 0.007143509108573198, + "step": 34440 + }, + { + "epoch": 30.980215827338128, + "grad_norm": 0.1306850165128708, + "learning_rate": 4.1908930900340745e-05, + "loss": 0.0184, + "step": 34450 + }, + { + "action_loss": 0.005658076610416174, + "epoch": 30.980215827338128, + "step": 34450 + }, + { + "epoch": 30.980215827338128, + "step": 34450, + "torque_loss": 0.13187600672245026 + }, + { + "epoch": 30.980215827338128, + "force_loss": 0.0058209956623613834, + "step": 34450 + }, + { + "epoch": 30.989208633093526, + "grad_norm": 0.21161605417728424, + "learning_rate": 4.1881737510766536e-05, + "loss": 0.0168, + "step": 34460 + }, + { + "action_loss": 0.002968036336824298, + "epoch": 30.989208633093526, + "step": 34460 + }, + { + "epoch": 30.989208633093526, + "step": 34460, + "torque_loss": 0.1066054105758667 + }, + { + "epoch": 30.989208633093526, + "force_loss": 0.0036938395351171494, + "step": 34460 + }, + { + "epoch": 30.99820143884892, + "grad_norm": 0.08903868496417999, + "learning_rate": 4.185454658730609e-05, + "loss": 0.016, + "step": 34470 + }, + { + "action_loss": 0.003074484644457698, + "epoch": 30.99820143884892, + "step": 34470 + }, + { + "epoch": 30.99820143884892, + "step": 34470, + "torque_loss": 0.17268311977386475 + }, + { + "epoch": 30.99820143884892, + "force_loss": 0.0022521994542330503, + "step": 34470 + }, + { + "epoch": 31.007194244604317, + "grad_norm": 0.28496021032333374, + "learning_rate": 4.1827358138219355e-05, + "loss": 0.0176, + "step": 34480 + }, + { + "action_loss": 0.0025270762853324413, + "epoch": 31.007194244604317, + "step": 34480 + }, + { + "epoch": 31.007194244604317, + "step": 34480, + "torque_loss": 0.11888081580400467 + }, + { + "epoch": 31.007194244604317, + "force_loss": 0.006741913501173258, + "step": 34480 + }, + { + "epoch": 31.01618705035971, + "grad_norm": 0.29523223638534546, + "learning_rate": 4.1800172171765404e-05, + "loss": 0.0273, + "step": 34490 + }, + { + "action_loss": 0.005727287847548723, + "epoch": 31.01618705035971, + "step": 34490 + }, + { + "epoch": 31.01618705035971, + "step": 34490, + "torque_loss": 0.08258908987045288 + }, + { + "epoch": 31.01618705035971, + "force_loss": 0.005919950548559427, + "step": 34490 + }, + { + "epoch": 31.02517985611511, + "grad_norm": 0.18150265514850616, + "learning_rate": 4.177298869620264e-05, + "loss": 0.0154, + "step": 34500 + }, + { + "action_loss": 0.0028090246487408876, + "epoch": 31.02517985611511, + "step": 34500 + }, + { + "epoch": 31.02517985611511, + "step": 34500, + "torque_loss": 0.14135436713695526 + }, + { + "epoch": 31.02517985611511, + "force_loss": 0.0021016087848693132, + "step": 34500 + }, + { + "epoch": 31.034172661870503, + "grad_norm": 0.13717269897460938, + "learning_rate": 4.1745807719788705e-05, + "loss": 0.019, + "step": 34510 + }, + { + "action_loss": 0.0021398004610091448, + "epoch": 31.034172661870503, + "step": 34510 + }, + { + "epoch": 31.034172661870503, + "step": 34510, + "torque_loss": 0.10547112673521042 + }, + { + "epoch": 31.034172661870503, + "force_loss": 0.0011371915461495519, + "step": 34510 + }, + { + "epoch": 31.0431654676259, + "grad_norm": 0.1943628489971161, + "learning_rate": 4.1718629250780445e-05, + "loss": 0.0178, + "step": 34520 + }, + { + "action_loss": 0.0027561886236071587, + "epoch": 31.0431654676259, + "step": 34520 + }, + { + "epoch": 31.0431654676259, + "step": 34520, + "torque_loss": 0.15017829835414886 + }, + { + "epoch": 31.0431654676259, + "force_loss": 0.009032641537487507, + "step": 34520 + }, + { + "epoch": 31.052158273381295, + "grad_norm": 0.11110750585794449, + "learning_rate": 4.1691453297433956e-05, + "loss": 0.0179, + "step": 34530 + }, + { + "action_loss": 0.005441917572170496, + "epoch": 31.052158273381295, + "step": 34530 + }, + { + "epoch": 31.052158273381295, + "step": 34530, + "torque_loss": 0.14995302259922028 + }, + { + "epoch": 31.052158273381295, + "force_loss": 0.002294342964887619, + "step": 34530 + }, + { + "epoch": 31.06115107913669, + "grad_norm": 0.5229678750038147, + "learning_rate": 4.166427986800457e-05, + "loss": 0.0196, + "step": 34540 + }, + { + "action_loss": 0.004152171779423952, + "epoch": 31.06115107913669, + "step": 34540 + }, + { + "epoch": 31.06115107913669, + "step": 34540, + "torque_loss": 0.12068601697683334 + }, + { + "epoch": 31.06115107913669, + "force_loss": 0.0015859386185184121, + "step": 34540 + }, + { + "epoch": 31.070143884892087, + "grad_norm": 0.19499029219150543, + "learning_rate": 4.163710897074688e-05, + "loss": 0.0172, + "step": 34550 + }, + { + "action_loss": 0.005862883757799864, + "epoch": 31.070143884892087, + "step": 34550 + }, + { + "epoch": 31.070143884892087, + "step": 34550, + "torque_loss": 0.09602329134941101 + }, + { + "epoch": 31.070143884892087, + "force_loss": 0.0055970698595047, + "step": 34550 + }, + { + "epoch": 31.07913669064748, + "grad_norm": 0.47074493765830994, + "learning_rate": 4.1609940613914686e-05, + "loss": 0.0237, + "step": 34560 + }, + { + "action_loss": 0.006736569106578827, + "epoch": 31.07913669064748, + "step": 34560 + }, + { + "epoch": 31.07913669064748, + "step": 34560, + "torque_loss": 0.1278313547372818 + }, + { + "epoch": 31.07913669064748, + "force_loss": 0.004935703705996275, + "step": 34560 + }, + { + "epoch": 31.08812949640288, + "grad_norm": 0.2099589854478836, + "learning_rate": 4.1582774805760996e-05, + "loss": 0.0176, + "step": 34570 + }, + { + "action_loss": 0.0035369135439395905, + "epoch": 31.08812949640288, + "step": 34570 + }, + { + "epoch": 31.08812949640288, + "step": 34570, + "torque_loss": 0.1384759545326233 + }, + { + "epoch": 31.08812949640288, + "force_loss": 0.004823770374059677, + "step": 34570 + }, + { + "epoch": 31.097122302158272, + "grad_norm": 0.35145261883735657, + "learning_rate": 4.155561155453809e-05, + "loss": 0.0179, + "step": 34580 + }, + { + "action_loss": 0.0020043589174747467, + "epoch": 31.097122302158272, + "step": 34580 + }, + { + "epoch": 31.097122302158272, + "step": 34580, + "torque_loss": 0.13524475693702698 + }, + { + "epoch": 31.097122302158272, + "force_loss": 0.0017389642307534814, + "step": 34580 + }, + { + "epoch": 31.10611510791367, + "grad_norm": 0.2711109519004822, + "learning_rate": 4.15284508684974e-05, + "loss": 0.0207, + "step": 34590 + }, + { + "action_loss": 0.001917586661875248, + "epoch": 31.10611510791367, + "step": 34590 + }, + { + "epoch": 31.10611510791367, + "step": 34590, + "torque_loss": 0.14557045698165894 + }, + { + "epoch": 31.10611510791367, + "force_loss": 0.002514700870960951, + "step": 34590 + }, + { + "epoch": 31.115107913669064, + "grad_norm": 0.2572793662548065, + "learning_rate": 4.1501292755889675e-05, + "loss": 0.0195, + "step": 34600 + }, + { + "action_loss": 0.00617902772501111, + "epoch": 31.115107913669064, + "step": 34600 + }, + { + "epoch": 31.115107913669064, + "step": 34600, + "torque_loss": 0.10713726282119751 + }, + { + "epoch": 31.115107913669064, + "force_loss": 0.009111873805522919, + "step": 34600 + }, + { + "epoch": 31.12410071942446, + "grad_norm": 0.3115302622318268, + "learning_rate": 4.1474137224964833e-05, + "loss": 0.0208, + "step": 34610 + }, + { + "action_loss": 0.004423326347023249, + "epoch": 31.12410071942446, + "step": 34610 + }, + { + "epoch": 31.12410071942446, + "step": 34610, + "torque_loss": 0.14599765837192535 + }, + { + "epoch": 31.12410071942446, + "force_loss": 0.016374656930565834, + "step": 34610 + }, + { + "epoch": 31.133093525179856, + "grad_norm": 0.338373601436615, + "learning_rate": 4.144698428397197e-05, + "loss": 0.0211, + "step": 34620 + }, + { + "action_loss": 0.003914201632142067, + "epoch": 31.133093525179856, + "step": 34620 + }, + { + "epoch": 31.133093525179856, + "step": 34620, + "torque_loss": 0.1427883356809616 + }, + { + "epoch": 31.133093525179856, + "force_loss": 0.002898996928706765, + "step": 34620 + }, + { + "epoch": 31.142086330935253, + "grad_norm": 0.09954176098108292, + "learning_rate": 4.1419833941159466e-05, + "loss": 0.0165, + "step": 34630 + }, + { + "action_loss": 0.0015698192873969674, + "epoch": 31.142086330935253, + "step": 34630 + }, + { + "epoch": 31.142086330935253, + "step": 34630, + "torque_loss": 0.11284714937210083 + }, + { + "epoch": 31.142086330935253, + "force_loss": 0.003355605760589242, + "step": 34630 + }, + { + "epoch": 31.151079136690647, + "grad_norm": 0.24442985653877258, + "learning_rate": 4.1392686204774846e-05, + "loss": 0.0167, + "step": 34640 + }, + { + "action_loss": 0.001921254675835371, + "epoch": 31.151079136690647, + "step": 34640 + }, + { + "epoch": 31.151079136690647, + "step": 34640, + "torque_loss": 0.13938771188259125 + }, + { + "epoch": 31.151079136690647, + "force_loss": 0.0030783768743276596, + "step": 34640 + }, + { + "epoch": 31.16007194244604, + "grad_norm": 0.1379452645778656, + "learning_rate": 4.13655410830649e-05, + "loss": 0.0155, + "step": 34650 + }, + { + "action_loss": 0.0036887743044644594, + "epoch": 31.16007194244604, + "step": 34650 + }, + { + "epoch": 31.16007194244604, + "step": 34650, + "torque_loss": 0.16108696162700653 + }, + { + "epoch": 31.16007194244604, + "force_loss": 0.005511805415153503, + "step": 34650 + }, + { + "epoch": 31.16906474820144, + "grad_norm": 0.3105632960796356, + "learning_rate": 4.1338398584275594e-05, + "loss": 0.02, + "step": 34660 + }, + { + "action_loss": 0.016464168205857277, + "epoch": 31.16906474820144, + "step": 34660 + }, + { + "epoch": 31.16906474820144, + "step": 34660, + "torque_loss": 0.10444417595863342 + }, + { + "epoch": 31.16906474820144, + "force_loss": 0.03631044924259186, + "step": 34660 + }, + { + "epoch": 31.178057553956833, + "grad_norm": 0.12203385680913925, + "learning_rate": 4.1311258716652104e-05, + "loss": 0.019, + "step": 34670 + }, + { + "action_loss": 0.00409175269305706, + "epoch": 31.178057553956833, + "step": 34670 + }, + { + "epoch": 31.178057553956833, + "step": 34670, + "torque_loss": 0.12638358771800995 + }, + { + "epoch": 31.178057553956833, + "force_loss": 0.01283965166658163, + "step": 34670 + }, + { + "epoch": 31.18705035971223, + "grad_norm": 0.17293165624141693, + "learning_rate": 4.128412148843881e-05, + "loss": 0.0178, + "step": 34680 + }, + { + "action_loss": 0.005139355082064867, + "epoch": 31.18705035971223, + "step": 34680 + }, + { + "epoch": 31.18705035971223, + "step": 34680, + "torque_loss": 0.13874301314353943 + }, + { + "epoch": 31.18705035971223, + "force_loss": 0.011036957614123821, + "step": 34680 + }, + { + "epoch": 31.196043165467625, + "grad_norm": 0.2990374267101288, + "learning_rate": 4.125698690787926e-05, + "loss": 0.0202, + "step": 34690 + }, + { + "action_loss": 0.005877554416656494, + "epoch": 31.196043165467625, + "step": 34690 + }, + { + "epoch": 31.196043165467625, + "step": 34690, + "torque_loss": 0.13707762956619263 + }, + { + "epoch": 31.196043165467625, + "force_loss": 0.005564381834119558, + "step": 34690 + }, + { + "epoch": 31.205035971223023, + "grad_norm": 0.2713855504989624, + "learning_rate": 4.1229854983216245e-05, + "loss": 0.0192, + "step": 34700 + }, + { + "action_loss": 0.0044330330565571785, + "epoch": 31.205035971223023, + "step": 34700 + }, + { + "epoch": 31.205035971223023, + "step": 34700, + "torque_loss": 0.15669254958629608 + }, + { + "epoch": 31.205035971223023, + "force_loss": 0.004603779409080744, + "step": 34700 + }, + { + "epoch": 31.214028776978417, + "grad_norm": 0.2822035551071167, + "learning_rate": 4.120272572269175e-05, + "loss": 0.0195, + "step": 34710 + }, + { + "action_loss": 0.00781963299959898, + "epoch": 31.214028776978417, + "step": 34710 + }, + { + "epoch": 31.214028776978417, + "step": 34710, + "torque_loss": 0.1621348261833191 + }, + { + "epoch": 31.214028776978417, + "force_loss": 0.013986830599606037, + "step": 34710 + }, + { + "epoch": 31.223021582733814, + "grad_norm": 0.33111459016799927, + "learning_rate": 4.117559913454687e-05, + "loss": 0.0193, + "step": 34720 + }, + { + "action_loss": 0.0026872241869568825, + "epoch": 31.223021582733814, + "step": 34720 + }, + { + "epoch": 31.223021582733814, + "step": 34720, + "torque_loss": 0.14999866485595703 + }, + { + "epoch": 31.223021582733814, + "force_loss": 0.00711226649582386, + "step": 34720 + }, + { + "epoch": 31.23201438848921, + "grad_norm": 0.20210996270179749, + "learning_rate": 4.114847522702201e-05, + "loss": 0.0193, + "step": 34730 + }, + { + "action_loss": 0.004023254383355379, + "epoch": 31.23201438848921, + "step": 34730 + }, + { + "epoch": 31.23201438848921, + "step": 34730, + "torque_loss": 0.13857853412628174 + }, + { + "epoch": 31.23201438848921, + "force_loss": 0.00549306022003293, + "step": 34730 + }, + { + "epoch": 31.241007194244606, + "grad_norm": 0.1407058835029602, + "learning_rate": 4.112135400835664e-05, + "loss": 0.0169, + "step": 34740 + }, + { + "action_loss": 0.005574887152761221, + "epoch": 31.241007194244606, + "step": 34740 + }, + { + "epoch": 31.241007194244606, + "step": 34740, + "torque_loss": 0.10836869478225708 + }, + { + "epoch": 31.241007194244606, + "force_loss": 0.0074792117811739445, + "step": 34740 + }, + { + "epoch": 31.25, + "grad_norm": 0.19229404628276825, + "learning_rate": 4.109423548678949e-05, + "loss": 0.0151, + "step": 34750 + }, + { + "action_loss": 0.0059226867742836475, + "epoch": 31.25, + "step": 34750 + }, + { + "epoch": 31.25, + "step": 34750, + "torque_loss": 0.12453003972768784 + }, + { + "epoch": 31.25, + "force_loss": 0.003832560032606125, + "step": 34750 + }, + { + "epoch": 31.258992805755394, + "grad_norm": 0.35395365953445435, + "learning_rate": 4.106711967055848e-05, + "loss": 0.0178, + "step": 34760 + }, + { + "action_loss": 0.003903917735442519, + "epoch": 31.258992805755394, + "step": 34760 + }, + { + "epoch": 31.258992805755394, + "step": 34760, + "torque_loss": 0.09834903478622437 + }, + { + "epoch": 31.258992805755394, + "force_loss": 0.007758116815239191, + "step": 34760 + }, + { + "epoch": 31.26798561151079, + "grad_norm": 0.2640562355518341, + "learning_rate": 4.1040006567900636e-05, + "loss": 0.0168, + "step": 34770 + }, + { + "action_loss": 0.0020288738887757063, + "epoch": 31.26798561151079, + "step": 34770 + }, + { + "epoch": 31.26798561151079, + "step": 34770, + "torque_loss": 0.11705183237791061 + }, + { + "epoch": 31.26798561151079, + "force_loss": 0.00440516322851181, + "step": 34770 + }, + { + "epoch": 31.276978417266186, + "grad_norm": 0.4126938581466675, + "learning_rate": 4.101289618705224e-05, + "loss": 0.0158, + "step": 34780 + }, + { + "action_loss": 0.0020967319142073393, + "epoch": 31.276978417266186, + "step": 34780 + }, + { + "epoch": 31.276978417266186, + "step": 34780, + "torque_loss": 0.12304145097732544 + }, + { + "epoch": 31.276978417266186, + "force_loss": 0.001527313725091517, + "step": 34780 + }, + { + "epoch": 31.285971223021583, + "grad_norm": 0.35957178473472595, + "learning_rate": 4.0985788536248675e-05, + "loss": 0.0175, + "step": 34790 + }, + { + "action_loss": 0.0024529846850782633, + "epoch": 31.285971223021583, + "step": 34790 + }, + { + "epoch": 31.285971223021583, + "step": 34790, + "torque_loss": 0.12623198330402374 + }, + { + "epoch": 31.285971223021583, + "force_loss": 0.0035021493677049875, + "step": 34790 + }, + { + "epoch": 31.294964028776977, + "grad_norm": 0.4234926104545593, + "learning_rate": 4.095868362372454e-05, + "loss": 0.017, + "step": 34800 + }, + { + "action_loss": 0.0035946767311543226, + "epoch": 31.294964028776977, + "step": 34800 + }, + { + "epoch": 31.294964028776977, + "step": 34800, + "torque_loss": 0.13089805841445923 + }, + { + "epoch": 31.294964028776977, + "force_loss": 0.001584723242558539, + "step": 34800 + }, + { + "epoch": 31.303956834532375, + "grad_norm": 0.33862611651420593, + "learning_rate": 4.0931581457713614e-05, + "loss": 0.0178, + "step": 34810 + }, + { + "action_loss": 0.007343566045165062, + "epoch": 31.303956834532375, + "step": 34810 + }, + { + "epoch": 31.303956834532375, + "step": 34810, + "torque_loss": 0.14513994753360748 + }, + { + "epoch": 31.303956834532375, + "force_loss": 0.008060489781200886, + "step": 34810 + }, + { + "epoch": 31.31294964028777, + "grad_norm": 0.15803027153015137, + "learning_rate": 4.09044820464488e-05, + "loss": 0.0188, + "step": 34820 + }, + { + "action_loss": 0.0023536905646324158, + "epoch": 31.31294964028777, + "step": 34820 + }, + { + "epoch": 31.31294964028777, + "step": 34820, + "torque_loss": 0.13476301729679108 + }, + { + "epoch": 31.31294964028777, + "force_loss": 0.0015555074205622077, + "step": 34820 + }, + { + "epoch": 31.321942446043167, + "grad_norm": 0.18544569611549377, + "learning_rate": 4.087738539816219e-05, + "loss": 0.0176, + "step": 34830 + }, + { + "action_loss": 0.01503182202577591, + "epoch": 31.321942446043167, + "step": 34830 + }, + { + "epoch": 31.321942446043167, + "step": 34830, + "torque_loss": 0.14875905215740204 + }, + { + "epoch": 31.321942446043167, + "force_loss": 0.017478706315159798, + "step": 34830 + }, + { + "epoch": 31.33093525179856, + "grad_norm": 1.007502555847168, + "learning_rate": 4.085029152108501e-05, + "loss": 0.022, + "step": 34840 + }, + { + "action_loss": 0.0013019072357565165, + "epoch": 31.33093525179856, + "step": 34840 + }, + { + "epoch": 31.33093525179856, + "step": 34840, + "torque_loss": 0.12316927313804626 + }, + { + "epoch": 31.33093525179856, + "force_loss": 0.0011967908358201385, + "step": 34840 + }, + { + "epoch": 31.33992805755396, + "grad_norm": 0.41491687297821045, + "learning_rate": 4.0823200423447714e-05, + "loss": 0.0196, + "step": 34850 + }, + { + "action_loss": 0.0028079384937882423, + "epoch": 31.33992805755396, + "step": 34850 + }, + { + "epoch": 31.33992805755396, + "step": 34850, + "torque_loss": 0.13180182874202728 + }, + { + "epoch": 31.33992805755396, + "force_loss": 0.004840850364416838, + "step": 34850 + }, + { + "epoch": 31.348920863309353, + "grad_norm": 0.1371115744113922, + "learning_rate": 4.079611211347981e-05, + "loss": 0.0241, + "step": 34860 + }, + { + "action_loss": 0.007915817201137543, + "epoch": 31.348920863309353, + "step": 34860 + }, + { + "epoch": 31.348920863309353, + "step": 34860, + "torque_loss": 0.15225368738174438 + }, + { + "epoch": 31.348920863309353, + "force_loss": 0.006319840904325247, + "step": 34860 + }, + { + "epoch": 31.357913669064747, + "grad_norm": 0.35097813606262207, + "learning_rate": 4.076902659941002e-05, + "loss": 0.0167, + "step": 34870 + }, + { + "action_loss": 0.002877927152439952, + "epoch": 31.357913669064747, + "step": 34870 + }, + { + "epoch": 31.357913669064747, + "step": 34870, + "torque_loss": 0.12672726809978485 + }, + { + "epoch": 31.357913669064747, + "force_loss": 0.0036767490673810244, + "step": 34870 + }, + { + "epoch": 31.366906474820144, + "grad_norm": 0.3157791495323181, + "learning_rate": 4.074194388946624e-05, + "loss": 0.0157, + "step": 34880 + }, + { + "action_loss": 0.0014395791804417968, + "epoch": 31.366906474820144, + "step": 34880 + }, + { + "epoch": 31.366906474820144, + "step": 34880, + "torque_loss": 0.12489987164735794 + }, + { + "epoch": 31.366906474820144, + "force_loss": 0.001917771645821631, + "step": 34880 + }, + { + "epoch": 31.37589928057554, + "grad_norm": 0.38226935267448425, + "learning_rate": 4.071486399187545e-05, + "loss": 0.0164, + "step": 34890 + }, + { + "action_loss": 0.008211738429963589, + "epoch": 31.37589928057554, + "step": 34890 + }, + { + "epoch": 31.37589928057554, + "step": 34890, + "torque_loss": 0.12687841057777405 + }, + { + "epoch": 31.37589928057554, + "force_loss": 0.010581615380942822, + "step": 34890 + }, + { + "epoch": 31.384892086330936, + "grad_norm": 0.2458968311548233, + "learning_rate": 4.0687786914863836e-05, + "loss": 0.0185, + "step": 34900 + }, + { + "action_loss": 0.0017622933955863118, + "epoch": 31.384892086330936, + "step": 34900 + }, + { + "epoch": 31.384892086330936, + "step": 34900, + "torque_loss": 0.10126352310180664 + }, + { + "epoch": 31.384892086330936, + "force_loss": 0.0025832951068878174, + "step": 34900 + }, + { + "epoch": 31.39388489208633, + "grad_norm": 0.272097110748291, + "learning_rate": 4.0660712666656666e-05, + "loss": 0.0157, + "step": 34910 + }, + { + "action_loss": 0.005375581327825785, + "epoch": 31.39388489208633, + "step": 34910 + }, + { + "epoch": 31.39388489208633, + "step": 34910, + "torque_loss": 0.14981211721897125 + }, + { + "epoch": 31.39388489208633, + "force_loss": 0.00587962381541729, + "step": 34910 + }, + { + "epoch": 31.402877697841728, + "grad_norm": 0.18457461893558502, + "learning_rate": 4.0633641255478394e-05, + "loss": 0.0207, + "step": 34920 + }, + { + "action_loss": 0.0011194973485544324, + "epoch": 31.402877697841728, + "step": 34920 + }, + { + "epoch": 31.402877697841728, + "step": 34920, + "torque_loss": 0.09443652629852295 + }, + { + "epoch": 31.402877697841728, + "force_loss": 0.0017360160127282143, + "step": 34920 + }, + { + "epoch": 31.41187050359712, + "grad_norm": 0.1195453479886055, + "learning_rate": 4.0606572689552624e-05, + "loss": 0.0154, + "step": 34930 + }, + { + "action_loss": 0.0014406010741367936, + "epoch": 31.41187050359712, + "step": 34930 + }, + { + "epoch": 31.41187050359712, + "step": 34930, + "torque_loss": 0.13830390572547913 + }, + { + "epoch": 31.41187050359712, + "force_loss": 0.0007887175306677818, + "step": 34930 + }, + { + "epoch": 31.42086330935252, + "grad_norm": 0.270561158657074, + "learning_rate": 4.0579506977102036e-05, + "loss": 0.0187, + "step": 34940 + }, + { + "action_loss": 0.0025075108278542757, + "epoch": 31.42086330935252, + "step": 34940 + }, + { + "epoch": 31.42086330935252, + "step": 34940, + "torque_loss": 0.15471245348453522 + }, + { + "epoch": 31.42086330935252, + "force_loss": 0.004849970806390047, + "step": 34940 + }, + { + "epoch": 31.429856115107913, + "grad_norm": 0.6065011620521545, + "learning_rate": 4.055244412634849e-05, + "loss": 0.0186, + "step": 34950 + }, + { + "action_loss": 0.003092721104621887, + "epoch": 31.429856115107913, + "step": 34950 + }, + { + "epoch": 31.429856115107913, + "step": 34950, + "torque_loss": 0.1049649640917778 + }, + { + "epoch": 31.429856115107913, + "force_loss": 0.0021366889122873545, + "step": 34950 + }, + { + "epoch": 31.43884892086331, + "grad_norm": 0.14073814451694489, + "learning_rate": 4.052538414551298e-05, + "loss": 0.0189, + "step": 34960 + }, + { + "action_loss": 0.012841415591537952, + "epoch": 31.43884892086331, + "step": 34960 + }, + { + "epoch": 31.43884892086331, + "step": 34960, + "torque_loss": 0.15012070536613464 + }, + { + "epoch": 31.43884892086331, + "force_loss": 0.0143703892827034, + "step": 34960 + }, + { + "epoch": 31.447841726618705, + "grad_norm": 0.11514277011156082, + "learning_rate": 4.0498327042815596e-05, + "loss": 0.0169, + "step": 34970 + }, + { + "action_loss": 0.0035906240809708834, + "epoch": 31.447841726618705, + "step": 34970 + }, + { + "epoch": 31.447841726618705, + "step": 34970, + "torque_loss": 0.14721103012561798 + }, + { + "epoch": 31.447841726618705, + "force_loss": 0.005858649965375662, + "step": 34970 + }, + { + "epoch": 31.4568345323741, + "grad_norm": 0.20259389281272888, + "learning_rate": 4.047127282647559e-05, + "loss": 0.0179, + "step": 34980 + }, + { + "action_loss": 0.003659149631857872, + "epoch": 31.4568345323741, + "step": 34980 + }, + { + "epoch": 31.4568345323741, + "step": 34980, + "torque_loss": 0.09909810870885849 + }, + { + "epoch": 31.4568345323741, + "force_loss": 0.0038765103090554476, + "step": 34980 + }, + { + "epoch": 31.465827338129497, + "grad_norm": 0.12645934522151947, + "learning_rate": 4.04442215047113e-05, + "loss": 0.0167, + "step": 34990 + }, + { + "action_loss": 0.0025366703048348427, + "epoch": 31.465827338129497, + "step": 34990 + }, + { + "epoch": 31.465827338129497, + "step": 34990, + "torque_loss": 0.12167666107416153 + }, + { + "epoch": 31.465827338129497, + "force_loss": 0.005248036235570908, + "step": 34990 + }, + { + "epoch": 31.47482014388489, + "grad_norm": 0.15403643250465393, + "learning_rate": 4.041717308574023e-05, + "loss": 0.0153, + "step": 35000 + }, + { + "action_loss": 0.004632141441106796, + "epoch": 31.47482014388489, + "step": 35000 + }, + { + "epoch": 31.47482014388489, + "step": 35000, + "torque_loss": 0.1133936420083046 + }, + { + "epoch": 31.47482014388489, + "force_loss": 0.005289702210575342, + "step": 35000 + }, + { + "epoch": 31.48381294964029, + "grad_norm": 0.1703784316778183, + "learning_rate": 4.039012757777893e-05, + "loss": 0.0177, + "step": 35010 + }, + { + "action_loss": 0.0033064030576497316, + "epoch": 31.48381294964029, + "step": 35010 + }, + { + "epoch": 31.48381294964029, + "step": 35010, + "torque_loss": 0.10487169027328491 + }, + { + "epoch": 31.48381294964029, + "force_loss": 0.0018594572320580482, + "step": 35010 + }, + { + "epoch": 31.492805755395683, + "grad_norm": 0.11044184863567352, + "learning_rate": 4.036308498904314e-05, + "loss": 0.0149, + "step": 35020 + }, + { + "action_loss": 0.0055665522813797, + "epoch": 31.492805755395683, + "step": 35020 + }, + { + "epoch": 31.492805755395683, + "step": 35020, + "torque_loss": 0.1490526795387268 + }, + { + "epoch": 31.492805755395683, + "force_loss": 0.005140155553817749, + "step": 35020 + }, + { + "epoch": 31.50179856115108, + "grad_norm": 0.31837034225463867, + "learning_rate": 4.033604532774771e-05, + "loss": 0.0186, + "step": 35030 + }, + { + "action_loss": 0.003204680746421218, + "epoch": 31.50179856115108, + "step": 35030 + }, + { + "epoch": 31.50179856115108, + "step": 35030, + "torque_loss": 0.15786059200763702 + }, + { + "epoch": 31.50179856115108, + "force_loss": 0.0030774895567446947, + "step": 35030 + }, + { + "epoch": 31.510791366906474, + "grad_norm": 0.20702697336673737, + "learning_rate": 4.030900860210652e-05, + "loss": 0.0161, + "step": 35040 + }, + { + "action_loss": 0.015199624001979828, + "epoch": 31.510791366906474, + "step": 35040 + }, + { + "epoch": 31.510791366906474, + "step": 35040, + "torque_loss": 0.13615868985652924 + }, + { + "epoch": 31.510791366906474, + "force_loss": 0.013585063628852367, + "step": 35040 + }, + { + "epoch": 31.519784172661872, + "grad_norm": 0.1472933441400528, + "learning_rate": 4.028197482033266e-05, + "loss": 0.018, + "step": 35050 + }, + { + "action_loss": 0.002620900049805641, + "epoch": 31.519784172661872, + "step": 35050 + }, + { + "epoch": 31.519784172661872, + "step": 35050, + "torque_loss": 0.14953002333641052 + }, + { + "epoch": 31.519784172661872, + "force_loss": 0.01394414808601141, + "step": 35050 + }, + { + "epoch": 31.528776978417266, + "grad_norm": 0.3911173343658447, + "learning_rate": 4.0254943990638246e-05, + "loss": 0.0189, + "step": 35060 + }, + { + "action_loss": 0.0028132274746894836, + "epoch": 31.528776978417266, + "step": 35060 + }, + { + "epoch": 31.528776978417266, + "step": 35060, + "torque_loss": 0.11279907077550888 + }, + { + "epoch": 31.528776978417266, + "force_loss": 0.003741770749911666, + "step": 35060 + }, + { + "epoch": 31.53776978417266, + "grad_norm": 0.21598149836063385, + "learning_rate": 4.022791612123454e-05, + "loss": 0.0252, + "step": 35070 + }, + { + "action_loss": 0.0020898503717035055, + "epoch": 31.53776978417266, + "step": 35070 + }, + { + "epoch": 31.53776978417266, + "step": 35070, + "torque_loss": 0.13717205822467804 + }, + { + "epoch": 31.53776978417266, + "force_loss": 0.002998681738972664, + "step": 35070 + }, + { + "epoch": 31.546762589928058, + "grad_norm": 0.14628909528255463, + "learning_rate": 4.020089122033192e-05, + "loss": 0.0182, + "step": 35080 + }, + { + "action_loss": 0.007679073140025139, + "epoch": 31.546762589928058, + "step": 35080 + }, + { + "epoch": 31.546762589928058, + "step": 35080, + "torque_loss": 0.09587617963552475 + }, + { + "epoch": 31.546762589928058, + "force_loss": 0.002372180810198188, + "step": 35080 + }, + { + "epoch": 31.555755395683452, + "grad_norm": 0.20146098732948303, + "learning_rate": 4.01738692961398e-05, + "loss": 0.0245, + "step": 35090 + }, + { + "action_loss": 0.00114073243457824, + "epoch": 31.555755395683452, + "step": 35090 + }, + { + "epoch": 31.555755395683452, + "step": 35090, + "torque_loss": 0.0986347496509552 + }, + { + "epoch": 31.555755395683452, + "force_loss": 0.0011868586298078299, + "step": 35090 + }, + { + "epoch": 31.56474820143885, + "grad_norm": 0.42345449328422546, + "learning_rate": 4.014685035686675e-05, + "loss": 0.016, + "step": 35100 + }, + { + "action_loss": 0.004441409837454557, + "epoch": 31.56474820143885, + "step": 35100 + }, + { + "epoch": 31.56474820143885, + "step": 35100, + "torque_loss": 0.10088644176721573 + }, + { + "epoch": 31.56474820143885, + "force_loss": 0.007341319229453802, + "step": 35100 + }, + { + "epoch": 31.573741007194243, + "grad_norm": 0.14468078315258026, + "learning_rate": 4.011983441072039e-05, + "loss": 0.0191, + "step": 35110 + }, + { + "action_loss": 0.004453874658793211, + "epoch": 31.573741007194243, + "step": 35110 + }, + { + "epoch": 31.573741007194243, + "step": 35110, + "torque_loss": 0.12231036275625229 + }, + { + "epoch": 31.573741007194243, + "force_loss": 0.004258755128830671, + "step": 35110 + }, + { + "epoch": 31.58273381294964, + "grad_norm": 0.3526107370853424, + "learning_rate": 4.0092821465907485e-05, + "loss": 0.0212, + "step": 35120 + }, + { + "action_loss": 0.0022445085924118757, + "epoch": 31.58273381294964, + "step": 35120 + }, + { + "epoch": 31.58273381294964, + "step": 35120, + "torque_loss": 0.10221400111913681 + }, + { + "epoch": 31.58273381294964, + "force_loss": 0.006604932714253664, + "step": 35120 + }, + { + "epoch": 31.591726618705035, + "grad_norm": 0.32770583033561707, + "learning_rate": 4.006581153063383e-05, + "loss": 0.0163, + "step": 35130 + }, + { + "action_loss": 0.0018231518333777785, + "epoch": 31.591726618705035, + "step": 35130 + }, + { + "epoch": 31.591726618705035, + "step": 35130, + "torque_loss": 0.1334356814622879 + }, + { + "epoch": 31.591726618705035, + "force_loss": 0.0011773643782362342, + "step": 35130 + }, + { + "epoch": 31.600719424460433, + "grad_norm": 0.3421180844306946, + "learning_rate": 4.003880461310432e-05, + "loss": 0.0196, + "step": 35140 + }, + { + "action_loss": 0.0029285720083862543, + "epoch": 31.600719424460433, + "step": 35140 + }, + { + "epoch": 31.600719424460433, + "step": 35140, + "torque_loss": 0.14738458395004272 + }, + { + "epoch": 31.600719424460433, + "force_loss": 0.0024381878320127726, + "step": 35140 + }, + { + "epoch": 31.609712230215827, + "grad_norm": 0.6190105080604553, + "learning_rate": 4.001180072152298e-05, + "loss": 0.0196, + "step": 35150 + }, + { + "action_loss": 0.0028955843299627304, + "epoch": 31.609712230215827, + "step": 35150 + }, + { + "epoch": 31.609712230215827, + "step": 35150, + "torque_loss": 0.16135770082473755 + }, + { + "epoch": 31.609712230215827, + "force_loss": 0.002871215110644698, + "step": 35150 + }, + { + "epoch": 31.618705035971225, + "grad_norm": 0.2656359076499939, + "learning_rate": 3.998479986409285e-05, + "loss": 0.0192, + "step": 35160 + }, + { + "action_loss": 0.002504608826711774, + "epoch": 31.618705035971225, + "step": 35160 + }, + { + "epoch": 31.618705035971225, + "step": 35160, + "torque_loss": 0.11583981662988663 + }, + { + "epoch": 31.618705035971225, + "force_loss": 0.009636103175580502, + "step": 35160 + }, + { + "epoch": 31.62769784172662, + "grad_norm": 0.10041473060846329, + "learning_rate": 3.995780204901607e-05, + "loss": 0.0152, + "step": 35170 + }, + { + "action_loss": 0.0036317389458417892, + "epoch": 31.62769784172662, + "step": 35170 + }, + { + "epoch": 31.62769784172662, + "step": 35170, + "torque_loss": 0.12671585381031036 + }, + { + "epoch": 31.62769784172662, + "force_loss": 0.006318715866655111, + "step": 35170 + }, + { + "epoch": 31.636690647482013, + "grad_norm": 0.1791400909423828, + "learning_rate": 3.993080728449391e-05, + "loss": 0.0169, + "step": 35180 + }, + { + "action_loss": 0.0016373790567740798, + "epoch": 31.636690647482013, + "step": 35180 + }, + { + "epoch": 31.636690647482013, + "step": 35180, + "torque_loss": 0.1573609709739685 + }, + { + "epoch": 31.636690647482013, + "force_loss": 0.002217449015006423, + "step": 35180 + }, + { + "epoch": 31.64568345323741, + "grad_norm": 0.17255602777004242, + "learning_rate": 3.990381557872661e-05, + "loss": 0.0155, + "step": 35190 + }, + { + "action_loss": 0.006632767617702484, + "epoch": 31.64568345323741, + "step": 35190 + }, + { + "epoch": 31.64568345323741, + "step": 35190, + "torque_loss": 0.1648382991552353 + }, + { + "epoch": 31.64568345323741, + "force_loss": 0.006704200059175491, + "step": 35190 + }, + { + "epoch": 31.654676258992804, + "grad_norm": 0.12096426635980606, + "learning_rate": 3.987682693991359e-05, + "loss": 0.017, + "step": 35200 + }, + { + "action_loss": 0.004801938775926828, + "epoch": 31.654676258992804, + "step": 35200 + }, + { + "epoch": 31.654676258992804, + "step": 35200, + "torque_loss": 0.1270293891429901 + }, + { + "epoch": 31.654676258992804, + "force_loss": 0.005421049892902374, + "step": 35200 + }, + { + "epoch": 31.663669064748202, + "grad_norm": 0.10935506969690323, + "learning_rate": 3.9849841376253226e-05, + "loss": 0.015, + "step": 35210 + }, + { + "action_loss": 0.0019251549383625388, + "epoch": 31.663669064748202, + "step": 35210 + }, + { + "epoch": 31.663669064748202, + "step": 35210, + "torque_loss": 0.14209115505218506 + }, + { + "epoch": 31.663669064748202, + "force_loss": 0.0015115119749680161, + "step": 35210 + }, + { + "epoch": 31.672661870503596, + "grad_norm": 0.15032987296581268, + "learning_rate": 3.982285889594306e-05, + "loss": 0.0191, + "step": 35220 + }, + { + "action_loss": 0.0037692366167902946, + "epoch": 31.672661870503596, + "step": 35220 + }, + { + "epoch": 31.672661870503596, + "step": 35220, + "torque_loss": 0.11902087926864624 + }, + { + "epoch": 31.672661870503596, + "force_loss": 0.0031990210991352797, + "step": 35220 + }, + { + "epoch": 31.681654676258994, + "grad_norm": 0.5698276162147522, + "learning_rate": 3.9795879507179665e-05, + "loss": 0.0174, + "step": 35230 + }, + { + "action_loss": 0.008475713431835175, + "epoch": 31.681654676258994, + "step": 35230 + }, + { + "epoch": 31.681654676258994, + "step": 35230, + "torque_loss": 0.11896976828575134 + }, + { + "epoch": 31.681654676258994, + "force_loss": 0.011556281708180904, + "step": 35230 + }, + { + "epoch": 31.690647482014388, + "grad_norm": 0.07939211279153824, + "learning_rate": 3.9768903218158634e-05, + "loss": 0.0167, + "step": 35240 + }, + { + "action_loss": 0.003014965681359172, + "epoch": 31.690647482014388, + "step": 35240 + }, + { + "epoch": 31.690647482014388, + "step": 35240, + "torque_loss": 0.13165317475795746 + }, + { + "epoch": 31.690647482014388, + "force_loss": 0.0021797276567667723, + "step": 35240 + }, + { + "epoch": 31.699640287769785, + "grad_norm": 0.17850062251091003, + "learning_rate": 3.974193003707468e-05, + "loss": 0.0178, + "step": 35250 + }, + { + "action_loss": 0.0028750200290232897, + "epoch": 31.699640287769785, + "step": 35250 + }, + { + "epoch": 31.699640287769785, + "step": 35250, + "torque_loss": 0.12155556678771973 + }, + { + "epoch": 31.699640287769785, + "force_loss": 0.004737237934023142, + "step": 35250 + }, + { + "epoch": 31.70863309352518, + "grad_norm": 0.17754119634628296, + "learning_rate": 3.971495997212152e-05, + "loss": 0.016, + "step": 35260 + }, + { + "action_loss": 0.0023565383162349463, + "epoch": 31.70863309352518, + "step": 35260 + }, + { + "epoch": 31.70863309352518, + "step": 35260, + "torque_loss": 0.1414964497089386 + }, + { + "epoch": 31.70863309352518, + "force_loss": 0.0033865266013890505, + "step": 35260 + }, + { + "epoch": 31.717625899280577, + "grad_norm": 0.6097290515899658, + "learning_rate": 3.9687993031491985e-05, + "loss": 0.018, + "step": 35270 + }, + { + "action_loss": 0.005043547600507736, + "epoch": 31.717625899280577, + "step": 35270 + }, + { + "epoch": 31.717625899280577, + "step": 35270, + "torque_loss": 0.17120970785617828 + }, + { + "epoch": 31.717625899280577, + "force_loss": 0.007256325799971819, + "step": 35270 + }, + { + "epoch": 31.72661870503597, + "grad_norm": 0.3341626226902008, + "learning_rate": 3.966102922337787e-05, + "loss": 0.0196, + "step": 35280 + }, + { + "action_loss": 0.0037151221185922623, + "epoch": 31.72661870503597, + "step": 35280 + }, + { + "epoch": 31.72661870503597, + "step": 35280, + "torque_loss": 0.1417021006345749 + }, + { + "epoch": 31.72661870503597, + "force_loss": 0.003333625616505742, + "step": 35280 + }, + { + "epoch": 31.735611510791365, + "grad_norm": 0.1533995270729065, + "learning_rate": 3.963406855597009e-05, + "loss": 0.0174, + "step": 35290 + }, + { + "action_loss": 0.00558444345369935, + "epoch": 31.735611510791365, + "step": 35290 + }, + { + "epoch": 31.735611510791365, + "step": 35290, + "torque_loss": 0.16073264181613922 + }, + { + "epoch": 31.735611510791365, + "force_loss": 0.005496786441653967, + "step": 35290 + }, + { + "epoch": 31.744604316546763, + "grad_norm": 0.20038332045078278, + "learning_rate": 3.960711103745861e-05, + "loss": 0.0183, + "step": 35300 + }, + { + "action_loss": 0.005106428172439337, + "epoch": 31.744604316546763, + "step": 35300 + }, + { + "epoch": 31.744604316546763, + "step": 35300, + "torque_loss": 0.12848752737045288 + }, + { + "epoch": 31.744604316546763, + "force_loss": 0.007534404750913382, + "step": 35300 + }, + { + "epoch": 31.753597122302157, + "grad_norm": 0.3179410398006439, + "learning_rate": 3.958015667603237e-05, + "loss": 0.019, + "step": 35310 + }, + { + "action_loss": 0.007186559494584799, + "epoch": 31.753597122302157, + "step": 35310 + }, + { + "epoch": 31.753597122302157, + "step": 35310, + "torque_loss": 0.13391290605068207 + }, + { + "epoch": 31.753597122302157, + "force_loss": 0.013319111429154873, + "step": 35310 + }, + { + "epoch": 31.762589928057555, + "grad_norm": 0.29443496465682983, + "learning_rate": 3.955320547987943e-05, + "loss": 0.018, + "step": 35320 + }, + { + "action_loss": 0.0034029714297503233, + "epoch": 31.762589928057555, + "step": 35320 + }, + { + "epoch": 31.762589928057555, + "step": 35320, + "torque_loss": 0.08257120102643967 + }, + { + "epoch": 31.762589928057555, + "force_loss": 0.005713058169931173, + "step": 35320 + }, + { + "epoch": 31.77158273381295, + "grad_norm": 0.9021161198616028, + "learning_rate": 3.952625745718681e-05, + "loss": 0.025, + "step": 35330 + }, + { + "action_loss": 0.0028725943993777037, + "epoch": 31.77158273381295, + "step": 35330 + }, + { + "epoch": 31.77158273381295, + "step": 35330, + "torque_loss": 0.08668620139360428 + }, + { + "epoch": 31.77158273381295, + "force_loss": 0.00623175548389554, + "step": 35330 + }, + { + "epoch": 31.780575539568346, + "grad_norm": 0.35724562406539917, + "learning_rate": 3.949931261614064e-05, + "loss": 0.0197, + "step": 35340 + }, + { + "action_loss": 0.0032996945083141327, + "epoch": 31.780575539568346, + "step": 35340 + }, + { + "epoch": 31.780575539568346, + "step": 35340, + "torque_loss": 0.10863769799470901 + }, + { + "epoch": 31.780575539568346, + "force_loss": 0.004071838688105345, + "step": 35340 + }, + { + "epoch": 31.78956834532374, + "grad_norm": 0.6367750763893127, + "learning_rate": 3.947237096492605e-05, + "loss": 0.0161, + "step": 35350 + }, + { + "action_loss": 0.008309929631650448, + "epoch": 31.78956834532374, + "step": 35350 + }, + { + "epoch": 31.78956834532374, + "step": 35350, + "torque_loss": 0.10108502954244614 + }, + { + "epoch": 31.78956834532374, + "force_loss": 0.0099371038377285, + "step": 35350 + }, + { + "epoch": 31.798561151079138, + "grad_norm": 0.27515581250190735, + "learning_rate": 3.944543251172719e-05, + "loss": 0.0182, + "step": 35360 + }, + { + "action_loss": 0.0035193238873034716, + "epoch": 31.798561151079138, + "step": 35360 + }, + { + "epoch": 31.798561151079138, + "step": 35360, + "torque_loss": 0.12900541722774506 + }, + { + "epoch": 31.798561151079138, + "force_loss": 0.0037006158381700516, + "step": 35360 + }, + { + "epoch": 31.807553956834532, + "grad_norm": 0.19117578864097595, + "learning_rate": 3.941849726472725e-05, + "loss": 0.0166, + "step": 35370 + }, + { + "action_loss": 0.010736651718616486, + "epoch": 31.807553956834532, + "step": 35370 + }, + { + "epoch": 31.807553956834532, + "step": 35370, + "torque_loss": 0.17377400398254395 + }, + { + "epoch": 31.807553956834532, + "force_loss": 0.007627354934811592, + "step": 35370 + }, + { + "epoch": 31.81654676258993, + "grad_norm": 0.1995638906955719, + "learning_rate": 3.939156523210846e-05, + "loss": 0.0159, + "step": 35380 + }, + { + "action_loss": 0.014721316285431385, + "epoch": 31.81654676258993, + "step": 35380 + }, + { + "epoch": 31.81654676258993, + "step": 35380, + "torque_loss": 0.1905713826417923 + }, + { + "epoch": 31.81654676258993, + "force_loss": 0.015707792714238167, + "step": 35380 + }, + { + "epoch": 31.825539568345324, + "grad_norm": 0.23033098876476288, + "learning_rate": 3.9364636422052046e-05, + "loss": 0.0225, + "step": 35390 + }, + { + "action_loss": 0.0027905143797397614, + "epoch": 31.825539568345324, + "step": 35390 + }, + { + "epoch": 31.825539568345324, + "step": 35390, + "torque_loss": 0.1308002918958664 + }, + { + "epoch": 31.825539568345324, + "force_loss": 0.005041670519858599, + "step": 35390 + }, + { + "epoch": 31.834532374100718, + "grad_norm": 0.1351657509803772, + "learning_rate": 3.933771084273828e-05, + "loss": 0.0233, + "step": 35400 + }, + { + "action_loss": 0.008600800298154354, + "epoch": 31.834532374100718, + "step": 35400 + }, + { + "epoch": 31.834532374100718, + "step": 35400, + "torque_loss": 0.12727564573287964 + }, + { + "epoch": 31.834532374100718, + "force_loss": 0.004926920402795076, + "step": 35400 + }, + { + "epoch": 31.843525179856115, + "grad_norm": 0.5700240135192871, + "learning_rate": 3.931078850234643e-05, + "loss": 0.0196, + "step": 35410 + }, + { + "action_loss": 0.0034690566826611757, + "epoch": 31.843525179856115, + "step": 35410 + }, + { + "epoch": 31.843525179856115, + "step": 35410, + "torque_loss": 0.10629952698945999 + }, + { + "epoch": 31.843525179856115, + "force_loss": 0.0024806184228509665, + "step": 35410 + }, + { + "epoch": 31.85251798561151, + "grad_norm": 0.2264889031648636, + "learning_rate": 3.928386940905483e-05, + "loss": 0.0156, + "step": 35420 + }, + { + "action_loss": 0.002343257423490286, + "epoch": 31.85251798561151, + "step": 35420 + }, + { + "epoch": 31.85251798561151, + "step": 35420, + "torque_loss": 0.17719678580760956 + }, + { + "epoch": 31.85251798561151, + "force_loss": 0.003980493173003197, + "step": 35420 + }, + { + "epoch": 31.861510791366907, + "grad_norm": 0.09385884553194046, + "learning_rate": 3.925695357104073e-05, + "loss": 0.0164, + "step": 35430 + }, + { + "action_loss": 0.00855818297713995, + "epoch": 31.861510791366907, + "step": 35430 + }, + { + "epoch": 31.861510791366907, + "step": 35430, + "torque_loss": 0.13382574915885925 + }, + { + "epoch": 31.861510791366907, + "force_loss": 0.01465965062379837, + "step": 35430 + }, + { + "epoch": 31.8705035971223, + "grad_norm": 0.2396390438079834, + "learning_rate": 3.923004099648049e-05, + "loss": 0.0196, + "step": 35440 + }, + { + "action_loss": 0.0022025706712156534, + "epoch": 31.8705035971223, + "step": 35440 + }, + { + "epoch": 31.8705035971223, + "step": 35440, + "torque_loss": 0.12100708484649658 + }, + { + "epoch": 31.8705035971223, + "force_loss": 0.0025210340972989798, + "step": 35440 + }, + { + "epoch": 31.8794964028777, + "grad_norm": 0.26506224274635315, + "learning_rate": 3.920313169354944e-05, + "loss": 0.0168, + "step": 35450 + }, + { + "action_loss": 0.0011666034115478396, + "epoch": 31.8794964028777, + "step": 35450 + }, + { + "epoch": 31.8794964028777, + "step": 35450, + "torque_loss": 0.10555519908666611 + }, + { + "epoch": 31.8794964028777, + "force_loss": 0.002271487610414624, + "step": 35450 + }, + { + "epoch": 31.888489208633093, + "grad_norm": 0.19065934419631958, + "learning_rate": 3.9176225670421897e-05, + "loss": 0.023, + "step": 35460 + }, + { + "action_loss": 0.002745841396972537, + "epoch": 31.888489208633093, + "step": 35460 + }, + { + "epoch": 31.888489208633093, + "step": 35460, + "torque_loss": 0.13876836001873016 + }, + { + "epoch": 31.888489208633093, + "force_loss": 0.005210071336477995, + "step": 35460 + }, + { + "epoch": 31.89748201438849, + "grad_norm": 0.17335911095142365, + "learning_rate": 3.9149322935271224e-05, + "loss": 0.0176, + "step": 35470 + }, + { + "action_loss": 0.009939249604940414, + "epoch": 31.89748201438849, + "step": 35470 + }, + { + "epoch": 31.89748201438849, + "step": 35470, + "torque_loss": 0.1244107261300087 + }, + { + "epoch": 31.89748201438849, + "force_loss": 0.0119258351624012, + "step": 35470 + }, + { + "epoch": 31.906474820143885, + "grad_norm": 0.3328714072704315, + "learning_rate": 3.9122423496269725e-05, + "loss": 0.0212, + "step": 35480 + }, + { + "action_loss": 0.001553779817186296, + "epoch": 31.906474820143885, + "step": 35480 + }, + { + "epoch": 31.906474820143885, + "step": 35480, + "torque_loss": 0.1323273777961731 + }, + { + "epoch": 31.906474820143885, + "force_loss": 0.0013025113148614764, + "step": 35480 + }, + { + "epoch": 31.915467625899282, + "grad_norm": 0.21877729892730713, + "learning_rate": 3.909552736158877e-05, + "loss": 0.0174, + "step": 35490 + }, + { + "action_loss": 0.003774484619498253, + "epoch": 31.915467625899282, + "step": 35490 + }, + { + "epoch": 31.915467625899282, + "step": 35490, + "torque_loss": 0.18078093230724335 + }, + { + "epoch": 31.915467625899282, + "force_loss": 0.0039477222599089146, + "step": 35490 + }, + { + "epoch": 31.924460431654676, + "grad_norm": 0.10050065070390701, + "learning_rate": 3.90686345393987e-05, + "loss": 0.017, + "step": 35500 + }, + { + "action_loss": 0.0015745697310194373, + "epoch": 31.924460431654676, + "step": 35500 + }, + { + "epoch": 31.924460431654676, + "step": 35500, + "torque_loss": 0.13969404995441437 + }, + { + "epoch": 31.924460431654676, + "force_loss": 0.0036092661321163177, + "step": 35500 + }, + { + "epoch": 31.93345323741007, + "grad_norm": 0.19695894420146942, + "learning_rate": 3.9041745037868816e-05, + "loss": 0.016, + "step": 35510 + }, + { + "action_loss": 0.0019424165366217494, + "epoch": 31.93345323741007, + "step": 35510 + }, + { + "epoch": 31.93345323741007, + "step": 35510, + "torque_loss": 0.1555541306734085 + }, + { + "epoch": 31.93345323741007, + "force_loss": 0.0014627467608079314, + "step": 35510 + }, + { + "epoch": 31.942446043165468, + "grad_norm": 0.3071577250957489, + "learning_rate": 3.9014858865167465e-05, + "loss": 0.0203, + "step": 35520 + }, + { + "action_loss": 0.004073005635291338, + "epoch": 31.942446043165468, + "step": 35520 + }, + { + "epoch": 31.942446043165468, + "step": 35520, + "torque_loss": 0.14763735234737396 + }, + { + "epoch": 31.942446043165468, + "force_loss": 0.002649690257385373, + "step": 35520 + }, + { + "epoch": 31.951438848920862, + "grad_norm": 0.14206725358963013, + "learning_rate": 3.8987976029461935e-05, + "loss": 0.0231, + "step": 35530 + }, + { + "action_loss": 0.002284081419929862, + "epoch": 31.951438848920862, + "step": 35530 + }, + { + "epoch": 31.951438848920862, + "step": 35530, + "torque_loss": 0.12091898173093796 + }, + { + "epoch": 31.951438848920862, + "force_loss": 0.005671159829944372, + "step": 35530 + }, + { + "epoch": 31.96043165467626, + "grad_norm": 0.18670997023582458, + "learning_rate": 3.896109653891853e-05, + "loss": 0.0172, + "step": 35540 + }, + { + "action_loss": 0.0024247097317129374, + "epoch": 31.96043165467626, + "step": 35540 + }, + { + "epoch": 31.96043165467626, + "step": 35540, + "torque_loss": 0.10384371876716614 + }, + { + "epoch": 31.96043165467626, + "force_loss": 0.003963085822761059, + "step": 35540 + }, + { + "epoch": 31.969424460431654, + "grad_norm": 0.21620497107505798, + "learning_rate": 3.893422040170254e-05, + "loss": 0.0159, + "step": 35550 + }, + { + "action_loss": 0.006690721958875656, + "epoch": 31.969424460431654, + "step": 35550 + }, + { + "epoch": 31.969424460431654, + "step": 35550, + "torque_loss": 0.10844894498586655 + }, + { + "epoch": 31.969424460431654, + "force_loss": 0.016566084697842598, + "step": 35550 + }, + { + "epoch": 31.97841726618705, + "grad_norm": 0.16152913868427277, + "learning_rate": 3.8907347625978207e-05, + "loss": 0.0213, + "step": 35560 + }, + { + "action_loss": 0.01430537085980177, + "epoch": 31.97841726618705, + "step": 35560 + }, + { + "epoch": 31.97841726618705, + "step": 35560, + "torque_loss": 0.21666081249713898 + }, + { + "epoch": 31.97841726618705, + "force_loss": 0.018692273646593094, + "step": 35560 + }, + { + "epoch": 31.987410071942445, + "grad_norm": 0.23225396871566772, + "learning_rate": 3.88804782199088e-05, + "loss": 0.0181, + "step": 35570 + }, + { + "action_loss": 0.017697514966130257, + "epoch": 31.987410071942445, + "step": 35570 + }, + { + "epoch": 31.987410071942445, + "step": 35570, + "torque_loss": 0.16339315474033356 + }, + { + "epoch": 31.987410071942445, + "force_loss": 0.026077555492520332, + "step": 35570 + }, + { + "epoch": 31.996402877697843, + "grad_norm": 0.6275593042373657, + "learning_rate": 3.8853612191656495e-05, + "loss": 0.0173, + "step": 35580 + }, + { + "action_loss": 0.002076135715469718, + "epoch": 31.996402877697843, + "step": 35580 + }, + { + "epoch": 31.996402877697843, + "step": 35580, + "torque_loss": 0.15213413536548615 + }, + { + "epoch": 31.996402877697843, + "force_loss": 0.0013605583226308227, + "step": 35580 + }, + { + "epoch": 32.00539568345324, + "grad_norm": 0.31636619567871094, + "learning_rate": 3.88267495493825e-05, + "loss": 0.0179, + "step": 35590 + }, + { + "action_loss": 0.0017146820900961757, + "epoch": 32.00539568345324, + "step": 35590 + }, + { + "epoch": 32.00539568345324, + "step": 35590, + "torque_loss": 0.0939028263092041 + }, + { + "epoch": 32.00539568345324, + "force_loss": 0.0031150151044130325, + "step": 35590 + }, + { + "epoch": 32.014388489208635, + "grad_norm": 0.24996823072433472, + "learning_rate": 3.8799890301247004e-05, + "loss": 0.0211, + "step": 35600 + }, + { + "action_loss": 0.0191951896995306, + "epoch": 32.014388489208635, + "step": 35600 + }, + { + "epoch": 32.014388489208635, + "step": 35600, + "torque_loss": 0.14893077313899994 + }, + { + "epoch": 32.014388489208635, + "force_loss": 0.03253011777997017, + "step": 35600 + }, + { + "epoch": 32.023381294964025, + "grad_norm": 0.21590636670589447, + "learning_rate": 3.8773034455409096e-05, + "loss": 0.0195, + "step": 35610 + }, + { + "action_loss": 0.007882803678512573, + "epoch": 32.023381294964025, + "step": 35610 + }, + { + "epoch": 32.023381294964025, + "step": 35610, + "torque_loss": 0.15608543157577515 + }, + { + "epoch": 32.023381294964025, + "force_loss": 0.007862423546612263, + "step": 35610 + }, + { + "epoch": 32.03237410071942, + "grad_norm": 0.228277787566185, + "learning_rate": 3.8746182020026904e-05, + "loss": 0.0193, + "step": 35620 + }, + { + "action_loss": 0.003483865410089493, + "epoch": 32.03237410071942, + "step": 35620 + }, + { + "epoch": 32.03237410071942, + "step": 35620, + "torque_loss": 0.1525992751121521 + }, + { + "epoch": 32.03237410071942, + "force_loss": 0.002559076063334942, + "step": 35620 + }, + { + "epoch": 32.04136690647482, + "grad_norm": 0.25834381580352783, + "learning_rate": 3.871933300325745e-05, + "loss": 0.0167, + "step": 35630 + }, + { + "action_loss": 0.0043231225572526455, + "epoch": 32.04136690647482, + "step": 35630 + }, + { + "epoch": 32.04136690647482, + "step": 35630, + "torque_loss": 0.13323475420475006 + }, + { + "epoch": 32.04136690647482, + "force_loss": 0.009907946921885014, + "step": 35630 + }, + { + "epoch": 32.05035971223022, + "grad_norm": 0.3000139594078064, + "learning_rate": 3.869248741325679e-05, + "loss": 0.02, + "step": 35640 + }, + { + "action_loss": 0.004427507985383272, + "epoch": 32.05035971223022, + "step": 35640 + }, + { + "epoch": 32.05035971223022, + "step": 35640, + "torque_loss": 0.14015614986419678 + }, + { + "epoch": 32.05035971223022, + "force_loss": 0.005604162812232971, + "step": 35640 + }, + { + "epoch": 32.05935251798561, + "grad_norm": 0.17594636976718903, + "learning_rate": 3.866564525817992e-05, + "loss": 0.0167, + "step": 35650 + }, + { + "action_loss": 0.004938953090459108, + "epoch": 32.05935251798561, + "step": 35650 + }, + { + "epoch": 32.05935251798561, + "step": 35650, + "torque_loss": 0.1780819296836853 + }, + { + "epoch": 32.05935251798561, + "force_loss": 0.002592293079942465, + "step": 35650 + }, + { + "epoch": 32.068345323741006, + "grad_norm": 0.29027655720710754, + "learning_rate": 3.8638806546180725e-05, + "loss": 0.0175, + "step": 35660 + }, + { + "action_loss": 0.0020971298217773438, + "epoch": 32.068345323741006, + "step": 35660 + }, + { + "epoch": 32.068345323741006, + "step": 35660, + "torque_loss": 0.08877771347761154 + }, + { + "epoch": 32.068345323741006, + "force_loss": 0.002040656516328454, + "step": 35660 + }, + { + "epoch": 32.077338129496404, + "grad_norm": 0.18215881288051605, + "learning_rate": 3.861197128541213e-05, + "loss": 0.02, + "step": 35670 + }, + { + "action_loss": 0.00523406034335494, + "epoch": 32.077338129496404, + "step": 35670 + }, + { + "epoch": 32.077338129496404, + "step": 35670, + "torque_loss": 0.13208834826946259 + }, + { + "epoch": 32.077338129496404, + "force_loss": 0.0069520375691354275, + "step": 35670 + }, + { + "epoch": 32.0863309352518, + "grad_norm": 0.11743181198835373, + "learning_rate": 3.858513948402599e-05, + "loss": 0.0191, + "step": 35680 + }, + { + "action_loss": 0.005054831970483065, + "epoch": 32.0863309352518, + "step": 35680 + }, + { + "epoch": 32.0863309352518, + "step": 35680, + "torque_loss": 0.14573615789413452 + }, + { + "epoch": 32.0863309352518, + "force_loss": 0.006392117589712143, + "step": 35680 + }, + { + "epoch": 32.09532374100719, + "grad_norm": 0.32260438799858093, + "learning_rate": 3.8558311150173077e-05, + "loss": 0.0328, + "step": 35690 + }, + { + "action_loss": 0.005089659709483385, + "epoch": 32.09532374100719, + "step": 35690 + }, + { + "epoch": 32.09532374100719, + "step": 35690, + "torque_loss": 0.19206349551677704 + }, + { + "epoch": 32.09532374100719, + "force_loss": 0.0048776851035654545, + "step": 35690 + }, + { + "epoch": 32.10431654676259, + "grad_norm": 0.19433145225048065, + "learning_rate": 3.853148629200312e-05, + "loss": 0.0207, + "step": 35700 + }, + { + "action_loss": 0.0017868741415441036, + "epoch": 32.10431654676259, + "step": 35700 + }, + { + "epoch": 32.10431654676259, + "step": 35700, + "torque_loss": 0.1520131379365921 + }, + { + "epoch": 32.10431654676259, + "force_loss": 0.0017587350448593497, + "step": 35700 + }, + { + "epoch": 32.11330935251799, + "grad_norm": 0.1347096711397171, + "learning_rate": 3.850466491766482e-05, + "loss": 0.0185, + "step": 35710 + }, + { + "action_loss": 0.0031681815162301064, + "epoch": 32.11330935251799, + "step": 35710 + }, + { + "epoch": 32.11330935251799, + "step": 35710, + "torque_loss": 0.16818366944789886 + }, + { + "epoch": 32.11330935251799, + "force_loss": 0.009126334451138973, + "step": 35710 + }, + { + "epoch": 32.12230215827338, + "grad_norm": 0.22764194011688232, + "learning_rate": 3.847784703530583e-05, + "loss": 0.0165, + "step": 35720 + }, + { + "action_loss": 0.003924195189028978, + "epoch": 32.12230215827338, + "step": 35720 + }, + { + "epoch": 32.12230215827338, + "step": 35720, + "torque_loss": 0.09507862478494644 + }, + { + "epoch": 32.12230215827338, + "force_loss": 0.007018100470304489, + "step": 35720 + }, + { + "epoch": 32.131294964028775, + "grad_norm": 0.11621231585741043, + "learning_rate": 3.845103265307266e-05, + "loss": 0.0173, + "step": 35730 + }, + { + "action_loss": 0.002146992366760969, + "epoch": 32.131294964028775, + "step": 35730 + }, + { + "epoch": 32.131294964028775, + "step": 35730, + "torque_loss": 0.146606907248497 + }, + { + "epoch": 32.131294964028775, + "force_loss": 0.0018671402940526605, + "step": 35730 + }, + { + "epoch": 32.14028776978417, + "grad_norm": 0.10835649073123932, + "learning_rate": 3.842422177911086e-05, + "loss": 0.0174, + "step": 35740 + }, + { + "action_loss": 0.008047571405768394, + "epoch": 32.14028776978417, + "step": 35740 + }, + { + "epoch": 32.14028776978417, + "step": 35740, + "torque_loss": 0.11531472206115723 + }, + { + "epoch": 32.14028776978417, + "force_loss": 0.014328345656394958, + "step": 35740 + }, + { + "epoch": 32.14928057553957, + "grad_norm": 0.1983528435230255, + "learning_rate": 3.8397414421564826e-05, + "loss": 0.0191, + "step": 35750 + }, + { + "action_loss": 0.004859645385295153, + "epoch": 32.14928057553957, + "step": 35750 + }, + { + "epoch": 32.14928057553957, + "step": 35750, + "torque_loss": 0.16065071523189545 + }, + { + "epoch": 32.14928057553957, + "force_loss": 0.0056686424650251865, + "step": 35750 + }, + { + "epoch": 32.15827338129496, + "grad_norm": 0.37957534193992615, + "learning_rate": 3.8370610588577935e-05, + "loss": 0.0199, + "step": 35760 + }, + { + "action_loss": 0.004888215567916632, + "epoch": 32.15827338129496, + "step": 35760 + }, + { + "epoch": 32.15827338129496, + "step": 35760, + "torque_loss": 0.15222908556461334 + }, + { + "epoch": 32.15827338129496, + "force_loss": 0.01704634726047516, + "step": 35760 + }, + { + "epoch": 32.16726618705036, + "grad_norm": 0.17114831507205963, + "learning_rate": 3.834381028829251e-05, + "loss": 0.0174, + "step": 35770 + }, + { + "action_loss": 0.003907196223735809, + "epoch": 32.16726618705036, + "step": 35770 + }, + { + "epoch": 32.16726618705036, + "step": 35770, + "torque_loss": 0.15016154944896698 + }, + { + "epoch": 32.16726618705036, + "force_loss": 0.002589662792161107, + "step": 35770 + }, + { + "epoch": 32.17625899280576, + "grad_norm": 0.19417725503444672, + "learning_rate": 3.8317013528849745e-05, + "loss": 0.0158, + "step": 35780 + }, + { + "action_loss": 0.0029246974736452103, + "epoch": 32.17625899280576, + "step": 35780 + }, + { + "epoch": 32.17625899280576, + "step": 35780, + "torque_loss": 0.12482305616140366 + }, + { + "epoch": 32.17625899280576, + "force_loss": 0.004284083843231201, + "step": 35780 + }, + { + "epoch": 32.185251798561154, + "grad_norm": 0.1793966442346573, + "learning_rate": 3.8290220318389815e-05, + "loss": 0.0213, + "step": 35790 + }, + { + "action_loss": 0.0022399509325623512, + "epoch": 32.185251798561154, + "step": 35790 + }, + { + "epoch": 32.185251798561154, + "step": 35790, + "torque_loss": 0.09926352649927139 + }, + { + "epoch": 32.185251798561154, + "force_loss": 0.0037671260070055723, + "step": 35790 + }, + { + "epoch": 32.194244604316545, + "grad_norm": 0.1799623966217041, + "learning_rate": 3.8263430665051746e-05, + "loss": 0.0155, + "step": 35800 + }, + { + "action_loss": 0.0030329227447509766, + "epoch": 32.194244604316545, + "step": 35800 + }, + { + "epoch": 32.194244604316545, + "step": 35800, + "torque_loss": 0.14444126188755035 + }, + { + "epoch": 32.194244604316545, + "force_loss": 0.005087635014206171, + "step": 35800 + }, + { + "epoch": 32.20323741007194, + "grad_norm": 0.24660754203796387, + "learning_rate": 3.8236644576973554e-05, + "loss": 0.0165, + "step": 35810 + }, + { + "action_loss": 0.004632997792214155, + "epoch": 32.20323741007194, + "step": 35810 + }, + { + "epoch": 32.20323741007194, + "step": 35810, + "torque_loss": 0.1333283632993698 + }, + { + "epoch": 32.20323741007194, + "force_loss": 0.006720694247633219, + "step": 35810 + }, + { + "epoch": 32.21223021582734, + "grad_norm": 0.07578231394290924, + "learning_rate": 3.820986206229217e-05, + "loss": 0.0161, + "step": 35820 + }, + { + "action_loss": 0.00815785676240921, + "epoch": 32.21223021582734, + "step": 35820 + }, + { + "epoch": 32.21223021582734, + "step": 35820, + "torque_loss": 0.15545953810214996 + }, + { + "epoch": 32.21223021582734, + "force_loss": 0.0039089578203856945, + "step": 35820 + }, + { + "epoch": 32.22122302158273, + "grad_norm": 0.49912646412849426, + "learning_rate": 3.8183083129143384e-05, + "loss": 0.0172, + "step": 35830 + }, + { + "action_loss": 0.06644818186759949, + "epoch": 32.22122302158273, + "step": 35830 + }, + { + "epoch": 32.22122302158273, + "step": 35830, + "torque_loss": 0.19193215668201447 + }, + { + "epoch": 32.22122302158273, + "force_loss": 0.06332853436470032, + "step": 35830 + }, + { + "epoch": 32.23021582733813, + "grad_norm": 0.22522485256195068, + "learning_rate": 3.815630778566193e-05, + "loss": 0.0273, + "step": 35840 + }, + { + "action_loss": 0.006677614990621805, + "epoch": 32.23021582733813, + "step": 35840 + }, + { + "epoch": 32.23021582733813, + "step": 35840, + "torque_loss": 0.13887529075145721 + }, + { + "epoch": 32.23021582733813, + "force_loss": 0.004647038411349058, + "step": 35840 + }, + { + "epoch": 32.239208633093526, + "grad_norm": 0.15656091272830963, + "learning_rate": 3.812953603998145e-05, + "loss": 0.0175, + "step": 35850 + }, + { + "action_loss": 0.0009108425583690405, + "epoch": 32.239208633093526, + "step": 35850 + }, + { + "epoch": 32.239208633093526, + "step": 35850, + "torque_loss": 0.07493776828050613 + }, + { + "epoch": 32.239208633093526, + "force_loss": 0.0010153655894100666, + "step": 35850 + }, + { + "epoch": 32.24820143884892, + "grad_norm": 0.28867635130882263, + "learning_rate": 3.8102767900234504e-05, + "loss": 0.0169, + "step": 35860 + }, + { + "action_loss": 0.001914641004987061, + "epoch": 32.24820143884892, + "step": 35860 + }, + { + "epoch": 32.24820143884892, + "step": 35860, + "torque_loss": 0.10751917958259583 + }, + { + "epoch": 32.24820143884892, + "force_loss": 0.0030521564185619354, + "step": 35860 + }, + { + "epoch": 32.257194244604314, + "grad_norm": 0.23864106833934784, + "learning_rate": 3.807600337455256e-05, + "loss": 0.0193, + "step": 35870 + }, + { + "action_loss": 0.0036270718555897474, + "epoch": 32.257194244604314, + "step": 35870 + }, + { + "epoch": 32.257194244604314, + "step": 35870, + "torque_loss": 0.09673986583948135 + }, + { + "epoch": 32.257194244604314, + "force_loss": 0.003295250004157424, + "step": 35870 + }, + { + "epoch": 32.26618705035971, + "grad_norm": 0.2653827965259552, + "learning_rate": 3.804924247106593e-05, + "loss": 0.019, + "step": 35880 + }, + { + "action_loss": 0.0020589062478393316, + "epoch": 32.26618705035971, + "step": 35880 + }, + { + "epoch": 32.26618705035971, + "step": 35880, + "torque_loss": 0.06579836457967758 + }, + { + "epoch": 32.26618705035971, + "force_loss": 0.0023911793250590563, + "step": 35880 + }, + { + "epoch": 32.27517985611511, + "grad_norm": 0.18982920050621033, + "learning_rate": 3.8022485197903925e-05, + "loss": 0.0175, + "step": 35890 + }, + { + "action_loss": 0.0015938496217131615, + "epoch": 32.27517985611511, + "step": 35890 + }, + { + "epoch": 32.27517985611511, + "step": 35890, + "torque_loss": 0.10908424109220505 + }, + { + "epoch": 32.27517985611511, + "force_loss": 0.00077058095484972, + "step": 35890 + }, + { + "epoch": 32.28417266187051, + "grad_norm": 0.13052676618099213, + "learning_rate": 3.799573156319464e-05, + "loss": 0.0172, + "step": 35900 + }, + { + "action_loss": 0.014239001087844372, + "epoch": 32.28417266187051, + "step": 35900 + }, + { + "epoch": 32.28417266187051, + "step": 35900, + "torque_loss": 0.10351020842790604 + }, + { + "epoch": 32.28417266187051, + "force_loss": 0.010057463310658932, + "step": 35900 + }, + { + "epoch": 32.2931654676259, + "grad_norm": 0.2521089017391205, + "learning_rate": 3.796898157506515e-05, + "loss": 0.0211, + "step": 35910 + }, + { + "action_loss": 0.002401305129751563, + "epoch": 32.2931654676259, + "step": 35910 + }, + { + "epoch": 32.2931654676259, + "step": 35910, + "torque_loss": 0.133083775639534 + }, + { + "epoch": 32.2931654676259, + "force_loss": 0.00283030909486115, + "step": 35910 + }, + { + "epoch": 32.302158273381295, + "grad_norm": 0.13918083906173706, + "learning_rate": 3.794223524164143e-05, + "loss": 0.0178, + "step": 35920 + }, + { + "action_loss": 0.0015532016986981034, + "epoch": 32.302158273381295, + "step": 35920 + }, + { + "epoch": 32.302158273381295, + "step": 35920, + "torque_loss": 0.08964937925338745 + }, + { + "epoch": 32.302158273381295, + "force_loss": 0.0019377600401639938, + "step": 35920 + }, + { + "epoch": 32.31115107913669, + "grad_norm": 0.1956116110086441, + "learning_rate": 3.7915492571048245e-05, + "loss": 0.0205, + "step": 35930 + }, + { + "action_loss": 0.002844024682417512, + "epoch": 32.31115107913669, + "step": 35930 + }, + { + "epoch": 32.31115107913669, + "step": 35930, + "torque_loss": 0.10727709531784058 + }, + { + "epoch": 32.31115107913669, + "force_loss": 0.004802302923053503, + "step": 35930 + }, + { + "epoch": 32.32014388489208, + "grad_norm": 0.2805778980255127, + "learning_rate": 3.788875357140937e-05, + "loss": 0.0238, + "step": 35940 + }, + { + "action_loss": 0.004543399903923273, + "epoch": 32.32014388489208, + "step": 35940 + }, + { + "epoch": 32.32014388489208, + "step": 35940, + "torque_loss": 0.12029657512903214 + }, + { + "epoch": 32.32014388489208, + "force_loss": 0.00520290806889534, + "step": 35940 + }, + { + "epoch": 32.32913669064748, + "grad_norm": 0.24458202719688416, + "learning_rate": 3.786201825084736e-05, + "loss": 0.0163, + "step": 35950 + }, + { + "action_loss": 0.002751231426373124, + "epoch": 32.32913669064748, + "step": 35950 + }, + { + "epoch": 32.32913669064748, + "step": 35950, + "torque_loss": 0.11642998456954956 + }, + { + "epoch": 32.32913669064748, + "force_loss": 0.0029301319736987352, + "step": 35950 + }, + { + "epoch": 32.33812949640288, + "grad_norm": 0.2637021243572235, + "learning_rate": 3.783528661748372e-05, + "loss": 0.016, + "step": 35960 + }, + { + "action_loss": 0.0029750268440693617, + "epoch": 32.33812949640288, + "step": 35960 + }, + { + "epoch": 32.33812949640288, + "step": 35960, + "torque_loss": 0.11280087381601334 + }, + { + "epoch": 32.33812949640288, + "force_loss": 0.0060738143511116505, + "step": 35960 + }, + { + "epoch": 32.347122302158276, + "grad_norm": 0.3076011538505554, + "learning_rate": 3.780855867943882e-05, + "loss": 0.022, + "step": 35970 + }, + { + "action_loss": 0.001550456159748137, + "epoch": 32.347122302158276, + "step": 35970 + }, + { + "epoch": 32.347122302158276, + "step": 35970, + "torque_loss": 0.10353758931159973 + }, + { + "epoch": 32.347122302158276, + "force_loss": 0.0014127379981800914, + "step": 35970 + }, + { + "epoch": 32.356115107913666, + "grad_norm": 0.2607734501361847, + "learning_rate": 3.778183444483189e-05, + "loss": 0.0177, + "step": 35980 + }, + { + "action_loss": 0.005373851861804724, + "epoch": 32.356115107913666, + "step": 35980 + }, + { + "epoch": 32.356115107913666, + "step": 35980, + "torque_loss": 0.10691497474908829 + }, + { + "epoch": 32.356115107913666, + "force_loss": 0.004640842322260141, + "step": 35980 + }, + { + "epoch": 32.365107913669064, + "grad_norm": 0.2998850345611572, + "learning_rate": 3.775511392178108e-05, + "loss": 0.0181, + "step": 35990 + }, + { + "action_loss": 0.0013369083171710372, + "epoch": 32.365107913669064, + "step": 35990 + }, + { + "epoch": 32.365107913669064, + "step": 35990, + "torque_loss": 0.12341249734163284 + }, + { + "epoch": 32.365107913669064, + "force_loss": 0.0023934622295200825, + "step": 35990 + }, + { + "epoch": 32.37410071942446, + "grad_norm": 0.32738152146339417, + "learning_rate": 3.772839711840332e-05, + "loss": 0.0202, + "step": 36000 + }, + { + "action_loss": 0.007295499090105295, + "epoch": 32.37410071942446, + "step": 36000 + }, + { + "epoch": 32.37410071942446, + "step": 36000, + "torque_loss": 0.15303842723369598 + }, + { + "epoch": 32.37410071942446, + "force_loss": 0.005727540235966444, + "step": 36000 + }, + { + "epoch": 32.38309352517986, + "grad_norm": 0.39588892459869385, + "learning_rate": 3.7701684042814515e-05, + "loss": 0.0211, + "step": 36010 + }, + { + "action_loss": 0.0037621904630213976, + "epoch": 32.38309352517986, + "step": 36010 + }, + { + "epoch": 32.38309352517986, + "step": 36010, + "torque_loss": 0.12148018926382065 + }, + { + "epoch": 32.38309352517986, + "force_loss": 0.004511638078838587, + "step": 36010 + }, + { + "epoch": 32.39208633093525, + "grad_norm": 0.29814431071281433, + "learning_rate": 3.76749747031294e-05, + "loss": 0.0164, + "step": 36020 + }, + { + "action_loss": 0.024477383121848106, + "epoch": 32.39208633093525, + "step": 36020 + }, + { + "epoch": 32.39208633093525, + "step": 36020, + "torque_loss": 0.19140727818012238 + }, + { + "epoch": 32.39208633093525, + "force_loss": 0.040710482746362686, + "step": 36020 + }, + { + "epoch": 32.40107913669065, + "grad_norm": 0.25167617201805115, + "learning_rate": 3.764826910746152e-05, + "loss": 0.0245, + "step": 36030 + }, + { + "action_loss": 0.002894095377996564, + "epoch": 32.40107913669065, + "step": 36030 + }, + { + "epoch": 32.40107913669065, + "step": 36030, + "torque_loss": 0.168232262134552 + }, + { + "epoch": 32.40107913669065, + "force_loss": 0.0017592711374163628, + "step": 36030 + }, + { + "epoch": 32.410071942446045, + "grad_norm": 0.17676064372062683, + "learning_rate": 3.762156726392338e-05, + "loss": 0.0163, + "step": 36040 + }, + { + "action_loss": 0.0027098748832941055, + "epoch": 32.410071942446045, + "step": 36040 + }, + { + "epoch": 32.410071942446045, + "step": 36040, + "torque_loss": 0.09334651380777359 + }, + { + "epoch": 32.410071942446045, + "force_loss": 0.0035172405187040567, + "step": 36040 + }, + { + "epoch": 32.419064748201436, + "grad_norm": 0.19449324905872345, + "learning_rate": 3.759486918062625e-05, + "loss": 0.0222, + "step": 36050 + }, + { + "action_loss": 0.002983634127303958, + "epoch": 32.419064748201436, + "step": 36050 + }, + { + "epoch": 32.419064748201436, + "step": 36050, + "torque_loss": 0.16478627920150757 + }, + { + "epoch": 32.419064748201436, + "force_loss": 0.005507923662662506, + "step": 36050 + }, + { + "epoch": 32.42805755395683, + "grad_norm": 0.2805757522583008, + "learning_rate": 3.756817486568033e-05, + "loss": 0.0197, + "step": 36060 + }, + { + "action_loss": 0.007810443639755249, + "epoch": 32.42805755395683, + "step": 36060 + }, + { + "epoch": 32.42805755395683, + "step": 36060, + "torque_loss": 0.11835313588380814 + }, + { + "epoch": 32.42805755395683, + "force_loss": 0.012032014317810535, + "step": 36060 + }, + { + "epoch": 32.43705035971223, + "grad_norm": 0.29620882868766785, + "learning_rate": 3.7541484327194654e-05, + "loss": 0.0176, + "step": 36070 + }, + { + "action_loss": 0.002350898226723075, + "epoch": 32.43705035971223, + "step": 36070 + }, + { + "epoch": 32.43705035971223, + "step": 36070, + "torque_loss": 0.07381812483072281 + }, + { + "epoch": 32.43705035971223, + "force_loss": 0.0035089829470962286, + "step": 36070 + }, + { + "epoch": 32.44604316546763, + "grad_norm": 0.1146189272403717, + "learning_rate": 3.751479757327707e-05, + "loss": 0.0202, + "step": 36080 + }, + { + "action_loss": 0.003432293189689517, + "epoch": 32.44604316546763, + "step": 36080 + }, + { + "epoch": 32.44604316546763, + "step": 36080, + "torque_loss": 0.10028309375047684 + }, + { + "epoch": 32.44604316546763, + "force_loss": 0.002584822243079543, + "step": 36080 + }, + { + "epoch": 32.45503597122302, + "grad_norm": 0.28914424777030945, + "learning_rate": 3.7488114612034345e-05, + "loss": 0.0184, + "step": 36090 + }, + { + "action_loss": 0.000974094495177269, + "epoch": 32.45503597122302, + "step": 36090 + }, + { + "epoch": 32.45503597122302, + "step": 36090, + "torque_loss": 0.16874247789382935 + }, + { + "epoch": 32.45503597122302, + "force_loss": 0.00147256872151047, + "step": 36090 + }, + { + "epoch": 32.46402877697842, + "grad_norm": 0.25618690252304077, + "learning_rate": 3.7461435451572044e-05, + "loss": 0.0201, + "step": 36100 + }, + { + "action_loss": 0.010891355574131012, + "epoch": 32.46402877697842, + "step": 36100 + }, + { + "epoch": 32.46402877697842, + "step": 36100, + "torque_loss": 0.12587206065654755 + }, + { + "epoch": 32.46402877697842, + "force_loss": 0.008505494333803654, + "step": 36100 + }, + { + "epoch": 32.473021582733814, + "grad_norm": 0.09253151714801788, + "learning_rate": 3.743476009999459e-05, + "loss": 0.0215, + "step": 36110 + }, + { + "action_loss": 0.002712973626330495, + "epoch": 32.473021582733814, + "step": 36110 + }, + { + "epoch": 32.473021582733814, + "step": 36110, + "torque_loss": 0.16960680484771729 + }, + { + "epoch": 32.473021582733814, + "force_loss": 0.0028641382232308388, + "step": 36110 + }, + { + "epoch": 32.48201438848921, + "grad_norm": 0.17254550755023956, + "learning_rate": 3.7408088565405245e-05, + "loss": 0.02, + "step": 36120 + }, + { + "action_loss": 0.0022241489496082067, + "epoch": 32.48201438848921, + "step": 36120 + }, + { + "epoch": 32.48201438848921, + "step": 36120, + "torque_loss": 0.15643279254436493 + }, + { + "epoch": 32.48201438848921, + "force_loss": 0.0014043691335245967, + "step": 36120 + }, + { + "epoch": 32.4910071942446, + "grad_norm": 0.1474131941795349, + "learning_rate": 3.738142085590612e-05, + "loss": 0.0184, + "step": 36130 + }, + { + "action_loss": 0.005370520055294037, + "epoch": 32.4910071942446, + "step": 36130 + }, + { + "epoch": 32.4910071942446, + "step": 36130, + "torque_loss": 0.12520332634449005 + }, + { + "epoch": 32.4910071942446, + "force_loss": 0.004635232966393232, + "step": 36130 + }, + { + "epoch": 32.5, + "grad_norm": 0.2439391314983368, + "learning_rate": 3.7354756979598194e-05, + "loss": 0.0172, + "step": 36140 + }, + { + "action_loss": 0.004163598176091909, + "epoch": 32.5, + "step": 36140 + }, + { + "epoch": 32.5, + "step": 36140, + "torque_loss": 0.13880373537540436 + }, + { + "epoch": 32.5, + "force_loss": 0.011748421005904675, + "step": 36140 + }, + { + "epoch": 32.5089928057554, + "grad_norm": 0.2441454827785492, + "learning_rate": 3.7328096944581187e-05, + "loss": 0.0162, + "step": 36150 + }, + { + "action_loss": 0.0010668698232620955, + "epoch": 32.5089928057554, + "step": 36150 + }, + { + "epoch": 32.5089928057554, + "step": 36150, + "torque_loss": 0.10977080464363098 + }, + { + "epoch": 32.5089928057554, + "force_loss": 0.0009263657848350704, + "step": 36150 + }, + { + "epoch": 32.51798561151079, + "grad_norm": 0.2893800437450409, + "learning_rate": 3.730144075895377e-05, + "loss": 0.0192, + "step": 36160 + }, + { + "action_loss": 0.014249789528548717, + "epoch": 32.51798561151079, + "step": 36160 + }, + { + "epoch": 32.51798561151079, + "step": 36160, + "torque_loss": 0.11918381601572037 + }, + { + "epoch": 32.51798561151079, + "force_loss": 0.02451418526470661, + "step": 36160 + }, + { + "epoch": 32.526978417266186, + "grad_norm": 0.1465277224779129, + "learning_rate": 3.727478843081335e-05, + "loss": 0.0165, + "step": 36170 + }, + { + "action_loss": 0.002034307224676013, + "epoch": 32.526978417266186, + "step": 36170 + }, + { + "epoch": 32.526978417266186, + "step": 36170, + "torque_loss": 0.11345982551574707 + }, + { + "epoch": 32.526978417266186, + "force_loss": 0.00411411514505744, + "step": 36170 + }, + { + "epoch": 32.53597122302158, + "grad_norm": 0.7144786715507507, + "learning_rate": 3.72481399682562e-05, + "loss": 0.0174, + "step": 36180 + }, + { + "action_loss": 0.03424415737390518, + "epoch": 32.53597122302158, + "step": 36180 + }, + { + "epoch": 32.53597122302158, + "step": 36180, + "torque_loss": 0.14526519179344177 + }, + { + "epoch": 32.53597122302158, + "force_loss": 0.0341915637254715, + "step": 36180 + }, + { + "epoch": 32.54496402877698, + "grad_norm": 0.24556899070739746, + "learning_rate": 3.722149537937747e-05, + "loss": 0.0195, + "step": 36190 + }, + { + "action_loss": 0.0016800950979813933, + "epoch": 32.54496402877698, + "step": 36190 + }, + { + "epoch": 32.54496402877698, + "step": 36190, + "torque_loss": 0.07097528129816055 + }, + { + "epoch": 32.54496402877698, + "force_loss": 0.0025095471646636724, + "step": 36190 + }, + { + "epoch": 32.55395683453237, + "grad_norm": 0.12260957062244415, + "learning_rate": 3.7194854672271015e-05, + "loss": 0.0142, + "step": 36200 + }, + { + "action_loss": 0.0065669403411448, + "epoch": 32.55395683453237, + "step": 36200 + }, + { + "epoch": 32.55395683453237, + "step": 36200, + "torque_loss": 0.16885767877101898 + }, + { + "epoch": 32.55395683453237, + "force_loss": 0.010066266171634197, + "step": 36200 + }, + { + "epoch": 32.56294964028777, + "grad_norm": 0.23737619817256927, + "learning_rate": 3.7168217855029644e-05, + "loss": 0.0179, + "step": 36210 + }, + { + "action_loss": 0.0021387652959674597, + "epoch": 32.56294964028777, + "step": 36210 + }, + { + "epoch": 32.56294964028777, + "step": 36210, + "torque_loss": 0.13318897783756256 + }, + { + "epoch": 32.56294964028777, + "force_loss": 0.0055229864083230495, + "step": 36210 + }, + { + "epoch": 32.57194244604317, + "grad_norm": 0.16022609174251556, + "learning_rate": 3.7141584935744856e-05, + "loss": 0.0176, + "step": 36220 + }, + { + "action_loss": 0.0031720492988824844, + "epoch": 32.57194244604317, + "step": 36220 + }, + { + "epoch": 32.57194244604317, + "step": 36220, + "torque_loss": 0.1342240422964096 + }, + { + "epoch": 32.57194244604317, + "force_loss": 0.0026714131236076355, + "step": 36220 + }, + { + "epoch": 32.580935251798564, + "grad_norm": 0.16397269070148468, + "learning_rate": 3.7114955922507055e-05, + "loss": 0.0162, + "step": 36230 + }, + { + "action_loss": 0.0010003969073295593, + "epoch": 32.580935251798564, + "step": 36230 + }, + { + "epoch": 32.580935251798564, + "step": 36230, + "torque_loss": 0.08848855644464493 + }, + { + "epoch": 32.580935251798564, + "force_loss": 0.0011519277468323708, + "step": 36230 + }, + { + "epoch": 32.589928057553955, + "grad_norm": 0.17343898117542267, + "learning_rate": 3.708833082340545e-05, + "loss": 0.0192, + "step": 36240 + }, + { + "action_loss": 0.001305138343013823, + "epoch": 32.589928057553955, + "step": 36240 + }, + { + "epoch": 32.589928057553955, + "step": 36240, + "torque_loss": 0.06593629717826843 + }, + { + "epoch": 32.589928057553955, + "force_loss": 0.0015580812469124794, + "step": 36240 + }, + { + "epoch": 32.59892086330935, + "grad_norm": 0.1266532987356186, + "learning_rate": 3.7061709646528034e-05, + "loss": 0.0141, + "step": 36250 + }, + { + "action_loss": 0.007717927452176809, + "epoch": 32.59892086330935, + "step": 36250 + }, + { + "epoch": 32.59892086330935, + "step": 36250, + "torque_loss": 0.12802881002426147 + }, + { + "epoch": 32.59892086330935, + "force_loss": 0.01575583405792713, + "step": 36250 + }, + { + "epoch": 32.60791366906475, + "grad_norm": 0.1754145324230194, + "learning_rate": 3.7035092399961604e-05, + "loss": 0.0166, + "step": 36260 + }, + { + "action_loss": 0.001969613367691636, + "epoch": 32.60791366906475, + "step": 36260 + }, + { + "epoch": 32.60791366906475, + "step": 36260, + "torque_loss": 0.1301606446504593 + }, + { + "epoch": 32.60791366906475, + "force_loss": 0.004181955009698868, + "step": 36260 + }, + { + "epoch": 32.61690647482014, + "grad_norm": 0.7388187646865845, + "learning_rate": 3.700847909179177e-05, + "loss": 0.0194, + "step": 36270 + }, + { + "action_loss": 0.0048209298402071, + "epoch": 32.61690647482014, + "step": 36270 + }, + { + "epoch": 32.61690647482014, + "step": 36270, + "torque_loss": 0.15288764238357544 + }, + { + "epoch": 32.61690647482014, + "force_loss": 0.003654737723991275, + "step": 36270 + }, + { + "epoch": 32.62589928057554, + "grad_norm": 0.3836144208908081, + "learning_rate": 3.698186973010297e-05, + "loss": 0.0232, + "step": 36280 + }, + { + "action_loss": 0.005664277821779251, + "epoch": 32.62589928057554, + "step": 36280 + }, + { + "epoch": 32.62589928057554, + "step": 36280, + "torque_loss": 0.13004820048809052 + }, + { + "epoch": 32.62589928057554, + "force_loss": 0.013694548048079014, + "step": 36280 + }, + { + "epoch": 32.634892086330936, + "grad_norm": 0.3084618151187897, + "learning_rate": 3.695526432297844e-05, + "loss": 0.0161, + "step": 36290 + }, + { + "action_loss": 0.006037670653313398, + "epoch": 32.634892086330936, + "step": 36290 + }, + { + "epoch": 32.634892086330936, + "step": 36290, + "torque_loss": 0.1703004091978073 + }, + { + "epoch": 32.634892086330936, + "force_loss": 0.007248884532600641, + "step": 36290 + }, + { + "epoch": 32.643884892086334, + "grad_norm": 0.5387348532676697, + "learning_rate": 3.692866287850017e-05, + "loss": 0.0163, + "step": 36300 + }, + { + "action_loss": 0.0015967251965776086, + "epoch": 32.643884892086334, + "step": 36300 + }, + { + "epoch": 32.643884892086334, + "step": 36300, + "torque_loss": 0.12389391660690308 + }, + { + "epoch": 32.643884892086334, + "force_loss": 0.005228487309068441, + "step": 36300 + }, + { + "epoch": 32.652877697841724, + "grad_norm": 0.2087627500295639, + "learning_rate": 3.6902065404749006e-05, + "loss": 0.0171, + "step": 36310 + }, + { + "action_loss": 0.011323176324367523, + "epoch": 32.652877697841724, + "step": 36310 + }, + { + "epoch": 32.652877697841724, + "step": 36310, + "torque_loss": 0.14483921229839325 + }, + { + "epoch": 32.652877697841724, + "force_loss": 0.005349026992917061, + "step": 36310 + }, + { + "epoch": 32.66187050359712, + "grad_norm": 0.41825759410858154, + "learning_rate": 3.6875471909804516e-05, + "loss": 0.0228, + "step": 36320 + }, + { + "action_loss": 0.000972981855738908, + "epoch": 32.66187050359712, + "step": 36320 + }, + { + "epoch": 32.66187050359712, + "step": 36320, + "torque_loss": 0.09398885816335678 + }, + { + "epoch": 32.66187050359712, + "force_loss": 0.000758222013246268, + "step": 36320 + }, + { + "epoch": 32.67086330935252, + "grad_norm": 0.23640744388103485, + "learning_rate": 3.6848882401745135e-05, + "loss": 0.0139, + "step": 36330 + }, + { + "action_loss": 0.002796156331896782, + "epoch": 32.67086330935252, + "step": 36330 + }, + { + "epoch": 32.67086330935252, + "step": 36330, + "torque_loss": 0.127136692404747 + }, + { + "epoch": 32.67086330935252, + "force_loss": 0.004370329901576042, + "step": 36330 + }, + { + "epoch": 32.67985611510792, + "grad_norm": 0.279753714799881, + "learning_rate": 3.682229688864806e-05, + "loss": 0.0169, + "step": 36340 + }, + { + "action_loss": 0.0012949536321684718, + "epoch": 32.67985611510792, + "step": 36340 + }, + { + "epoch": 32.67985611510792, + "step": 36340, + "torque_loss": 0.1081714853644371 + }, + { + "epoch": 32.67985611510792, + "force_loss": 0.0017533120699226856, + "step": 36340 + }, + { + "epoch": 32.68884892086331, + "grad_norm": 0.10611731559038162, + "learning_rate": 3.6795715378589235e-05, + "loss": 0.0159, + "step": 36350 + }, + { + "action_loss": 0.001235096831806004, + "epoch": 32.68884892086331, + "step": 36350 + }, + { + "epoch": 32.68884892086331, + "step": 36350, + "torque_loss": 0.11640948057174683 + }, + { + "epoch": 32.68884892086331, + "force_loss": 0.001461779116652906, + "step": 36350 + }, + { + "epoch": 32.697841726618705, + "grad_norm": 0.34805500507354736, + "learning_rate": 3.676913787964345e-05, + "loss": 0.0217, + "step": 36360 + }, + { + "action_loss": 0.002664159284904599, + "epoch": 32.697841726618705, + "step": 36360 + }, + { + "epoch": 32.697841726618705, + "step": 36360, + "torque_loss": 0.10745004564523697 + }, + { + "epoch": 32.697841726618705, + "force_loss": 0.0024546687491238117, + "step": 36360 + }, + { + "epoch": 32.7068345323741, + "grad_norm": 0.2172185331583023, + "learning_rate": 3.674256439988423e-05, + "loss": 0.0198, + "step": 36370 + }, + { + "action_loss": 0.002869424642995, + "epoch": 32.7068345323741, + "step": 36370 + }, + { + "epoch": 32.7068345323741, + "step": 36370, + "torque_loss": 0.09453406184911728 + }, + { + "epoch": 32.7068345323741, + "force_loss": 0.00511117884889245, + "step": 36370 + }, + { + "epoch": 32.71582733812949, + "grad_norm": 0.4886157512664795, + "learning_rate": 3.6715994947383904e-05, + "loss": 0.0168, + "step": 36380 + }, + { + "action_loss": 0.0028554575983434916, + "epoch": 32.71582733812949, + "step": 36380 + }, + { + "epoch": 32.71582733812949, + "step": 36380, + "torque_loss": 0.08057756721973419 + }, + { + "epoch": 32.71582733812949, + "force_loss": 0.0010780870215967298, + "step": 36380 + }, + { + "epoch": 32.72482014388489, + "grad_norm": 0.1216602623462677, + "learning_rate": 3.668942953021357e-05, + "loss": 0.0159, + "step": 36390 + }, + { + "action_loss": 0.008342002518475056, + "epoch": 32.72482014388489, + "step": 36390 + }, + { + "epoch": 32.72482014388489, + "step": 36390, + "torque_loss": 0.14404691755771637 + }, + { + "epoch": 32.72482014388489, + "force_loss": 0.015245121903717518, + "step": 36390 + }, + { + "epoch": 32.73381294964029, + "grad_norm": 0.11196604371070862, + "learning_rate": 3.66628681564431e-05, + "loss": 0.0156, + "step": 36400 + }, + { + "action_loss": 0.0013187493896111846, + "epoch": 32.73381294964029, + "step": 36400 + }, + { + "epoch": 32.73381294964029, + "step": 36400, + "torque_loss": 0.14906540513038635 + }, + { + "epoch": 32.73381294964029, + "force_loss": 0.004711283836513758, + "step": 36400 + }, + { + "epoch": 32.742805755395686, + "grad_norm": 0.09018264710903168, + "learning_rate": 3.663631083414114e-05, + "loss": 0.0149, + "step": 36410 + }, + { + "action_loss": 0.0025115821044892073, + "epoch": 32.742805755395686, + "step": 36410 + }, + { + "epoch": 32.742805755395686, + "step": 36410, + "torque_loss": 0.1465323567390442 + }, + { + "epoch": 32.742805755395686, + "force_loss": 0.003507686546072364, + "step": 36410 + }, + { + "epoch": 32.75179856115108, + "grad_norm": 0.3198237121105194, + "learning_rate": 3.660975757137509e-05, + "loss": 0.0165, + "step": 36420 + }, + { + "action_loss": 0.0016209982568398118, + "epoch": 32.75179856115108, + "step": 36420 + }, + { + "epoch": 32.75179856115108, + "step": 36420, + "torque_loss": 0.09486324340105057 + }, + { + "epoch": 32.75179856115108, + "force_loss": 0.004021722823381424, + "step": 36420 + }, + { + "epoch": 32.760791366906474, + "grad_norm": 0.27158528566360474, + "learning_rate": 3.658320837621114e-05, + "loss": 0.016, + "step": 36430 + }, + { + "action_loss": 0.0017069907626137137, + "epoch": 32.760791366906474, + "step": 36430 + }, + { + "epoch": 32.760791366906474, + "step": 36430, + "torque_loss": 0.11200741678476334 + }, + { + "epoch": 32.760791366906474, + "force_loss": 0.002729924162849784, + "step": 36430 + }, + { + "epoch": 32.76978417266187, + "grad_norm": 0.19988465309143066, + "learning_rate": 3.655666325671426e-05, + "loss": 0.0182, + "step": 36440 + }, + { + "action_loss": 0.0011555716628208756, + "epoch": 32.76978417266187, + "step": 36440 + }, + { + "epoch": 32.76978417266187, + "step": 36440, + "torque_loss": 0.11958108097314835 + }, + { + "epoch": 32.76978417266187, + "force_loss": 0.0012702671810984612, + "step": 36440 + }, + { + "epoch": 32.77877697841727, + "grad_norm": 0.16070693731307983, + "learning_rate": 3.65301222209481e-05, + "loss": 0.0158, + "step": 36450 + }, + { + "action_loss": 0.00433853454887867, + "epoch": 32.77877697841727, + "step": 36450 + }, + { + "epoch": 32.77877697841727, + "step": 36450, + "torque_loss": 0.12327710539102554 + }, + { + "epoch": 32.77877697841727, + "force_loss": 0.0031252000480890274, + "step": 36450 + }, + { + "epoch": 32.78776978417266, + "grad_norm": 0.36357322335243225, + "learning_rate": 3.650358527697519e-05, + "loss": 0.022, + "step": 36460 + }, + { + "action_loss": 0.004157778341323137, + "epoch": 32.78776978417266, + "step": 36460 + }, + { + "epoch": 32.78776978417266, + "step": 36460, + "torque_loss": 0.10977357625961304 + }, + { + "epoch": 32.78776978417266, + "force_loss": 0.01005170401185751, + "step": 36460 + }, + { + "epoch": 32.79676258992806, + "grad_norm": 0.17972682416439056, + "learning_rate": 3.64770524328567e-05, + "loss": 0.0184, + "step": 36470 + }, + { + "action_loss": 0.00646154535934329, + "epoch": 32.79676258992806, + "step": 36470 + }, + { + "epoch": 32.79676258992806, + "step": 36470, + "torque_loss": 0.09342347830533981 + }, + { + "epoch": 32.79676258992806, + "force_loss": 0.006371751893311739, + "step": 36470 + }, + { + "epoch": 32.805755395683455, + "grad_norm": 0.2528167963027954, + "learning_rate": 3.645052369665265e-05, + "loss": 0.0183, + "step": 36480 + }, + { + "action_loss": 0.005299969110637903, + "epoch": 32.805755395683455, + "step": 36480 + }, + { + "epoch": 32.805755395683455, + "step": 36480, + "torque_loss": 0.12035242468118668 + }, + { + "epoch": 32.805755395683455, + "force_loss": 0.012455065734684467, + "step": 36480 + }, + { + "epoch": 32.814748201438846, + "grad_norm": 0.295187383890152, + "learning_rate": 3.6423999076421724e-05, + "loss": 0.0191, + "step": 36490 + }, + { + "action_loss": 0.002935246331617236, + "epoch": 32.814748201438846, + "step": 36490 + }, + { + "epoch": 32.814748201438846, + "step": 36490, + "torque_loss": 0.17187924683094025 + }, + { + "epoch": 32.814748201438846, + "force_loss": 0.0024404069408774376, + "step": 36490 + }, + { + "epoch": 32.82374100719424, + "grad_norm": 0.3179472088813782, + "learning_rate": 3.639747858022142e-05, + "loss": 0.0209, + "step": 36500 + }, + { + "action_loss": 0.005375315900892019, + "epoch": 32.82374100719424, + "step": 36500 + }, + { + "epoch": 32.82374100719424, + "step": 36500, + "torque_loss": 0.09595170617103577 + }, + { + "epoch": 32.82374100719424, + "force_loss": 0.015310156159102917, + "step": 36500 + }, + { + "epoch": 32.83273381294964, + "grad_norm": 0.12300152331590652, + "learning_rate": 3.637096221610799e-05, + "loss": 0.0179, + "step": 36510 + }, + { + "action_loss": 0.0014070924371480942, + "epoch": 32.83273381294964, + "step": 36510 + }, + { + "epoch": 32.83273381294964, + "step": 36510, + "torque_loss": 0.11820673942565918 + }, + { + "epoch": 32.83273381294964, + "force_loss": 0.0017465772107243538, + "step": 36510 + }, + { + "epoch": 32.84172661870504, + "grad_norm": 0.13506054878234863, + "learning_rate": 3.634444999213638e-05, + "loss": 0.0159, + "step": 36520 + }, + { + "action_loss": 0.0023763671051710844, + "epoch": 32.84172661870504, + "step": 36520 + }, + { + "epoch": 32.84172661870504, + "step": 36520, + "torque_loss": 0.17207233607769012 + }, + { + "epoch": 32.84172661870504, + "force_loss": 0.0017365027451887727, + "step": 36520 + }, + { + "epoch": 32.85071942446043, + "grad_norm": 0.25424519181251526, + "learning_rate": 3.6317941916360296e-05, + "loss": 0.019, + "step": 36530 + }, + { + "action_loss": 0.0013623054837808013, + "epoch": 32.85071942446043, + "step": 36530 + }, + { + "epoch": 32.85071942446043, + "step": 36530, + "torque_loss": 0.12819351255893707 + }, + { + "epoch": 32.85071942446043, + "force_loss": 0.00106936099473387, + "step": 36530 + }, + { + "epoch": 32.85971223021583, + "grad_norm": 0.15003550052642822, + "learning_rate": 3.629143799683221e-05, + "loss": 0.0166, + "step": 36540 + }, + { + "action_loss": 0.0020373777952045202, + "epoch": 32.85971223021583, + "step": 36540 + }, + { + "epoch": 32.85971223021583, + "step": 36540, + "torque_loss": 0.14321736991405487 + }, + { + "epoch": 32.85971223021583, + "force_loss": 0.002855272265151143, + "step": 36540 + }, + { + "epoch": 32.868705035971225, + "grad_norm": 0.24814960360527039, + "learning_rate": 3.626493824160331e-05, + "loss": 0.0184, + "step": 36550 + }, + { + "action_loss": 0.0011365163372829556, + "epoch": 32.868705035971225, + "step": 36550 + }, + { + "epoch": 32.868705035971225, + "step": 36550, + "torque_loss": 0.12206246703863144 + }, + { + "epoch": 32.868705035971225, + "force_loss": 0.002020656829699874, + "step": 36550 + }, + { + "epoch": 32.87769784172662, + "grad_norm": 0.17621426284313202, + "learning_rate": 3.623844265872352e-05, + "loss": 0.0135, + "step": 36560 + }, + { + "action_loss": 0.0018490878865122795, + "epoch": 32.87769784172662, + "step": 36560 + }, + { + "epoch": 32.87769784172662, + "step": 36560, + "torque_loss": 0.11712668091058731 + }, + { + "epoch": 32.87769784172662, + "force_loss": 0.0028901107143610716, + "step": 36560 + }, + { + "epoch": 32.88669064748201, + "grad_norm": 0.2913665175437927, + "learning_rate": 3.621195125624149e-05, + "loss": 0.0191, + "step": 36570 + }, + { + "action_loss": 0.006535492837429047, + "epoch": 32.88669064748201, + "step": 36570 + }, + { + "epoch": 32.88669064748201, + "step": 36570, + "torque_loss": 0.15344056487083435 + }, + { + "epoch": 32.88669064748201, + "force_loss": 0.0064294240437448025, + "step": 36570 + }, + { + "epoch": 32.89568345323741, + "grad_norm": 0.22605034708976746, + "learning_rate": 3.618546404220463e-05, + "loss": 0.0212, + "step": 36580 + }, + { + "action_loss": 0.0054314048029482365, + "epoch": 32.89568345323741, + "step": 36580 + }, + { + "epoch": 32.89568345323741, + "step": 36580, + "torque_loss": 0.12828782200813293 + }, + { + "epoch": 32.89568345323741, + "force_loss": 0.006194684188812971, + "step": 36580 + }, + { + "epoch": 32.90467625899281, + "grad_norm": 0.14173366129398346, + "learning_rate": 3.615898102465903e-05, + "loss": 0.0189, + "step": 36590 + }, + { + "action_loss": 0.012773125432431698, + "epoch": 32.90467625899281, + "step": 36590 + }, + { + "epoch": 32.90467625899281, + "step": 36590, + "torque_loss": 0.19824115931987762 + }, + { + "epoch": 32.90467625899281, + "force_loss": 0.007875685580074787, + "step": 36590 + }, + { + "epoch": 32.9136690647482, + "grad_norm": 0.19740355014801025, + "learning_rate": 3.6132502211649544e-05, + "loss": 0.0203, + "step": 36600 + }, + { + "action_loss": 0.0032737914007157087, + "epoch": 32.9136690647482, + "step": 36600 + }, + { + "epoch": 32.9136690647482, + "step": 36600, + "torque_loss": 0.10628330707550049 + }, + { + "epoch": 32.9136690647482, + "force_loss": 0.0048421830870211124, + "step": 36600 + }, + { + "epoch": 32.922661870503596, + "grad_norm": 0.320834219455719, + "learning_rate": 3.610602761121975e-05, + "loss": 0.0165, + "step": 36610 + }, + { + "action_loss": 0.002689277520403266, + "epoch": 32.922661870503596, + "step": 36610 + }, + { + "epoch": 32.922661870503596, + "step": 36610, + "torque_loss": 0.15329383313655853 + }, + { + "epoch": 32.922661870503596, + "force_loss": 0.003482213243842125, + "step": 36610 + }, + { + "epoch": 32.931654676258994, + "grad_norm": 0.16296826303005219, + "learning_rate": 3.6079557231411897e-05, + "loss": 0.0182, + "step": 36620 + }, + { + "action_loss": 0.002387323882430792, + "epoch": 32.931654676258994, + "step": 36620 + }, + { + "epoch": 32.931654676258994, + "step": 36620, + "torque_loss": 0.1422547698020935 + }, + { + "epoch": 32.931654676258994, + "force_loss": 0.004744958132505417, + "step": 36620 + }, + { + "epoch": 32.94064748201439, + "grad_norm": 0.09802334755659103, + "learning_rate": 3.6053091080267035e-05, + "loss": 0.0156, + "step": 36630 + }, + { + "action_loss": 0.0044053904712200165, + "epoch": 32.94064748201439, + "step": 36630 + }, + { + "epoch": 32.94064748201439, + "step": 36630, + "torque_loss": 0.11917876452207565 + }, + { + "epoch": 32.94064748201439, + "force_loss": 0.003263379679992795, + "step": 36630 + }, + { + "epoch": 32.94964028776978, + "grad_norm": 0.09336693584918976, + "learning_rate": 3.602662916582483e-05, + "loss": 0.0156, + "step": 36640 + }, + { + "action_loss": 0.003043554723262787, + "epoch": 32.94964028776978, + "step": 36640 + }, + { + "epoch": 32.94964028776978, + "step": 36640, + "torque_loss": 0.12353932857513428 + }, + { + "epoch": 32.94964028776978, + "force_loss": 0.0018917560810223222, + "step": 36640 + }, + { + "epoch": 32.95863309352518, + "grad_norm": 0.14380191266536713, + "learning_rate": 3.600017149612375e-05, + "loss": 0.0169, + "step": 36650 + }, + { + "action_loss": 0.0009219881030730903, + "epoch": 32.95863309352518, + "step": 36650 + }, + { + "epoch": 32.95863309352518, + "step": 36650, + "torque_loss": 0.1801210641860962 + }, + { + "epoch": 32.95863309352518, + "force_loss": 0.0009728195145726204, + "step": 36650 + }, + { + "epoch": 32.96762589928058, + "grad_norm": 0.21092064678668976, + "learning_rate": 3.5973718079200935e-05, + "loss": 0.0199, + "step": 36660 + }, + { + "action_loss": 0.0006425574538297951, + "epoch": 32.96762589928058, + "step": 36660 + }, + { + "epoch": 32.96762589928058, + "step": 36660, + "torque_loss": 0.09892163425683975 + }, + { + "epoch": 32.96762589928058, + "force_loss": 0.0011274392018094659, + "step": 36660 + }, + { + "epoch": 32.976618705035975, + "grad_norm": 0.0832480788230896, + "learning_rate": 3.5947268923092216e-05, + "loss": 0.0157, + "step": 36670 + }, + { + "action_loss": 0.00119773019105196, + "epoch": 32.976618705035975, + "step": 36670 + }, + { + "epoch": 32.976618705035975, + "step": 36670, + "torque_loss": 0.1590159684419632 + }, + { + "epoch": 32.976618705035975, + "force_loss": 0.0012387711321935058, + "step": 36670 + }, + { + "epoch": 32.985611510791365, + "grad_norm": 0.13251672685146332, + "learning_rate": 3.592082403583216e-05, + "loss": 0.0166, + "step": 36680 + }, + { + "action_loss": 0.0008663971093483269, + "epoch": 32.985611510791365, + "step": 36680 + }, + { + "epoch": 32.985611510791365, + "step": 36680, + "torque_loss": 0.10590314865112305 + }, + { + "epoch": 32.985611510791365, + "force_loss": 0.0011033309856429696, + "step": 36680 + }, + { + "epoch": 32.99460431654676, + "grad_norm": 0.1287786364555359, + "learning_rate": 3.5894383425454004e-05, + "loss": 0.0166, + "step": 36690 + }, + { + "action_loss": 0.0021171323023736477, + "epoch": 32.99460431654676, + "step": 36690 + }, + { + "epoch": 32.99460431654676, + "step": 36690, + "torque_loss": 0.13737957179546356 + }, + { + "epoch": 32.99460431654676, + "force_loss": 0.0046308995224535465, + "step": 36690 + }, + { + "epoch": 33.00359712230216, + "grad_norm": 0.3398348391056061, + "learning_rate": 3.586794709998975e-05, + "loss": 0.0161, + "step": 36700 + }, + { + "action_loss": 0.006835767533630133, + "epoch": 33.00359712230216, + "step": 36700 + }, + { + "epoch": 33.00359712230216, + "step": 36700, + "torque_loss": 0.10759087651968002 + }, + { + "epoch": 33.00359712230216, + "force_loss": 0.008363459259271622, + "step": 36700 + }, + { + "epoch": 33.01258992805755, + "grad_norm": 0.19697205722332, + "learning_rate": 3.584151506747002e-05, + "loss": 0.0177, + "step": 36710 + }, + { + "action_loss": 0.0015691443113610148, + "epoch": 33.01258992805755, + "step": 36710 + }, + { + "epoch": 33.01258992805755, + "step": 36710, + "torque_loss": 0.11815029382705688 + }, + { + "epoch": 33.01258992805755, + "force_loss": 0.002755112946033478, + "step": 36710 + }, + { + "epoch": 33.02158273381295, + "grad_norm": 0.3054356575012207, + "learning_rate": 3.581508733592418e-05, + "loss": 0.0188, + "step": 36720 + }, + { + "action_loss": 0.010482266545295715, + "epoch": 33.02158273381295, + "step": 36720 + }, + { + "epoch": 33.02158273381295, + "step": 36720, + "torque_loss": 0.12076478451490402 + }, + { + "epoch": 33.02158273381295, + "force_loss": 0.009484922513365746, + "step": 36720 + }, + { + "epoch": 33.030575539568346, + "grad_norm": 0.14727911353111267, + "learning_rate": 3.5788663913380297e-05, + "loss": 0.0177, + "step": 36730 + }, + { + "action_loss": 0.005812238901853561, + "epoch": 33.030575539568346, + "step": 36730 + }, + { + "epoch": 33.030575539568346, + "step": 36730, + "torque_loss": 0.1283504217863083 + }, + { + "epoch": 33.030575539568346, + "force_loss": 0.006615789141505957, + "step": 36730 + }, + { + "epoch": 33.039568345323744, + "grad_norm": 0.13999532163143158, + "learning_rate": 3.576224480786506e-05, + "loss": 0.0233, + "step": 36740 + }, + { + "action_loss": 0.0031441580504179, + "epoch": 33.039568345323744, + "step": 36740 + }, + { + "epoch": 33.039568345323744, + "step": 36740, + "torque_loss": 0.09089095145463943 + }, + { + "epoch": 33.039568345323744, + "force_loss": 0.006952656898647547, + "step": 36740 + }, + { + "epoch": 33.048561151079134, + "grad_norm": 0.1500289887189865, + "learning_rate": 3.573583002740393e-05, + "loss": 0.0181, + "step": 36750 + }, + { + "action_loss": 0.009716532193124294, + "epoch": 33.048561151079134, + "step": 36750 + }, + { + "epoch": 33.048561151079134, + "step": 36750, + "torque_loss": 0.170123890042305 + }, + { + "epoch": 33.048561151079134, + "force_loss": 0.010109642520546913, + "step": 36750 + }, + { + "epoch": 33.05755395683453, + "grad_norm": 0.30803382396698, + "learning_rate": 3.570941958002103e-05, + "loss": 0.0187, + "step": 36760 + }, + { + "action_loss": 0.001115148887038231, + "epoch": 33.05755395683453, + "step": 36760 + }, + { + "epoch": 33.05755395683453, + "step": 36760, + "torque_loss": 0.11222013831138611 + }, + { + "epoch": 33.05755395683453, + "force_loss": 0.002676828531548381, + "step": 36760 + }, + { + "epoch": 33.06654676258993, + "grad_norm": 0.1865822970867157, + "learning_rate": 3.568301347373912e-05, + "loss": 0.0157, + "step": 36770 + }, + { + "action_loss": 0.0011608978966251016, + "epoch": 33.06654676258993, + "step": 36770 + }, + { + "epoch": 33.06654676258993, + "step": 36770, + "torque_loss": 0.1131066158413887 + }, + { + "epoch": 33.06654676258993, + "force_loss": 0.0014423249522224069, + "step": 36770 + }, + { + "epoch": 33.07553956834533, + "grad_norm": 0.11792761087417603, + "learning_rate": 3.5656611716579726e-05, + "loss": 0.017, + "step": 36780 + }, + { + "action_loss": 0.0028036274015903473, + "epoch": 33.07553956834533, + "step": 36780 + }, + { + "epoch": 33.07553956834533, + "step": 36780, + "torque_loss": 0.13723760843276978 + }, + { + "epoch": 33.07553956834533, + "force_loss": 0.004048797767609358, + "step": 36780 + }, + { + "epoch": 33.08453237410072, + "grad_norm": 0.1791292130947113, + "learning_rate": 3.5630214316562946e-05, + "loss": 0.0218, + "step": 36790 + }, + { + "action_loss": 0.001995891099795699, + "epoch": 33.08453237410072, + "step": 36790 + }, + { + "epoch": 33.08453237410072, + "step": 36790, + "torque_loss": 0.11438407748937607 + }, + { + "epoch": 33.08453237410072, + "force_loss": 0.003643640084192157, + "step": 36790 + }, + { + "epoch": 33.093525179856115, + "grad_norm": 0.1332855075597763, + "learning_rate": 3.560382128170766e-05, + "loss": 0.016, + "step": 36800 + }, + { + "action_loss": 0.06602529436349869, + "epoch": 33.093525179856115, + "step": 36800 + }, + { + "epoch": 33.093525179856115, + "step": 36800, + "torque_loss": 0.26105237007141113 + }, + { + "epoch": 33.093525179856115, + "force_loss": 0.05167825147509575, + "step": 36800 + }, + { + "epoch": 33.10251798561151, + "grad_norm": 0.07148727774620056, + "learning_rate": 3.5577432620031374e-05, + "loss": 0.0223, + "step": 36810 + }, + { + "action_loss": 0.002395481104031205, + "epoch": 33.10251798561151, + "step": 36810 + }, + { + "epoch": 33.10251798561151, + "step": 36810, + "torque_loss": 0.10932862758636475 + }, + { + "epoch": 33.10251798561151, + "force_loss": 0.003633117536082864, + "step": 36810 + }, + { + "epoch": 33.111510791366904, + "grad_norm": 0.1899031400680542, + "learning_rate": 3.5551048339550216e-05, + "loss": 0.0174, + "step": 36820 + }, + { + "action_loss": 0.008279651403427124, + "epoch": 33.111510791366904, + "step": 36820 + }, + { + "epoch": 33.111510791366904, + "step": 36820, + "torque_loss": 0.14984314143657684 + }, + { + "epoch": 33.111510791366904, + "force_loss": 0.0064571392722427845, + "step": 36820 + }, + { + "epoch": 33.1205035971223, + "grad_norm": 0.247861847281456, + "learning_rate": 3.55246684482791e-05, + "loss": 0.0195, + "step": 36830 + }, + { + "action_loss": 0.005978280212730169, + "epoch": 33.1205035971223, + "step": 36830 + }, + { + "epoch": 33.1205035971223, + "step": 36830, + "torque_loss": 0.09209439903497696 + }, + { + "epoch": 33.1205035971223, + "force_loss": 0.0068796114064753056, + "step": 36830 + }, + { + "epoch": 33.1294964028777, + "grad_norm": 0.16852621734142303, + "learning_rate": 3.5498292954231496e-05, + "loss": 0.0168, + "step": 36840 + }, + { + "action_loss": 0.0018792202463373542, + "epoch": 33.1294964028777, + "step": 36840 + }, + { + "epoch": 33.1294964028777, + "step": 36840, + "torque_loss": 0.15244042873382568 + }, + { + "epoch": 33.1294964028777, + "force_loss": 0.00158885947894305, + "step": 36840 + }, + { + "epoch": 33.138489208633096, + "grad_norm": 0.357898086309433, + "learning_rate": 3.54719218654196e-05, + "loss": 0.0196, + "step": 36850 + }, + { + "action_loss": 0.016745124012231827, + "epoch": 33.138489208633096, + "step": 36850 + }, + { + "epoch": 33.138489208633096, + "step": 36850, + "torque_loss": 0.09298679977655411 + }, + { + "epoch": 33.138489208633096, + "force_loss": 0.011909134685993195, + "step": 36850 + }, + { + "epoch": 33.14748201438849, + "grad_norm": 0.28340256214141846, + "learning_rate": 3.544555518985425e-05, + "loss": 0.0174, + "step": 36860 + }, + { + "action_loss": 0.002974806586280465, + "epoch": 33.14748201438849, + "step": 36860 + }, + { + "epoch": 33.14748201438849, + "step": 36860, + "torque_loss": 0.13511574268341064 + }, + { + "epoch": 33.14748201438849, + "force_loss": 0.0018620028859004378, + "step": 36860 + }, + { + "epoch": 33.156474820143885, + "grad_norm": 0.2921615540981293, + "learning_rate": 3.541919293554494e-05, + "loss": 0.0165, + "step": 36870 + }, + { + "action_loss": 0.0021552962716668844, + "epoch": 33.156474820143885, + "step": 36870 + }, + { + "epoch": 33.156474820143885, + "step": 36870, + "torque_loss": 0.13561786711215973 + }, + { + "epoch": 33.156474820143885, + "force_loss": 0.001641663839109242, + "step": 36870 + }, + { + "epoch": 33.16546762589928, + "grad_norm": 0.18524503707885742, + "learning_rate": 3.539283511049985e-05, + "loss": 0.0193, + "step": 36880 + }, + { + "action_loss": 0.00771550415083766, + "epoch": 33.16546762589928, + "step": 36880 + }, + { + "epoch": 33.16546762589928, + "step": 36880, + "torque_loss": 0.20462913811206818 + }, + { + "epoch": 33.16546762589928, + "force_loss": 0.006421716418117285, + "step": 36880 + }, + { + "epoch": 33.17446043165467, + "grad_norm": 0.24745787680149078, + "learning_rate": 3.5366481722725755e-05, + "loss": 0.0173, + "step": 36890 + }, + { + "action_loss": 0.01282097864896059, + "epoch": 33.17446043165467, + "step": 36890 + }, + { + "epoch": 33.17446043165467, + "step": 36890, + "torque_loss": 0.10666092485189438 + }, + { + "epoch": 33.17446043165467, + "force_loss": 0.022650517523288727, + "step": 36890 + }, + { + "epoch": 33.18345323741007, + "grad_norm": 0.21280813217163086, + "learning_rate": 3.534013278022816e-05, + "loss": 0.0163, + "step": 36900 + }, + { + "action_loss": 0.01911107450723648, + "epoch": 33.18345323741007, + "step": 36900 + }, + { + "epoch": 33.18345323741007, + "step": 36900, + "torque_loss": 0.16768568754196167 + }, + { + "epoch": 33.18345323741007, + "force_loss": 0.023493608459830284, + "step": 36900 + }, + { + "epoch": 33.19244604316547, + "grad_norm": 0.40644174814224243, + "learning_rate": 3.531378829101113e-05, + "loss": 0.0196, + "step": 36910 + }, + { + "action_loss": 0.004462155979126692, + "epoch": 33.19244604316547, + "step": 36910 + }, + { + "epoch": 33.19244604316547, + "step": 36910, + "torque_loss": 0.10983768850564957 + }, + { + "epoch": 33.19244604316547, + "force_loss": 0.005608848761767149, + "step": 36910 + }, + { + "epoch": 33.201438848920866, + "grad_norm": 0.2414204627275467, + "learning_rate": 3.528744826307746e-05, + "loss": 0.019, + "step": 36920 + }, + { + "action_loss": 0.002865339396521449, + "epoch": 33.201438848920866, + "step": 36920 + }, + { + "epoch": 33.201438848920866, + "step": 36920, + "torque_loss": 0.1108912006020546 + }, + { + "epoch": 33.201438848920866, + "force_loss": 0.005613286048173904, + "step": 36920 + }, + { + "epoch": 33.210431654676256, + "grad_norm": 0.2705686688423157, + "learning_rate": 3.5261112704428554e-05, + "loss": 0.018, + "step": 36930 + }, + { + "action_loss": 0.0017391294240951538, + "epoch": 33.210431654676256, + "step": 36930 + }, + { + "epoch": 33.210431654676256, + "step": 36930, + "torque_loss": 0.15713131427764893 + }, + { + "epoch": 33.210431654676256, + "force_loss": 0.0019209147430956364, + "step": 36930 + }, + { + "epoch": 33.219424460431654, + "grad_norm": 0.2855124771595001, + "learning_rate": 3.523478162306443e-05, + "loss": 0.0169, + "step": 36940 + }, + { + "action_loss": 0.0016624769195914268, + "epoch": 33.219424460431654, + "step": 36940 + }, + { + "epoch": 33.219424460431654, + "step": 36940, + "torque_loss": 0.1696600317955017 + }, + { + "epoch": 33.219424460431654, + "force_loss": 0.0020895209163427353, + "step": 36940 + }, + { + "epoch": 33.22841726618705, + "grad_norm": 0.136759951710701, + "learning_rate": 3.520845502698381e-05, + "loss": 0.0161, + "step": 36950 + }, + { + "action_loss": 0.001486244611442089, + "epoch": 33.22841726618705, + "step": 36950 + }, + { + "epoch": 33.22841726618705, + "step": 36950, + "torque_loss": 0.09340252727270126 + }, + { + "epoch": 33.22841726618705, + "force_loss": 0.0017930116737261415, + "step": 36950 + }, + { + "epoch": 33.23741007194245, + "grad_norm": 0.3832060694694519, + "learning_rate": 3.5182132924184005e-05, + "loss": 0.0169, + "step": 36960 + }, + { + "action_loss": 0.0057553499937057495, + "epoch": 33.23741007194245, + "step": 36960 + }, + { + "epoch": 33.23741007194245, + "step": 36960, + "torque_loss": 0.10780555009841919 + }, + { + "epoch": 33.23741007194245, + "force_loss": 0.007818729616701603, + "step": 36960 + }, + { + "epoch": 33.24640287769784, + "grad_norm": 0.458911269903183, + "learning_rate": 3.5155815322660966e-05, + "loss": 0.0195, + "step": 36970 + }, + { + "action_loss": 0.010337863117456436, + "epoch": 33.24640287769784, + "step": 36970 + }, + { + "epoch": 33.24640287769784, + "step": 36970, + "torque_loss": 0.1338314265012741 + }, + { + "epoch": 33.24640287769784, + "force_loss": 0.0112711600959301, + "step": 36970 + }, + { + "epoch": 33.25539568345324, + "grad_norm": 0.5410187840461731, + "learning_rate": 3.512950223040931e-05, + "loss": 0.0171, + "step": 36980 + }, + { + "action_loss": 0.005152761470526457, + "epoch": 33.25539568345324, + "step": 36980 + }, + { + "epoch": 33.25539568345324, + "step": 36980, + "torque_loss": 0.09719046205282211 + }, + { + "epoch": 33.25539568345324, + "force_loss": 0.004004388581961393, + "step": 36980 + }, + { + "epoch": 33.264388489208635, + "grad_norm": 0.3527100086212158, + "learning_rate": 3.5103193655422216e-05, + "loss": 0.0181, + "step": 36990 + }, + { + "action_loss": 0.0041131447069346905, + "epoch": 33.264388489208635, + "step": 36990 + }, + { + "epoch": 33.264388489208635, + "step": 36990, + "torque_loss": 0.10060495883226395 + }, + { + "epoch": 33.264388489208635, + "force_loss": 0.004139088559895754, + "step": 36990 + }, + { + "epoch": 33.273381294964025, + "grad_norm": 0.16072429716587067, + "learning_rate": 3.5076889605691596e-05, + "loss": 0.0176, + "step": 37000 + }, + { + "action_loss": 0.0035816740710288286, + "epoch": 33.273381294964025, + "step": 37000 + }, + { + "epoch": 33.273381294964025, + "step": 37000, + "torque_loss": 0.10252014547586441 + }, + { + "epoch": 33.273381294964025, + "force_loss": 0.002722412347793579, + "step": 37000 + }, + { + "epoch": 33.28237410071942, + "grad_norm": 0.17735706269741058, + "learning_rate": 3.505059008920787e-05, + "loss": 0.019, + "step": 37010 + }, + { + "action_loss": 0.00379945314489305, + "epoch": 33.28237410071942, + "step": 37010 + }, + { + "epoch": 33.28237410071942, + "step": 37010, + "torque_loss": 0.11861858516931534 + }, + { + "epoch": 33.28237410071942, + "force_loss": 0.0024994960986077785, + "step": 37010 + }, + { + "epoch": 33.29136690647482, + "grad_norm": 0.22336888313293457, + "learning_rate": 3.502429511396016e-05, + "loss": 0.0233, + "step": 37020 + }, + { + "action_loss": 0.0022373658139258623, + "epoch": 33.29136690647482, + "step": 37020 + }, + { + "epoch": 33.29136690647482, + "step": 37020, + "torque_loss": 0.0988149642944336 + }, + { + "epoch": 33.29136690647482, + "force_loss": 0.0013564390828832984, + "step": 37020 + }, + { + "epoch": 33.30035971223022, + "grad_norm": 0.20216836035251617, + "learning_rate": 3.4998004687936196e-05, + "loss": 0.0181, + "step": 37030 + }, + { + "action_loss": 0.0014282582560554147, + "epoch": 33.30035971223022, + "step": 37030 + }, + { + "epoch": 33.30035971223022, + "step": 37030, + "torque_loss": 0.10763565450906754 + }, + { + "epoch": 33.30035971223022, + "force_loss": 0.0018201001221314073, + "step": 37030 + }, + { + "epoch": 33.30935251798561, + "grad_norm": 0.22850529849529266, + "learning_rate": 3.497171881912229e-05, + "loss": 0.017, + "step": 37040 + }, + { + "action_loss": 0.003305848455056548, + "epoch": 33.30935251798561, + "step": 37040 + }, + { + "epoch": 33.30935251798561, + "step": 37040, + "torque_loss": 0.14173872768878937 + }, + { + "epoch": 33.30935251798561, + "force_loss": 0.0017273053526878357, + "step": 37040 + }, + { + "epoch": 33.318345323741006, + "grad_norm": 0.12240440398454666, + "learning_rate": 3.494543751550342e-05, + "loss": 0.0191, + "step": 37050 + }, + { + "action_loss": 0.0035281090531498194, + "epoch": 33.318345323741006, + "step": 37050 + }, + { + "epoch": 33.318345323741006, + "step": 37050, + "torque_loss": 0.10068196058273315 + }, + { + "epoch": 33.318345323741006, + "force_loss": 0.0063293371349573135, + "step": 37050 + }, + { + "epoch": 33.327338129496404, + "grad_norm": 0.3412223160266876, + "learning_rate": 3.491916078506313e-05, + "loss": 0.0195, + "step": 37060 + }, + { + "action_loss": 0.00252143875695765, + "epoch": 33.327338129496404, + "step": 37060 + }, + { + "epoch": 33.327338129496404, + "step": 37060, + "torque_loss": 0.08746298402547836 + }, + { + "epoch": 33.327338129496404, + "force_loss": 0.002379532204940915, + "step": 37060 + }, + { + "epoch": 33.3363309352518, + "grad_norm": 0.2112552970647812, + "learning_rate": 3.489288863578361e-05, + "loss": 0.0219, + "step": 37070 + }, + { + "action_loss": 0.007381019648164511, + "epoch": 33.3363309352518, + "step": 37070 + }, + { + "epoch": 33.3363309352518, + "step": 37070, + "torque_loss": 0.09913697838783264 + }, + { + "epoch": 33.3363309352518, + "force_loss": 0.008412917144596577, + "step": 37070 + }, + { + "epoch": 33.34532374100719, + "grad_norm": 0.20136091113090515, + "learning_rate": 3.4866621075645646e-05, + "loss": 0.0196, + "step": 37080 + }, + { + "action_loss": 0.0018523618346080184, + "epoch": 33.34532374100719, + "step": 37080 + }, + { + "epoch": 33.34532374100719, + "step": 37080, + "torque_loss": 0.12294883280992508 + }, + { + "epoch": 33.34532374100719, + "force_loss": 0.003359299385920167, + "step": 37080 + }, + { + "epoch": 33.35431654676259, + "grad_norm": 0.2694815695285797, + "learning_rate": 3.4840358112628614e-05, + "loss": 0.0189, + "step": 37090 + }, + { + "action_loss": 0.007186661008745432, + "epoch": 33.35431654676259, + "step": 37090 + }, + { + "epoch": 33.35431654676259, + "step": 37090, + "torque_loss": 0.12499719858169556 + }, + { + "epoch": 33.35431654676259, + "force_loss": 0.01146344467997551, + "step": 37090 + }, + { + "epoch": 33.36330935251799, + "grad_norm": 0.17808374762535095, + "learning_rate": 3.481409975471053e-05, + "loss": 0.0151, + "step": 37100 + }, + { + "action_loss": 0.0011240991298109293, + "epoch": 33.36330935251799, + "step": 37100 + }, + { + "epoch": 33.36330935251799, + "step": 37100, + "torque_loss": 0.08603618294000626 + }, + { + "epoch": 33.36330935251799, + "force_loss": 0.0016839904710650444, + "step": 37100 + }, + { + "epoch": 33.37230215827338, + "grad_norm": 0.18893858790397644, + "learning_rate": 3.4787846009867986e-05, + "loss": 0.0204, + "step": 37110 + }, + { + "action_loss": 0.005135738756507635, + "epoch": 33.37230215827338, + "step": 37110 + }, + { + "epoch": 33.37230215827338, + "step": 37110, + "torque_loss": 0.09564564377069473 + }, + { + "epoch": 33.37230215827338, + "force_loss": 0.01340966671705246, + "step": 37110 + }, + { + "epoch": 33.381294964028775, + "grad_norm": 0.38284650444984436, + "learning_rate": 3.476159688607615e-05, + "loss": 0.0196, + "step": 37120 + }, + { + "action_loss": 0.001931976992636919, + "epoch": 33.381294964028775, + "step": 37120 + }, + { + "epoch": 33.381294964028775, + "step": 37120, + "torque_loss": 0.0948227047920227 + }, + { + "epoch": 33.381294964028775, + "force_loss": 0.00308020762167871, + "step": 37120 + }, + { + "epoch": 33.39028776978417, + "grad_norm": 0.2643629014492035, + "learning_rate": 3.4735352391308854e-05, + "loss": 0.0182, + "step": 37130 + }, + { + "action_loss": 0.0031101114582270384, + "epoch": 33.39028776978417, + "step": 37130 + }, + { + "epoch": 33.39028776978417, + "step": 37130, + "torque_loss": 0.10445722192525864 + }, + { + "epoch": 33.39028776978417, + "force_loss": 0.004710142035037279, + "step": 37130 + }, + { + "epoch": 33.39928057553957, + "grad_norm": 0.273506224155426, + "learning_rate": 3.4709112533538446e-05, + "loss": 0.0182, + "step": 37140 + }, + { + "action_loss": 0.001914509921334684, + "epoch": 33.39928057553957, + "step": 37140 + }, + { + "epoch": 33.39928057553957, + "step": 37140, + "torque_loss": 0.11653489619493484 + }, + { + "epoch": 33.39928057553957, + "force_loss": 0.002778188558295369, + "step": 37140 + }, + { + "epoch": 33.40827338129496, + "grad_norm": 0.20317287743091583, + "learning_rate": 3.4682877320735934e-05, + "loss": 0.013, + "step": 37150 + }, + { + "action_loss": 0.0028175953775644302, + "epoch": 33.40827338129496, + "step": 37150 + }, + { + "epoch": 33.40827338129496, + "step": 37150, + "torque_loss": 0.13847455382347107 + }, + { + "epoch": 33.40827338129496, + "force_loss": 0.0015900194412097335, + "step": 37150 + }, + { + "epoch": 33.41726618705036, + "grad_norm": 0.16540081799030304, + "learning_rate": 3.465664676087085e-05, + "loss": 0.0185, + "step": 37160 + }, + { + "action_loss": 0.006076851859688759, + "epoch": 33.41726618705036, + "step": 37160 + }, + { + "epoch": 33.41726618705036, + "step": 37160, + "torque_loss": 0.1168292760848999 + }, + { + "epoch": 33.41726618705036, + "force_loss": 0.004960244055837393, + "step": 37160 + }, + { + "epoch": 33.42625899280576, + "grad_norm": 0.12980490922927856, + "learning_rate": 3.463042086191136e-05, + "loss": 0.0184, + "step": 37170 + }, + { + "action_loss": 0.00509148882701993, + "epoch": 33.42625899280576, + "step": 37170 + }, + { + "epoch": 33.42625899280576, + "step": 37170, + "torque_loss": 0.14884944260120392 + }, + { + "epoch": 33.42625899280576, + "force_loss": 0.006237113382667303, + "step": 37170 + }, + { + "epoch": 33.435251798561154, + "grad_norm": 0.2110956907272339, + "learning_rate": 3.460419963182423e-05, + "loss": 0.0182, + "step": 37180 + }, + { + "action_loss": 0.003542988793924451, + "epoch": 33.435251798561154, + "step": 37180 + }, + { + "epoch": 33.435251798561154, + "step": 37180, + "torque_loss": 0.1327694058418274 + }, + { + "epoch": 33.435251798561154, + "force_loss": 0.007422368507832289, + "step": 37180 + }, + { + "epoch": 33.444244604316545, + "grad_norm": 0.12511028349399567, + "learning_rate": 3.457798307857473e-05, + "loss": 0.017, + "step": 37190 + }, + { + "action_loss": 0.0014243010664358735, + "epoch": 33.444244604316545, + "step": 37190 + }, + { + "epoch": 33.444244604316545, + "step": 37190, + "torque_loss": 0.07623125612735748 + }, + { + "epoch": 33.444244604316545, + "force_loss": 0.001706250011920929, + "step": 37190 + }, + { + "epoch": 33.45323741007194, + "grad_norm": 0.32506227493286133, + "learning_rate": 3.455177121012678e-05, + "loss": 0.0182, + "step": 37200 + }, + { + "action_loss": 0.0025057748425751925, + "epoch": 33.45323741007194, + "step": 37200 + }, + { + "epoch": 33.45323741007194, + "step": 37200, + "torque_loss": 0.0937427505850792 + }, + { + "epoch": 33.45323741007194, + "force_loss": 0.002118638949468732, + "step": 37200 + }, + { + "epoch": 33.46223021582734, + "grad_norm": 0.2868201434612274, + "learning_rate": 3.452556403444285e-05, + "loss": 0.0186, + "step": 37210 + }, + { + "action_loss": 0.0016576891066506505, + "epoch": 33.46223021582734, + "step": 37210 + }, + { + "epoch": 33.46223021582734, + "step": 37210, + "torque_loss": 0.13382239639759064 + }, + { + "epoch": 33.46223021582734, + "force_loss": 0.0017278121085837483, + "step": 37210 + }, + { + "epoch": 33.47122302158273, + "grad_norm": 0.45674651861190796, + "learning_rate": 3.4499361559483975e-05, + "loss": 0.0177, + "step": 37220 + }, + { + "action_loss": 0.0016105164540931582, + "epoch": 33.47122302158273, + "step": 37220 + }, + { + "epoch": 33.47122302158273, + "step": 37220, + "torque_loss": 0.09298282116651535 + }, + { + "epoch": 33.47122302158273, + "force_loss": 0.00311430380679667, + "step": 37220 + }, + { + "epoch": 33.48021582733813, + "grad_norm": 0.11264558881521225, + "learning_rate": 3.44731637932098e-05, + "loss": 0.0154, + "step": 37230 + }, + { + "action_loss": 0.002587829949334264, + "epoch": 33.48021582733813, + "step": 37230 + }, + { + "epoch": 33.48021582733813, + "step": 37230, + "torque_loss": 0.17612791061401367 + }, + { + "epoch": 33.48021582733813, + "force_loss": 0.0030575525015592575, + "step": 37230 + }, + { + "epoch": 33.489208633093526, + "grad_norm": 0.13175669312477112, + "learning_rate": 3.44469707435785e-05, + "loss": 0.0159, + "step": 37240 + }, + { + "action_loss": 0.0034480781760066748, + "epoch": 33.489208633093526, + "step": 37240 + }, + { + "epoch": 33.489208633093526, + "step": 37240, + "torque_loss": 0.12548349797725677 + }, + { + "epoch": 33.489208633093526, + "force_loss": 0.004469641018658876, + "step": 37240 + }, + { + "epoch": 33.49820143884892, + "grad_norm": 0.20179106295108795, + "learning_rate": 3.4420782418546835e-05, + "loss": 0.0169, + "step": 37250 + }, + { + "action_loss": 0.004843270871788263, + "epoch": 33.49820143884892, + "step": 37250 + }, + { + "epoch": 33.49820143884892, + "step": 37250, + "torque_loss": 0.12337341904640198 + }, + { + "epoch": 33.49820143884892, + "force_loss": 0.004069761838763952, + "step": 37250 + }, + { + "epoch": 33.507194244604314, + "grad_norm": 0.15321849286556244, + "learning_rate": 3.439459882607012e-05, + "loss": 0.0159, + "step": 37260 + }, + { + "action_loss": 0.0017042580293491483, + "epoch": 33.507194244604314, + "step": 37260 + }, + { + "epoch": 33.507194244604314, + "step": 37260, + "torque_loss": 0.15580761432647705 + }, + { + "epoch": 33.507194244604314, + "force_loss": 0.0032556771766394377, + "step": 37260 + }, + { + "epoch": 33.51618705035971, + "grad_norm": 0.596222460269928, + "learning_rate": 3.436841997410225e-05, + "loss": 0.0166, + "step": 37270 + }, + { + "action_loss": 0.0015397807583212852, + "epoch": 33.51618705035971, + "step": 37270 + }, + { + "epoch": 33.51618705035971, + "step": 37270, + "torque_loss": 0.08465570211410522 + }, + { + "epoch": 33.51618705035971, + "force_loss": 0.005037181545048952, + "step": 37270 + }, + { + "epoch": 33.52517985611511, + "grad_norm": 0.13629741966724396, + "learning_rate": 3.434224587059567e-05, + "loss": 0.0163, + "step": 37280 + }, + { + "action_loss": 0.00683363014832139, + "epoch": 33.52517985611511, + "step": 37280 + }, + { + "epoch": 33.52517985611511, + "step": 37280, + "torque_loss": 0.1719367504119873 + }, + { + "epoch": 33.52517985611511, + "force_loss": 0.0065572988241910934, + "step": 37280 + }, + { + "epoch": 33.53417266187051, + "grad_norm": 0.18977464735507965, + "learning_rate": 3.431607652350136e-05, + "loss": 0.0165, + "step": 37290 + }, + { + "action_loss": 0.004459542687982321, + "epoch": 33.53417266187051, + "step": 37290 + }, + { + "epoch": 33.53417266187051, + "step": 37290, + "torque_loss": 0.15703324973583221 + }, + { + "epoch": 33.53417266187051, + "force_loss": 0.005290301516652107, + "step": 37290 + }, + { + "epoch": 33.5431654676259, + "grad_norm": 0.16229213774204254, + "learning_rate": 3.428991194076891e-05, + "loss": 0.0167, + "step": 37300 + }, + { + "action_loss": 0.0021986865904182196, + "epoch": 33.5431654676259, + "step": 37300 + }, + { + "epoch": 33.5431654676259, + "step": 37300, + "torque_loss": 0.06772855669260025 + }, + { + "epoch": 33.5431654676259, + "force_loss": 0.0009641043725423515, + "step": 37300 + }, + { + "epoch": 33.552158273381295, + "grad_norm": 0.5432208776473999, + "learning_rate": 3.4263752130346394e-05, + "loss": 0.0154, + "step": 37310 + }, + { + "action_loss": 0.0035273574758321047, + "epoch": 33.552158273381295, + "step": 37310 + }, + { + "epoch": 33.552158273381295, + "step": 37310, + "torque_loss": 0.0995974913239479 + }, + { + "epoch": 33.552158273381295, + "force_loss": 0.002668227069079876, + "step": 37310 + }, + { + "epoch": 33.56115107913669, + "grad_norm": 0.2932492196559906, + "learning_rate": 3.4237597100180515e-05, + "loss": 0.0173, + "step": 37320 + }, + { + "action_loss": 0.0009626168757677078, + "epoch": 33.56115107913669, + "step": 37320 + }, + { + "epoch": 33.56115107913669, + "step": 37320, + "torque_loss": 0.09957525879144669 + }, + { + "epoch": 33.56115107913669, + "force_loss": 0.0013694021617993712, + "step": 37320 + }, + { + "epoch": 33.57014388489208, + "grad_norm": 0.39091816544532776, + "learning_rate": 3.4211446858216427e-05, + "loss": 0.0207, + "step": 37330 + }, + { + "action_loss": 0.00854301918298006, + "epoch": 33.57014388489208, + "step": 37330 + }, + { + "epoch": 33.57014388489208, + "step": 37330, + "torque_loss": 0.09822949022054672 + }, + { + "epoch": 33.57014388489208, + "force_loss": 0.010932791978120804, + "step": 37330 + }, + { + "epoch": 33.57913669064748, + "grad_norm": 0.3789336085319519, + "learning_rate": 3.4185301412397915e-05, + "loss": 0.0281, + "step": 37340 + }, + { + "action_loss": 0.003750154748558998, + "epoch": 33.57913669064748, + "step": 37340 + }, + { + "epoch": 33.57913669064748, + "step": 37340, + "torque_loss": 0.11640580743551254 + }, + { + "epoch": 33.57913669064748, + "force_loss": 0.006120224948972464, + "step": 37340 + }, + { + "epoch": 33.58812949640288, + "grad_norm": 0.125992089509964, + "learning_rate": 3.415916077066729e-05, + "loss": 0.0211, + "step": 37350 + }, + { + "action_loss": 0.0021435495000332594, + "epoch": 33.58812949640288, + "step": 37350 + }, + { + "epoch": 33.58812949640288, + "step": 37350, + "torque_loss": 0.11021417379379272 + }, + { + "epoch": 33.58812949640288, + "force_loss": 0.002189154038205743, + "step": 37350 + }, + { + "epoch": 33.597122302158276, + "grad_norm": 0.2969815135002136, + "learning_rate": 3.413302494096535e-05, + "loss": 0.015, + "step": 37360 + }, + { + "action_loss": 0.005637716967612505, + "epoch": 33.597122302158276, + "step": 37360 + }, + { + "epoch": 33.597122302158276, + "step": 37360, + "torque_loss": 0.15141957998275757 + }, + { + "epoch": 33.597122302158276, + "force_loss": 0.009561696089804173, + "step": 37360 + }, + { + "epoch": 33.606115107913666, + "grad_norm": 0.1678556352853775, + "learning_rate": 3.410689393123151e-05, + "loss": 0.0179, + "step": 37370 + }, + { + "action_loss": 0.003360767848789692, + "epoch": 33.606115107913666, + "step": 37370 + }, + { + "epoch": 33.606115107913666, + "step": 37370, + "torque_loss": 0.1640554666519165 + }, + { + "epoch": 33.606115107913666, + "force_loss": 0.002035049954429269, + "step": 37370 + }, + { + "epoch": 33.615107913669064, + "grad_norm": 0.25882089138031006, + "learning_rate": 3.408076774940364e-05, + "loss": 0.0188, + "step": 37380 + }, + { + "action_loss": 0.004847631324082613, + "epoch": 33.615107913669064, + "step": 37380 + }, + { + "epoch": 33.615107913669064, + "step": 37380, + "torque_loss": 0.09756847470998764 + }, + { + "epoch": 33.615107913669064, + "force_loss": 0.004225565120577812, + "step": 37380 + }, + { + "epoch": 33.62410071942446, + "grad_norm": 0.3377942740917206, + "learning_rate": 3.40546464034182e-05, + "loss": 0.0178, + "step": 37390 + }, + { + "action_loss": 0.0038517692591995, + "epoch": 33.62410071942446, + "step": 37390 + }, + { + "epoch": 33.62410071942446, + "step": 37390, + "torque_loss": 0.12156283110380173 + }, + { + "epoch": 33.62410071942446, + "force_loss": 0.003442132845520973, + "step": 37390 + }, + { + "epoch": 33.63309352517986, + "grad_norm": 0.08597448468208313, + "learning_rate": 3.4028529901210185e-05, + "loss": 0.0227, + "step": 37400 + }, + { + "action_loss": 0.008343946188688278, + "epoch": 33.63309352517986, + "step": 37400 + }, + { + "epoch": 33.63309352517986, + "step": 37400, + "torque_loss": 0.13320598006248474 + }, + { + "epoch": 33.63309352517986, + "force_loss": 0.0168051365762949, + "step": 37400 + }, + { + "epoch": 33.64208633093525, + "grad_norm": 0.17610159516334534, + "learning_rate": 3.4002418250713086e-05, + "loss": 0.0213, + "step": 37410 + }, + { + "action_loss": 0.003728461218997836, + "epoch": 33.64208633093525, + "step": 37410 + }, + { + "epoch": 33.64208633093525, + "step": 37410, + "torque_loss": 0.12109140306711197 + }, + { + "epoch": 33.64208633093525, + "force_loss": 0.00312162097543478, + "step": 37410 + }, + { + "epoch": 33.65107913669065, + "grad_norm": 0.2590765655040741, + "learning_rate": 3.3976311459858936e-05, + "loss": 0.0155, + "step": 37420 + }, + { + "action_loss": 0.0029412468429654837, + "epoch": 33.65107913669065, + "step": 37420 + }, + { + "epoch": 33.65107913669065, + "step": 37420, + "torque_loss": 0.17498338222503662 + }, + { + "epoch": 33.65107913669065, + "force_loss": 0.0039038872346282005, + "step": 37420 + }, + { + "epoch": 33.660071942446045, + "grad_norm": 0.33347541093826294, + "learning_rate": 3.395020953657826e-05, + "loss": 0.0179, + "step": 37430 + }, + { + "action_loss": 0.0020078737288713455, + "epoch": 33.660071942446045, + "step": 37430 + }, + { + "epoch": 33.660071942446045, + "step": 37430, + "torque_loss": 0.14252640306949615 + }, + { + "epoch": 33.660071942446045, + "force_loss": 0.007143103983253241, + "step": 37430 + }, + { + "epoch": 33.669064748201436, + "grad_norm": 0.15101240575313568, + "learning_rate": 3.3924112488800165e-05, + "loss": 0.0161, + "step": 37440 + }, + { + "action_loss": 0.0037033017724752426, + "epoch": 33.669064748201436, + "step": 37440 + }, + { + "epoch": 33.669064748201436, + "step": 37440, + "torque_loss": 0.12488061189651489 + }, + { + "epoch": 33.669064748201436, + "force_loss": 0.002581848530098796, + "step": 37440 + }, + { + "epoch": 33.67805755395683, + "grad_norm": 0.12471426278352737, + "learning_rate": 3.389802032445225e-05, + "loss": 0.0162, + "step": 37450 + }, + { + "action_loss": 0.0028155117761343718, + "epoch": 33.67805755395683, + "step": 37450 + }, + { + "epoch": 33.67805755395683, + "step": 37450, + "torque_loss": 0.09916329383850098 + }, + { + "epoch": 33.67805755395683, + "force_loss": 0.003710748627781868, + "step": 37450 + }, + { + "epoch": 33.68705035971223, + "grad_norm": 0.20393705368041992, + "learning_rate": 3.38719330514606e-05, + "loss": 0.0158, + "step": 37460 + }, + { + "action_loss": 0.0023817417677491903, + "epoch": 33.68705035971223, + "step": 37460 + }, + { + "epoch": 33.68705035971223, + "step": 37460, + "torque_loss": 0.11883729696273804 + }, + { + "epoch": 33.68705035971223, + "force_loss": 0.0021876280661672354, + "step": 37460 + }, + { + "epoch": 33.69604316546763, + "grad_norm": 0.3987010717391968, + "learning_rate": 3.3845850677749866e-05, + "loss": 0.0183, + "step": 37470 + }, + { + "action_loss": 0.010615058243274689, + "epoch": 33.69604316546763, + "step": 37470 + }, + { + "epoch": 33.69604316546763, + "step": 37470, + "torque_loss": 0.15040643513202667 + }, + { + "epoch": 33.69604316546763, + "force_loss": 0.009057716466486454, + "step": 37470 + }, + { + "epoch": 33.70503597122302, + "grad_norm": 0.3651612102985382, + "learning_rate": 3.3819773211243157e-05, + "loss": 0.0172, + "step": 37480 + }, + { + "action_loss": 0.002133977832272649, + "epoch": 33.70503597122302, + "step": 37480 + }, + { + "epoch": 33.70503597122302, + "step": 37480, + "torque_loss": 0.1405663937330246 + }, + { + "epoch": 33.70503597122302, + "force_loss": 0.007784413173794746, + "step": 37480 + }, + { + "epoch": 33.71402877697842, + "grad_norm": 0.10498624294996262, + "learning_rate": 3.379370065986213e-05, + "loss": 0.0167, + "step": 37490 + }, + { + "action_loss": 0.0021034483797848225, + "epoch": 33.71402877697842, + "step": 37490 + }, + { + "epoch": 33.71402877697842, + "step": 37490, + "torque_loss": 0.0771850124001503 + }, + { + "epoch": 33.71402877697842, + "force_loss": 0.004364440683275461, + "step": 37490 + }, + { + "epoch": 33.723021582733814, + "grad_norm": 0.14413706958293915, + "learning_rate": 3.3767633031526955e-05, + "loss": 0.0165, + "step": 37500 + }, + { + "action_loss": 0.00519385002553463, + "epoch": 33.723021582733814, + "step": 37500 + }, + { + "epoch": 33.723021582733814, + "step": 37500, + "torque_loss": 0.1302100270986557 + }, + { + "epoch": 33.723021582733814, + "force_loss": 0.014179304242134094, + "step": 37500 + }, + { + "epoch": 33.73201438848921, + "grad_norm": 0.13210685551166534, + "learning_rate": 3.374157033415626e-05, + "loss": 0.0179, + "step": 37510 + }, + { + "action_loss": 0.007926639169454575, + "epoch": 33.73201438848921, + "step": 37510 + }, + { + "epoch": 33.73201438848921, + "step": 37510, + "torque_loss": 0.1363096535205841 + }, + { + "epoch": 33.73201438848921, + "force_loss": 0.016951395198702812, + "step": 37510 + }, + { + "epoch": 33.7410071942446, + "grad_norm": 0.3256091773509979, + "learning_rate": 3.371551257566723e-05, + "loss": 0.0167, + "step": 37520 + }, + { + "action_loss": 0.007042469456791878, + "epoch": 33.7410071942446, + "step": 37520 + }, + { + "epoch": 33.7410071942446, + "step": 37520, + "torque_loss": 0.14798277616500854 + }, + { + "epoch": 33.7410071942446, + "force_loss": 0.007989879697561264, + "step": 37520 + }, + { + "epoch": 33.75, + "grad_norm": 0.19257792830467224, + "learning_rate": 3.36894597639755e-05, + "loss": 0.0168, + "step": 37530 + }, + { + "action_loss": 0.007201740052551031, + "epoch": 33.75, + "step": 37530 + }, + { + "epoch": 33.75, + "step": 37530, + "torque_loss": 0.16540561616420746 + }, + { + "epoch": 33.75, + "force_loss": 0.012398049235343933, + "step": 37530 + }, + { + "epoch": 33.7589928057554, + "grad_norm": 0.2116713672876358, + "learning_rate": 3.366341190699523e-05, + "loss": 0.0169, + "step": 37540 + }, + { + "action_loss": 0.0036671289708465338, + "epoch": 33.7589928057554, + "step": 37540 + }, + { + "epoch": 33.7589928057554, + "step": 37540, + "torque_loss": 0.14058664441108704 + }, + { + "epoch": 33.7589928057554, + "force_loss": 0.010138331912457943, + "step": 37540 + }, + { + "epoch": 33.76798561151079, + "grad_norm": 0.32120534777641296, + "learning_rate": 3.36373690126391e-05, + "loss": 0.0168, + "step": 37550 + }, + { + "action_loss": 0.0008958268444985151, + "epoch": 33.76798561151079, + "step": 37550 + }, + { + "epoch": 33.76798561151079, + "step": 37550, + "torque_loss": 0.1516445428133011 + }, + { + "epoch": 33.76798561151079, + "force_loss": 0.0012197787873446941, + "step": 37550 + }, + { + "epoch": 33.776978417266186, + "grad_norm": 0.3115787208080292, + "learning_rate": 3.3611331088818234e-05, + "loss": 0.0181, + "step": 37560 + }, + { + "action_loss": 0.002802715403959155, + "epoch": 33.776978417266186, + "step": 37560 + }, + { + "epoch": 33.776978417266186, + "step": 37560, + "torque_loss": 0.11129637807607651 + }, + { + "epoch": 33.776978417266186, + "force_loss": 0.004637276753783226, + "step": 37560 + }, + { + "epoch": 33.78597122302158, + "grad_norm": 0.09075756371021271, + "learning_rate": 3.3585298143442265e-05, + "loss": 0.0178, + "step": 37570 + }, + { + "action_loss": 0.0008286790107376873, + "epoch": 33.78597122302158, + "step": 37570 + }, + { + "epoch": 33.78597122302158, + "step": 37570, + "torque_loss": 0.14661164581775665 + }, + { + "epoch": 33.78597122302158, + "force_loss": 0.0009299348457716405, + "step": 37570 + }, + { + "epoch": 33.79496402877698, + "grad_norm": 0.23820370435714722, + "learning_rate": 3.35592701844193e-05, + "loss": 0.0186, + "step": 37580 + }, + { + "action_loss": 0.0022285699378699064, + "epoch": 33.79496402877698, + "step": 37580 + }, + { + "epoch": 33.79496402877698, + "step": 37580, + "torque_loss": 0.1465781182050705 + }, + { + "epoch": 33.79496402877698, + "force_loss": 0.0012841963907703757, + "step": 37580 + }, + { + "epoch": 33.80395683453237, + "grad_norm": 0.2742636501789093, + "learning_rate": 3.353324721965596e-05, + "loss": 0.0208, + "step": 37590 + }, + { + "action_loss": 0.0014164196327328682, + "epoch": 33.80395683453237, + "step": 37590 + }, + { + "epoch": 33.80395683453237, + "step": 37590, + "torque_loss": 0.15090857446193695 + }, + { + "epoch": 33.80395683453237, + "force_loss": 0.0011573833180591464, + "step": 37590 + }, + { + "epoch": 33.81294964028777, + "grad_norm": 0.39198556542396545, + "learning_rate": 3.350722925705736e-05, + "loss": 0.0151, + "step": 37600 + }, + { + "action_loss": 0.008726746775209904, + "epoch": 33.81294964028777, + "step": 37600 + }, + { + "epoch": 33.81294964028777, + "step": 37600, + "torque_loss": 0.14815856516361237 + }, + { + "epoch": 33.81294964028777, + "force_loss": 0.007682301104068756, + "step": 37600 + }, + { + "epoch": 33.82194244604317, + "grad_norm": 0.15155120193958282, + "learning_rate": 3.348121630452703e-05, + "loss": 0.0201, + "step": 37610 + }, + { + "action_loss": 0.005731402430683374, + "epoch": 33.82194244604317, + "step": 37610 + }, + { + "epoch": 33.82194244604317, + "step": 37610, + "torque_loss": 0.16444124281406403 + }, + { + "epoch": 33.82194244604317, + "force_loss": 0.007580975536257029, + "step": 37610 + }, + { + "epoch": 33.830935251798564, + "grad_norm": 0.1278384029865265, + "learning_rate": 3.3455208369967044e-05, + "loss": 0.0169, + "step": 37620 + }, + { + "action_loss": 0.0018593333661556244, + "epoch": 33.830935251798564, + "step": 37620 + }, + { + "epoch": 33.830935251798564, + "step": 37620, + "torque_loss": 0.19654791057109833 + }, + { + "epoch": 33.830935251798564, + "force_loss": 0.0014461595565080643, + "step": 37620 + }, + { + "epoch": 33.839928057553955, + "grad_norm": 0.22377993166446686, + "learning_rate": 3.34292054612779e-05, + "loss": 0.0176, + "step": 37630 + }, + { + "action_loss": 0.00334533560089767, + "epoch": 33.839928057553955, + "step": 37630 + }, + { + "epoch": 33.839928057553955, + "step": 37630, + "torque_loss": 0.1333996206521988 + }, + { + "epoch": 33.839928057553955, + "force_loss": 0.0028983645606786013, + "step": 37630 + }, + { + "epoch": 33.84892086330935, + "grad_norm": 0.20579461753368378, + "learning_rate": 3.340320758635861e-05, + "loss": 0.0173, + "step": 37640 + }, + { + "action_loss": 0.0016261772252619267, + "epoch": 33.84892086330935, + "step": 37640 + }, + { + "epoch": 33.84892086330935, + "step": 37640, + "torque_loss": 0.10516867786645889 + }, + { + "epoch": 33.84892086330935, + "force_loss": 0.001366125768981874, + "step": 37640 + }, + { + "epoch": 33.85791366906475, + "grad_norm": 0.3685949146747589, + "learning_rate": 3.337721475310666e-05, + "loss": 0.0168, + "step": 37650 + }, + { + "action_loss": 0.002672509290277958, + "epoch": 33.85791366906475, + "step": 37650 + }, + { + "epoch": 33.85791366906475, + "step": 37650, + "torque_loss": 0.12032388895750046 + }, + { + "epoch": 33.85791366906475, + "force_loss": 0.0033669155091047287, + "step": 37650 + }, + { + "epoch": 33.86690647482014, + "grad_norm": 0.15373122692108154, + "learning_rate": 3.335122696941795e-05, + "loss": 0.0156, + "step": 37660 + }, + { + "action_loss": 0.002857193350791931, + "epoch": 33.86690647482014, + "step": 37660 + }, + { + "epoch": 33.86690647482014, + "step": 37660, + "torque_loss": 0.11864816397428513 + }, + { + "epoch": 33.86690647482014, + "force_loss": 0.0032869998831301928, + "step": 37660 + }, + { + "epoch": 33.87589928057554, + "grad_norm": 0.6034438014030457, + "learning_rate": 3.332524424318692e-05, + "loss": 0.017, + "step": 37670 + }, + { + "action_loss": 0.000992569955997169, + "epoch": 33.87589928057554, + "step": 37670 + }, + { + "epoch": 33.87589928057554, + "step": 37670, + "torque_loss": 0.12354931980371475 + }, + { + "epoch": 33.87589928057554, + "force_loss": 0.002155066467821598, + "step": 37670 + }, + { + "epoch": 33.884892086330936, + "grad_norm": 0.18322548270225525, + "learning_rate": 3.32992665823064e-05, + "loss": 0.016, + "step": 37680 + }, + { + "action_loss": 0.006615945603698492, + "epoch": 33.884892086330936, + "step": 37680 + }, + { + "epoch": 33.884892086330936, + "step": 37680, + "torque_loss": 0.08322913199663162 + }, + { + "epoch": 33.884892086330936, + "force_loss": 0.01546612661331892, + "step": 37680 + }, + { + "epoch": 33.893884892086334, + "grad_norm": 0.2370956689119339, + "learning_rate": 3.327329399466774e-05, + "loss": 0.0187, + "step": 37690 + }, + { + "action_loss": 0.0012947594514116645, + "epoch": 33.893884892086334, + "step": 37690 + }, + { + "epoch": 33.893884892086334, + "step": 37690, + "torque_loss": 0.11286928504705429 + }, + { + "epoch": 33.893884892086334, + "force_loss": 0.001266099396161735, + "step": 37690 + }, + { + "epoch": 33.902877697841724, + "grad_norm": 0.3126492500305176, + "learning_rate": 3.324732648816072e-05, + "loss": 0.017, + "step": 37700 + }, + { + "action_loss": 0.002898208796977997, + "epoch": 33.902877697841724, + "step": 37700 + }, + { + "epoch": 33.902877697841724, + "step": 37700, + "torque_loss": 0.1359972506761551 + }, + { + "epoch": 33.902877697841724, + "force_loss": 0.0015772896585986018, + "step": 37700 + }, + { + "epoch": 33.91187050359712, + "grad_norm": 0.29794228076934814, + "learning_rate": 3.322136407067358e-05, + "loss": 0.0151, + "step": 37710 + }, + { + "action_loss": 0.0062476820312440395, + "epoch": 33.91187050359712, + "step": 37710 + }, + { + "epoch": 33.91187050359712, + "step": 37710, + "torque_loss": 0.13420605659484863 + }, + { + "epoch": 33.91187050359712, + "force_loss": 0.02077285759150982, + "step": 37710 + }, + { + "epoch": 33.92086330935252, + "grad_norm": 0.2918078303337097, + "learning_rate": 3.3195406750093036e-05, + "loss": 0.0168, + "step": 37720 + }, + { + "action_loss": 0.0014831653097644448, + "epoch": 33.92086330935252, + "step": 37720 + }, + { + "epoch": 33.92086330935252, + "step": 37720, + "torque_loss": 0.09238388389348984 + }, + { + "epoch": 33.92086330935252, + "force_loss": 0.0015495987609028816, + "step": 37720 + }, + { + "epoch": 33.92985611510792, + "grad_norm": 0.28916600346565247, + "learning_rate": 3.3169454534304205e-05, + "loss": 0.0161, + "step": 37730 + }, + { + "action_loss": 0.0015696684131398797, + "epoch": 33.92985611510792, + "step": 37730 + }, + { + "epoch": 33.92985611510792, + "step": 37730, + "torque_loss": 0.12014540284872055 + }, + { + "epoch": 33.92985611510792, + "force_loss": 0.0015735877677798271, + "step": 37730 + }, + { + "epoch": 33.93884892086331, + "grad_norm": 0.21858280897140503, + "learning_rate": 3.3143507431190725e-05, + "loss": 0.0168, + "step": 37740 + }, + { + "action_loss": 0.004588219802826643, + "epoch": 33.93884892086331, + "step": 37740 + }, + { + "epoch": 33.93884892086331, + "step": 37740, + "torque_loss": 0.11913526803255081 + }, + { + "epoch": 33.93884892086331, + "force_loss": 0.004644671455025673, + "step": 37740 + }, + { + "epoch": 33.947841726618705, + "grad_norm": 0.2118322253227234, + "learning_rate": 3.311756544863459e-05, + "loss": 0.0164, + "step": 37750 + }, + { + "action_loss": 0.0020320925395935774, + "epoch": 33.947841726618705, + "step": 37750 + }, + { + "epoch": 33.947841726618705, + "step": 37750, + "torque_loss": 0.14661842584609985 + }, + { + "epoch": 33.947841726618705, + "force_loss": 0.003945678006857634, + "step": 37750 + }, + { + "epoch": 33.9568345323741, + "grad_norm": 0.1361873596906662, + "learning_rate": 3.309162859451633e-05, + "loss": 0.0196, + "step": 37760 + }, + { + "action_loss": 0.0033473484218120575, + "epoch": 33.9568345323741, + "step": 37760 + }, + { + "epoch": 33.9568345323741, + "step": 37760, + "torque_loss": 0.06191965565085411 + }, + { + "epoch": 33.9568345323741, + "force_loss": 0.002128748456016183, + "step": 37760 + }, + { + "epoch": 33.96582733812949, + "grad_norm": 0.08888915926218033, + "learning_rate": 3.306569687671487e-05, + "loss": 0.0162, + "step": 37770 + }, + { + "action_loss": 0.00863057654350996, + "epoch": 33.96582733812949, + "step": 37770 + }, + { + "epoch": 33.96582733812949, + "step": 37770, + "torque_loss": 0.15201717615127563 + }, + { + "epoch": 33.96582733812949, + "force_loss": 0.010936747305095196, + "step": 37770 + }, + { + "epoch": 33.97482014388489, + "grad_norm": 0.27153369784355164, + "learning_rate": 3.303977030310756e-05, + "loss": 0.017, + "step": 37780 + }, + { + "action_loss": 0.0035764712374657393, + "epoch": 33.97482014388489, + "step": 37780 + }, + { + "epoch": 33.97482014388489, + "step": 37780, + "torque_loss": 0.12162601947784424 + }, + { + "epoch": 33.97482014388489, + "force_loss": 0.002379960147663951, + "step": 37780 + }, + { + "epoch": 33.98381294964029, + "grad_norm": 0.12568314373493195, + "learning_rate": 3.3013848881570245e-05, + "loss": 0.0155, + "step": 37790 + }, + { + "action_loss": 0.0024474619422107935, + "epoch": 33.98381294964029, + "step": 37790 + }, + { + "epoch": 33.98381294964029, + "step": 37790, + "torque_loss": 0.09655970335006714 + }, + { + "epoch": 33.98381294964029, + "force_loss": 0.0016897293971851468, + "step": 37790 + }, + { + "epoch": 33.992805755395686, + "grad_norm": 0.17658117413520813, + "learning_rate": 3.298793261997712e-05, + "loss": 0.0202, + "step": 37800 + }, + { + "action_loss": 0.0010149498702958226, + "epoch": 33.992805755395686, + "step": 37800 + }, + { + "epoch": 33.992805755395686, + "step": 37800, + "torque_loss": 0.11661273241043091 + }, + { + "epoch": 33.992805755395686, + "force_loss": 0.0007563618128187954, + "step": 37800 + }, + { + "epoch": 34.00179856115108, + "grad_norm": 0.12374042719602585, + "learning_rate": 3.2962021526200893e-05, + "loss": 0.0155, + "step": 37810 + }, + { + "action_loss": 0.00854000449180603, + "epoch": 34.00179856115108, + "step": 37810 + }, + { + "epoch": 34.00179856115108, + "step": 37810, + "torque_loss": 0.16836285591125488 + }, + { + "epoch": 34.00179856115108, + "force_loss": 0.017972860485315323, + "step": 37810 + }, + { + "epoch": 34.010791366906474, + "grad_norm": 0.2548501491546631, + "learning_rate": 3.293611560811268e-05, + "loss": 0.019, + "step": 37820 + }, + { + "action_loss": 0.003282478079199791, + "epoch": 34.010791366906474, + "step": 37820 + }, + { + "epoch": 34.010791366906474, + "step": 37820, + "torque_loss": 0.09992154687643051 + }, + { + "epoch": 34.010791366906474, + "force_loss": 0.003299129195511341, + "step": 37820 + }, + { + "epoch": 34.01978417266187, + "grad_norm": 0.2718057930469513, + "learning_rate": 3.291021487358199e-05, + "loss": 0.0197, + "step": 37830 + }, + { + "action_loss": 0.014899407513439655, + "epoch": 34.01978417266187, + "step": 37830 + }, + { + "epoch": 34.01978417266187, + "step": 37830, + "torque_loss": 0.11702007055282593 + }, + { + "epoch": 34.01978417266187, + "force_loss": 0.03293338045477867, + "step": 37830 + }, + { + "epoch": 34.02877697841727, + "grad_norm": 0.3501644730567932, + "learning_rate": 3.28843193304768e-05, + "loss": 0.0209, + "step": 37840 + }, + { + "action_loss": 0.010363319888710976, + "epoch": 34.02877697841727, + "step": 37840 + }, + { + "epoch": 34.02877697841727, + "step": 37840, + "torque_loss": 0.07841602712869644 + }, + { + "epoch": 34.02877697841727, + "force_loss": 0.009520623832941055, + "step": 37840 + }, + { + "epoch": 34.03776978417266, + "grad_norm": 0.17710961401462555, + "learning_rate": 3.2858428986663456e-05, + "loss": 0.0197, + "step": 37850 + }, + { + "action_loss": 0.003229064168408513, + "epoch": 34.03776978417266, + "step": 37850 + }, + { + "epoch": 34.03776978417266, + "step": 37850, + "torque_loss": 0.08983471244573593 + }, + { + "epoch": 34.03776978417266, + "force_loss": 0.008613090962171555, + "step": 37850 + }, + { + "epoch": 34.04676258992806, + "grad_norm": 0.13705997169017792, + "learning_rate": 3.283254385000681e-05, + "loss": 0.0152, + "step": 37860 + }, + { + "action_loss": 0.008400142192840576, + "epoch": 34.04676258992806, + "step": 37860 + }, + { + "epoch": 34.04676258992806, + "step": 37860, + "torque_loss": 0.14736247062683105 + }, + { + "epoch": 34.04676258992806, + "force_loss": 0.01469422597438097, + "step": 37860 + }, + { + "epoch": 34.055755395683455, + "grad_norm": 0.17904780805110931, + "learning_rate": 3.2806663928370076e-05, + "loss": 0.0179, + "step": 37870 + }, + { + "action_loss": 0.00401114160194993, + "epoch": 34.055755395683455, + "step": 37870 + }, + { + "epoch": 34.055755395683455, + "step": 37870, + "torque_loss": 0.14634637534618378 + }, + { + "epoch": 34.055755395683455, + "force_loss": 0.002393824979662895, + "step": 37870 + }, + { + "epoch": 34.064748201438846, + "grad_norm": 0.1617443561553955, + "learning_rate": 3.278078922961485e-05, + "loss": 0.0163, + "step": 37880 + }, + { + "action_loss": 0.0014823386445641518, + "epoch": 34.064748201438846, + "step": 37880 + }, + { + "epoch": 34.064748201438846, + "step": 37880, + "torque_loss": 0.11770256608724594 + }, + { + "epoch": 34.064748201438846, + "force_loss": 0.003905919613316655, + "step": 37880 + }, + { + "epoch": 34.07374100719424, + "grad_norm": 0.09863841533660889, + "learning_rate": 3.275491976160123e-05, + "loss": 0.0155, + "step": 37890 + }, + { + "action_loss": 0.005528856068849564, + "epoch": 34.07374100719424, + "step": 37890 + }, + { + "epoch": 34.07374100719424, + "step": 37890, + "torque_loss": 0.11923859268426895 + }, + { + "epoch": 34.07374100719424, + "force_loss": 0.00419391505420208, + "step": 37890 + }, + { + "epoch": 34.08273381294964, + "grad_norm": 0.18858498334884644, + "learning_rate": 3.2729055532187645e-05, + "loss": 0.0158, + "step": 37900 + }, + { + "action_loss": 0.005414047744125128, + "epoch": 34.08273381294964, + "step": 37900 + }, + { + "epoch": 34.08273381294964, + "step": 37900, + "torque_loss": 0.21118660271167755 + }, + { + "epoch": 34.08273381294964, + "force_loss": 0.0015715138288214803, + "step": 37900 + }, + { + "epoch": 34.09172661870504, + "grad_norm": 0.4234582781791687, + "learning_rate": 3.270319654923097e-05, + "loss": 0.0199, + "step": 37910 + }, + { + "action_loss": 0.015277204103767872, + "epoch": 34.09172661870504, + "step": 37910 + }, + { + "epoch": 34.09172661870504, + "step": 37910, + "torque_loss": 0.12573844194412231 + }, + { + "epoch": 34.09172661870504, + "force_loss": 0.02044801227748394, + "step": 37910 + }, + { + "epoch": 34.10071942446043, + "grad_norm": 0.2833522856235504, + "learning_rate": 3.2677342820586506e-05, + "loss": 0.0177, + "step": 37920 + }, + { + "action_loss": 0.0018413658253848553, + "epoch": 34.10071942446043, + "step": 37920 + }, + { + "epoch": 34.10071942446043, + "step": 37920, + "torque_loss": 0.10017908364534378 + }, + { + "epoch": 34.10071942446043, + "force_loss": 0.0016164866974577308, + "step": 37920 + }, + { + "epoch": 34.10971223021583, + "grad_norm": 0.2937682569026947, + "learning_rate": 3.2651494354107905e-05, + "loss": 0.0178, + "step": 37930 + }, + { + "action_loss": 0.001016736845485866, + "epoch": 34.10971223021583, + "step": 37930 + }, + { + "epoch": 34.10971223021583, + "step": 37930, + "torque_loss": 0.11007937043905258 + }, + { + "epoch": 34.10971223021583, + "force_loss": 0.0016252034110948443, + "step": 37930 + }, + { + "epoch": 34.118705035971225, + "grad_norm": 0.08290261030197144, + "learning_rate": 3.2625651157647266e-05, + "loss": 0.0141, + "step": 37940 + }, + { + "action_loss": 0.0014338326873257756, + "epoch": 34.118705035971225, + "step": 37940 + }, + { + "epoch": 34.118705035971225, + "step": 37940, + "torque_loss": 0.1151399090886116 + }, + { + "epoch": 34.118705035971225, + "force_loss": 0.0014972379431128502, + "step": 37940 + }, + { + "epoch": 34.12769784172662, + "grad_norm": 0.19066950678825378, + "learning_rate": 3.259981323905505e-05, + "loss": 0.0163, + "step": 37950 + }, + { + "action_loss": 0.0012854525120928884, + "epoch": 34.12769784172662, + "step": 37950 + }, + { + "epoch": 34.12769784172662, + "step": 37950, + "torque_loss": 0.10462678223848343 + }, + { + "epoch": 34.12769784172662, + "force_loss": 0.00450166454538703, + "step": 37950 + }, + { + "epoch": 34.13669064748201, + "grad_norm": 0.1492910236120224, + "learning_rate": 3.257398060618014e-05, + "loss": 0.0161, + "step": 37960 + }, + { + "action_loss": 0.0031411610543727875, + "epoch": 34.13669064748201, + "step": 37960 + }, + { + "epoch": 34.13669064748201, + "step": 37960, + "torque_loss": 0.11208828538656235 + }, + { + "epoch": 34.13669064748201, + "force_loss": 0.002766592660918832, + "step": 37960 + }, + { + "epoch": 34.14568345323741, + "grad_norm": 0.1473616361618042, + "learning_rate": 3.254815326686983e-05, + "loss": 0.0147, + "step": 37970 + }, + { + "action_loss": 0.0011684807250276208, + "epoch": 34.14568345323741, + "step": 37970 + }, + { + "epoch": 34.14568345323741, + "step": 37970, + "torque_loss": 0.0926390290260315 + }, + { + "epoch": 34.14568345323741, + "force_loss": 0.0008684794302098453, + "step": 37970 + }, + { + "epoch": 34.15467625899281, + "grad_norm": 0.14073944091796875, + "learning_rate": 3.2522331228969774e-05, + "loss": 0.0138, + "step": 37980 + }, + { + "action_loss": 0.0021744014229625463, + "epoch": 34.15467625899281, + "step": 37980 + }, + { + "epoch": 34.15467625899281, + "step": 37980, + "torque_loss": 0.12354063987731934 + }, + { + "epoch": 34.15467625899281, + "force_loss": 0.007990377955138683, + "step": 37980 + }, + { + "epoch": 34.1636690647482, + "grad_norm": 0.1969049721956253, + "learning_rate": 3.2496514500324006e-05, + "loss": 0.016, + "step": 37990 + }, + { + "action_loss": 0.0008678183075971901, + "epoch": 34.1636690647482, + "step": 37990 + }, + { + "epoch": 34.1636690647482, + "step": 37990, + "torque_loss": 0.10964993387460709 + }, + { + "epoch": 34.1636690647482, + "force_loss": 0.0005905279540456831, + "step": 37990 + }, + { + "epoch": 34.172661870503596, + "grad_norm": 0.09668637067079544, + "learning_rate": 3.247070308877498e-05, + "loss": 0.0138, + "step": 38000 + }, + { + "action_loss": 0.009854384697973728, + "epoch": 34.172661870503596, + "step": 38000 + }, + { + "epoch": 34.172661870503596, + "step": 38000, + "torque_loss": 0.1738036870956421 + }, + { + "epoch": 34.172661870503596, + "force_loss": 0.024181565269827843, + "step": 38000 + }, + { + "epoch": 34.181654676258994, + "grad_norm": 0.1287219077348709, + "learning_rate": 3.2444897002163515e-05, + "loss": 0.0164, + "step": 38010 + }, + { + "action_loss": 0.0009754174388945103, + "epoch": 34.181654676258994, + "step": 38010 + }, + { + "epoch": 34.181654676258994, + "step": 38010, + "torque_loss": 0.07954701781272888 + }, + { + "epoch": 34.181654676258994, + "force_loss": 0.006283354014158249, + "step": 38010 + }, + { + "epoch": 34.19064748201439, + "grad_norm": 0.21000033617019653, + "learning_rate": 3.241909624832885e-05, + "loss": 0.0149, + "step": 38020 + }, + { + "action_loss": 0.002194909146055579, + "epoch": 34.19064748201439, + "step": 38020 + }, + { + "epoch": 34.19064748201439, + "step": 38020, + "torque_loss": 0.08859354257583618 + }, + { + "epoch": 34.19064748201439, + "force_loss": 0.0030823368579149246, + "step": 38020 + }, + { + "epoch": 34.19964028776978, + "grad_norm": 0.34158578515052795, + "learning_rate": 3.239330083510852e-05, + "loss": 0.0185, + "step": 38030 + }, + { + "action_loss": 0.010723414830863476, + "epoch": 34.19964028776978, + "step": 38030 + }, + { + "epoch": 34.19964028776978, + "step": 38030, + "torque_loss": 0.10285502672195435 + }, + { + "epoch": 34.19964028776978, + "force_loss": 0.021538028493523598, + "step": 38030 + }, + { + "epoch": 34.20863309352518, + "grad_norm": 0.20425674319267273, + "learning_rate": 3.236751077033855e-05, + "loss": 0.0187, + "step": 38040 + }, + { + "action_loss": 0.0016714368248358369, + "epoch": 34.20863309352518, + "step": 38040 + }, + { + "epoch": 34.20863309352518, + "step": 38040, + "torque_loss": 0.1403820663690567 + }, + { + "epoch": 34.20863309352518, + "force_loss": 0.0013357937568798661, + "step": 38040 + }, + { + "epoch": 34.21762589928058, + "grad_norm": 0.6160386800765991, + "learning_rate": 3.234172606185322e-05, + "loss": 0.0207, + "step": 38050 + }, + { + "action_loss": 0.0022034465800970793, + "epoch": 34.21762589928058, + "step": 38050 + }, + { + "epoch": 34.21762589928058, + "step": 38050, + "torque_loss": 0.16391442716121674 + }, + { + "epoch": 34.21762589928058, + "force_loss": 0.003691088641062379, + "step": 38050 + }, + { + "epoch": 34.226618705035975, + "grad_norm": 0.42136967182159424, + "learning_rate": 3.231594671748528e-05, + "loss": 0.0183, + "step": 38060 + }, + { + "action_loss": 0.0058631435967981815, + "epoch": 34.226618705035975, + "step": 38060 + }, + { + "epoch": 34.226618705035975, + "step": 38060, + "torque_loss": 0.17233215272426605 + }, + { + "epoch": 34.226618705035975, + "force_loss": 0.0033209084067493677, + "step": 38060 + }, + { + "epoch": 34.235611510791365, + "grad_norm": 0.1460287868976593, + "learning_rate": 3.2290172745065815e-05, + "loss": 0.0162, + "step": 38070 + }, + { + "action_loss": 0.00554612884297967, + "epoch": 34.235611510791365, + "step": 38070 + }, + { + "epoch": 34.235611510791365, + "step": 38070, + "torque_loss": 0.11947265267372131 + }, + { + "epoch": 34.235611510791365, + "force_loss": 0.012024116702377796, + "step": 38070 + }, + { + "epoch": 34.24460431654676, + "grad_norm": 0.31487590074539185, + "learning_rate": 3.226440415242426e-05, + "loss": 0.0162, + "step": 38080 + }, + { + "action_loss": 0.0022396717686206102, + "epoch": 34.24460431654676, + "step": 38080 + }, + { + "epoch": 34.24460431654676, + "step": 38080, + "torque_loss": 0.15118329226970673 + }, + { + "epoch": 34.24460431654676, + "force_loss": 0.0028562508523464203, + "step": 38080 + }, + { + "epoch": 34.25359712230216, + "grad_norm": 0.896686851978302, + "learning_rate": 3.223864094738846e-05, + "loss": 0.0175, + "step": 38090 + }, + { + "action_loss": 0.0029583973810076714, + "epoch": 34.25359712230216, + "step": 38090 + }, + { + "epoch": 34.25359712230216, + "step": 38090, + "torque_loss": 0.16267897188663483 + }, + { + "epoch": 34.25359712230216, + "force_loss": 0.0029362214263528585, + "step": 38090 + }, + { + "epoch": 34.26258992805755, + "grad_norm": 0.13580796122550964, + "learning_rate": 3.221288313778456e-05, + "loss": 0.0151, + "step": 38100 + }, + { + "action_loss": 0.0009770820615813136, + "epoch": 34.26258992805755, + "step": 38100 + }, + { + "epoch": 34.26258992805755, + "step": 38100, + "torque_loss": 0.08829469233751297 + }, + { + "epoch": 34.26258992805755, + "force_loss": 0.0013094767928123474, + "step": 38100 + }, + { + "epoch": 34.27158273381295, + "grad_norm": 0.09662114083766937, + "learning_rate": 3.2187130731437125e-05, + "loss": 0.0142, + "step": 38110 + }, + { + "action_loss": 0.008527331985533237, + "epoch": 34.27158273381295, + "step": 38110 + }, + { + "epoch": 34.27158273381295, + "step": 38110, + "torque_loss": 0.11693272739648819 + }, + { + "epoch": 34.27158273381295, + "force_loss": 0.010731442831456661, + "step": 38110 + }, + { + "epoch": 34.280575539568346, + "grad_norm": 0.11308939754962921, + "learning_rate": 3.216138373616905e-05, + "loss": 0.0172, + "step": 38120 + }, + { + "action_loss": 0.0016659159446135163, + "epoch": 34.280575539568346, + "step": 38120 + }, + { + "epoch": 34.280575539568346, + "step": 38120, + "torque_loss": 0.1553521752357483 + }, + { + "epoch": 34.280575539568346, + "force_loss": 0.005824326071888208, + "step": 38120 + }, + { + "epoch": 34.289568345323744, + "grad_norm": 0.10985405743122101, + "learning_rate": 3.21356421598016e-05, + "loss": 0.0169, + "step": 38130 + }, + { + "action_loss": 0.002218943787738681, + "epoch": 34.289568345323744, + "step": 38130 + }, + { + "epoch": 34.289568345323744, + "step": 38130, + "torque_loss": 0.11694708466529846 + }, + { + "epoch": 34.289568345323744, + "force_loss": 0.003227776614949107, + "step": 38130 + }, + { + "epoch": 34.298561151079134, + "grad_norm": 0.309419184923172, + "learning_rate": 3.210990601015438e-05, + "loss": 0.0167, + "step": 38140 + }, + { + "action_loss": 0.001612002495676279, + "epoch": 34.298561151079134, + "step": 38140 + }, + { + "epoch": 34.298561151079134, + "step": 38140, + "torque_loss": 0.09101120382547379 + }, + { + "epoch": 34.298561151079134, + "force_loss": 0.005857498850673437, + "step": 38140 + }, + { + "epoch": 34.30755395683453, + "grad_norm": 0.08458050340414047, + "learning_rate": 3.208417529504535e-05, + "loss": 0.016, + "step": 38150 + }, + { + "action_loss": 0.004689517430961132, + "epoch": 34.30755395683453, + "step": 38150 + }, + { + "epoch": 34.30755395683453, + "step": 38150, + "torque_loss": 0.14398176968097687 + }, + { + "epoch": 34.30755395683453, + "force_loss": 0.009167391806840897, + "step": 38150 + }, + { + "epoch": 34.31654676258993, + "grad_norm": 0.1201728954911232, + "learning_rate": 3.205845002229084e-05, + "loss": 0.0179, + "step": 38160 + }, + { + "action_loss": 0.002028467832133174, + "epoch": 34.31654676258993, + "step": 38160 + }, + { + "epoch": 34.31654676258993, + "step": 38160, + "torque_loss": 0.12843093276023865 + }, + { + "epoch": 34.31654676258993, + "force_loss": 0.003033468732610345, + "step": 38160 + }, + { + "epoch": 34.32553956834532, + "grad_norm": 0.23771627247333527, + "learning_rate": 3.203273019970547e-05, + "loss": 0.0158, + "step": 38170 + }, + { + "action_loss": 0.01165491621941328, + "epoch": 34.32553956834532, + "step": 38170 + }, + { + "epoch": 34.32553956834532, + "step": 38170, + "torque_loss": 0.10926521569490433 + }, + { + "epoch": 34.32553956834532, + "force_loss": 0.01667793281376362, + "step": 38170 + }, + { + "epoch": 34.33453237410072, + "grad_norm": 0.38787347078323364, + "learning_rate": 3.200701583510227e-05, + "loss": 0.0171, + "step": 38180 + }, + { + "action_loss": 0.0010124507825821638, + "epoch": 34.33453237410072, + "step": 38180 + }, + { + "epoch": 34.33453237410072, + "step": 38180, + "torque_loss": 0.1000855341553688 + }, + { + "epoch": 34.33453237410072, + "force_loss": 0.003066321136429906, + "step": 38180 + }, + { + "epoch": 34.343525179856115, + "grad_norm": 0.15007776021957397, + "learning_rate": 3.198130693629261e-05, + "loss": 0.0169, + "step": 38190 + }, + { + "action_loss": 0.0010897573083639145, + "epoch": 34.343525179856115, + "step": 38190 + }, + { + "epoch": 34.343525179856115, + "step": 38190, + "torque_loss": 0.12579868733882904 + }, + { + "epoch": 34.343525179856115, + "force_loss": 0.001298569142818451, + "step": 38190 + }, + { + "epoch": 34.35251798561151, + "grad_norm": 0.33359494805336, + "learning_rate": 3.195560351108612e-05, + "loss": 0.0175, + "step": 38200 + }, + { + "action_loss": 0.0017828574636951089, + "epoch": 34.35251798561151, + "step": 38200 + }, + { + "epoch": 34.35251798561151, + "step": 38200, + "torque_loss": 0.13059596717357635 + }, + { + "epoch": 34.35251798561151, + "force_loss": 0.0023853592574596405, + "step": 38200 + }, + { + "epoch": 34.361510791366904, + "grad_norm": 0.125379741191864, + "learning_rate": 3.1929905567290865e-05, + "loss": 0.0157, + "step": 38210 + }, + { + "action_loss": 0.009774972684681416, + "epoch": 34.361510791366904, + "step": 38210 + }, + { + "epoch": 34.361510791366904, + "step": 38210, + "torque_loss": 0.11848794668912888 + }, + { + "epoch": 34.361510791366904, + "force_loss": 0.016782671213150024, + "step": 38210 + }, + { + "epoch": 34.3705035971223, + "grad_norm": 0.22163516283035278, + "learning_rate": 3.1904213112713164e-05, + "loss": 0.0178, + "step": 38220 + }, + { + "action_loss": 0.0012675829930230975, + "epoch": 34.3705035971223, + "step": 38220 + }, + { + "epoch": 34.3705035971223, + "step": 38220, + "torque_loss": 0.11763477325439453 + }, + { + "epoch": 34.3705035971223, + "force_loss": 0.002317859558388591, + "step": 38220 + }, + { + "epoch": 34.3794964028777, + "grad_norm": 0.11981499195098877, + "learning_rate": 3.187852615515774e-05, + "loss": 0.018, + "step": 38230 + }, + { + "action_loss": 0.0013185711577534676, + "epoch": 34.3794964028777, + "step": 38230 + }, + { + "epoch": 34.3794964028777, + "step": 38230, + "torque_loss": 0.10611226409673691 + }, + { + "epoch": 34.3794964028777, + "force_loss": 0.0019390900852158666, + "step": 38230 + }, + { + "epoch": 34.388489208633096, + "grad_norm": 0.1892155259847641, + "learning_rate": 3.1852844702427606e-05, + "loss": 0.0157, + "step": 38240 + }, + { + "action_loss": 0.001037661568261683, + "epoch": 34.388489208633096, + "step": 38240 + }, + { + "epoch": 34.388489208633096, + "step": 38240, + "torque_loss": 0.10556551069021225 + }, + { + "epoch": 34.388489208633096, + "force_loss": 0.0007493208977393806, + "step": 38240 + }, + { + "epoch": 34.39748201438849, + "grad_norm": 0.1841997504234314, + "learning_rate": 3.18271687623241e-05, + "loss": 0.0175, + "step": 38250 + }, + { + "action_loss": 0.0012132179690524936, + "epoch": 34.39748201438849, + "step": 38250 + }, + { + "epoch": 34.39748201438849, + "step": 38250, + "torque_loss": 0.12356892973184586 + }, + { + "epoch": 34.39748201438849, + "force_loss": 0.0009360490366816521, + "step": 38250 + }, + { + "epoch": 34.406474820143885, + "grad_norm": 0.24160510301589966, + "learning_rate": 3.1801498342646896e-05, + "loss": 0.0173, + "step": 38260 + }, + { + "action_loss": 0.001780887134373188, + "epoch": 34.406474820143885, + "step": 38260 + }, + { + "epoch": 34.406474820143885, + "step": 38260, + "torque_loss": 0.10922298580408096 + }, + { + "epoch": 34.406474820143885, + "force_loss": 0.0032669713255017996, + "step": 38260 + }, + { + "epoch": 34.41546762589928, + "grad_norm": 0.3818244934082031, + "learning_rate": 3.177583345119398e-05, + "loss": 0.0179, + "step": 38270 + }, + { + "action_loss": 0.0012223607627674937, + "epoch": 34.41546762589928, + "step": 38270 + }, + { + "epoch": 34.41546762589928, + "step": 38270, + "torque_loss": 0.1331145316362381 + }, + { + "epoch": 34.41546762589928, + "force_loss": 0.0014526421437039971, + "step": 38270 + }, + { + "epoch": 34.42446043165468, + "grad_norm": 0.2520965039730072, + "learning_rate": 3.17501740957617e-05, + "loss": 0.0154, + "step": 38280 + }, + { + "action_loss": 0.002908057300373912, + "epoch": 34.42446043165468, + "step": 38280 + }, + { + "epoch": 34.42446043165468, + "step": 38280, + "torque_loss": 0.1257518231868744 + }, + { + "epoch": 34.42446043165468, + "force_loss": 0.003900927258655429, + "step": 38280 + }, + { + "epoch": 34.43345323741007, + "grad_norm": 0.14366383850574493, + "learning_rate": 3.172452028414467e-05, + "loss": 0.0173, + "step": 38290 + }, + { + "action_loss": 0.002626248402521014, + "epoch": 34.43345323741007, + "step": 38290 + }, + { + "epoch": 34.43345323741007, + "step": 38290, + "torque_loss": 0.1364048719406128 + }, + { + "epoch": 34.43345323741007, + "force_loss": 0.004100541118532419, + "step": 38290 + }, + { + "epoch": 34.44244604316547, + "grad_norm": 0.20616415143013, + "learning_rate": 3.169887202413583e-05, + "loss": 0.0181, + "step": 38300 + }, + { + "action_loss": 0.011716465465724468, + "epoch": 34.44244604316547, + "step": 38300 + }, + { + "epoch": 34.44244604316547, + "step": 38300, + "torque_loss": 0.12543442845344543 + }, + { + "epoch": 34.44244604316547, + "force_loss": 0.016384921967983246, + "step": 38300 + }, + { + "epoch": 34.451438848920866, + "grad_norm": 0.2401425689458847, + "learning_rate": 3.167322932352646e-05, + "loss": 0.0178, + "step": 38310 + }, + { + "action_loss": 0.0014225519262254238, + "epoch": 34.451438848920866, + "step": 38310 + }, + { + "epoch": 34.451438848920866, + "step": 38310, + "torque_loss": 0.09976992756128311 + }, + { + "epoch": 34.451438848920866, + "force_loss": 0.0016255304217338562, + "step": 38310 + }, + { + "epoch": 34.460431654676256, + "grad_norm": 0.11276914179325104, + "learning_rate": 3.164759219010613e-05, + "loss": 0.0183, + "step": 38320 + }, + { + "action_loss": 0.002597688464447856, + "epoch": 34.460431654676256, + "step": 38320 + }, + { + "epoch": 34.460431654676256, + "step": 38320, + "torque_loss": 0.12056461721658707 + }, + { + "epoch": 34.460431654676256, + "force_loss": 0.002494994318112731, + "step": 38320 + }, + { + "epoch": 34.469424460431654, + "grad_norm": 0.2172938734292984, + "learning_rate": 3.1621960631662725e-05, + "loss": 0.0168, + "step": 38330 + }, + { + "action_loss": 0.0031617535278201103, + "epoch": 34.469424460431654, + "step": 38330 + }, + { + "epoch": 34.469424460431654, + "step": 38330, + "torque_loss": 0.13513052463531494 + }, + { + "epoch": 34.469424460431654, + "force_loss": 0.004643291234970093, + "step": 38330 + }, + { + "epoch": 34.47841726618705, + "grad_norm": 0.29820922017097473, + "learning_rate": 3.159633465598245e-05, + "loss": 0.0228, + "step": 38340 + }, + { + "action_loss": 0.0019786886405199766, + "epoch": 34.47841726618705, + "step": 38340 + }, + { + "epoch": 34.47841726618705, + "step": 38340, + "torque_loss": 0.16076822578907013 + }, + { + "epoch": 34.47841726618705, + "force_loss": 0.0020407189149409533, + "step": 38340 + }, + { + "epoch": 34.48741007194245, + "grad_norm": 0.2596455216407776, + "learning_rate": 3.1570714270849767e-05, + "loss": 0.0159, + "step": 38350 + }, + { + "action_loss": 0.0019989933352917433, + "epoch": 34.48741007194245, + "step": 38350 + }, + { + "epoch": 34.48741007194245, + "step": 38350, + "torque_loss": 0.16867582499980927 + }, + { + "epoch": 34.48741007194245, + "force_loss": 0.0039395857602357864, + "step": 38350 + }, + { + "epoch": 34.49640287769784, + "grad_norm": 0.20967614650726318, + "learning_rate": 3.1545099484047516e-05, + "loss": 0.0163, + "step": 38360 + }, + { + "action_loss": 0.003609751584008336, + "epoch": 34.49640287769784, + "step": 38360 + }, + { + "epoch": 34.49640287769784, + "step": 38360, + "torque_loss": 0.09196087718009949 + }, + { + "epoch": 34.49640287769784, + "force_loss": 0.00652661407366395, + "step": 38360 + }, + { + "epoch": 34.50539568345324, + "grad_norm": 0.30926424264907837, + "learning_rate": 3.151949030335674e-05, + "loss": 0.015, + "step": 38370 + }, + { + "action_loss": 0.0023019700311124325, + "epoch": 34.50539568345324, + "step": 38370 + }, + { + "epoch": 34.50539568345324, + "step": 38370, + "torque_loss": 0.1672786921262741 + }, + { + "epoch": 34.50539568345324, + "force_loss": 0.003482109634205699, + "step": 38370 + }, + { + "epoch": 34.514388489208635, + "grad_norm": 0.3725399076938629, + "learning_rate": 3.149388673655687e-05, + "loss": 0.018, + "step": 38380 + }, + { + "action_loss": 0.0032252792734652758, + "epoch": 34.514388489208635, + "step": 38380 + }, + { + "epoch": 34.514388489208635, + "step": 38380, + "torque_loss": 0.18382048606872559 + }, + { + "epoch": 34.514388489208635, + "force_loss": 0.009460299275815487, + "step": 38380 + }, + { + "epoch": 34.523381294964025, + "grad_norm": 0.22372767329216003, + "learning_rate": 3.146828879142559e-05, + "loss": 0.0188, + "step": 38390 + }, + { + "action_loss": 0.0013847621157765388, + "epoch": 34.523381294964025, + "step": 38390 + }, + { + "epoch": 34.523381294964025, + "step": 38390, + "torque_loss": 0.11002520471811295 + }, + { + "epoch": 34.523381294964025, + "force_loss": 0.0018376881489530206, + "step": 38390 + }, + { + "epoch": 34.53237410071942, + "grad_norm": 0.07798569649457932, + "learning_rate": 3.1442696475738866e-05, + "loss": 0.0146, + "step": 38400 + }, + { + "action_loss": 0.001473208307288587, + "epoch": 34.53237410071942, + "step": 38400 + }, + { + "epoch": 34.53237410071942, + "step": 38400, + "torque_loss": 0.11487217992544174 + }, + { + "epoch": 34.53237410071942, + "force_loss": 0.000947553024161607, + "step": 38400 + }, + { + "epoch": 34.54136690647482, + "grad_norm": 0.18024161458015442, + "learning_rate": 3.141710979727098e-05, + "loss": 0.0151, + "step": 38410 + }, + { + "action_loss": 0.0012842240976169705, + "epoch": 34.54136690647482, + "step": 38410 + }, + { + "epoch": 34.54136690647482, + "step": 38410, + "torque_loss": 0.14827051758766174 + }, + { + "epoch": 34.54136690647482, + "force_loss": 0.0008141950820572674, + "step": 38410 + }, + { + "epoch": 34.55035971223022, + "grad_norm": 0.14035546779632568, + "learning_rate": 3.139152876379447e-05, + "loss": 0.0164, + "step": 38420 + }, + { + "action_loss": 0.0026300896424800158, + "epoch": 34.55035971223022, + "step": 38420 + }, + { + "epoch": 34.55035971223022, + "step": 38420, + "torque_loss": 0.10661149024963379 + }, + { + "epoch": 34.55035971223022, + "force_loss": 0.0066487849690020084, + "step": 38420 + }, + { + "epoch": 34.55935251798561, + "grad_norm": 0.2546481490135193, + "learning_rate": 3.1365953383080214e-05, + "loss": 0.0182, + "step": 38430 + }, + { + "action_loss": 0.0016450881958007812, + "epoch": 34.55935251798561, + "step": 38430 + }, + { + "epoch": 34.55935251798561, + "step": 38430, + "torque_loss": 0.08318792283535004 + }, + { + "epoch": 34.55935251798561, + "force_loss": 0.0018876157701015472, + "step": 38430 + }, + { + "epoch": 34.568345323741006, + "grad_norm": 0.1375221461057663, + "learning_rate": 3.134038366289731e-05, + "loss": 0.0151, + "step": 38440 + }, + { + "action_loss": 0.0013049558037891984, + "epoch": 34.568345323741006, + "step": 38440 + }, + { + "epoch": 34.568345323741006, + "step": 38440, + "torque_loss": 0.0981418564915657 + }, + { + "epoch": 34.568345323741006, + "force_loss": 0.0027469974011182785, + "step": 38440 + }, + { + "epoch": 34.577338129496404, + "grad_norm": 0.24912779033184052, + "learning_rate": 3.131481961101317e-05, + "loss": 0.019, + "step": 38450 + }, + { + "action_loss": 0.0014133708318695426, + "epoch": 34.577338129496404, + "step": 38450 + }, + { + "epoch": 34.577338129496404, + "step": 38450, + "torque_loss": 0.16744482517242432 + }, + { + "epoch": 34.577338129496404, + "force_loss": 0.002211084822192788, + "step": 38450 + }, + { + "epoch": 34.5863309352518, + "grad_norm": 0.15376850962638855, + "learning_rate": 3.128926123519349e-05, + "loss": 0.0182, + "step": 38460 + }, + { + "action_loss": 0.0012096124701201916, + "epoch": 34.5863309352518, + "step": 38460 + }, + { + "epoch": 34.5863309352518, + "step": 38460, + "torque_loss": 0.13167576491832733 + }, + { + "epoch": 34.5863309352518, + "force_loss": 0.002058493671938777, + "step": 38460 + }, + { + "epoch": 34.59532374100719, + "grad_norm": 0.10142739862203598, + "learning_rate": 3.1263708543202194e-05, + "loss": 0.0135, + "step": 38470 + }, + { + "action_loss": 0.006791311781853437, + "epoch": 34.59532374100719, + "step": 38470 + }, + { + "epoch": 34.59532374100719, + "step": 38470, + "torque_loss": 0.12551186978816986 + }, + { + "epoch": 34.59532374100719, + "force_loss": 0.0074044340290129185, + "step": 38470 + }, + { + "epoch": 34.60431654676259, + "grad_norm": 0.17957811057567596, + "learning_rate": 3.123816154280155e-05, + "loss": 0.0202, + "step": 38480 + }, + { + "action_loss": 0.0025393737014383078, + "epoch": 34.60431654676259, + "step": 38480 + }, + { + "epoch": 34.60431654676259, + "step": 38480, + "torque_loss": 0.14315222203731537 + }, + { + "epoch": 34.60431654676259, + "force_loss": 0.002506350865587592, + "step": 38480 + }, + { + "epoch": 34.61330935251799, + "grad_norm": 0.17599982023239136, + "learning_rate": 3.121262024175207e-05, + "loss": 0.0177, + "step": 38490 + }, + { + "action_loss": 0.005657270550727844, + "epoch": 34.61330935251799, + "step": 38490 + }, + { + "epoch": 34.61330935251799, + "step": 38490, + "torque_loss": 0.10008245706558228 + }, + { + "epoch": 34.61330935251799, + "force_loss": 0.006468428764492273, + "step": 38490 + }, + { + "epoch": 34.62230215827338, + "grad_norm": 0.27707439661026, + "learning_rate": 3.118708464781248e-05, + "loss": 0.0166, + "step": 38500 + }, + { + "action_loss": 0.0019723237492144108, + "epoch": 34.62230215827338, + "step": 38500 + }, + { + "epoch": 34.62230215827338, + "step": 38500, + "torque_loss": 0.12207069993019104 + }, + { + "epoch": 34.62230215827338, + "force_loss": 0.0031738094985485077, + "step": 38500 + }, + { + "epoch": 34.631294964028775, + "grad_norm": 0.3856644034385681, + "learning_rate": 3.116155476873987e-05, + "loss": 0.0174, + "step": 38510 + }, + { + "action_loss": 0.0012843600707128644, + "epoch": 34.631294964028775, + "step": 38510 + }, + { + "epoch": 34.631294964028775, + "step": 38510, + "torque_loss": 0.10053249448537827 + }, + { + "epoch": 34.631294964028775, + "force_loss": 0.0038113368209451437, + "step": 38510 + }, + { + "epoch": 34.64028776978417, + "grad_norm": 0.10629621893167496, + "learning_rate": 3.11360306122895e-05, + "loss": 0.0167, + "step": 38520 + }, + { + "action_loss": 0.0037077355664223433, + "epoch": 34.64028776978417, + "step": 38520 + }, + { + "epoch": 34.64028776978417, + "step": 38520, + "torque_loss": 0.14237217605113983 + }, + { + "epoch": 34.64028776978417, + "force_loss": 0.007574303541332483, + "step": 38520 + }, + { + "epoch": 34.64928057553957, + "grad_norm": 0.21006788313388824, + "learning_rate": 3.1110512186214975e-05, + "loss": 0.017, + "step": 38530 + }, + { + "action_loss": 0.0016152107855305076, + "epoch": 34.64928057553957, + "step": 38530 + }, + { + "epoch": 34.64928057553957, + "step": 38530, + "torque_loss": 0.1222589835524559 + }, + { + "epoch": 34.64928057553957, + "force_loss": 0.0008252934203483164, + "step": 38530 + }, + { + "epoch": 34.65827338129496, + "grad_norm": 0.24139325320720673, + "learning_rate": 3.1084999498268095e-05, + "loss": 0.0183, + "step": 38540 + }, + { + "action_loss": 0.0011581552680581808, + "epoch": 34.65827338129496, + "step": 38540 + }, + { + "epoch": 34.65827338129496, + "step": 38540, + "torque_loss": 0.1371845006942749 + }, + { + "epoch": 34.65827338129496, + "force_loss": 0.0009283709223382175, + "step": 38540 + }, + { + "epoch": 34.66726618705036, + "grad_norm": 0.2640725076198578, + "learning_rate": 3.1059492556198934e-05, + "loss": 0.0168, + "step": 38550 + }, + { + "action_loss": 0.018619609996676445, + "epoch": 34.66726618705036, + "step": 38550 + }, + { + "epoch": 34.66726618705036, + "step": 38550, + "torque_loss": 0.1586941033601761 + }, + { + "epoch": 34.66726618705036, + "force_loss": 0.019095636904239655, + "step": 38550 + }, + { + "epoch": 34.67625899280576, + "grad_norm": 0.39664411544799805, + "learning_rate": 3.103399136775586e-05, + "loss": 0.0183, + "step": 38560 + }, + { + "action_loss": 0.007044801488518715, + "epoch": 34.67625899280576, + "step": 38560 + }, + { + "epoch": 34.67625899280576, + "step": 38560, + "torque_loss": 0.14300976693630219 + }, + { + "epoch": 34.67625899280576, + "force_loss": 0.0095159150660038, + "step": 38560 + }, + { + "epoch": 34.685251798561154, + "grad_norm": 0.4605175256729126, + "learning_rate": 3.100849594068541e-05, + "loss": 0.0194, + "step": 38570 + }, + { + "action_loss": 0.003579928306862712, + "epoch": 34.685251798561154, + "step": 38570 + }, + { + "epoch": 34.685251798561154, + "step": 38570, + "torque_loss": 0.11204127222299576 + }, + { + "epoch": 34.685251798561154, + "force_loss": 0.004976348951458931, + "step": 38570 + }, + { + "epoch": 34.694244604316545, + "grad_norm": 0.2618870437145233, + "learning_rate": 3.0983006282732484e-05, + "loss": 0.0172, + "step": 38580 + }, + { + "action_loss": 0.005463303532451391, + "epoch": 34.694244604316545, + "step": 38580 + }, + { + "epoch": 34.694244604316545, + "step": 38580, + "torque_loss": 0.1450069695711136 + }, + { + "epoch": 34.694244604316545, + "force_loss": 0.004828445613384247, + "step": 38580 + }, + { + "epoch": 34.70323741007194, + "grad_norm": 0.200755313038826, + "learning_rate": 3.0957522401640116e-05, + "loss": 0.0179, + "step": 38590 + }, + { + "action_loss": 0.0009448258206248283, + "epoch": 34.70323741007194, + "step": 38590 + }, + { + "epoch": 34.70323741007194, + "step": 38590, + "torque_loss": 0.11735858768224716 + }, + { + "epoch": 34.70323741007194, + "force_loss": 0.0005795633187517524, + "step": 38590 + }, + { + "epoch": 34.71223021582734, + "grad_norm": 0.10308893769979477, + "learning_rate": 3.0932044305149645e-05, + "loss": 0.0156, + "step": 38600 + }, + { + "action_loss": 0.00312026753090322, + "epoch": 34.71223021582734, + "step": 38600 + }, + { + "epoch": 34.71223021582734, + "step": 38600, + "torque_loss": 0.1819179505109787 + }, + { + "epoch": 34.71223021582734, + "force_loss": 0.0033644726499915123, + "step": 38600 + }, + { + "epoch": 34.72122302158273, + "grad_norm": 0.28257471323013306, + "learning_rate": 3.090657200100068e-05, + "loss": 0.0161, + "step": 38610 + }, + { + "action_loss": 0.005166333634406328, + "epoch": 34.72122302158273, + "step": 38610 + }, + { + "epoch": 34.72122302158273, + "step": 38610, + "torque_loss": 0.15472187101840973 + }, + { + "epoch": 34.72122302158273, + "force_loss": 0.0045619686134159565, + "step": 38610 + }, + { + "epoch": 34.73021582733813, + "grad_norm": 0.10393625497817993, + "learning_rate": 3.088110549693099e-05, + "loss": 0.0166, + "step": 38620 + }, + { + "action_loss": 0.003556174226105213, + "epoch": 34.73021582733813, + "step": 38620 + }, + { + "epoch": 34.73021582733813, + "step": 38620, + "torque_loss": 0.14934581518173218 + }, + { + "epoch": 34.73021582733813, + "force_loss": 0.004354837816208601, + "step": 38620 + }, + { + "epoch": 34.739208633093526, + "grad_norm": 0.17032960057258606, + "learning_rate": 3.085564480067667e-05, + "loss": 0.0163, + "step": 38630 + }, + { + "action_loss": 0.0020976655650883913, + "epoch": 34.739208633093526, + "step": 38630 + }, + { + "epoch": 34.739208633093526, + "step": 38630, + "torque_loss": 0.13000796735286713 + }, + { + "epoch": 34.739208633093526, + "force_loss": 0.0012645056704059243, + "step": 38630 + }, + { + "epoch": 34.74820143884892, + "grad_norm": 0.0839029997587204, + "learning_rate": 3.0830189919971955e-05, + "loss": 0.0147, + "step": 38640 + }, + { + "action_loss": 0.0012977635487914085, + "epoch": 34.74820143884892, + "step": 38640 + }, + { + "epoch": 34.74820143884892, + "step": 38640, + "torque_loss": 0.12684597074985504 + }, + { + "epoch": 34.74820143884892, + "force_loss": 0.004534391686320305, + "step": 38640 + }, + { + "epoch": 34.757194244604314, + "grad_norm": 0.4317879378795624, + "learning_rate": 3.080474086254939e-05, + "loss": 0.0168, + "step": 38650 + }, + { + "action_loss": 0.0022805833723396063, + "epoch": 34.757194244604314, + "step": 38650 + }, + { + "epoch": 34.757194244604314, + "step": 38650, + "torque_loss": 0.10338791459798813 + }, + { + "epoch": 34.757194244604314, + "force_loss": 0.0035688721109181643, + "step": 38650 + }, + { + "epoch": 34.76618705035971, + "grad_norm": 0.11154210567474365, + "learning_rate": 3.077929763613975e-05, + "loss": 0.0162, + "step": 38660 + }, + { + "action_loss": 0.003112196223810315, + "epoch": 34.76618705035971, + "step": 38660 + }, + { + "epoch": 34.76618705035971, + "step": 38660, + "torque_loss": 0.13109798729419708 + }, + { + "epoch": 34.76618705035971, + "force_loss": 0.004263064358383417, + "step": 38660 + }, + { + "epoch": 34.77517985611511, + "grad_norm": 0.10955115407705307, + "learning_rate": 3.075386024847198e-05, + "loss": 0.0188, + "step": 38670 + }, + { + "action_loss": 0.00954432599246502, + "epoch": 34.77517985611511, + "step": 38670 + }, + { + "epoch": 34.77517985611511, + "step": 38670, + "torque_loss": 0.12226337194442749 + }, + { + "epoch": 34.77517985611511, + "force_loss": 0.013184207491576672, + "step": 38670 + }, + { + "epoch": 34.78417266187051, + "grad_norm": 0.12276234477758408, + "learning_rate": 3.072842870727331e-05, + "loss": 0.0166, + "step": 38680 + }, + { + "action_loss": 0.0012173851719126105, + "epoch": 34.78417266187051, + "step": 38680 + }, + { + "epoch": 34.78417266187051, + "step": 38680, + "torque_loss": 0.1090126633644104 + }, + { + "epoch": 34.78417266187051, + "force_loss": 0.001330745406448841, + "step": 38680 + }, + { + "epoch": 34.7931654676259, + "grad_norm": 0.2330036610364914, + "learning_rate": 3.070300302026916e-05, + "loss": 0.015, + "step": 38690 + }, + { + "action_loss": 0.00153315847273916, + "epoch": 34.7931654676259, + "step": 38690 + }, + { + "epoch": 34.7931654676259, + "step": 38690, + "torque_loss": 0.11496663093566895 + }, + { + "epoch": 34.7931654676259, + "force_loss": 0.0012268581194803119, + "step": 38690 + }, + { + "epoch": 34.802158273381295, + "grad_norm": 0.3656274974346161, + "learning_rate": 3.067758319518318e-05, + "loss": 0.0203, + "step": 38700 + }, + { + "action_loss": 0.002117444993928075, + "epoch": 34.802158273381295, + "step": 38700 + }, + { + "epoch": 34.802158273381295, + "step": 38700, + "torque_loss": 0.07872550934553146 + }, + { + "epoch": 34.802158273381295, + "force_loss": 0.0019782257732003927, + "step": 38700 + }, + { + "epoch": 34.81115107913669, + "grad_norm": 0.17873837053775787, + "learning_rate": 3.065216923973725e-05, + "loss": 0.0146, + "step": 38710 + }, + { + "action_loss": 0.0021198915783315897, + "epoch": 34.81115107913669, + "step": 38710 + }, + { + "epoch": 34.81115107913669, + "step": 38710, + "torque_loss": 0.14273861050605774 + }, + { + "epoch": 34.81115107913669, + "force_loss": 0.007720018271356821, + "step": 38710 + }, + { + "epoch": 34.82014388489208, + "grad_norm": 0.19084160029888153, + "learning_rate": 3.062676116165145e-05, + "loss": 0.0161, + "step": 38720 + }, + { + "action_loss": 0.003642566502094269, + "epoch": 34.82014388489208, + "step": 38720 + }, + { + "epoch": 34.82014388489208, + "step": 38720, + "torque_loss": 0.1195937916636467 + }, + { + "epoch": 34.82014388489208, + "force_loss": 0.008990968577563763, + "step": 38720 + }, + { + "epoch": 34.82913669064748, + "grad_norm": 0.3843153715133667, + "learning_rate": 3.06013589686441e-05, + "loss": 0.0165, + "step": 38730 + }, + { + "action_loss": 0.0009784154826775193, + "epoch": 34.82913669064748, + "step": 38730 + }, + { + "epoch": 34.82913669064748, + "step": 38730, + "torque_loss": 0.08457992225885391 + }, + { + "epoch": 34.82913669064748, + "force_loss": 0.001979834632948041, + "step": 38730 + }, + { + "epoch": 34.83812949640288, + "grad_norm": 0.39605802297592163, + "learning_rate": 3.05759626684317e-05, + "loss": 0.0179, + "step": 38740 + }, + { + "action_loss": 0.0043804761953651905, + "epoch": 34.83812949640288, + "step": 38740 + }, + { + "epoch": 34.83812949640288, + "step": 38740, + "torque_loss": 0.14209119975566864 + }, + { + "epoch": 34.83812949640288, + "force_loss": 0.005905414465814829, + "step": 38740 + }, + { + "epoch": 34.847122302158276, + "grad_norm": 0.284212201833725, + "learning_rate": 3.055057226872896e-05, + "loss": 0.0164, + "step": 38750 + }, + { + "action_loss": 0.004344114568084478, + "epoch": 34.847122302158276, + "step": 38750 + }, + { + "epoch": 34.847122302158276, + "step": 38750, + "torque_loss": 0.06664899736642838 + }, + { + "epoch": 34.847122302158276, + "force_loss": 0.003713706275448203, + "step": 38750 + }, + { + "epoch": 34.856115107913666, + "grad_norm": 0.1520061492919922, + "learning_rate": 3.052518777724887e-05, + "loss": 0.0161, + "step": 38760 + }, + { + "action_loss": 0.004284176509827375, + "epoch": 34.856115107913666, + "step": 38760 + }, + { + "epoch": 34.856115107913666, + "step": 38760, + "torque_loss": 0.13820405304431915 + }, + { + "epoch": 34.856115107913666, + "force_loss": 0.001813484006561339, + "step": 38760 + }, + { + "epoch": 34.865107913669064, + "grad_norm": 0.1246003657579422, + "learning_rate": 3.04998092017025e-05, + "loss": 0.016, + "step": 38770 + }, + { + "action_loss": 0.009232689626514912, + "epoch": 34.865107913669064, + "step": 38770 + }, + { + "epoch": 34.865107913669064, + "step": 38770, + "torque_loss": 0.18753080070018768 + }, + { + "epoch": 34.865107913669064, + "force_loss": 0.00506631750613451, + "step": 38770 + }, + { + "epoch": 34.87410071942446, + "grad_norm": 0.4100170433521271, + "learning_rate": 3.0474436549799246e-05, + "loss": 0.02, + "step": 38780 + }, + { + "action_loss": 0.0022725574672222137, + "epoch": 34.87410071942446, + "step": 38780 + }, + { + "epoch": 34.87410071942446, + "step": 38780, + "torque_loss": 0.18606919050216675 + }, + { + "epoch": 34.87410071942446, + "force_loss": 0.002096575917676091, + "step": 38780 + }, + { + "epoch": 34.88309352517986, + "grad_norm": 0.3019394278526306, + "learning_rate": 3.044906982924661e-05, + "loss": 0.0177, + "step": 38790 + }, + { + "action_loss": 0.0009394208900630474, + "epoch": 34.88309352517986, + "step": 38790 + }, + { + "epoch": 34.88309352517986, + "step": 38790, + "torque_loss": 0.10499713569879532 + }, + { + "epoch": 34.88309352517986, + "force_loss": 0.0010895855957642198, + "step": 38790 + }, + { + "epoch": 34.89208633093525, + "grad_norm": 0.23774023354053497, + "learning_rate": 3.0423709047750337e-05, + "loss": 0.0155, + "step": 38800 + }, + { + "action_loss": 0.003401062684133649, + "epoch": 34.89208633093525, + "step": 38800 + }, + { + "epoch": 34.89208633093525, + "step": 38800, + "torque_loss": 0.08398023992776871 + }, + { + "epoch": 34.89208633093525, + "force_loss": 0.005365829914808273, + "step": 38800 + }, + { + "epoch": 34.90107913669065, + "grad_norm": 0.14938440918922424, + "learning_rate": 3.03983542130144e-05, + "loss": 0.0168, + "step": 38810 + }, + { + "action_loss": 0.0028705305885523558, + "epoch": 34.90107913669065, + "step": 38810 + }, + { + "epoch": 34.90107913669065, + "step": 38810, + "torque_loss": 0.08145985007286072 + }, + { + "epoch": 34.90107913669065, + "force_loss": 0.003623089985921979, + "step": 38810 + }, + { + "epoch": 34.910071942446045, + "grad_norm": 0.3361145853996277, + "learning_rate": 3.0373005332740877e-05, + "loss": 0.0207, + "step": 38820 + }, + { + "action_loss": 0.006614338140934706, + "epoch": 34.910071942446045, + "step": 38820 + }, + { + "epoch": 34.910071942446045, + "step": 38820, + "torque_loss": 0.114862360060215 + }, + { + "epoch": 34.910071942446045, + "force_loss": 0.00877420324832201, + "step": 38820 + }, + { + "epoch": 34.919064748201436, + "grad_norm": 0.19874995946884155, + "learning_rate": 3.034766241463013e-05, + "loss": 0.0165, + "step": 38830 + }, + { + "action_loss": 0.001063765143044293, + "epoch": 34.919064748201436, + "step": 38830 + }, + { + "epoch": 34.919064748201436, + "step": 38830, + "torque_loss": 0.11082597821950912 + }, + { + "epoch": 34.919064748201436, + "force_loss": 0.0019464752404019237, + "step": 38830 + }, + { + "epoch": 34.92805755395683, + "grad_norm": 0.35252317786216736, + "learning_rate": 3.032232546638064e-05, + "loss": 0.0163, + "step": 38840 + }, + { + "action_loss": 0.004800503142178059, + "epoch": 34.92805755395683, + "step": 38840 + }, + { + "epoch": 34.92805755395683, + "step": 38840, + "torque_loss": 0.11431223899126053 + }, + { + "epoch": 34.92805755395683, + "force_loss": 0.004633750766515732, + "step": 38840 + }, + { + "epoch": 34.93705035971223, + "grad_norm": 0.36122679710388184, + "learning_rate": 3.0296994495689114e-05, + "loss": 0.0163, + "step": 38850 + }, + { + "action_loss": 0.007631280925124884, + "epoch": 34.93705035971223, + "step": 38850 + }, + { + "epoch": 34.93705035971223, + "step": 38850, + "torque_loss": 0.13991303741931915 + }, + { + "epoch": 34.93705035971223, + "force_loss": 0.003343901364132762, + "step": 38850 + }, + { + "epoch": 34.94604316546763, + "grad_norm": 0.13534283638000488, + "learning_rate": 3.0271669510250444e-05, + "loss": 0.0166, + "step": 38860 + }, + { + "action_loss": 0.002593817189335823, + "epoch": 34.94604316546763, + "step": 38860 + }, + { + "epoch": 34.94604316546763, + "step": 38860, + "torque_loss": 0.0812850072979927 + }, + { + "epoch": 34.94604316546763, + "force_loss": 0.0023991798516362906, + "step": 38860 + }, + { + "epoch": 34.95503597122302, + "grad_norm": 0.19206443428993225, + "learning_rate": 3.024635051775766e-05, + "loss": 0.0167, + "step": 38870 + }, + { + "action_loss": 0.0023118644021451473, + "epoch": 34.95503597122302, + "step": 38870 + }, + { + "epoch": 34.95503597122302, + "step": 38870, + "torque_loss": 0.1302252858877182 + }, + { + "epoch": 34.95503597122302, + "force_loss": 0.0034537147730588913, + "step": 38870 + }, + { + "epoch": 34.96402877697842, + "grad_norm": 0.10967405885457993, + "learning_rate": 3.022103752590205e-05, + "loss": 0.0196, + "step": 38880 + }, + { + "action_loss": 0.002898571314290166, + "epoch": 34.96402877697842, + "step": 38880 + }, + { + "epoch": 34.96402877697842, + "step": 38880, + "torque_loss": 0.1515694558620453 + }, + { + "epoch": 34.96402877697842, + "force_loss": 0.00570349395275116, + "step": 38880 + }, + { + "epoch": 34.973021582733814, + "grad_norm": 0.3706991672515869, + "learning_rate": 3.0195730542372992e-05, + "loss": 0.0189, + "step": 38890 + }, + { + "action_loss": 0.00310687068849802, + "epoch": 34.973021582733814, + "step": 38890 + }, + { + "epoch": 34.973021582733814, + "step": 38890, + "torque_loss": 0.21740974485874176 + }, + { + "epoch": 34.973021582733814, + "force_loss": 0.004394970368593931, + "step": 38890 + }, + { + "epoch": 34.98201438848921, + "grad_norm": 0.32958418130874634, + "learning_rate": 3.0170429574858084e-05, + "loss": 0.0168, + "step": 38900 + }, + { + "action_loss": 0.002011655131354928, + "epoch": 34.98201438848921, + "step": 38900 + }, + { + "epoch": 34.98201438848921, + "step": 38900, + "torque_loss": 0.09887155145406723 + }, + { + "epoch": 34.98201438848921, + "force_loss": 0.006040406879037619, + "step": 38900 + }, + { + "epoch": 34.9910071942446, + "grad_norm": 0.09772446751594543, + "learning_rate": 3.0145134631043127e-05, + "loss": 0.0149, + "step": 38910 + }, + { + "action_loss": 0.0020546845626085997, + "epoch": 34.9910071942446, + "step": 38910 + }, + { + "epoch": 34.9910071942446, + "step": 38910, + "torque_loss": 0.14445792138576508 + }, + { + "epoch": 34.9910071942446, + "force_loss": 0.0013244961155578494, + "step": 38910 + }, + { + "epoch": 35.0, + "grad_norm": 0.11525759845972061, + "learning_rate": 3.0119845718612018e-05, + "loss": 0.016, + "step": 38920 + }, + { + "action_loss": 0.002357466844841838, + "epoch": 35.0, + "step": 38920 + }, + { + "epoch": 35.0, + "step": 38920, + "torque_loss": 0.15787339210510254 + }, + { + "epoch": 35.0, + "force_loss": 0.002151248510926962, + "step": 38920 + }, + { + "epoch": 35.0089928057554, + "grad_norm": 0.5724800825119019, + "learning_rate": 3.009456284524688e-05, + "loss": 0.0204, + "step": 38930 + }, + { + "action_loss": 0.008969360962510109, + "epoch": 35.0089928057554, + "step": 38930 + }, + { + "epoch": 35.0089928057554, + "step": 38930, + "torque_loss": 0.11319664865732193 + }, + { + "epoch": 35.0089928057554, + "force_loss": 0.012574061751365662, + "step": 38930 + }, + { + "epoch": 35.01798561151079, + "grad_norm": 0.11487343907356262, + "learning_rate": 3.0069286018627967e-05, + "loss": 0.0181, + "step": 38940 + }, + { + "action_loss": 0.0036686931271106005, + "epoch": 35.01798561151079, + "step": 38940 + }, + { + "epoch": 35.01798561151079, + "step": 38940, + "torque_loss": 0.13395781815052032 + }, + { + "epoch": 35.01798561151079, + "force_loss": 0.013992962427437305, + "step": 38940 + }, + { + "epoch": 35.026978417266186, + "grad_norm": 0.3809404671192169, + "learning_rate": 3.0044015246433743e-05, + "loss": 0.021, + "step": 38950 + }, + { + "action_loss": 0.013758336193859577, + "epoch": 35.026978417266186, + "step": 38950 + }, + { + "epoch": 35.026978417266186, + "step": 38950, + "torque_loss": 0.14249445497989655 + }, + { + "epoch": 35.026978417266186, + "force_loss": 0.01452005747705698, + "step": 38950 + }, + { + "epoch": 35.03597122302158, + "grad_norm": 0.12666234374046326, + "learning_rate": 3.0018750536340755e-05, + "loss": 0.0201, + "step": 38960 + }, + { + "action_loss": 0.03742603957653046, + "epoch": 35.03597122302158, + "step": 38960 + }, + { + "epoch": 35.03597122302158, + "step": 38960, + "torque_loss": 0.22465144097805023 + }, + { + "epoch": 35.03597122302158, + "force_loss": 0.02060875855386257, + "step": 38960 + }, + { + "epoch": 35.04496402877698, + "grad_norm": 0.16837553679943085, + "learning_rate": 2.999349189602378e-05, + "loss": 0.0251, + "step": 38970 + }, + { + "action_loss": 0.0012964889174327254, + "epoch": 35.04496402877698, + "step": 38970 + }, + { + "epoch": 35.04496402877698, + "step": 38970, + "torque_loss": 0.10754083842039108 + }, + { + "epoch": 35.04496402877698, + "force_loss": 0.0011754450388252735, + "step": 38970 + }, + { + "epoch": 35.05395683453237, + "grad_norm": 0.24246415495872498, + "learning_rate": 2.9968239333155733e-05, + "loss": 0.0189, + "step": 38980 + }, + { + "action_loss": 0.002997683361172676, + "epoch": 35.05395683453237, + "step": 38980 + }, + { + "epoch": 35.05395683453237, + "step": 38980, + "torque_loss": 0.14538516104221344 + }, + { + "epoch": 35.05395683453237, + "force_loss": 0.006573410239070654, + "step": 38980 + }, + { + "epoch": 35.06294964028777, + "grad_norm": 0.11037357151508331, + "learning_rate": 2.994299285540767e-05, + "loss": 0.0151, + "step": 38990 + }, + { + "action_loss": 0.0011510731419548392, + "epoch": 35.06294964028777, + "step": 38990 + }, + { + "epoch": 35.06294964028777, + "step": 38990, + "torque_loss": 0.10932561010122299 + }, + { + "epoch": 35.06294964028777, + "force_loss": 0.0018023927696049213, + "step": 38990 + }, + { + "epoch": 35.07194244604317, + "grad_norm": 0.2527114748954773, + "learning_rate": 2.9917752470448813e-05, + "loss": 0.017, + "step": 39000 + }, + { + "action_loss": 0.0013888214016333222, + "epoch": 35.07194244604317, + "step": 39000 + }, + { + "epoch": 35.07194244604317, + "step": 39000, + "torque_loss": 0.09995990991592407 + }, + { + "epoch": 35.07194244604317, + "force_loss": 0.0023165512830018997, + "step": 39000 + }, + { + "epoch": 35.080935251798564, + "grad_norm": 0.13498377799987793, + "learning_rate": 2.9892518185946495e-05, + "loss": 0.0172, + "step": 39010 + }, + { + "action_loss": 0.004496482200920582, + "epoch": 35.080935251798564, + "step": 39010 + }, + { + "epoch": 35.080935251798564, + "step": 39010, + "torque_loss": 0.12354707717895508 + }, + { + "epoch": 35.080935251798564, + "force_loss": 0.0062436000443995, + "step": 39010 + }, + { + "epoch": 35.089928057553955, + "grad_norm": 0.10693199932575226, + "learning_rate": 2.986729000956624e-05, + "loss": 0.0215, + "step": 39020 + }, + { + "action_loss": 0.0035669002681970596, + "epoch": 35.089928057553955, + "step": 39020 + }, + { + "epoch": 35.089928057553955, + "step": 39020, + "torque_loss": 0.14371724426746368 + }, + { + "epoch": 35.089928057553955, + "force_loss": 0.010771863162517548, + "step": 39020 + }, + { + "epoch": 35.09892086330935, + "grad_norm": 0.2423935979604721, + "learning_rate": 2.9842067948971736e-05, + "loss": 0.0174, + "step": 39030 + }, + { + "action_loss": 0.0022365062031894922, + "epoch": 35.09892086330935, + "step": 39030 + }, + { + "epoch": 35.09892086330935, + "step": 39030, + "torque_loss": 0.10635358095169067 + }, + { + "epoch": 35.09892086330935, + "force_loss": 0.0015481151640415192, + "step": 39030 + }, + { + "epoch": 35.10791366906475, + "grad_norm": 0.1286407858133316, + "learning_rate": 2.9816852011824727e-05, + "loss": 0.0147, + "step": 39040 + }, + { + "action_loss": 0.002132033696398139, + "epoch": 35.10791366906475, + "step": 39040 + }, + { + "epoch": 35.10791366906475, + "step": 39040, + "torque_loss": 0.09622766822576523 + }, + { + "epoch": 35.10791366906475, + "force_loss": 0.001295006019063294, + "step": 39040 + }, + { + "epoch": 35.11690647482014, + "grad_norm": 0.08749210089445114, + "learning_rate": 2.979164220578519e-05, + "loss": 0.0165, + "step": 39050 + }, + { + "action_loss": 0.01790345273911953, + "epoch": 35.11690647482014, + "step": 39050 + }, + { + "epoch": 35.11690647482014, + "step": 39050, + "torque_loss": 0.11699700355529785 + }, + { + "epoch": 35.11690647482014, + "force_loss": 0.02455134503543377, + "step": 39050 + }, + { + "epoch": 35.12589928057554, + "grad_norm": 0.19494834542274475, + "learning_rate": 2.9766438538511165e-05, + "loss": 0.017, + "step": 39060 + }, + { + "action_loss": 0.002067138673737645, + "epoch": 35.12589928057554, + "step": 39060 + }, + { + "epoch": 35.12589928057554, + "step": 39060, + "torque_loss": 0.10579248517751694 + }, + { + "epoch": 35.12589928057554, + "force_loss": 0.004045093897730112, + "step": 39060 + }, + { + "epoch": 35.134892086330936, + "grad_norm": 0.31693965196609497, + "learning_rate": 2.9741241017658873e-05, + "loss": 0.0162, + "step": 39070 + }, + { + "action_loss": 0.003981070127338171, + "epoch": 35.134892086330936, + "step": 39070 + }, + { + "epoch": 35.134892086330936, + "step": 39070, + "torque_loss": 0.1272837519645691 + }, + { + "epoch": 35.134892086330936, + "force_loss": 0.00482361251488328, + "step": 39070 + }, + { + "epoch": 35.143884892086334, + "grad_norm": 0.3081154525279999, + "learning_rate": 2.971604965088267e-05, + "loss": 0.0163, + "step": 39080 + }, + { + "action_loss": 0.0040407381020486355, + "epoch": 35.143884892086334, + "step": 39080 + }, + { + "epoch": 35.143884892086334, + "step": 39080, + "torque_loss": 0.12722699344158173 + }, + { + "epoch": 35.143884892086334, + "force_loss": 0.00501455832272768, + "step": 39080 + }, + { + "epoch": 35.152877697841724, + "grad_norm": 0.17428624629974365, + "learning_rate": 2.9690864445835008e-05, + "loss": 0.0167, + "step": 39090 + }, + { + "action_loss": 0.004586245398968458, + "epoch": 35.152877697841724, + "step": 39090 + }, + { + "epoch": 35.152877697841724, + "step": 39090, + "torque_loss": 0.1103062704205513 + }, + { + "epoch": 35.152877697841724, + "force_loss": 0.010136381722986698, + "step": 39090 + }, + { + "epoch": 35.16187050359712, + "grad_norm": 0.2923608422279358, + "learning_rate": 2.966568541016651e-05, + "loss": 0.0255, + "step": 39100 + }, + { + "action_loss": 0.0013203794369474053, + "epoch": 35.16187050359712, + "step": 39100 + }, + { + "epoch": 35.16187050359712, + "step": 39100, + "torque_loss": 0.14165626466274261 + }, + { + "epoch": 35.16187050359712, + "force_loss": 0.0013694696826860309, + "step": 39100 + }, + { + "epoch": 35.17086330935252, + "grad_norm": 0.25528061389923096, + "learning_rate": 2.9640512551525867e-05, + "loss": 0.0201, + "step": 39110 + }, + { + "action_loss": 0.0019578454084694386, + "epoch": 35.17086330935252, + "step": 39110 + }, + { + "epoch": 35.17086330935252, + "step": 39110, + "torque_loss": 0.17274141311645508 + }, + { + "epoch": 35.17086330935252, + "force_loss": 0.0021876245737075806, + "step": 39110 + }, + { + "epoch": 35.17985611510792, + "grad_norm": 0.24375079572200775, + "learning_rate": 2.961534587755995e-05, + "loss": 0.0163, + "step": 39120 + }, + { + "action_loss": 0.0041252062655985355, + "epoch": 35.17985611510792, + "step": 39120 + }, + { + "epoch": 35.17985611510792, + "step": 39120, + "torque_loss": 0.09842798113822937 + }, + { + "epoch": 35.17985611510792, + "force_loss": 0.0029616865795105696, + "step": 39120 + }, + { + "epoch": 35.18884892086331, + "grad_norm": 0.2040484994649887, + "learning_rate": 2.959018539591375e-05, + "loss": 0.0188, + "step": 39130 + }, + { + "action_loss": 0.00480501726269722, + "epoch": 35.18884892086331, + "step": 39130 + }, + { + "epoch": 35.18884892086331, + "step": 39130, + "torque_loss": 0.1627064347267151 + }, + { + "epoch": 35.18884892086331, + "force_loss": 0.013547913171350956, + "step": 39130 + }, + { + "epoch": 35.197841726618705, + "grad_norm": 0.18873806297779083, + "learning_rate": 2.9565031114230325e-05, + "loss": 0.0199, + "step": 39140 + }, + { + "action_loss": 0.0026393691077828407, + "epoch": 35.197841726618705, + "step": 39140 + }, + { + "epoch": 35.197841726618705, + "step": 39140, + "torque_loss": 0.13031871616840363 + }, + { + "epoch": 35.197841726618705, + "force_loss": 0.0018990668468177319, + "step": 39140 + }, + { + "epoch": 35.2068345323741, + "grad_norm": 0.3732083737850189, + "learning_rate": 2.9539883040150895e-05, + "loss": 0.0178, + "step": 39150 + }, + { + "action_loss": 0.0017531081102788448, + "epoch": 35.2068345323741, + "step": 39150 + }, + { + "epoch": 35.2068345323741, + "step": 39150, + "torque_loss": 0.1356305032968521 + }, + { + "epoch": 35.2068345323741, + "force_loss": 0.0031208631116896868, + "step": 39150 + }, + { + "epoch": 35.21582733812949, + "grad_norm": 0.10988090187311172, + "learning_rate": 2.9514741181314774e-05, + "loss": 0.0181, + "step": 39160 + }, + { + "action_loss": 0.0022965031675994396, + "epoch": 35.21582733812949, + "step": 39160 + }, + { + "epoch": 35.21582733812949, + "step": 39160, + "torque_loss": 0.172601580619812 + }, + { + "epoch": 35.21582733812949, + "force_loss": 0.008354817517101765, + "step": 39160 + }, + { + "epoch": 35.22482014388489, + "grad_norm": 0.26232537627220154, + "learning_rate": 2.94896055453594e-05, + "loss": 0.015, + "step": 39170 + }, + { + "action_loss": 0.0013383496552705765, + "epoch": 35.22482014388489, + "step": 39170 + }, + { + "epoch": 35.22482014388489, + "step": 39170, + "torque_loss": 0.12118307501077652 + }, + { + "epoch": 35.22482014388489, + "force_loss": 0.000979963573627174, + "step": 39170 + }, + { + "epoch": 35.23381294964029, + "grad_norm": 0.20352475345134735, + "learning_rate": 2.9464476139920332e-05, + "loss": 0.0173, + "step": 39180 + }, + { + "action_loss": 0.002695555565878749, + "epoch": 35.23381294964029, + "step": 39180 + }, + { + "epoch": 35.23381294964029, + "step": 39180, + "torque_loss": 0.11937006562948227 + }, + { + "epoch": 35.23381294964029, + "force_loss": 0.002426528139039874, + "step": 39180 + }, + { + "epoch": 35.242805755395686, + "grad_norm": 0.17224377393722534, + "learning_rate": 2.9439352972631186e-05, + "loss": 0.0131, + "step": 39190 + }, + { + "action_loss": 0.0031537513714283705, + "epoch": 35.242805755395686, + "step": 39190 + }, + { + "epoch": 35.242805755395686, + "step": 39190, + "torque_loss": 0.15531350672245026 + }, + { + "epoch": 35.242805755395686, + "force_loss": 0.006157737225294113, + "step": 39190 + }, + { + "epoch": 35.25179856115108, + "grad_norm": 0.12478018552064896, + "learning_rate": 2.9414236051123757e-05, + "loss": 0.0173, + "step": 39200 + }, + { + "action_loss": 0.0021453325171023607, + "epoch": 35.25179856115108, + "step": 39200 + }, + { + "epoch": 35.25179856115108, + "step": 39200, + "torque_loss": 0.1252780556678772 + }, + { + "epoch": 35.25179856115108, + "force_loss": 0.0020287532825022936, + "step": 39200 + }, + { + "epoch": 35.260791366906474, + "grad_norm": 0.09150382876396179, + "learning_rate": 2.938912538302785e-05, + "loss": 0.0155, + "step": 39210 + }, + { + "action_loss": 0.0031896408181637526, + "epoch": 35.260791366906474, + "step": 39210 + }, + { + "epoch": 35.260791366906474, + "step": 39210, + "torque_loss": 0.14173874258995056 + }, + { + "epoch": 35.260791366906474, + "force_loss": 0.0050001805648207664, + "step": 39210 + }, + { + "epoch": 35.26978417266187, + "grad_norm": 0.23232616484165192, + "learning_rate": 2.9364020975971464e-05, + "loss": 0.0138, + "step": 39220 + }, + { + "action_loss": 0.0010937395272776484, + "epoch": 35.26978417266187, + "step": 39220 + }, + { + "epoch": 35.26978417266187, + "step": 39220, + "torque_loss": 0.10851342231035233 + }, + { + "epoch": 35.26978417266187, + "force_loss": 0.0009604405495338142, + "step": 39220 + }, + { + "epoch": 35.27877697841727, + "grad_norm": 0.08099891245365143, + "learning_rate": 2.9338922837580657e-05, + "loss": 0.016, + "step": 39230 + }, + { + "action_loss": 0.004228802863508463, + "epoch": 35.27877697841727, + "step": 39230 + }, + { + "epoch": 35.27877697841727, + "step": 39230, + "torque_loss": 0.11079681664705276 + }, + { + "epoch": 35.27877697841727, + "force_loss": 0.006905920337885618, + "step": 39230 + }, + { + "epoch": 35.28776978417266, + "grad_norm": 0.22082647681236267, + "learning_rate": 2.931383097547955e-05, + "loss": 0.0186, + "step": 39240 + }, + { + "action_loss": 0.006835064385086298, + "epoch": 35.28776978417266, + "step": 39240 + }, + { + "epoch": 35.28776978417266, + "step": 39240, + "torque_loss": 0.11697503924369812 + }, + { + "epoch": 35.28776978417266, + "force_loss": 0.01195115689188242, + "step": 39240 + }, + { + "epoch": 35.29676258992806, + "grad_norm": 0.298631489276886, + "learning_rate": 2.928874539729043e-05, + "loss": 0.0169, + "step": 39250 + }, + { + "action_loss": 0.0017788062104955316, + "epoch": 35.29676258992806, + "step": 39250 + }, + { + "epoch": 35.29676258992806, + "step": 39250, + "torque_loss": 0.0846819281578064 + }, + { + "epoch": 35.29676258992806, + "force_loss": 0.001323180622421205, + "step": 39250 + }, + { + "epoch": 35.305755395683455, + "grad_norm": 0.4005330204963684, + "learning_rate": 2.926366611063358e-05, + "loss": 0.0157, + "step": 39260 + }, + { + "action_loss": 0.002382399281486869, + "epoch": 35.305755395683455, + "step": 39260 + }, + { + "epoch": 35.305755395683455, + "step": 39260, + "torque_loss": 0.19659322500228882 + }, + { + "epoch": 35.305755395683455, + "force_loss": 0.0021546969655901194, + "step": 39260 + }, + { + "epoch": 35.314748201438846, + "grad_norm": 0.11796467006206512, + "learning_rate": 2.9238593123127463e-05, + "loss": 0.0168, + "step": 39270 + }, + { + "action_loss": 0.0013092047302052379, + "epoch": 35.314748201438846, + "step": 39270 + }, + { + "epoch": 35.314748201438846, + "step": 39270, + "torque_loss": 0.07257264107465744 + }, + { + "epoch": 35.314748201438846, + "force_loss": 0.003968419507145882, + "step": 39270 + }, + { + "epoch": 35.32374100719424, + "grad_norm": 0.08360259234905243, + "learning_rate": 2.9213526442388583e-05, + "loss": 0.0143, + "step": 39280 + }, + { + "action_loss": 0.0014142096042633057, + "epoch": 35.32374100719424, + "step": 39280 + }, + { + "epoch": 35.32374100719424, + "step": 39280, + "torque_loss": 0.18021027743816376 + }, + { + "epoch": 35.32374100719424, + "force_loss": 0.0019014537101611495, + "step": 39280 + }, + { + "epoch": 35.33273381294964, + "grad_norm": 0.08111292868852615, + "learning_rate": 2.9188466076031545e-05, + "loss": 0.0141, + "step": 39290 + }, + { + "action_loss": 0.0012782718986272812, + "epoch": 35.33273381294964, + "step": 39290 + }, + { + "epoch": 35.33273381294964, + "step": 39290, + "torque_loss": 0.14475487172603607 + }, + { + "epoch": 35.33273381294964, + "force_loss": 0.0027069367934018373, + "step": 39290 + }, + { + "epoch": 35.34172661870504, + "grad_norm": 0.19811029732227325, + "learning_rate": 2.9163412031669012e-05, + "loss": 0.0158, + "step": 39300 + }, + { + "action_loss": 0.0013705870369449258, + "epoch": 35.34172661870504, + "step": 39300 + }, + { + "epoch": 35.34172661870504, + "step": 39300, + "torque_loss": 0.11132822185754776 + }, + { + "epoch": 35.34172661870504, + "force_loss": 0.0021050074137747288, + "step": 39300 + }, + { + "epoch": 35.35071942446043, + "grad_norm": 0.08124122768640518, + "learning_rate": 2.913836431691175e-05, + "loss": 0.0181, + "step": 39310 + }, + { + "action_loss": 0.0010235168738290668, + "epoch": 35.35071942446043, + "step": 39310 + }, + { + "epoch": 35.35071942446043, + "step": 39310, + "torque_loss": 0.1073768138885498 + }, + { + "epoch": 35.35071942446043, + "force_loss": 0.0006691438029520214, + "step": 39310 + }, + { + "epoch": 35.35971223021583, + "grad_norm": 0.09625593572854996, + "learning_rate": 2.9113322939368583e-05, + "loss": 0.0179, + "step": 39320 + }, + { + "action_loss": 0.001894653425551951, + "epoch": 35.35971223021583, + "step": 39320 + }, + { + "epoch": 35.35971223021583, + "step": 39320, + "torque_loss": 0.1327088177204132 + }, + { + "epoch": 35.35971223021583, + "force_loss": 0.0035764549393206835, + "step": 39320 + }, + { + "epoch": 35.368705035971225, + "grad_norm": 0.13951651751995087, + "learning_rate": 2.9088287906646427e-05, + "loss": 0.0185, + "step": 39330 + }, + { + "action_loss": 0.005528716370463371, + "epoch": 35.368705035971225, + "step": 39330 + }, + { + "epoch": 35.368705035971225, + "step": 39330, + "torque_loss": 0.12613056600093842 + }, + { + "epoch": 35.368705035971225, + "force_loss": 0.006761878728866577, + "step": 39330 + }, + { + "epoch": 35.37769784172662, + "grad_norm": 0.2915889322757721, + "learning_rate": 2.906325922635024e-05, + "loss": 0.0169, + "step": 39340 + }, + { + "action_loss": 0.0013506049290299416, + "epoch": 35.37769784172662, + "step": 39340 + }, + { + "epoch": 35.37769784172662, + "step": 39340, + "torque_loss": 0.0970362201333046 + }, + { + "epoch": 35.37769784172662, + "force_loss": 0.0008510045590810478, + "step": 39340 + }, + { + "epoch": 35.38669064748201, + "grad_norm": 0.2827681601047516, + "learning_rate": 2.903823690608313e-05, + "loss": 0.0153, + "step": 39350 + }, + { + "action_loss": 0.004065061453729868, + "epoch": 35.38669064748201, + "step": 39350 + }, + { + "epoch": 35.38669064748201, + "step": 39350, + "torque_loss": 0.1831979751586914 + }, + { + "epoch": 35.38669064748201, + "force_loss": 0.005742497276514769, + "step": 39350 + }, + { + "epoch": 35.39568345323741, + "grad_norm": 0.127257838845253, + "learning_rate": 2.9013220953446174e-05, + "loss": 0.0171, + "step": 39360 + }, + { + "action_loss": 0.0010706715984269977, + "epoch": 35.39568345323741, + "step": 39360 + }, + { + "epoch": 35.39568345323741, + "step": 39360, + "torque_loss": 0.12138820439577103 + }, + { + "epoch": 35.39568345323741, + "force_loss": 0.0009163037757389247, + "step": 39360 + }, + { + "epoch": 35.40467625899281, + "grad_norm": 0.144105464220047, + "learning_rate": 2.8988211376038564e-05, + "loss": 0.0147, + "step": 39370 + }, + { + "action_loss": 0.0012387748574838042, + "epoch": 35.40467625899281, + "step": 39370 + }, + { + "epoch": 35.40467625899281, + "step": 39370, + "torque_loss": 0.12555968761444092 + }, + { + "epoch": 35.40467625899281, + "force_loss": 0.000849484873469919, + "step": 39370 + }, + { + "epoch": 35.4136690647482, + "grad_norm": 0.31135427951812744, + "learning_rate": 2.8963208181457564e-05, + "loss": 0.0155, + "step": 39380 + }, + { + "action_loss": 0.014255337417125702, + "epoch": 35.4136690647482, + "step": 39380 + }, + { + "epoch": 35.4136690647482, + "step": 39380, + "torque_loss": 0.15915845334529877 + }, + { + "epoch": 35.4136690647482, + "force_loss": 0.017163896933197975, + "step": 39380 + }, + { + "epoch": 35.422661870503596, + "grad_norm": 0.14365169405937195, + "learning_rate": 2.8938211377298453e-05, + "loss": 0.023, + "step": 39390 + }, + { + "action_loss": 0.004171127453446388, + "epoch": 35.422661870503596, + "step": 39390 + }, + { + "epoch": 35.422661870503596, + "step": 39390, + "torque_loss": 0.09644972532987595 + }, + { + "epoch": 35.422661870503596, + "force_loss": 0.0029431155417114496, + "step": 39390 + }, + { + "epoch": 35.431654676258994, + "grad_norm": 0.1570999026298523, + "learning_rate": 2.8913220971154652e-05, + "loss": 0.0232, + "step": 39400 + }, + { + "action_loss": 0.004454169888049364, + "epoch": 35.431654676258994, + "step": 39400 + }, + { + "epoch": 35.431654676258994, + "step": 39400, + "torque_loss": 0.12635324895381927 + }, + { + "epoch": 35.431654676258994, + "force_loss": 0.002220805501565337, + "step": 39400 + }, + { + "epoch": 35.44064748201439, + "grad_norm": 0.11610624939203262, + "learning_rate": 2.888823697061753e-05, + "loss": 0.0159, + "step": 39410 + }, + { + "action_loss": 0.0035125084687024355, + "epoch": 35.44064748201439, + "step": 39410 + }, + { + "epoch": 35.44064748201439, + "step": 39410, + "torque_loss": 0.1324479579925537 + }, + { + "epoch": 35.44064748201439, + "force_loss": 0.005658428650349379, + "step": 39410 + }, + { + "epoch": 35.44964028776978, + "grad_norm": 0.19316492974758148, + "learning_rate": 2.8863259383276618e-05, + "loss": 0.0178, + "step": 39420 + }, + { + "action_loss": 0.00290547008626163, + "epoch": 35.44964028776978, + "step": 39420 + }, + { + "epoch": 35.44964028776978, + "step": 39420, + "torque_loss": 0.12777863442897797 + }, + { + "epoch": 35.44964028776978, + "force_loss": 0.017759613692760468, + "step": 39420 + }, + { + "epoch": 35.45863309352518, + "grad_norm": 0.27461129426956177, + "learning_rate": 2.8838288216719395e-05, + "loss": 0.0166, + "step": 39430 + }, + { + "action_loss": 0.003124079667031765, + "epoch": 35.45863309352518, + "step": 39430 + }, + { + "epoch": 35.45863309352518, + "step": 39430, + "torque_loss": 0.15821687877178192 + }, + { + "epoch": 35.45863309352518, + "force_loss": 0.0033759765792638063, + "step": 39430 + }, + { + "epoch": 35.46762589928058, + "grad_norm": 0.17730678617954254, + "learning_rate": 2.8813323478531484e-05, + "loss": 0.0158, + "step": 39440 + }, + { + "action_loss": 0.0015664267120882869, + "epoch": 35.46762589928058, + "step": 39440 + }, + { + "epoch": 35.46762589928058, + "step": 39440, + "torque_loss": 0.1286115199327469 + }, + { + "epoch": 35.46762589928058, + "force_loss": 0.0024287817068398, + "step": 39440 + }, + { + "epoch": 35.476618705035975, + "grad_norm": 0.49196091294288635, + "learning_rate": 2.8788365176296496e-05, + "loss": 0.0231, + "step": 39450 + }, + { + "action_loss": 0.00135201180819422, + "epoch": 35.476618705035975, + "step": 39450 + }, + { + "epoch": 35.476618705035975, + "step": 39450, + "torque_loss": 0.12456002086400986 + }, + { + "epoch": 35.476618705035975, + "force_loss": 0.001781140803359449, + "step": 39450 + }, + { + "epoch": 35.485611510791365, + "grad_norm": 0.13427764177322388, + "learning_rate": 2.876341331759611e-05, + "loss": 0.0143, + "step": 39460 + }, + { + "action_loss": 0.0027898463886231184, + "epoch": 35.485611510791365, + "step": 39460 + }, + { + "epoch": 35.485611510791365, + "step": 39460, + "torque_loss": 0.13061822950839996 + }, + { + "epoch": 35.485611510791365, + "force_loss": 0.0048857200890779495, + "step": 39460 + }, + { + "epoch": 35.49460431654676, + "grad_norm": 0.07692616432905197, + "learning_rate": 2.8738467910010036e-05, + "loss": 0.0194, + "step": 39470 + }, + { + "action_loss": 0.0025288949254900217, + "epoch": 35.49460431654676, + "step": 39470 + }, + { + "epoch": 35.49460431654676, + "step": 39470, + "torque_loss": 0.10948216915130615 + }, + { + "epoch": 35.49460431654676, + "force_loss": 0.004075657110661268, + "step": 39470 + }, + { + "epoch": 35.50359712230216, + "grad_norm": 0.3425906300544739, + "learning_rate": 2.8713528961116032e-05, + "loss": 0.0158, + "step": 39480 + }, + { + "action_loss": 0.014046314172446728, + "epoch": 35.50359712230216, + "step": 39480 + }, + { + "epoch": 35.50359712230216, + "step": 39480, + "torque_loss": 0.15999191999435425 + }, + { + "epoch": 35.50359712230216, + "force_loss": 0.012685430236160755, + "step": 39480 + }, + { + "epoch": 35.51258992805755, + "grad_norm": 0.1947794407606125, + "learning_rate": 2.8688596478489875e-05, + "loss": 0.0192, + "step": 39490 + }, + { + "action_loss": 0.004564117640256882, + "epoch": 35.51258992805755, + "step": 39490 + }, + { + "epoch": 35.51258992805755, + "step": 39490, + "torque_loss": 0.1416541188955307 + }, + { + "epoch": 35.51258992805755, + "force_loss": 0.007333502173423767, + "step": 39490 + }, + { + "epoch": 35.52158273381295, + "grad_norm": 0.2513328492641449, + "learning_rate": 2.8663670469705434e-05, + "loss": 0.0172, + "step": 39500 + }, + { + "action_loss": 0.0026504509150981903, + "epoch": 35.52158273381295, + "step": 39500 + }, + { + "epoch": 35.52158273381295, + "step": 39500, + "torque_loss": 0.10963200777769089 + }, + { + "epoch": 35.52158273381295, + "force_loss": 0.004098037723451853, + "step": 39500 + }, + { + "epoch": 35.530575539568346, + "grad_norm": 0.11168992519378662, + "learning_rate": 2.8638750942334546e-05, + "loss": 0.0187, + "step": 39510 + }, + { + "action_loss": 0.002348459092900157, + "epoch": 35.530575539568346, + "step": 39510 + }, + { + "epoch": 35.530575539568346, + "step": 39510, + "torque_loss": 0.13284127414226532 + }, + { + "epoch": 35.530575539568346, + "force_loss": 0.002375292591750622, + "step": 39510 + }, + { + "epoch": 35.539568345323744, + "grad_norm": 0.282320499420166, + "learning_rate": 2.8613837903947115e-05, + "loss": 0.0151, + "step": 39520 + }, + { + "action_loss": 0.01882605440914631, + "epoch": 35.539568345323744, + "step": 39520 + }, + { + "epoch": 35.539568345323744, + "step": 39520, + "torque_loss": 0.14823205769062042 + }, + { + "epoch": 35.539568345323744, + "force_loss": 0.032957300543785095, + "step": 39520 + }, + { + "epoch": 35.548561151079134, + "grad_norm": 0.23333409428596497, + "learning_rate": 2.858893136211106e-05, + "loss": 0.0182, + "step": 39530 + }, + { + "action_loss": 0.007244527339935303, + "epoch": 35.548561151079134, + "step": 39530 + }, + { + "epoch": 35.548561151079134, + "step": 39530, + "torque_loss": 0.1048051193356514 + }, + { + "epoch": 35.548561151079134, + "force_loss": 0.006691901013255119, + "step": 39530 + }, + { + "epoch": 35.55755395683453, + "grad_norm": 0.1614960879087448, + "learning_rate": 2.8564031324392315e-05, + "loss": 0.0162, + "step": 39540 + }, + { + "action_loss": 0.0025056616868823767, + "epoch": 35.55755395683453, + "step": 39540 + }, + { + "epoch": 35.55755395683453, + "step": 39540, + "torque_loss": 0.12144249677658081 + }, + { + "epoch": 35.55755395683453, + "force_loss": 0.003249605419114232, + "step": 39540 + }, + { + "epoch": 35.56654676258993, + "grad_norm": 0.3132919669151306, + "learning_rate": 2.85391377983549e-05, + "loss": 0.0176, + "step": 39550 + }, + { + "action_loss": 0.00451565720140934, + "epoch": 35.56654676258993, + "step": 39550 + }, + { + "epoch": 35.56654676258993, + "step": 39550, + "torque_loss": 0.14093726873397827 + }, + { + "epoch": 35.56654676258993, + "force_loss": 0.008190018124878407, + "step": 39550 + }, + { + "epoch": 35.57553956834532, + "grad_norm": 0.3187926709651947, + "learning_rate": 2.851425079156075e-05, + "loss": 0.0211, + "step": 39560 + }, + { + "action_loss": 0.0034284184221178293, + "epoch": 35.57553956834532, + "step": 39560 + }, + { + "epoch": 35.57553956834532, + "step": 39560, + "torque_loss": 0.11422654241323471 + }, + { + "epoch": 35.57553956834532, + "force_loss": 0.004590218886733055, + "step": 39560 + }, + { + "epoch": 35.58453237410072, + "grad_norm": 0.42136937379837036, + "learning_rate": 2.848937031156994e-05, + "loss": 0.0156, + "step": 39570 + }, + { + "action_loss": 0.001175474957562983, + "epoch": 35.58453237410072, + "step": 39570 + }, + { + "epoch": 35.58453237410072, + "step": 39570, + "torque_loss": 0.14840276539325714 + }, + { + "epoch": 35.58453237410072, + "force_loss": 0.0011653221445158124, + "step": 39570 + }, + { + "epoch": 35.593525179856115, + "grad_norm": 0.22646832466125488, + "learning_rate": 2.846449636594044e-05, + "loss": 0.0217, + "step": 39580 + }, + { + "action_loss": 0.008363465778529644, + "epoch": 35.593525179856115, + "step": 39580 + }, + { + "epoch": 35.593525179856115, + "step": 39580, + "torque_loss": 0.1058172956109047 + }, + { + "epoch": 35.593525179856115, + "force_loss": 0.014881676994264126, + "step": 39580 + }, + { + "epoch": 35.60251798561151, + "grad_norm": 0.1466566026210785, + "learning_rate": 2.843962896222836e-05, + "loss": 0.0165, + "step": 39590 + }, + { + "action_loss": 0.008684664964675903, + "epoch": 35.60251798561151, + "step": 39590 + }, + { + "epoch": 35.60251798561151, + "step": 39590, + "torque_loss": 0.14061520993709564 + }, + { + "epoch": 35.60251798561151, + "force_loss": 0.016985170543193817, + "step": 39590 + }, + { + "epoch": 35.611510791366904, + "grad_norm": 0.17727145552635193, + "learning_rate": 2.8414768107987722e-05, + "loss": 0.0179, + "step": 39600 + }, + { + "action_loss": 0.0039934213273227215, + "epoch": 35.611510791366904, + "step": 39600 + }, + { + "epoch": 35.611510791366904, + "step": 39600, + "torque_loss": 0.11750852316617966 + }, + { + "epoch": 35.611510791366904, + "force_loss": 0.004186486825346947, + "step": 39600 + }, + { + "epoch": 35.6205035971223, + "grad_norm": 0.2128559947013855, + "learning_rate": 2.838991381077061e-05, + "loss": 0.0185, + "step": 39610 + }, + { + "action_loss": 0.003089507343247533, + "epoch": 35.6205035971223, + "step": 39610 + }, + { + "epoch": 35.6205035971223, + "step": 39610, + "torque_loss": 0.1126134991645813 + }, + { + "epoch": 35.6205035971223, + "force_loss": 0.006907335016876459, + "step": 39610 + }, + { + "epoch": 35.6294964028777, + "grad_norm": 0.15802417695522308, + "learning_rate": 2.83650660781271e-05, + "loss": 0.0154, + "step": 39620 + }, + { + "action_loss": 0.001239847973920405, + "epoch": 35.6294964028777, + "step": 39620 + }, + { + "epoch": 35.6294964028777, + "step": 39620, + "torque_loss": 0.09498241543769836 + }, + { + "epoch": 35.6294964028777, + "force_loss": 0.001859107636846602, + "step": 39620 + }, + { + "epoch": 35.638489208633096, + "grad_norm": 0.1919955164194107, + "learning_rate": 2.8340224917605285e-05, + "loss": 0.0145, + "step": 39630 + }, + { + "action_loss": 0.003541032550856471, + "epoch": 35.638489208633096, + "step": 39630 + }, + { + "epoch": 35.638489208633096, + "step": 39630, + "torque_loss": 0.13486574590206146 + }, + { + "epoch": 35.638489208633096, + "force_loss": 0.003753454191610217, + "step": 39630 + }, + { + "epoch": 35.64748201438849, + "grad_norm": 0.20450246334075928, + "learning_rate": 2.831539033675122e-05, + "loss": 0.0187, + "step": 39640 + }, + { + "action_loss": 0.003315453650429845, + "epoch": 35.64748201438849, + "step": 39640 + }, + { + "epoch": 35.64748201438849, + "step": 39640, + "torque_loss": 0.09184668213129044 + }, + { + "epoch": 35.64748201438849, + "force_loss": 0.001953155966475606, + "step": 39640 + }, + { + "epoch": 35.656474820143885, + "grad_norm": 0.1740974634885788, + "learning_rate": 2.8290562343109038e-05, + "loss": 0.0209, + "step": 39650 + }, + { + "action_loss": 0.004921962972730398, + "epoch": 35.656474820143885, + "step": 39650 + }, + { + "epoch": 35.656474820143885, + "step": 39650, + "torque_loss": 0.08533027768135071 + }, + { + "epoch": 35.656474820143885, + "force_loss": 0.006142523139715195, + "step": 39650 + }, + { + "epoch": 35.66546762589928, + "grad_norm": 0.11232845485210419, + "learning_rate": 2.826574094422082e-05, + "loss": 0.0154, + "step": 39660 + }, + { + "action_loss": 0.00197235937230289, + "epoch": 35.66546762589928, + "step": 39660 + }, + { + "epoch": 35.66546762589928, + "step": 39660, + "torque_loss": 0.11520601063966751 + }, + { + "epoch": 35.66546762589928, + "force_loss": 0.004631725139915943, + "step": 39660 + }, + { + "epoch": 35.67446043165468, + "grad_norm": 0.20036110281944275, + "learning_rate": 2.8240926147626645e-05, + "loss": 0.0193, + "step": 39670 + }, + { + "action_loss": 0.0008053396013565361, + "epoch": 35.67446043165468, + "step": 39670 + }, + { + "epoch": 35.67446043165468, + "step": 39670, + "torque_loss": 0.09567391872406006 + }, + { + "epoch": 35.67446043165468, + "force_loss": 0.0011568082263693213, + "step": 39670 + }, + { + "epoch": 35.68345323741007, + "grad_norm": 0.3572350740432739, + "learning_rate": 2.8216117960864586e-05, + "loss": 0.0192, + "step": 39680 + }, + { + "action_loss": 0.002147337654605508, + "epoch": 35.68345323741007, + "step": 39680 + }, + { + "epoch": 35.68345323741007, + "step": 39680, + "torque_loss": 0.1557895392179489 + }, + { + "epoch": 35.68345323741007, + "force_loss": 0.013895433396100998, + "step": 39680 + }, + { + "epoch": 35.69244604316547, + "grad_norm": 0.18563728034496307, + "learning_rate": 2.8191316391470703e-05, + "loss": 0.0164, + "step": 39690 + }, + { + "action_loss": 0.00875378493219614, + "epoch": 35.69244604316547, + "step": 39690 + }, + { + "epoch": 35.69244604316547, + "step": 39690, + "torque_loss": 0.13940010964870453 + }, + { + "epoch": 35.69244604316547, + "force_loss": 0.011029067449271679, + "step": 39690 + }, + { + "epoch": 35.701438848920866, + "grad_norm": 0.10199469327926636, + "learning_rate": 2.816652144697911e-05, + "loss": 0.0211, + "step": 39700 + }, + { + "action_loss": 0.0029193172231316566, + "epoch": 35.701438848920866, + "step": 39700 + }, + { + "epoch": 35.701438848920866, + "step": 39700, + "torque_loss": 0.1119891107082367 + }, + { + "epoch": 35.701438848920866, + "force_loss": 0.0015033232048153877, + "step": 39700 + }, + { + "epoch": 35.710431654676256, + "grad_norm": 0.1253720074892044, + "learning_rate": 2.8141733134921783e-05, + "loss": 0.0204, + "step": 39710 + }, + { + "action_loss": 0.0032067298889160156, + "epoch": 35.710431654676256, + "step": 39710 + }, + { + "epoch": 35.710431654676256, + "step": 39710, + "torque_loss": 0.11965510994195938 + }, + { + "epoch": 35.710431654676256, + "force_loss": 0.0026536390651017427, + "step": 39710 + }, + { + "epoch": 35.719424460431654, + "grad_norm": 0.41262295842170715, + "learning_rate": 2.811695146282884e-05, + "loss": 0.0179, + "step": 39720 + }, + { + "action_loss": 0.0026037513744086027, + "epoch": 35.719424460431654, + "step": 39720 + }, + { + "epoch": 35.719424460431654, + "step": 39720, + "torque_loss": 0.10272317379713058 + }, + { + "epoch": 35.719424460431654, + "force_loss": 0.0028136586770415306, + "step": 39720 + }, + { + "epoch": 35.72841726618705, + "grad_norm": 0.20123039186000824, + "learning_rate": 2.8092176438228212e-05, + "loss": 0.0161, + "step": 39730 + }, + { + "action_loss": 0.0025930143892765045, + "epoch": 35.72841726618705, + "step": 39730 + }, + { + "epoch": 35.72841726618705, + "step": 39730, + "torque_loss": 0.11339656263589859 + }, + { + "epoch": 35.72841726618705, + "force_loss": 0.005493659991770983, + "step": 39730 + }, + { + "epoch": 35.73741007194245, + "grad_norm": 0.31548067927360535, + "learning_rate": 2.806740806864598e-05, + "loss": 0.0194, + "step": 39740 + }, + { + "action_loss": 0.0027863343711942434, + "epoch": 35.73741007194245, + "step": 39740 + }, + { + "epoch": 35.73741007194245, + "step": 39740, + "torque_loss": 0.14005906879901886 + }, + { + "epoch": 35.73741007194245, + "force_loss": 0.0059461998753249645, + "step": 39740 + }, + { + "epoch": 35.74640287769784, + "grad_norm": 0.291875958442688, + "learning_rate": 2.804264636160604e-05, + "loss": 0.0161, + "step": 39750 + }, + { + "action_loss": 0.008138484321534634, + "epoch": 35.74640287769784, + "step": 39750 + }, + { + "epoch": 35.74640287769784, + "step": 39750, + "torque_loss": 0.1472003012895584 + }, + { + "epoch": 35.74640287769784, + "force_loss": 0.004367370158433914, + "step": 39750 + }, + { + "epoch": 35.75539568345324, + "grad_norm": 0.10295046120882034, + "learning_rate": 2.8017891324630402e-05, + "loss": 0.0152, + "step": 39760 + }, + { + "action_loss": 0.003908615559339523, + "epoch": 35.75539568345324, + "step": 39760 + }, + { + "epoch": 35.75539568345324, + "step": 39760, + "torque_loss": 0.16097819805145264 + }, + { + "epoch": 35.75539568345324, + "force_loss": 0.0038034298922866583, + "step": 39760 + }, + { + "epoch": 35.764388489208635, + "grad_norm": 0.13721863925457, + "learning_rate": 2.7993142965238976e-05, + "loss": 0.0193, + "step": 39770 + }, + { + "action_loss": 0.00265670008957386, + "epoch": 35.764388489208635, + "step": 39770 + }, + { + "epoch": 35.764388489208635, + "step": 39770, + "torque_loss": 0.10878171771764755 + }, + { + "epoch": 35.764388489208635, + "force_loss": 0.0066906786523759365, + "step": 39770 + }, + { + "epoch": 35.773381294964025, + "grad_norm": 0.12275765836238861, + "learning_rate": 2.7968401290949665e-05, + "loss": 0.0167, + "step": 39780 + }, + { + "action_loss": 0.004790095612406731, + "epoch": 35.773381294964025, + "step": 39780 + }, + { + "epoch": 35.773381294964025, + "step": 39780, + "torque_loss": 0.15864305198192596 + }, + { + "epoch": 35.773381294964025, + "force_loss": 0.006495725363492966, + "step": 39780 + }, + { + "epoch": 35.78237410071942, + "grad_norm": 0.11805963516235352, + "learning_rate": 2.7943666309278328e-05, + "loss": 0.0164, + "step": 39790 + }, + { + "action_loss": 0.0025398877914994955, + "epoch": 35.78237410071942, + "step": 39790 + }, + { + "epoch": 35.78237410071942, + "step": 39790, + "torque_loss": 0.08587618917226791 + }, + { + "epoch": 35.78237410071942, + "force_loss": 0.004442976322025061, + "step": 39790 + }, + { + "epoch": 35.79136690647482, + "grad_norm": 0.1223180741071701, + "learning_rate": 2.7918938027738783e-05, + "loss": 0.017, + "step": 39800 + }, + { + "action_loss": 0.0015568555099889636, + "epoch": 35.79136690647482, + "step": 39800 + }, + { + "epoch": 35.79136690647482, + "step": 39800, + "torque_loss": 0.11426212638616562 + }, + { + "epoch": 35.79136690647482, + "force_loss": 0.0010734944371506572, + "step": 39800 + }, + { + "epoch": 35.80035971223022, + "grad_norm": 0.12311927229166031, + "learning_rate": 2.789421645384287e-05, + "loss": 0.0144, + "step": 39810 + }, + { + "action_loss": 0.024767877534031868, + "epoch": 35.80035971223022, + "step": 39810 + }, + { + "epoch": 35.80035971223022, + "step": 39810, + "torque_loss": 0.12275147438049316 + }, + { + "epoch": 35.80035971223022, + "force_loss": 0.028525101020932198, + "step": 39810 + }, + { + "epoch": 35.80935251798561, + "grad_norm": 0.17425116896629333, + "learning_rate": 2.786950159510032e-05, + "loss": 0.0185, + "step": 39820 + }, + { + "action_loss": 0.0011173957027494907, + "epoch": 35.80935251798561, + "step": 39820 + }, + { + "epoch": 35.80935251798561, + "step": 39820, + "torque_loss": 0.1357005089521408 + }, + { + "epoch": 35.80935251798561, + "force_loss": 0.004809817299246788, + "step": 39820 + }, + { + "epoch": 35.818345323741006, + "grad_norm": 0.3158116936683655, + "learning_rate": 2.7844793459018876e-05, + "loss": 0.0171, + "step": 39830 + }, + { + "action_loss": 0.0019987565465271473, + "epoch": 35.818345323741006, + "step": 39830 + }, + { + "epoch": 35.818345323741006, + "step": 39830, + "torque_loss": 0.10832720994949341 + }, + { + "epoch": 35.818345323741006, + "force_loss": 0.0037875166162848473, + "step": 39830 + }, + { + "epoch": 35.827338129496404, + "grad_norm": 0.2635257840156555, + "learning_rate": 2.7820092053104195e-05, + "loss": 0.0158, + "step": 39840 + }, + { + "action_loss": 0.0046249027363955975, + "epoch": 35.827338129496404, + "step": 39840 + }, + { + "epoch": 35.827338129496404, + "step": 39840, + "torque_loss": 0.14352811872959137 + }, + { + "epoch": 35.827338129496404, + "force_loss": 0.005319828633219004, + "step": 39840 + }, + { + "epoch": 35.8363309352518, + "grad_norm": 0.0906476303935051, + "learning_rate": 2.7795397384859933e-05, + "loss": 0.0148, + "step": 39850 + }, + { + "action_loss": 0.007267903536558151, + "epoch": 35.8363309352518, + "step": 39850 + }, + { + "epoch": 35.8363309352518, + "step": 39850, + "torque_loss": 0.11004098504781723 + }, + { + "epoch": 35.8363309352518, + "force_loss": 0.014775082468986511, + "step": 39850 + }, + { + "epoch": 35.84532374100719, + "grad_norm": 0.1165788546204567, + "learning_rate": 2.7770709461787638e-05, + "loss": 0.0159, + "step": 39860 + }, + { + "action_loss": 0.0020856165792793036, + "epoch": 35.84532374100719, + "step": 39860 + }, + { + "epoch": 35.84532374100719, + "step": 39860, + "torque_loss": 0.1354133039712906 + }, + { + "epoch": 35.84532374100719, + "force_loss": 0.001596722169779241, + "step": 39860 + }, + { + "epoch": 35.85431654676259, + "grad_norm": 0.13126079738140106, + "learning_rate": 2.7746028291386915e-05, + "loss": 0.0153, + "step": 39870 + }, + { + "action_loss": 0.002711870474740863, + "epoch": 35.85431654676259, + "step": 39870 + }, + { + "epoch": 35.85431654676259, + "step": 39870, + "torque_loss": 0.13184605538845062 + }, + { + "epoch": 35.85431654676259, + "force_loss": 0.002248078351840377, + "step": 39870 + }, + { + "epoch": 35.86330935251799, + "grad_norm": 0.5663595795631409, + "learning_rate": 2.772135388115519e-05, + "loss": 0.0175, + "step": 39880 + }, + { + "action_loss": 0.002428376814350486, + "epoch": 35.86330935251799, + "step": 39880 + }, + { + "epoch": 35.86330935251799, + "step": 39880, + "torque_loss": 0.16598446667194366 + }, + { + "epoch": 35.86330935251799, + "force_loss": 0.0022639110684394836, + "step": 39880 + }, + { + "epoch": 35.87230215827338, + "grad_norm": 0.29061752557754517, + "learning_rate": 2.7696686238587945e-05, + "loss": 0.017, + "step": 39890 + }, + { + "action_loss": 0.005739705171436071, + "epoch": 35.87230215827338, + "step": 39890 + }, + { + "epoch": 35.87230215827338, + "step": 39890, + "torque_loss": 0.1431030035018921 + }, + { + "epoch": 35.87230215827338, + "force_loss": 0.009874660521745682, + "step": 39890 + }, + { + "epoch": 35.881294964028775, + "grad_norm": 0.11623810976743698, + "learning_rate": 2.7672025371178505e-05, + "loss": 0.0201, + "step": 39900 + }, + { + "action_loss": 0.0011906855506822467, + "epoch": 35.881294964028775, + "step": 39900 + }, + { + "epoch": 35.881294964028775, + "step": 39900, + "torque_loss": 0.0976327657699585 + }, + { + "epoch": 35.881294964028775, + "force_loss": 0.0008170453947968781, + "step": 39900 + }, + { + "epoch": 35.89028776978417, + "grad_norm": 0.5750964283943176, + "learning_rate": 2.7647371286418238e-05, + "loss": 0.0166, + "step": 39910 + }, + { + "action_loss": 0.0016528418054804206, + "epoch": 35.89028776978417, + "step": 39910 + }, + { + "epoch": 35.89028776978417, + "step": 39910, + "torque_loss": 0.09385880082845688 + }, + { + "epoch": 35.89028776978417, + "force_loss": 0.0024199041072279215, + "step": 39910 + }, + { + "epoch": 35.89928057553957, + "grad_norm": 0.2214822769165039, + "learning_rate": 2.762272399179639e-05, + "loss": 0.0154, + "step": 39920 + }, + { + "action_loss": 0.006354435812681913, + "epoch": 35.89928057553957, + "step": 39920 + }, + { + "epoch": 35.89928057553957, + "step": 39920, + "torque_loss": 0.14036713540554047 + }, + { + "epoch": 35.89928057553957, + "force_loss": 0.007456177845597267, + "step": 39920 + }, + { + "epoch": 35.90827338129496, + "grad_norm": 0.2445472776889801, + "learning_rate": 2.7598083494800154e-05, + "loss": 0.0155, + "step": 39930 + }, + { + "action_loss": 0.007851445116102695, + "epoch": 35.90827338129496, + "step": 39930 + }, + { + "epoch": 35.90827338129496, + "step": 39930, + "torque_loss": 0.19601039588451385 + }, + { + "epoch": 35.90827338129496, + "force_loss": 0.0154436519369483, + "step": 39930 + }, + { + "epoch": 35.91726618705036, + "grad_norm": 0.5414363741874695, + "learning_rate": 2.7573449802914664e-05, + "loss": 0.0186, + "step": 39940 + }, + { + "action_loss": 0.0016132229939103127, + "epoch": 35.91726618705036, + "step": 39940 + }, + { + "epoch": 35.91726618705036, + "step": 39940, + "torque_loss": 0.10824790596961975 + }, + { + "epoch": 35.91726618705036, + "force_loss": 0.001795265474356711, + "step": 39940 + }, + { + "epoch": 35.92625899280576, + "grad_norm": 0.31710657477378845, + "learning_rate": 2.7548822923622964e-05, + "loss": 0.0155, + "step": 39950 + }, + { + "action_loss": 0.0017026454443112016, + "epoch": 35.92625899280576, + "step": 39950 + }, + { + "epoch": 35.92625899280576, + "step": 39950, + "torque_loss": 0.0875890925526619 + }, + { + "epoch": 35.92625899280576, + "force_loss": 0.006363121792674065, + "step": 39950 + }, + { + "epoch": 35.935251798561154, + "grad_norm": 0.09589733928442001, + "learning_rate": 2.752420286440609e-05, + "loss": 0.0157, + "step": 39960 + }, + { + "action_loss": 0.0011475059436634183, + "epoch": 35.935251798561154, + "step": 39960 + }, + { + "epoch": 35.935251798561154, + "step": 39960, + "torque_loss": 0.1054466962814331 + }, + { + "epoch": 35.935251798561154, + "force_loss": 0.0019890100229531527, + "step": 39960 + }, + { + "epoch": 35.944244604316545, + "grad_norm": 0.09964091330766678, + "learning_rate": 2.749958963274295e-05, + "loss": 0.0169, + "step": 39970 + }, + { + "action_loss": 0.00537843769416213, + "epoch": 35.944244604316545, + "step": 39970 + }, + { + "epoch": 35.944244604316545, + "step": 39970, + "torque_loss": 0.1167064681649208 + }, + { + "epoch": 35.944244604316545, + "force_loss": 0.012418483383953571, + "step": 39970 + }, + { + "epoch": 35.95323741007194, + "grad_norm": 0.2967495024204254, + "learning_rate": 2.747498323611039e-05, + "loss": 0.0178, + "step": 39980 + }, + { + "action_loss": 0.0007548278081230819, + "epoch": 35.95323741007194, + "step": 39980 + }, + { + "epoch": 35.95323741007194, + "step": 39980, + "torque_loss": 0.09171827882528305 + }, + { + "epoch": 35.95323741007194, + "force_loss": 0.0007295326213352382, + "step": 39980 + }, + { + "epoch": 35.96223021582734, + "grad_norm": 0.14533080160617828, + "learning_rate": 2.7450383681983184e-05, + "loss": 0.0152, + "step": 39990 + }, + { + "action_loss": 0.0024219213519245386, + "epoch": 35.96223021582734, + "step": 39990 + }, + { + "epoch": 35.96223021582734, + "step": 39990, + "torque_loss": 0.17060492932796478 + }, + { + "epoch": 35.96223021582734, + "force_loss": 0.0016523669473826885, + "step": 39990 + }, + { + "epoch": 35.97122302158273, + "grad_norm": 0.31310293078422546, + "learning_rate": 2.742579097783403e-05, + "loss": 0.0163, + "step": 40000 + }, + { + "action_loss": 0.0009725075215101242, + "epoch": 35.97122302158273, + "step": 40000 + }, + { + "epoch": 35.97122302158273, + "step": 40000, + "torque_loss": 0.09078284353017807 + }, + { + "epoch": 35.97122302158273, + "force_loss": 0.0020989449694752693, + "step": 40000 + }, + { + "epoch": 35.98021582733813, + "grad_norm": 0.13676171004772186, + "learning_rate": 2.7401205131133512e-05, + "loss": 0.0155, + "step": 40010 + }, + { + "action_loss": 0.001372501370497048, + "epoch": 35.98021582733813, + "step": 40010 + }, + { + "epoch": 35.98021582733813, + "step": 40010, + "torque_loss": 0.11363661289215088 + }, + { + "epoch": 35.98021582733813, + "force_loss": 0.00143437460064888, + "step": 40010 + }, + { + "epoch": 35.989208633093526, + "grad_norm": 0.29066896438598633, + "learning_rate": 2.7376626149350238e-05, + "loss": 0.0176, + "step": 40020 + }, + { + "action_loss": 0.003074265317991376, + "epoch": 35.989208633093526, + "step": 40020 + }, + { + "epoch": 35.989208633093526, + "step": 40020, + "torque_loss": 0.1350867748260498 + }, + { + "epoch": 35.989208633093526, + "force_loss": 0.00463194539770484, + "step": 40020 + }, + { + "epoch": 35.99820143884892, + "grad_norm": 0.1654406487941742, + "learning_rate": 2.735205403995056e-05, + "loss": 0.0168, + "step": 40030 + }, + { + "action_loss": 0.0019327859627082944, + "epoch": 35.99820143884892, + "step": 40030 + }, + { + "epoch": 35.99820143884892, + "step": 40030, + "torque_loss": 0.1391221284866333 + }, + { + "epoch": 35.99820143884892, + "force_loss": 0.0014563221484422684, + "step": 40030 + }, + { + "epoch": 36.007194244604314, + "grad_norm": 0.156697615981102, + "learning_rate": 2.7327488810398917e-05, + "loss": 0.0148, + "step": 40040 + }, + { + "action_loss": 0.0022141123190522194, + "epoch": 36.007194244604314, + "step": 40040 + }, + { + "epoch": 36.007194244604314, + "step": 40040, + "torque_loss": 0.1411222219467163 + }, + { + "epoch": 36.007194244604314, + "force_loss": 0.011481490917503834, + "step": 40040 + }, + { + "epoch": 36.01618705035971, + "grad_norm": 0.168813094496727, + "learning_rate": 2.7302930468157507e-05, + "loss": 0.0168, + "step": 40050 + }, + { + "action_loss": 0.004035875666886568, + "epoch": 36.01618705035971, + "step": 40050 + }, + { + "epoch": 36.01618705035971, + "step": 40050, + "torque_loss": 0.09257396310567856 + }, + { + "epoch": 36.01618705035971, + "force_loss": 0.009550203569233418, + "step": 40050 + }, + { + "epoch": 36.02517985611511, + "grad_norm": 0.41819918155670166, + "learning_rate": 2.727837902068655e-05, + "loss": 0.017, + "step": 40060 + }, + { + "action_loss": 0.0009752670302987099, + "epoch": 36.02517985611511, + "step": 40060 + }, + { + "epoch": 36.02517985611511, + "step": 40060, + "torque_loss": 0.11066470295190811 + }, + { + "epoch": 36.02517985611511, + "force_loss": 0.0011010640300810337, + "step": 40060 + }, + { + "epoch": 36.03417266187051, + "grad_norm": 0.15850377082824707, + "learning_rate": 2.7253834475444123e-05, + "loss": 0.016, + "step": 40070 + }, + { + "action_loss": 0.0011355552123859525, + "epoch": 36.03417266187051, + "step": 40070 + }, + { + "epoch": 36.03417266187051, + "step": 40070, + "torque_loss": 0.14427512884140015 + }, + { + "epoch": 36.03417266187051, + "force_loss": 0.0008650435484014452, + "step": 40070 + }, + { + "epoch": 36.0431654676259, + "grad_norm": 0.23034508526325226, + "learning_rate": 2.7229296839886204e-05, + "loss": 0.0189, + "step": 40080 + }, + { + "action_loss": 0.001957319676876068, + "epoch": 36.0431654676259, + "step": 40080 + }, + { + "epoch": 36.0431654676259, + "step": 40080, + "torque_loss": 0.12914668023586273 + }, + { + "epoch": 36.0431654676259, + "force_loss": 0.0027458462864160538, + "step": 40080 + }, + { + "epoch": 36.052158273381295, + "grad_norm": 0.14659057557582855, + "learning_rate": 2.720476612146668e-05, + "loss": 0.0149, + "step": 40090 + }, + { + "action_loss": 0.0009491958771832287, + "epoch": 36.052158273381295, + "step": 40090 + }, + { + "epoch": 36.052158273381295, + "step": 40090, + "torque_loss": 0.11084743589162827 + }, + { + "epoch": 36.052158273381295, + "force_loss": 0.0010933068115264177, + "step": 40090 + }, + { + "epoch": 36.06115107913669, + "grad_norm": 0.49214720726013184, + "learning_rate": 2.7180242327637317e-05, + "loss": 0.0167, + "step": 40100 + }, + { + "action_loss": 0.0015685424441471696, + "epoch": 36.06115107913669, + "step": 40100 + }, + { + "epoch": 36.06115107913669, + "step": 40100, + "torque_loss": 0.13770383596420288 + }, + { + "epoch": 36.06115107913669, + "force_loss": 0.001006570179015398, + "step": 40100 + }, + { + "epoch": 36.07014388489208, + "grad_norm": 0.18703696131706238, + "learning_rate": 2.7155725465847826e-05, + "loss": 0.0204, + "step": 40110 + }, + { + "action_loss": 0.0051618232391774654, + "epoch": 36.07014388489208, + "step": 40110 + }, + { + "epoch": 36.07014388489208, + "step": 40110, + "torque_loss": 0.13218457996845245 + }, + { + "epoch": 36.07014388489208, + "force_loss": 0.004874333273619413, + "step": 40110 + }, + { + "epoch": 36.07913669064748, + "grad_norm": 0.14812196791172028, + "learning_rate": 2.713121554354578e-05, + "loss": 0.0157, + "step": 40120 + }, + { + "action_loss": 0.0013802592875435948, + "epoch": 36.07913669064748, + "step": 40120 + }, + { + "epoch": 36.07913669064748, + "step": 40120, + "torque_loss": 0.07893958687782288 + }, + { + "epoch": 36.07913669064748, + "force_loss": 0.0012137488229200244, + "step": 40120 + }, + { + "epoch": 36.08812949640288, + "grad_norm": 0.1326134204864502, + "learning_rate": 2.7106712568176628e-05, + "loss": 0.0173, + "step": 40130 + }, + { + "action_loss": 0.002022692235186696, + "epoch": 36.08812949640288, + "step": 40130 + }, + { + "epoch": 36.08812949640288, + "step": 40130, + "torque_loss": 0.11246875673532486 + }, + { + "epoch": 36.08812949640288, + "force_loss": 0.006079592276364565, + "step": 40130 + }, + { + "epoch": 36.097122302158276, + "grad_norm": 0.3330748975276947, + "learning_rate": 2.708221654718374e-05, + "loss": 0.0169, + "step": 40140 + }, + { + "action_loss": 0.007756896782666445, + "epoch": 36.097122302158276, + "step": 40140 + }, + { + "epoch": 36.097122302158276, + "step": 40140, + "torque_loss": 0.12113672494888306 + }, + { + "epoch": 36.097122302158276, + "force_loss": 0.008284587413072586, + "step": 40140 + }, + { + "epoch": 36.106115107913666, + "grad_norm": 0.1462162584066391, + "learning_rate": 2.7057727488008357e-05, + "loss": 0.0166, + "step": 40150 + }, + { + "action_loss": 0.0011988849146291614, + "epoch": 36.106115107913666, + "step": 40150 + }, + { + "epoch": 36.106115107913666, + "step": 40150, + "torque_loss": 0.1106100007891655 + }, + { + "epoch": 36.106115107913666, + "force_loss": 0.00159476010594517, + "step": 40150 + }, + { + "epoch": 36.115107913669064, + "grad_norm": 0.10286112874746323, + "learning_rate": 2.703324539808961e-05, + "loss": 0.0152, + "step": 40160 + }, + { + "action_loss": 0.0011922478443011642, + "epoch": 36.115107913669064, + "step": 40160 + }, + { + "epoch": 36.115107913669064, + "step": 40160, + "torque_loss": 0.09055280685424805 + }, + { + "epoch": 36.115107913669064, + "force_loss": 0.0021607305388897657, + "step": 40160 + }, + { + "epoch": 36.12410071942446, + "grad_norm": 0.14139248430728912, + "learning_rate": 2.7008770284864505e-05, + "loss": 0.0149, + "step": 40170 + }, + { + "action_loss": 0.0053743403404951096, + "epoch": 36.12410071942446, + "step": 40170 + }, + { + "epoch": 36.12410071942446, + "step": 40170, + "torque_loss": 0.16901345551013947 + }, + { + "epoch": 36.12410071942446, + "force_loss": 0.005718277767300606, + "step": 40170 + }, + { + "epoch": 36.13309352517986, + "grad_norm": 0.19509971141815186, + "learning_rate": 2.6984302155767916e-05, + "loss": 0.0171, + "step": 40180 + }, + { + "action_loss": 0.0008323395741172135, + "epoch": 36.13309352517986, + "step": 40180 + }, + { + "epoch": 36.13309352517986, + "step": 40180, + "torque_loss": 0.12096711248159409 + }, + { + "epoch": 36.13309352517986, + "force_loss": 0.0006870224024169147, + "step": 40180 + }, + { + "epoch": 36.14208633093525, + "grad_norm": 0.26619499921798706, + "learning_rate": 2.6959841018232683e-05, + "loss": 0.0163, + "step": 40190 + }, + { + "action_loss": 0.0011973791988566518, + "epoch": 36.14208633093525, + "step": 40190 + }, + { + "epoch": 36.14208633093525, + "step": 40190, + "torque_loss": 0.08380764722824097 + }, + { + "epoch": 36.14208633093525, + "force_loss": 0.003565125400200486, + "step": 40190 + }, + { + "epoch": 36.15107913669065, + "grad_norm": 0.35528653860092163, + "learning_rate": 2.693538687968937e-05, + "loss": 0.0159, + "step": 40200 + }, + { + "action_loss": 0.002721647033467889, + "epoch": 36.15107913669065, + "step": 40200 + }, + { + "epoch": 36.15107913669065, + "step": 40200, + "torque_loss": 0.13448883593082428 + }, + { + "epoch": 36.15107913669065, + "force_loss": 0.0056694429367780685, + "step": 40200 + }, + { + "epoch": 36.160071942446045, + "grad_norm": 0.16108831763267517, + "learning_rate": 2.6910939747566556e-05, + "loss": 0.0172, + "step": 40210 + }, + { + "action_loss": 0.0010758339194580913, + "epoch": 36.160071942446045, + "step": 40210 + }, + { + "epoch": 36.160071942446045, + "step": 40210, + "torque_loss": 0.12400656938552856 + }, + { + "epoch": 36.160071942446045, + "force_loss": 0.0024678672198206186, + "step": 40210 + }, + { + "epoch": 36.169064748201436, + "grad_norm": 0.078067846596241, + "learning_rate": 2.6886499629290607e-05, + "loss": 0.0129, + "step": 40220 + }, + { + "action_loss": 0.0013312241062521935, + "epoch": 36.169064748201436, + "step": 40220 + }, + { + "epoch": 36.169064748201436, + "step": 40220, + "torque_loss": 0.11408732086420059 + }, + { + "epoch": 36.169064748201436, + "force_loss": 0.002038830192759633, + "step": 40220 + }, + { + "epoch": 36.17805755395683, + "grad_norm": 0.061363305896520615, + "learning_rate": 2.6862066532285802e-05, + "loss": 0.0151, + "step": 40230 + }, + { + "action_loss": 0.0034331511706113815, + "epoch": 36.17805755395683, + "step": 40230 + }, + { + "epoch": 36.17805755395683, + "step": 40230, + "torque_loss": 0.14008234441280365 + }, + { + "epoch": 36.17805755395683, + "force_loss": 0.005968259181827307, + "step": 40230 + }, + { + "epoch": 36.18705035971223, + "grad_norm": 0.3427886664867401, + "learning_rate": 2.6837640463974262e-05, + "loss": 0.0157, + "step": 40240 + }, + { + "action_loss": 0.001401070854626596, + "epoch": 36.18705035971223, + "step": 40240 + }, + { + "epoch": 36.18705035971223, + "step": 40240, + "torque_loss": 0.13309124112129211 + }, + { + "epoch": 36.18705035971223, + "force_loss": 0.004553420003503561, + "step": 40240 + }, + { + "epoch": 36.19604316546763, + "grad_norm": 0.26022714376449585, + "learning_rate": 2.681322143177596e-05, + "loss": 0.0151, + "step": 40250 + }, + { + "action_loss": 0.0013003213098272681, + "epoch": 36.19604316546763, + "step": 40250 + }, + { + "epoch": 36.19604316546763, + "step": 40250, + "torque_loss": 0.09454932063817978 + }, + { + "epoch": 36.19604316546763, + "force_loss": 0.0035277120769023895, + "step": 40250 + }, + { + "epoch": 36.20503597122302, + "grad_norm": 0.20755928754806519, + "learning_rate": 2.678880944310882e-05, + "loss": 0.0166, + "step": 40260 + }, + { + "action_loss": 0.0049186162650585175, + "epoch": 36.20503597122302, + "step": 40260 + }, + { + "epoch": 36.20503597122302, + "step": 40260, + "torque_loss": 0.1464814394712448 + }, + { + "epoch": 36.20503597122302, + "force_loss": 0.0049349102191627026, + "step": 40260 + }, + { + "epoch": 36.21402877697842, + "grad_norm": 0.47558560967445374, + "learning_rate": 2.6764404505388474e-05, + "loss": 0.0188, + "step": 40270 + }, + { + "action_loss": 0.0011392353335395455, + "epoch": 36.21402877697842, + "step": 40270 + }, + { + "epoch": 36.21402877697842, + "step": 40270, + "torque_loss": 0.12501515448093414 + }, + { + "epoch": 36.21402877697842, + "force_loss": 0.003926285542547703, + "step": 40270 + }, + { + "epoch": 36.223021582733814, + "grad_norm": 0.27003738284111023, + "learning_rate": 2.6740006626028558e-05, + "loss": 0.0177, + "step": 40280 + }, + { + "action_loss": 0.0029524012934416533, + "epoch": 36.223021582733814, + "step": 40280 + }, + { + "epoch": 36.223021582733814, + "step": 40280, + "torque_loss": 0.13837145268917084 + }, + { + "epoch": 36.223021582733814, + "force_loss": 0.003736709477379918, + "step": 40280 + }, + { + "epoch": 36.23201438848921, + "grad_norm": 0.22201573848724365, + "learning_rate": 2.671561581244048e-05, + "loss": 0.0185, + "step": 40290 + }, + { + "action_loss": 0.0021585607901215553, + "epoch": 36.23201438848921, + "step": 40290 + }, + { + "epoch": 36.23201438848921, + "step": 40290, + "torque_loss": 0.0984804555773735 + }, + { + "epoch": 36.23201438848921, + "force_loss": 0.0020127766765654087, + "step": 40290 + }, + { + "epoch": 36.2410071942446, + "grad_norm": 0.2025788426399231, + "learning_rate": 2.6691232072033536e-05, + "loss": 0.0164, + "step": 40300 + }, + { + "action_loss": 0.0029947690200060606, + "epoch": 36.2410071942446, + "step": 40300 + }, + { + "epoch": 36.2410071942446, + "step": 40300, + "torque_loss": 0.12133028358221054 + }, + { + "epoch": 36.2410071942446, + "force_loss": 0.0017820410430431366, + "step": 40300 + }, + { + "epoch": 36.25, + "grad_norm": 0.22762535512447357, + "learning_rate": 2.6666855412214852e-05, + "loss": 0.0193, + "step": 40310 + }, + { + "action_loss": 0.004297464620321989, + "epoch": 36.25, + "step": 40310 + }, + { + "epoch": 36.25, + "step": 40310, + "torque_loss": 0.15519928932189941 + }, + { + "epoch": 36.25, + "force_loss": 0.003369540674611926, + "step": 40310 + }, + { + "epoch": 36.2589928057554, + "grad_norm": 0.11644995957612991, + "learning_rate": 2.664248584038942e-05, + "loss": 0.0183, + "step": 40320 + }, + { + "action_loss": 0.002315806457772851, + "epoch": 36.2589928057554, + "step": 40320 + }, + { + "epoch": 36.2589928057554, + "step": 40320, + "torque_loss": 0.12423001974821091 + }, + { + "epoch": 36.2589928057554, + "force_loss": 0.003705536248162389, + "step": 40320 + }, + { + "epoch": 36.26798561151079, + "grad_norm": 0.5651867985725403, + "learning_rate": 2.6618123363960047e-05, + "loss": 0.0149, + "step": 40330 + }, + { + "action_loss": 0.004449928645044565, + "epoch": 36.26798561151079, + "step": 40330 + }, + { + "epoch": 36.26798561151079, + "step": 40330, + "torque_loss": 0.12324311584234238 + }, + { + "epoch": 36.26798561151079, + "force_loss": 0.005421905312687159, + "step": 40330 + }, + { + "epoch": 36.276978417266186, + "grad_norm": 0.49224138259887695, + "learning_rate": 2.659376799032748e-05, + "loss": 0.0164, + "step": 40340 + }, + { + "action_loss": 0.0017632081871852279, + "epoch": 36.276978417266186, + "step": 40340 + }, + { + "epoch": 36.276978417266186, + "step": 40340, + "torque_loss": 0.11182568222284317 + }, + { + "epoch": 36.276978417266186, + "force_loss": 0.001757893362082541, + "step": 40340 + }, + { + "epoch": 36.28597122302158, + "grad_norm": 0.2579120099544525, + "learning_rate": 2.6569419726890145e-05, + "loss": 0.0204, + "step": 40350 + }, + { + "action_loss": 0.0043951706029474735, + "epoch": 36.28597122302158, + "step": 40350 + }, + { + "epoch": 36.28597122302158, + "step": 40350, + "torque_loss": 0.12350539118051529 + }, + { + "epoch": 36.28597122302158, + "force_loss": 0.003934036009013653, + "step": 40350 + }, + { + "epoch": 36.29496402877698, + "grad_norm": 0.2595391571521759, + "learning_rate": 2.654507858104447e-05, + "loss": 0.0191, + "step": 40360 + }, + { + "action_loss": 0.003790018381550908, + "epoch": 36.29496402877698, + "step": 40360 + }, + { + "epoch": 36.29496402877698, + "step": 40360, + "torque_loss": 0.13111385703086853 + }, + { + "epoch": 36.29496402877698, + "force_loss": 0.00280355685390532, + "step": 40360 + }, + { + "epoch": 36.30395683453237, + "grad_norm": 0.07278679311275482, + "learning_rate": 2.652074456018463e-05, + "loss": 0.0155, + "step": 40370 + }, + { + "action_loss": 0.0023485682904720306, + "epoch": 36.30395683453237, + "step": 40370 + }, + { + "epoch": 36.30395683453237, + "step": 40370, + "torque_loss": 0.22213876247406006 + }, + { + "epoch": 36.30395683453237, + "force_loss": 0.0021585547365248203, + "step": 40370 + }, + { + "epoch": 36.31294964028777, + "grad_norm": 0.27411389350891113, + "learning_rate": 2.6496417671702646e-05, + "loss": 0.017, + "step": 40380 + }, + { + "action_loss": 0.0031113314907997847, + "epoch": 36.31294964028777, + "step": 40380 + }, + { + "epoch": 36.31294964028777, + "step": 40380, + "torque_loss": 0.11473411321640015 + }, + { + "epoch": 36.31294964028777, + "force_loss": 0.003830675734207034, + "step": 40380 + }, + { + "epoch": 36.32194244604317, + "grad_norm": 0.1259770393371582, + "learning_rate": 2.6472097922988427e-05, + "loss": 0.0152, + "step": 40390 + }, + { + "action_loss": 0.014899145811796188, + "epoch": 36.32194244604317, + "step": 40390 + }, + { + "epoch": 36.32194244604317, + "step": 40390, + "torque_loss": 0.14831231534481049 + }, + { + "epoch": 36.32194244604317, + "force_loss": 0.028943797573447227, + "step": 40390 + }, + { + "epoch": 36.330935251798564, + "grad_norm": 0.10508344322443008, + "learning_rate": 2.6447785321429607e-05, + "loss": 0.0306, + "step": 40400 + }, + { + "action_loss": 0.0018155038123950362, + "epoch": 36.330935251798564, + "step": 40400 + }, + { + "epoch": 36.330935251798564, + "step": 40400, + "torque_loss": 0.1340511590242386 + }, + { + "epoch": 36.330935251798564, + "force_loss": 0.0014119663974270225, + "step": 40400 + }, + { + "epoch": 36.339928057553955, + "grad_norm": 0.14072927832603455, + "learning_rate": 2.6423479874411784e-05, + "loss": 0.0186, + "step": 40410 + }, + { + "action_loss": 0.002754220739006996, + "epoch": 36.339928057553955, + "step": 40410 + }, + { + "epoch": 36.339928057553955, + "step": 40410, + "torque_loss": 0.13490955531597137 + }, + { + "epoch": 36.339928057553955, + "force_loss": 0.007847055792808533, + "step": 40410 + }, + { + "epoch": 36.34892086330935, + "grad_norm": 0.27140411734580994, + "learning_rate": 2.6399181589318234e-05, + "loss": 0.0149, + "step": 40420 + }, + { + "action_loss": 0.0016590593149885535, + "epoch": 36.34892086330935, + "step": 40420 + }, + { + "epoch": 36.34892086330935, + "step": 40420, + "torque_loss": 0.1119220182299614 + }, + { + "epoch": 36.34892086330935, + "force_loss": 0.00261204713024199, + "step": 40420 + }, + { + "epoch": 36.35791366906475, + "grad_norm": 0.1714809089899063, + "learning_rate": 2.6374890473530188e-05, + "loss": 0.015, + "step": 40430 + }, + { + "action_loss": 0.002275634091347456, + "epoch": 36.35791366906475, + "step": 40430 + }, + { + "epoch": 36.35791366906475, + "step": 40430, + "torque_loss": 0.1370101273059845 + }, + { + "epoch": 36.35791366906475, + "force_loss": 0.007402322720736265, + "step": 40430 + }, + { + "epoch": 36.36690647482014, + "grad_norm": 0.07405562698841095, + "learning_rate": 2.635060653442664e-05, + "loss": 0.0183, + "step": 40440 + }, + { + "action_loss": 0.0008310635457746685, + "epoch": 36.36690647482014, + "step": 40440 + }, + { + "epoch": 36.36690647482014, + "step": 40440, + "torque_loss": 0.11837742477655411 + }, + { + "epoch": 36.36690647482014, + "force_loss": 0.0009524838533252478, + "step": 40440 + }, + { + "epoch": 36.37589928057554, + "grad_norm": 0.1030859500169754, + "learning_rate": 2.6326329779384395e-05, + "loss": 0.0165, + "step": 40450 + }, + { + "action_loss": 0.004299999680370092, + "epoch": 36.37589928057554, + "step": 40450 + }, + { + "epoch": 36.37589928057554, + "step": 40450, + "torque_loss": 0.11847180873155594 + }, + { + "epoch": 36.37589928057554, + "force_loss": 0.00947362557053566, + "step": 40450 + }, + { + "epoch": 36.384892086330936, + "grad_norm": 0.15667304396629333, + "learning_rate": 2.63020602157781e-05, + "loss": 0.0152, + "step": 40460 + }, + { + "action_loss": 0.0012257518246769905, + "epoch": 36.384892086330936, + "step": 40460 + }, + { + "epoch": 36.384892086330936, + "step": 40460, + "torque_loss": 0.1009514257311821 + }, + { + "epoch": 36.384892086330936, + "force_loss": 0.0029828825499862432, + "step": 40460 + }, + { + "epoch": 36.393884892086334, + "grad_norm": 0.11764544248580933, + "learning_rate": 2.62777978509802e-05, + "loss": 0.0169, + "step": 40470 + }, + { + "action_loss": 0.0022348554339259863, + "epoch": 36.393884892086334, + "step": 40470 + }, + { + "epoch": 36.393884892086334, + "step": 40470, + "torque_loss": 0.11265590041875839 + }, + { + "epoch": 36.393884892086334, + "force_loss": 0.003278246382251382, + "step": 40470 + }, + { + "epoch": 36.402877697841724, + "grad_norm": 0.10085401684045792, + "learning_rate": 2.6253542692360954e-05, + "loss": 0.0178, + "step": 40480 + }, + { + "action_loss": 0.0021885985042899847, + "epoch": 36.402877697841724, + "step": 40480 + }, + { + "epoch": 36.402877697841724, + "step": 40480, + "torque_loss": 0.09116091579198837 + }, + { + "epoch": 36.402877697841724, + "force_loss": 0.003922689240425825, + "step": 40480 + }, + { + "epoch": 36.41187050359712, + "grad_norm": 0.3292659521102905, + "learning_rate": 2.6229294747288458e-05, + "loss": 0.0139, + "step": 40490 + }, + { + "action_loss": 0.0016092568403109908, + "epoch": 36.41187050359712, + "step": 40490 + }, + { + "epoch": 36.41187050359712, + "step": 40490, + "torque_loss": 0.10183060169219971 + }, + { + "epoch": 36.41187050359712, + "force_loss": 0.0010820940369740129, + "step": 40490 + }, + { + "epoch": 36.42086330935252, + "grad_norm": 0.06739940494298935, + "learning_rate": 2.6205054023128596e-05, + "loss": 0.0138, + "step": 40500 + }, + { + "action_loss": 0.0036087885964661837, + "epoch": 36.42086330935252, + "step": 40500 + }, + { + "epoch": 36.42086330935252, + "step": 40500, + "torque_loss": 0.10821110755205154 + }, + { + "epoch": 36.42086330935252, + "force_loss": 0.004196793772280216, + "step": 40500 + }, + { + "epoch": 36.42985611510792, + "grad_norm": 0.25225770473480225, + "learning_rate": 2.6180820527245043e-05, + "loss": 0.0191, + "step": 40510 + }, + { + "action_loss": 0.0015113676199689507, + "epoch": 36.42985611510792, + "step": 40510 + }, + { + "epoch": 36.42985611510792, + "step": 40510, + "torque_loss": 0.1375342458486557 + }, + { + "epoch": 36.42985611510792, + "force_loss": 0.0015819409163668752, + "step": 40510 + }, + { + "epoch": 36.43884892086331, + "grad_norm": 0.23361550271511078, + "learning_rate": 2.6156594266999313e-05, + "loss": 0.0168, + "step": 40520 + }, + { + "action_loss": 0.004073174670338631, + "epoch": 36.43884892086331, + "step": 40520 + }, + { + "epoch": 36.43884892086331, + "step": 40520, + "torque_loss": 0.10116708278656006 + }, + { + "epoch": 36.43884892086331, + "force_loss": 0.006961511448025703, + "step": 40520 + }, + { + "epoch": 36.447841726618705, + "grad_norm": 0.2864074110984802, + "learning_rate": 2.6132375249750672e-05, + "loss": 0.0172, + "step": 40530 + }, + { + "action_loss": 0.005145722068846226, + "epoch": 36.447841726618705, + "step": 40530 + }, + { + "epoch": 36.447841726618705, + "step": 40530, + "torque_loss": 0.07767868041992188 + }, + { + "epoch": 36.447841726618705, + "force_loss": 0.005683968309313059, + "step": 40530 + }, + { + "epoch": 36.4568345323741, + "grad_norm": 0.11944551765918732, + "learning_rate": 2.6108163482856286e-05, + "loss": 0.0148, + "step": 40540 + }, + { + "action_loss": 0.00349957006983459, + "epoch": 36.4568345323741, + "step": 40540 + }, + { + "epoch": 36.4568345323741, + "step": 40540, + "torque_loss": 0.082356758415699 + }, + { + "epoch": 36.4568345323741, + "force_loss": 0.003870716318488121, + "step": 40540 + }, + { + "epoch": 36.46582733812949, + "grad_norm": 0.12395649403333664, + "learning_rate": 2.6083958973670964e-05, + "loss": 0.0158, + "step": 40550 + }, + { + "action_loss": 0.0013828134397044778, + "epoch": 36.46582733812949, + "step": 40550 + }, + { + "epoch": 36.46582733812949, + "step": 40550, + "torque_loss": 0.09643980115652084 + }, + { + "epoch": 36.46582733812949, + "force_loss": 0.003292340785264969, + "step": 40550 + }, + { + "epoch": 36.47482014388489, + "grad_norm": 0.14932620525360107, + "learning_rate": 2.6059761729547483e-05, + "loss": 0.0174, + "step": 40560 + }, + { + "action_loss": 0.0006636045873165131, + "epoch": 36.47482014388489, + "step": 40560 + }, + { + "epoch": 36.47482014388489, + "step": 40560, + "torque_loss": 0.11179139465093613 + }, + { + "epoch": 36.47482014388489, + "force_loss": 0.0016114813042804599, + "step": 40560 + }, + { + "epoch": 36.48381294964029, + "grad_norm": 0.09086962789297104, + "learning_rate": 2.603557175783624e-05, + "loss": 0.0147, + "step": 40570 + }, + { + "action_loss": 0.0006282440153881907, + "epoch": 36.48381294964029, + "step": 40570 + }, + { + "epoch": 36.48381294964029, + "step": 40570, + "torque_loss": 0.10010053962469101 + }, + { + "epoch": 36.48381294964029, + "force_loss": 0.001065872493200004, + "step": 40570 + }, + { + "epoch": 36.492805755395686, + "grad_norm": 0.06441736221313477, + "learning_rate": 2.601138906588559e-05, + "loss": 0.0153, + "step": 40580 + }, + { + "action_loss": 0.0014755738666281104, + "epoch": 36.492805755395686, + "step": 40580 + }, + { + "epoch": 36.492805755395686, + "step": 40580, + "torque_loss": 0.130422905087471 + }, + { + "epoch": 36.492805755395686, + "force_loss": 0.0017960546538233757, + "step": 40580 + }, + { + "epoch": 36.50179856115108, + "grad_norm": 0.3446483612060547, + "learning_rate": 2.598721366104152e-05, + "loss": 0.0163, + "step": 40590 + }, + { + "action_loss": 0.001830587163567543, + "epoch": 36.50179856115108, + "step": 40590 + }, + { + "epoch": 36.50179856115108, + "step": 40590, + "torque_loss": 0.13736669719219208 + }, + { + "epoch": 36.50179856115108, + "force_loss": 0.0017593413358554244, + "step": 40590 + }, + { + "epoch": 36.510791366906474, + "grad_norm": 0.09236304461956024, + "learning_rate": 2.5963045550647945e-05, + "loss": 0.0134, + "step": 40600 + }, + { + "action_loss": 0.0018125405767932534, + "epoch": 36.510791366906474, + "step": 40600 + }, + { + "epoch": 36.510791366906474, + "step": 40600, + "torque_loss": 0.14467743039131165 + }, + { + "epoch": 36.510791366906474, + "force_loss": 0.0026651702355593443, + "step": 40600 + }, + { + "epoch": 36.51978417266187, + "grad_norm": 0.27811768651008606, + "learning_rate": 2.5938884742046466e-05, + "loss": 0.015, + "step": 40610 + }, + { + "action_loss": 0.005231405142694712, + "epoch": 36.51978417266187, + "step": 40610 + }, + { + "epoch": 36.51978417266187, + "step": 40610, + "torque_loss": 0.1340021938085556 + }, + { + "epoch": 36.51978417266187, + "force_loss": 0.006940803956240416, + "step": 40610 + }, + { + "epoch": 36.52877697841727, + "grad_norm": 0.16698452830314636, + "learning_rate": 2.5914731242576507e-05, + "loss": 0.0168, + "step": 40620 + }, + { + "action_loss": 0.00139683170709759, + "epoch": 36.52877697841727, + "step": 40620 + }, + { + "epoch": 36.52877697841727, + "step": 40620, + "torque_loss": 0.0994524359703064 + }, + { + "epoch": 36.52877697841727, + "force_loss": 0.002018711995333433, + "step": 40620 + }, + { + "epoch": 36.53776978417266, + "grad_norm": 0.1333102583885193, + "learning_rate": 2.5890585059575268e-05, + "loss": 0.0164, + "step": 40630 + }, + { + "action_loss": 0.0010444204090163112, + "epoch": 36.53776978417266, + "step": 40630 + }, + { + "epoch": 36.53776978417266, + "step": 40630, + "torque_loss": 0.15608525276184082 + }, + { + "epoch": 36.53776978417266, + "force_loss": 0.0033776445779949427, + "step": 40630 + }, + { + "epoch": 36.54676258992806, + "grad_norm": 0.13561423122882843, + "learning_rate": 2.5866446200377688e-05, + "loss": 0.0159, + "step": 40640 + }, + { + "action_loss": 0.0017424138495698571, + "epoch": 36.54676258992806, + "step": 40640 + }, + { + "epoch": 36.54676258992806, + "step": 40640, + "torque_loss": 0.1100485697388649 + }, + { + "epoch": 36.54676258992806, + "force_loss": 0.0015539852902293205, + "step": 40640 + }, + { + "epoch": 36.555755395683455, + "grad_norm": 0.33544790744781494, + "learning_rate": 2.5842314672316566e-05, + "loss": 0.017, + "step": 40650 + }, + { + "action_loss": 0.0023810788989067078, + "epoch": 36.555755395683455, + "step": 40650 + }, + { + "epoch": 36.555755395683455, + "step": 40650, + "torque_loss": 0.10753890126943588 + }, + { + "epoch": 36.555755395683455, + "force_loss": 0.005487387999892235, + "step": 40650 + }, + { + "epoch": 36.564748201438846, + "grad_norm": 0.20060555636882782, + "learning_rate": 2.581819048272239e-05, + "loss": 0.0161, + "step": 40660 + }, + { + "action_loss": 0.013823983259499073, + "epoch": 36.564748201438846, + "step": 40660 + }, + { + "epoch": 36.564748201438846, + "step": 40660, + "torque_loss": 0.1418100744485855 + }, + { + "epoch": 36.564748201438846, + "force_loss": 0.013399017043411732, + "step": 40660 + }, + { + "epoch": 36.57374100719424, + "grad_norm": 0.21801358461380005, + "learning_rate": 2.5794073638923478e-05, + "loss": 0.0199, + "step": 40670 + }, + { + "action_loss": 0.005062831100076437, + "epoch": 36.57374100719424, + "step": 40670 + }, + { + "epoch": 36.57374100719424, + "step": 40670, + "torque_loss": 0.13114379346370697 + }, + { + "epoch": 36.57374100719424, + "force_loss": 0.003218039171770215, + "step": 40670 + }, + { + "epoch": 36.58273381294964, + "grad_norm": 0.4264189898967743, + "learning_rate": 2.576996414824586e-05, + "loss": 0.0228, + "step": 40680 + }, + { + "action_loss": 0.0015105524798855186, + "epoch": 36.58273381294964, + "step": 40680 + }, + { + "epoch": 36.58273381294964, + "step": 40680, + "torque_loss": 0.10605400800704956 + }, + { + "epoch": 36.58273381294964, + "force_loss": 0.002417312702164054, + "step": 40680 + }, + { + "epoch": 36.59172661870504, + "grad_norm": 0.20488204061985016, + "learning_rate": 2.574586201801339e-05, + "loss": 0.0163, + "step": 40690 + }, + { + "action_loss": 0.0009720135130919516, + "epoch": 36.59172661870504, + "step": 40690 + }, + { + "epoch": 36.59172661870504, + "step": 40690, + "torque_loss": 0.1611345410346985 + }, + { + "epoch": 36.59172661870504, + "force_loss": 0.0007408817764371634, + "step": 40690 + }, + { + "epoch": 36.60071942446043, + "grad_norm": 0.20045778155326843, + "learning_rate": 2.572176725554762e-05, + "loss": 0.0153, + "step": 40700 + }, + { + "action_loss": 0.007397270295768976, + "epoch": 36.60071942446043, + "step": 40700 + }, + { + "epoch": 36.60071942446043, + "step": 40700, + "torque_loss": 0.11989260464906693 + }, + { + "epoch": 36.60071942446043, + "force_loss": 0.012663266621530056, + "step": 40700 + }, + { + "epoch": 36.60971223021583, + "grad_norm": 0.5078071355819702, + "learning_rate": 2.5697679868167966e-05, + "loss": 0.0179, + "step": 40710 + }, + { + "action_loss": 0.001644335687160492, + "epoch": 36.60971223021583, + "step": 40710 + }, + { + "epoch": 36.60971223021583, + "step": 40710, + "torque_loss": 0.13805948197841644 + }, + { + "epoch": 36.60971223021583, + "force_loss": 0.0016886420780792832, + "step": 40710 + }, + { + "epoch": 36.618705035971225, + "grad_norm": 0.13483642041683197, + "learning_rate": 2.5673599863191468e-05, + "loss": 0.0166, + "step": 40720 + }, + { + "action_loss": 0.005367635283619165, + "epoch": 36.618705035971225, + "step": 40720 + }, + { + "epoch": 36.618705035971225, + "step": 40720, + "torque_loss": 0.09979171305894852 + }, + { + "epoch": 36.618705035971225, + "force_loss": 0.006961230654269457, + "step": 40720 + }, + { + "epoch": 36.62769784172662, + "grad_norm": 0.22946494817733765, + "learning_rate": 2.564952724793306e-05, + "loss": 0.0143, + "step": 40730 + }, + { + "action_loss": 0.0017493563937023282, + "epoch": 36.62769784172662, + "step": 40730 + }, + { + "epoch": 36.62769784172662, + "step": 40730, + "torque_loss": 0.140501007437706 + }, + { + "epoch": 36.62769784172662, + "force_loss": 0.0019741205032914877, + "step": 40730 + }, + { + "epoch": 36.63669064748201, + "grad_norm": 0.14705729484558105, + "learning_rate": 2.5625462029705306e-05, + "loss": 0.017, + "step": 40740 + }, + { + "action_loss": 0.00583826145157218, + "epoch": 36.63669064748201, + "step": 40740 + }, + { + "epoch": 36.63669064748201, + "step": 40740, + "torque_loss": 0.11922410130500793 + }, + { + "epoch": 36.63669064748201, + "force_loss": 0.004548977129161358, + "step": 40740 + }, + { + "epoch": 36.64568345323741, + "grad_norm": 0.10758621990680695, + "learning_rate": 2.5601404215818624e-05, + "loss": 0.0157, + "step": 40750 + }, + { + "action_loss": 0.0021942760795354843, + "epoch": 36.64568345323741, + "step": 40750 + }, + { + "epoch": 36.64568345323741, + "step": 40750, + "torque_loss": 0.09720811992883682 + }, + { + "epoch": 36.64568345323741, + "force_loss": 0.0034751601051539183, + "step": 40750 + }, + { + "epoch": 36.65467625899281, + "grad_norm": 0.17540010809898376, + "learning_rate": 2.5577353813581144e-05, + "loss": 0.0158, + "step": 40760 + }, + { + "action_loss": 0.003847033716738224, + "epoch": 36.65467625899281, + "step": 40760 + }, + { + "epoch": 36.65467625899281, + "step": 40760, + "torque_loss": 0.12497133016586304 + }, + { + "epoch": 36.65467625899281, + "force_loss": 0.005627034697681665, + "step": 40760 + }, + { + "epoch": 36.6636690647482, + "grad_norm": 0.3054220378398895, + "learning_rate": 2.5553310830298733e-05, + "loss": 0.0198, + "step": 40770 + }, + { + "action_loss": 0.012451942078769207, + "epoch": 36.6636690647482, + "step": 40770 + }, + { + "epoch": 36.6636690647482, + "step": 40770, + "torque_loss": 0.1235266700387001 + }, + { + "epoch": 36.6636690647482, + "force_loss": 0.016896799206733704, + "step": 40770 + }, + { + "epoch": 36.672661870503596, + "grad_norm": 0.16211777925491333, + "learning_rate": 2.5529275273275012e-05, + "loss": 0.015, + "step": 40780 + }, + { + "action_loss": 0.009844637475907803, + "epoch": 36.672661870503596, + "step": 40780 + }, + { + "epoch": 36.672661870503596, + "step": 40780, + "torque_loss": 0.10241729021072388 + }, + { + "epoch": 36.672661870503596, + "force_loss": 0.007657181471586227, + "step": 40780 + }, + { + "epoch": 36.681654676258994, + "grad_norm": 0.2554575800895691, + "learning_rate": 2.550524714981133e-05, + "loss": 0.0145, + "step": 40790 + }, + { + "action_loss": 0.0008564687450416386, + "epoch": 36.681654676258994, + "step": 40790 + }, + { + "epoch": 36.681654676258994, + "step": 40790, + "torque_loss": 0.12012094259262085 + }, + { + "epoch": 36.681654676258994, + "force_loss": 0.001241430756635964, + "step": 40790 + }, + { + "epoch": 36.69064748201439, + "grad_norm": 0.07543785870075226, + "learning_rate": 2.5481226467206837e-05, + "loss": 0.0155, + "step": 40800 + }, + { + "action_loss": 0.0014604610623791814, + "epoch": 36.69064748201439, + "step": 40800 + }, + { + "epoch": 36.69064748201439, + "step": 40800, + "torque_loss": 0.14680027961730957 + }, + { + "epoch": 36.69064748201439, + "force_loss": 0.0021571898832917213, + "step": 40800 + }, + { + "epoch": 36.69964028776978, + "grad_norm": 0.09962771087884903, + "learning_rate": 2.5457213232758365e-05, + "loss": 0.0146, + "step": 40810 + }, + { + "action_loss": 0.004125829786062241, + "epoch": 36.69964028776978, + "step": 40810 + }, + { + "epoch": 36.69964028776978, + "step": 40810, + "torque_loss": 0.1400660276412964 + }, + { + "epoch": 36.69964028776978, + "force_loss": 0.009941665455698967, + "step": 40810 + }, + { + "epoch": 36.70863309352518, + "grad_norm": 0.12375088781118393, + "learning_rate": 2.5433207453760498e-05, + "loss": 0.0206, + "step": 40820 + }, + { + "action_loss": 0.0012120419414713979, + "epoch": 36.70863309352518, + "step": 40820 + }, + { + "epoch": 36.70863309352518, + "step": 40820, + "torque_loss": 0.1398182511329651 + }, + { + "epoch": 36.70863309352518, + "force_loss": 0.0014342637732625008, + "step": 40820 + }, + { + "epoch": 36.71762589928058, + "grad_norm": 0.22068969905376434, + "learning_rate": 2.5409209137505552e-05, + "loss": 0.0167, + "step": 40830 + }, + { + "action_loss": 0.0047895521856844425, + "epoch": 36.71762589928058, + "step": 40830 + }, + { + "epoch": 36.71762589928058, + "step": 40830, + "torque_loss": 0.1456289291381836 + }, + { + "epoch": 36.71762589928058, + "force_loss": 0.008335483260452747, + "step": 40830 + }, + { + "epoch": 36.726618705035975, + "grad_norm": 0.19960246980190277, + "learning_rate": 2.5385218291283597e-05, + "loss": 0.015, + "step": 40840 + }, + { + "action_loss": 0.00313937128521502, + "epoch": 36.726618705035975, + "step": 40840 + }, + { + "epoch": 36.726618705035975, + "step": 40840, + "torque_loss": 0.19083677232265472 + }, + { + "epoch": 36.726618705035975, + "force_loss": 0.002062875544652343, + "step": 40840 + }, + { + "epoch": 36.735611510791365, + "grad_norm": 0.11217894405126572, + "learning_rate": 2.5361234922382383e-05, + "loss": 0.0184, + "step": 40850 + }, + { + "action_loss": 0.0013308579800650477, + "epoch": 36.735611510791365, + "step": 40850 + }, + { + "epoch": 36.735611510791365, + "step": 40850, + "torque_loss": 0.11408674716949463 + }, + { + "epoch": 36.735611510791365, + "force_loss": 0.002995135262608528, + "step": 40850 + }, + { + "epoch": 36.74460431654676, + "grad_norm": 0.15857085585594177, + "learning_rate": 2.533725903808749e-05, + "loss": 0.0162, + "step": 40860 + }, + { + "action_loss": 0.0016767798224464059, + "epoch": 36.74460431654676, + "step": 40860 + }, + { + "epoch": 36.74460431654676, + "step": 40860, + "torque_loss": 0.1243806704878807 + }, + { + "epoch": 36.74460431654676, + "force_loss": 0.0010130471782758832, + "step": 40860 + }, + { + "epoch": 36.75359712230216, + "grad_norm": 0.17636162042617798, + "learning_rate": 2.5313290645682085e-05, + "loss": 0.0178, + "step": 40870 + }, + { + "action_loss": 0.006714515388011932, + "epoch": 36.75359712230216, + "step": 40870 + }, + { + "epoch": 36.75359712230216, + "step": 40870, + "torque_loss": 0.13070610165596008 + }, + { + "epoch": 36.75359712230216, + "force_loss": 0.011135063134133816, + "step": 40870 + }, + { + "epoch": 36.76258992805755, + "grad_norm": 0.16088257730007172, + "learning_rate": 2.52893297524472e-05, + "loss": 0.0183, + "step": 40880 + }, + { + "action_loss": 0.006263503339141607, + "epoch": 36.76258992805755, + "step": 40880 + }, + { + "epoch": 36.76258992805755, + "step": 40880, + "torque_loss": 0.07483164221048355 + }, + { + "epoch": 36.76258992805755, + "force_loss": 0.007079185452312231, + "step": 40880 + }, + { + "epoch": 36.77158273381295, + "grad_norm": 0.31436511874198914, + "learning_rate": 2.526537636566145e-05, + "loss": 0.0137, + "step": 40890 + }, + { + "action_loss": 0.0019651756156235933, + "epoch": 36.77158273381295, + "step": 40890 + }, + { + "epoch": 36.77158273381295, + "step": 40890, + "torque_loss": 0.1034235954284668 + }, + { + "epoch": 36.77158273381295, + "force_loss": 0.005370249506086111, + "step": 40890 + }, + { + "epoch": 36.780575539568346, + "grad_norm": 0.13597896695137024, + "learning_rate": 2.5241430492601305e-05, + "loss": 0.0169, + "step": 40900 + }, + { + "action_loss": 0.0029434890020638704, + "epoch": 36.780575539568346, + "step": 40900 + }, + { + "epoch": 36.780575539568346, + "step": 40900, + "torque_loss": 0.12355612963438034 + }, + { + "epoch": 36.780575539568346, + "force_loss": 0.0021419788245111704, + "step": 40900 + }, + { + "epoch": 36.789568345323744, + "grad_norm": 0.11905980110168457, + "learning_rate": 2.5217492140540867e-05, + "loss": 0.0163, + "step": 40910 + }, + { + "action_loss": 0.0007639930699951947, + "epoch": 36.789568345323744, + "step": 40910 + }, + { + "epoch": 36.789568345323744, + "step": 40910, + "torque_loss": 0.11611571162939072 + }, + { + "epoch": 36.789568345323744, + "force_loss": 0.000702633464243263, + "step": 40910 + }, + { + "epoch": 36.798561151079134, + "grad_norm": 0.06944193691015244, + "learning_rate": 2.5193561316751967e-05, + "loss": 0.0164, + "step": 40920 + }, + { + "action_loss": 0.0014789592241868377, + "epoch": 36.798561151079134, + "step": 40920 + }, + { + "epoch": 36.798561151079134, + "step": 40920, + "torque_loss": 0.13382190465927124 + }, + { + "epoch": 36.798561151079134, + "force_loss": 0.00314895436167717, + "step": 40920 + }, + { + "epoch": 36.80755395683453, + "grad_norm": 0.09905727207660675, + "learning_rate": 2.516963802850416e-05, + "loss": 0.0181, + "step": 40930 + }, + { + "action_loss": 0.0007742529851384461, + "epoch": 36.80755395683453, + "step": 40930 + }, + { + "epoch": 36.80755395683453, + "step": 40930, + "torque_loss": 0.08615850657224655 + }, + { + "epoch": 36.80755395683453, + "force_loss": 0.000837352592498064, + "step": 40930 + }, + { + "epoch": 36.81654676258993, + "grad_norm": 0.1636144071817398, + "learning_rate": 2.5145722283064698e-05, + "loss": 0.0175, + "step": 40940 + }, + { + "action_loss": 0.0014748625690117478, + "epoch": 36.81654676258993, + "step": 40940 + }, + { + "epoch": 36.81654676258993, + "step": 40940, + "torque_loss": 0.12413579225540161 + }, + { + "epoch": 36.81654676258993, + "force_loss": 0.0024982497561722994, + "step": 40940 + }, + { + "epoch": 36.82553956834532, + "grad_norm": 0.24281755089759827, + "learning_rate": 2.5121814087698602e-05, + "loss": 0.0156, + "step": 40950 + }, + { + "action_loss": 0.0018437281250953674, + "epoch": 36.82553956834532, + "step": 40950 + }, + { + "epoch": 36.82553956834532, + "step": 40950, + "torque_loss": 0.13107097148895264 + }, + { + "epoch": 36.82553956834532, + "force_loss": 0.003890153719112277, + "step": 40950 + }, + { + "epoch": 36.83453237410072, + "grad_norm": 0.1569495052099228, + "learning_rate": 2.509791344966848e-05, + "loss": 0.0153, + "step": 40960 + }, + { + "action_loss": 0.00205861940048635, + "epoch": 36.83453237410072, + "step": 40960 + }, + { + "epoch": 36.83453237410072, + "step": 40960, + "torque_loss": 0.14420723915100098 + }, + { + "epoch": 36.83453237410072, + "force_loss": 0.005641359835863113, + "step": 40960 + }, + { + "epoch": 36.843525179856115, + "grad_norm": 0.09009571373462677, + "learning_rate": 2.5074020376234768e-05, + "loss": 0.0167, + "step": 40970 + }, + { + "action_loss": 0.0010952440788969398, + "epoch": 36.843525179856115, + "step": 40970 + }, + { + "epoch": 36.843525179856115, + "step": 40970, + "torque_loss": 0.12368500232696533 + }, + { + "epoch": 36.843525179856115, + "force_loss": 0.0014682303881272674, + "step": 40970 + }, + { + "epoch": 36.85251798561151, + "grad_norm": 0.12156790494918823, + "learning_rate": 2.5050134874655534e-05, + "loss": 0.0169, + "step": 40980 + }, + { + "action_loss": 0.0015372349880635738, + "epoch": 36.85251798561151, + "step": 40980 + }, + { + "epoch": 36.85251798561151, + "step": 40980, + "torque_loss": 0.16442887485027313 + }, + { + "epoch": 36.85251798561151, + "force_loss": 0.005887440871447325, + "step": 40980 + }, + { + "epoch": 36.861510791366904, + "grad_norm": 0.2620079517364502, + "learning_rate": 2.5026256952186566e-05, + "loss": 0.0135, + "step": 40990 + }, + { + "action_loss": 0.0025781861040741205, + "epoch": 36.861510791366904, + "step": 40990 + }, + { + "epoch": 36.861510791366904, + "step": 40990, + "torque_loss": 0.16907234489917755 + }, + { + "epoch": 36.861510791366904, + "force_loss": 0.006407263223081827, + "step": 40990 + }, + { + "epoch": 36.8705035971223, + "grad_norm": 0.1035318449139595, + "learning_rate": 2.5002386616081335e-05, + "loss": 0.0163, + "step": 41000 + }, + { + "action_loss": 0.0011775917373597622, + "epoch": 36.8705035971223, + "step": 41000 + }, + { + "epoch": 36.8705035971223, + "step": 41000, + "torque_loss": 0.10542478412389755 + }, + { + "epoch": 36.8705035971223, + "force_loss": 0.0017100091790780425, + "step": 41000 + }, + { + "epoch": 36.8794964028777, + "grad_norm": 0.09911273419857025, + "learning_rate": 2.497852387359103e-05, + "loss": 0.026, + "step": 41010 + }, + { + "action_loss": 0.0013963145902380347, + "epoch": 36.8794964028777, + "step": 41010 + }, + { + "epoch": 36.8794964028777, + "step": 41010, + "torque_loss": 0.10430901497602463 + }, + { + "epoch": 36.8794964028777, + "force_loss": 0.0026781449560076, + "step": 41010 + }, + { + "epoch": 36.888489208633096, + "grad_norm": 0.11811426281929016, + "learning_rate": 2.4954668731964496e-05, + "loss": 0.0163, + "step": 41020 + }, + { + "action_loss": 0.005028447601944208, + "epoch": 36.888489208633096, + "step": 41020 + }, + { + "epoch": 36.888489208633096, + "step": 41020, + "torque_loss": 0.12395540624856949 + }, + { + "epoch": 36.888489208633096, + "force_loss": 0.005967164412140846, + "step": 41020 + }, + { + "epoch": 36.89748201438849, + "grad_norm": 0.07472461462020874, + "learning_rate": 2.4930821198448364e-05, + "loss": 0.0175, + "step": 41030 + }, + { + "action_loss": 0.009155981242656708, + "epoch": 36.89748201438849, + "step": 41030 + }, + { + "epoch": 36.89748201438849, + "step": 41030, + "torque_loss": 0.16561651229858398 + }, + { + "epoch": 36.89748201438849, + "force_loss": 0.019507234916090965, + "step": 41030 + }, + { + "epoch": 36.906474820143885, + "grad_norm": 0.1970684975385666, + "learning_rate": 2.4906981280286796e-05, + "loss": 0.0172, + "step": 41040 + }, + { + "action_loss": 0.003002721117809415, + "epoch": 36.906474820143885, + "step": 41040 + }, + { + "epoch": 36.906474820143885, + "step": 41040, + "torque_loss": 0.19472616910934448 + }, + { + "epoch": 36.906474820143885, + "force_loss": 0.002908949973061681, + "step": 41040 + }, + { + "epoch": 36.91546762589928, + "grad_norm": 0.16730277240276337, + "learning_rate": 2.488314898472179e-05, + "loss": 0.017, + "step": 41050 + }, + { + "action_loss": 0.0017800036584958434, + "epoch": 36.91546762589928, + "step": 41050 + }, + { + "epoch": 36.91546762589928, + "step": 41050, + "torque_loss": 0.1606191098690033 + }, + { + "epoch": 36.91546762589928, + "force_loss": 0.0013810022501274943, + "step": 41050 + }, + { + "epoch": 36.92446043165468, + "grad_norm": 0.2171097993850708, + "learning_rate": 2.485932431899295e-05, + "loss": 0.0155, + "step": 41060 + }, + { + "action_loss": 0.0010319054126739502, + "epoch": 36.92446043165468, + "step": 41060 + }, + { + "epoch": 36.92446043165468, + "step": 41060, + "torque_loss": 0.14204587042331696 + }, + { + "epoch": 36.92446043165468, + "force_loss": 0.0026959876995533705, + "step": 41060 + }, + { + "epoch": 36.93345323741007, + "grad_norm": 0.13802260160446167, + "learning_rate": 2.4835507290337584e-05, + "loss": 0.0205, + "step": 41070 + }, + { + "action_loss": 0.004782230127602816, + "epoch": 36.93345323741007, + "step": 41070 + }, + { + "epoch": 36.93345323741007, + "step": 41070, + "torque_loss": 0.13187959790229797 + }, + { + "epoch": 36.93345323741007, + "force_loss": 0.002835092833265662, + "step": 41070 + }, + { + "epoch": 36.94244604316547, + "grad_norm": 0.24833816289901733, + "learning_rate": 2.4811697905990672e-05, + "loss": 0.015, + "step": 41080 + }, + { + "action_loss": 0.007749114651232958, + "epoch": 36.94244604316547, + "step": 41080 + }, + { + "epoch": 36.94244604316547, + "step": 41080, + "torque_loss": 0.16751426458358765 + }, + { + "epoch": 36.94244604316547, + "force_loss": 0.011407208628952503, + "step": 41080 + }, + { + "epoch": 36.951438848920866, + "grad_norm": 0.055707551538944244, + "learning_rate": 2.4787896173184854e-05, + "loss": 0.0156, + "step": 41090 + }, + { + "action_loss": 0.004178546369075775, + "epoch": 36.951438848920866, + "step": 41090 + }, + { + "epoch": 36.951438848920866, + "step": 41090, + "torque_loss": 0.11315649747848511 + }, + { + "epoch": 36.951438848920866, + "force_loss": 0.005426496267318726, + "step": 41090 + }, + { + "epoch": 36.960431654676256, + "grad_norm": 0.11946913599967957, + "learning_rate": 2.4764102099150534e-05, + "loss": 0.0199, + "step": 41100 + }, + { + "action_loss": 0.002004031091928482, + "epoch": 36.960431654676256, + "step": 41100 + }, + { + "epoch": 36.960431654676256, + "step": 41100, + "torque_loss": 0.08728880435228348 + }, + { + "epoch": 36.960431654676256, + "force_loss": 0.0021931633818894625, + "step": 41100 + }, + { + "epoch": 36.969424460431654, + "grad_norm": 0.11880341172218323, + "learning_rate": 2.4740315691115644e-05, + "loss": 0.0177, + "step": 41110 + }, + { + "action_loss": 0.01130113285034895, + "epoch": 36.969424460431654, + "step": 41110 + }, + { + "epoch": 36.969424460431654, + "step": 41110, + "torque_loss": 0.12452418357133865 + }, + { + "epoch": 36.969424460431654, + "force_loss": 0.021689504384994507, + "step": 41110 + }, + { + "epoch": 36.97841726618705, + "grad_norm": 0.12031841278076172, + "learning_rate": 2.4716536956305918e-05, + "loss": 0.0196, + "step": 41120 + }, + { + "action_loss": 0.010829283855855465, + "epoch": 36.97841726618705, + "step": 41120 + }, + { + "epoch": 36.97841726618705, + "step": 41120, + "torque_loss": 0.1601242870092392 + }, + { + "epoch": 36.97841726618705, + "force_loss": 0.005428771022707224, + "step": 41120 + }, + { + "epoch": 36.98741007194245, + "grad_norm": 0.1415182650089264, + "learning_rate": 2.4692765901944697e-05, + "loss": 0.0162, + "step": 41130 + }, + { + "action_loss": 0.001498539000749588, + "epoch": 36.98741007194245, + "step": 41130 + }, + { + "epoch": 36.98741007194245, + "step": 41130, + "torque_loss": 0.11062075942754745 + }, + { + "epoch": 36.98741007194245, + "force_loss": 0.0020529981702566147, + "step": 41130 + }, + { + "epoch": 36.99640287769784, + "grad_norm": 0.19059258699417114, + "learning_rate": 2.4669002535253e-05, + "loss": 0.0207, + "step": 41140 + }, + { + "action_loss": 0.0025275133084505796, + "epoch": 36.99640287769784, + "step": 41140 + }, + { + "epoch": 36.99640287769784, + "step": 41140, + "torque_loss": 0.167414590716362 + }, + { + "epoch": 36.99640287769784, + "force_loss": 0.0019015618599951267, + "step": 41140 + }, + { + "epoch": 37.00539568345324, + "grad_norm": 0.15231658518314362, + "learning_rate": 2.46452468634495e-05, + "loss": 0.017, + "step": 41150 + }, + { + "action_loss": 0.0024368467275053263, + "epoch": 37.00539568345324, + "step": 41150 + }, + { + "epoch": 37.00539568345324, + "step": 41150, + "torque_loss": 0.17986814677715302 + }, + { + "epoch": 37.00539568345324, + "force_loss": 0.004945660009980202, + "step": 41150 + }, + { + "epoch": 37.014388489208635, + "grad_norm": 0.07546962797641754, + "learning_rate": 2.462149889375055e-05, + "loss": 0.0193, + "step": 41160 + }, + { + "action_loss": 0.002899347571656108, + "epoch": 37.014388489208635, + "step": 41160 + }, + { + "epoch": 37.014388489208635, + "step": 41160, + "torque_loss": 0.14161519706249237 + }, + { + "epoch": 37.014388489208635, + "force_loss": 0.005148437339812517, + "step": 41160 + }, + { + "epoch": 37.023381294964025, + "grad_norm": 0.1663338989019394, + "learning_rate": 2.459775863337014e-05, + "loss": 0.0167, + "step": 41170 + }, + { + "action_loss": 0.0018307944992557168, + "epoch": 37.023381294964025, + "step": 41170 + }, + { + "epoch": 37.023381294964025, + "step": 41170, + "torque_loss": 0.12471288442611694 + }, + { + "epoch": 37.023381294964025, + "force_loss": 0.00840574037283659, + "step": 41170 + }, + { + "epoch": 37.03237410071942, + "grad_norm": 0.18716450035572052, + "learning_rate": 2.4574026089519985e-05, + "loss": 0.0157, + "step": 41180 + }, + { + "action_loss": 0.0017526658484712243, + "epoch": 37.03237410071942, + "step": 41180 + }, + { + "epoch": 37.03237410071942, + "step": 41180, + "torque_loss": 0.10260826349258423 + }, + { + "epoch": 37.03237410071942, + "force_loss": 0.0017096939263865352, + "step": 41180 + }, + { + "epoch": 37.04136690647482, + "grad_norm": 0.24679633975028992, + "learning_rate": 2.4550301269409333e-05, + "loss": 0.0174, + "step": 41190 + }, + { + "action_loss": 0.007488483563065529, + "epoch": 37.04136690647482, + "step": 41190 + }, + { + "epoch": 37.04136690647482, + "step": 41190, + "torque_loss": 0.10798462480306625 + }, + { + "epoch": 37.04136690647482, + "force_loss": 0.004979473073035479, + "step": 41190 + }, + { + "epoch": 37.05035971223022, + "grad_norm": 0.17777372896671295, + "learning_rate": 2.4526584180245216e-05, + "loss": 0.0181, + "step": 41200 + }, + { + "action_loss": 0.0021048991475254297, + "epoch": 37.05035971223022, + "step": 41200 + }, + { + "epoch": 37.05035971223022, + "step": 41200, + "torque_loss": 0.12938641011714935 + }, + { + "epoch": 37.05035971223022, + "force_loss": 0.001535317045636475, + "step": 41200 + }, + { + "epoch": 37.05935251798561, + "grad_norm": 0.17514784634113312, + "learning_rate": 2.4502874829232236e-05, + "loss": 0.0197, + "step": 41210 + }, + { + "action_loss": 0.001937543973326683, + "epoch": 37.05935251798561, + "step": 41210 + }, + { + "epoch": 37.05935251798561, + "step": 41210, + "torque_loss": 0.17843842506408691 + }, + { + "epoch": 37.05935251798561, + "force_loss": 0.003016873961314559, + "step": 41210 + }, + { + "epoch": 37.068345323741006, + "grad_norm": 0.3838598430156708, + "learning_rate": 2.447917322357267e-05, + "loss": 0.0163, + "step": 41220 + }, + { + "action_loss": 0.0018005785532295704, + "epoch": 37.068345323741006, + "step": 41220 + }, + { + "epoch": 37.068345323741006, + "step": 41220, + "torque_loss": 0.1490955501794815 + }, + { + "epoch": 37.068345323741006, + "force_loss": 0.004971009213477373, + "step": 41220 + }, + { + "epoch": 37.077338129496404, + "grad_norm": 0.14843182265758514, + "learning_rate": 2.4455479370466443e-05, + "loss": 0.017, + "step": 41230 + }, + { + "action_loss": 0.0018570529064163566, + "epoch": 37.077338129496404, + "step": 41230 + }, + { + "epoch": 37.077338129496404, + "step": 41230, + "torque_loss": 0.12969480454921722 + }, + { + "epoch": 37.077338129496404, + "force_loss": 0.003479142440482974, + "step": 41230 + }, + { + "epoch": 37.0863309352518, + "grad_norm": 0.3520604968070984, + "learning_rate": 2.4431793277111097e-05, + "loss": 0.0154, + "step": 41240 + }, + { + "action_loss": 0.0029263596516102552, + "epoch": 37.0863309352518, + "step": 41240 + }, + { + "epoch": 37.0863309352518, + "step": 41240, + "torque_loss": 0.1147025004029274 + }, + { + "epoch": 37.0863309352518, + "force_loss": 0.003374862251803279, + "step": 41240 + }, + { + "epoch": 37.09532374100719, + "grad_norm": 0.16331171989440918, + "learning_rate": 2.4408114950701905e-05, + "loss": 0.0145, + "step": 41250 + }, + { + "action_loss": 0.001004036865197122, + "epoch": 37.09532374100719, + "step": 41250 + }, + { + "epoch": 37.09532374100719, + "step": 41250, + "torque_loss": 0.10240895301103592 + }, + { + "epoch": 37.09532374100719, + "force_loss": 0.002176305279135704, + "step": 41250 + }, + { + "epoch": 37.10431654676259, + "grad_norm": 0.1545088291168213, + "learning_rate": 2.4384444398431634e-05, + "loss": 0.0147, + "step": 41260 + }, + { + "action_loss": 0.0008223604527302086, + "epoch": 37.10431654676259, + "step": 41260 + }, + { + "epoch": 37.10431654676259, + "step": 41260, + "torque_loss": 0.14022932946681976 + }, + { + "epoch": 37.10431654676259, + "force_loss": 0.0010246924357488751, + "step": 41260 + }, + { + "epoch": 37.11330935251799, + "grad_norm": 0.19908730685710907, + "learning_rate": 2.4360781627490837e-05, + "loss": 0.016, + "step": 41270 + }, + { + "action_loss": 0.001409512129612267, + "epoch": 37.11330935251799, + "step": 41270 + }, + { + "epoch": 37.11330935251799, + "step": 41270, + "torque_loss": 0.10714276880025864 + }, + { + "epoch": 37.11330935251799, + "force_loss": 0.0017570602940395474, + "step": 41270 + }, + { + "epoch": 37.12230215827338, + "grad_norm": 0.13742773234844208, + "learning_rate": 2.433712664506762e-05, + "loss": 0.015, + "step": 41280 + }, + { + "action_loss": 0.0016693522920832038, + "epoch": 37.12230215827338, + "step": 41280 + }, + { + "epoch": 37.12230215827338, + "step": 41280, + "torque_loss": 0.10153579711914062 + }, + { + "epoch": 37.12230215827338, + "force_loss": 0.005407848861068487, + "step": 41280 + }, + { + "epoch": 37.131294964028775, + "grad_norm": 0.341092973947525, + "learning_rate": 2.431347945834774e-05, + "loss": 0.0151, + "step": 41290 + }, + { + "action_loss": 0.0009617149480618536, + "epoch": 37.131294964028775, + "step": 41290 + }, + { + "epoch": 37.131294964028775, + "step": 41290, + "torque_loss": 0.15237867832183838 + }, + { + "epoch": 37.131294964028775, + "force_loss": 0.0007011820562183857, + "step": 41290 + }, + { + "epoch": 37.14028776978417, + "grad_norm": 0.10262230038642883, + "learning_rate": 2.428984007451458e-05, + "loss": 0.018, + "step": 41300 + }, + { + "action_loss": 0.0047562080435454845, + "epoch": 37.14028776978417, + "step": 41300 + }, + { + "epoch": 37.14028776978417, + "step": 41300, + "torque_loss": 0.12761260569095612 + }, + { + "epoch": 37.14028776978417, + "force_loss": 0.006924936082214117, + "step": 41300 + }, + { + "epoch": 37.14928057553957, + "grad_norm": 0.2014538049697876, + "learning_rate": 2.426620850074917e-05, + "loss": 0.0183, + "step": 41310 + }, + { + "action_loss": 0.002989858388900757, + "epoch": 37.14928057553957, + "step": 41310 + }, + { + "epoch": 37.14928057553957, + "step": 41310, + "torque_loss": 0.10058669000864029 + }, + { + "epoch": 37.14928057553957, + "force_loss": 0.0022126834373921156, + "step": 41310 + }, + { + "epoch": 37.15827338129496, + "grad_norm": 0.23876667022705078, + "learning_rate": 2.424258474423014e-05, + "loss": 0.0207, + "step": 41320 + }, + { + "action_loss": 0.0018437696853652596, + "epoch": 37.15827338129496, + "step": 41320 + }, + { + "epoch": 37.15827338129496, + "step": 41320, + "torque_loss": 0.10081741958856583 + }, + { + "epoch": 37.15827338129496, + "force_loss": 0.0060493722558021545, + "step": 41320 + }, + { + "epoch": 37.16726618705036, + "grad_norm": 0.20577901601791382, + "learning_rate": 2.421896881213382e-05, + "loss": 0.0167, + "step": 41330 + }, + { + "action_loss": 0.004117974545806646, + "epoch": 37.16726618705036, + "step": 41330 + }, + { + "epoch": 37.16726618705036, + "step": 41330, + "torque_loss": 0.14380398392677307 + }, + { + "epoch": 37.16726618705036, + "force_loss": 0.003966405522078276, + "step": 41330 + }, + { + "epoch": 37.17625899280576, + "grad_norm": 0.23002475500106812, + "learning_rate": 2.419536071163402e-05, + "loss": 0.0163, + "step": 41340 + }, + { + "action_loss": 0.0014744462678208947, + "epoch": 37.17625899280576, + "step": 41340 + }, + { + "epoch": 37.17625899280576, + "step": 41340, + "torque_loss": 0.09775781631469727 + }, + { + "epoch": 37.17625899280576, + "force_loss": 0.0056115384213626385, + "step": 41340 + }, + { + "epoch": 37.185251798561154, + "grad_norm": 0.2878395915031433, + "learning_rate": 2.417176044990233e-05, + "loss": 0.0152, + "step": 41350 + }, + { + "action_loss": 0.0076812454499304295, + "epoch": 37.185251798561154, + "step": 41350 + }, + { + "epoch": 37.185251798561154, + "step": 41350, + "torque_loss": 0.09224791079759598 + }, + { + "epoch": 37.185251798561154, + "force_loss": 0.017508333548903465, + "step": 41350 + }, + { + "epoch": 37.194244604316545, + "grad_norm": 0.2531919777393341, + "learning_rate": 2.4148168034107855e-05, + "loss": 0.0175, + "step": 41360 + }, + { + "action_loss": 0.0008937583188526332, + "epoch": 37.194244604316545, + "step": 41360 + }, + { + "epoch": 37.194244604316545, + "step": 41360, + "torque_loss": 0.15019066631793976 + }, + { + "epoch": 37.194244604316545, + "force_loss": 0.0014468939043581486, + "step": 41360 + }, + { + "epoch": 37.20323741007194, + "grad_norm": 0.19757236540317535, + "learning_rate": 2.4124583471417355e-05, + "loss": 0.0157, + "step": 41370 + }, + { + "action_loss": 0.0034667998552322388, + "epoch": 37.20323741007194, + "step": 41370 + }, + { + "epoch": 37.20323741007194, + "step": 41370, + "torque_loss": 0.1455114185810089 + }, + { + "epoch": 37.20323741007194, + "force_loss": 0.002801511436700821, + "step": 41370 + }, + { + "epoch": 37.21223021582734, + "grad_norm": 0.09754537791013718, + "learning_rate": 2.41010067689952e-05, + "loss": 0.0174, + "step": 41380 + }, + { + "action_loss": 0.006356456782668829, + "epoch": 37.21223021582734, + "step": 41380 + }, + { + "epoch": 37.21223021582734, + "step": 41380, + "torque_loss": 0.1255526840686798 + }, + { + "epoch": 37.21223021582734, + "force_loss": 0.0030926025938242674, + "step": 41380 + }, + { + "epoch": 37.22122302158273, + "grad_norm": 0.2362031191587448, + "learning_rate": 2.4077437934003338e-05, + "loss": 0.0168, + "step": 41390 + }, + { + "action_loss": 0.0017143426230177283, + "epoch": 37.22122302158273, + "step": 41390 + }, + { + "epoch": 37.22122302158273, + "step": 41390, + "torque_loss": 0.13515441119670868 + }, + { + "epoch": 37.22122302158273, + "force_loss": 0.0025035792496055365, + "step": 41390 + }, + { + "epoch": 37.23021582733813, + "grad_norm": 0.15564242005348206, + "learning_rate": 2.405387697360143e-05, + "loss": 0.0179, + "step": 41400 + }, + { + "action_loss": 0.002296850783750415, + "epoch": 37.23021582733813, + "step": 41400 + }, + { + "epoch": 37.23021582733813, + "step": 41400, + "torque_loss": 0.14443393051624298 + }, + { + "epoch": 37.23021582733813, + "force_loss": 0.005019243340939283, + "step": 41400 + }, + { + "epoch": 37.239208633093526, + "grad_norm": 0.17784880101680756, + "learning_rate": 2.4030323894946595e-05, + "loss": 0.015, + "step": 41410 + }, + { + "action_loss": 0.003928730264306068, + "epoch": 37.239208633093526, + "step": 41410 + }, + { + "epoch": 37.239208633093526, + "step": 41410, + "torque_loss": 0.13540036976337433 + }, + { + "epoch": 37.239208633093526, + "force_loss": 0.003075113520026207, + "step": 41410 + }, + { + "epoch": 37.24820143884892, + "grad_norm": 0.2360670268535614, + "learning_rate": 2.40067787051937e-05, + "loss": 0.0174, + "step": 41420 + }, + { + "action_loss": 0.0012221961515024304, + "epoch": 37.24820143884892, + "step": 41420 + }, + { + "epoch": 37.24820143884892, + "step": 41420, + "torque_loss": 0.13722175359725952 + }, + { + "epoch": 37.24820143884892, + "force_loss": 0.004681184887886047, + "step": 41420 + }, + { + "epoch": 37.257194244604314, + "grad_norm": 0.1011488288640976, + "learning_rate": 2.3983241411495087e-05, + "loss": 0.0168, + "step": 41430 + }, + { + "action_loss": 0.0016275119269266725, + "epoch": 37.257194244604314, + "step": 41430 + }, + { + "epoch": 37.257194244604314, + "step": 41430, + "torque_loss": 0.1198020949959755 + }, + { + "epoch": 37.257194244604314, + "force_loss": 0.0024211483541876078, + "step": 41430 + }, + { + "epoch": 37.26618705035971, + "grad_norm": 0.2071520984172821, + "learning_rate": 2.3959712021000823e-05, + "loss": 0.0169, + "step": 41440 + }, + { + "action_loss": 0.0016327359480783343, + "epoch": 37.26618705035971, + "step": 41440 + }, + { + "epoch": 37.26618705035971, + "step": 41440, + "torque_loss": 0.07270107418298721 + }, + { + "epoch": 37.26618705035971, + "force_loss": 0.004087215755134821, + "step": 41440 + }, + { + "epoch": 37.27517985611511, + "grad_norm": 0.4247923195362091, + "learning_rate": 2.3936190540858495e-05, + "loss": 0.016, + "step": 41450 + }, + { + "action_loss": 0.008242207579314709, + "epoch": 37.27517985611511, + "step": 41450 + }, + { + "epoch": 37.27517985611511, + "step": 41450, + "torque_loss": 0.1607363075017929 + }, + { + "epoch": 37.27517985611511, + "force_loss": 0.005622658878564835, + "step": 41450 + }, + { + "epoch": 37.28417266187051, + "grad_norm": 0.14170235395431519, + "learning_rate": 2.39126769782133e-05, + "loss": 0.018, + "step": 41460 + }, + { + "action_loss": 0.0017281798645853996, + "epoch": 37.28417266187051, + "step": 41460 + }, + { + "epoch": 37.28417266187051, + "step": 41460, + "torque_loss": 0.09212587028741837 + }, + { + "epoch": 37.28417266187051, + "force_loss": 0.004253492224961519, + "step": 41460 + }, + { + "epoch": 37.2931654676259, + "grad_norm": 0.06294012814760208, + "learning_rate": 2.388917134020805e-05, + "loss": 0.0155, + "step": 41470 + }, + { + "action_loss": 0.005167144350707531, + "epoch": 37.2931654676259, + "step": 41470 + }, + { + "epoch": 37.2931654676259, + "step": 41470, + "torque_loss": 0.12978367507457733 + }, + { + "epoch": 37.2931654676259, + "force_loss": 0.006586526986211538, + "step": 41470 + }, + { + "epoch": 37.302158273381295, + "grad_norm": 0.10713452845811844, + "learning_rate": 2.3865673633983128e-05, + "loss": 0.0171, + "step": 41480 + }, + { + "action_loss": 0.001562502351589501, + "epoch": 37.302158273381295, + "step": 41480 + }, + { + "epoch": 37.302158273381295, + "step": 41480, + "torque_loss": 0.10428479313850403 + }, + { + "epoch": 37.302158273381295, + "force_loss": 0.0011037611402571201, + "step": 41480 + }, + { + "epoch": 37.31115107913669, + "grad_norm": 0.16244597733020782, + "learning_rate": 2.3842183866676492e-05, + "loss": 0.0158, + "step": 41490 + }, + { + "action_loss": 0.00414640735834837, + "epoch": 37.31115107913669, + "step": 41490 + }, + { + "epoch": 37.31115107913669, + "step": 41490, + "torque_loss": 0.09372931718826294 + }, + { + "epoch": 37.31115107913669, + "force_loss": 0.013103918172419071, + "step": 41490 + }, + { + "epoch": 37.32014388489208, + "grad_norm": 0.10922183841466904, + "learning_rate": 2.381870204542377e-05, + "loss": 0.0155, + "step": 41500 + }, + { + "action_loss": 0.0007616126094944775, + "epoch": 37.32014388489208, + "step": 41500 + }, + { + "epoch": 37.32014388489208, + "step": 41500, + "torque_loss": 0.12131894379854202 + }, + { + "epoch": 37.32014388489208, + "force_loss": 0.0007598371594212949, + "step": 41500 + }, + { + "epoch": 37.32913669064748, + "grad_norm": 0.12311369180679321, + "learning_rate": 2.379522817735808e-05, + "loss": 0.0143, + "step": 41510 + }, + { + "action_loss": 0.0022795626427978277, + "epoch": 37.32913669064748, + "step": 41510 + }, + { + "epoch": 37.32913669064748, + "step": 41510, + "torque_loss": 0.16554562747478485 + }, + { + "epoch": 37.32913669064748, + "force_loss": 0.008074014447629452, + "step": 41510 + }, + { + "epoch": 37.33812949640288, + "grad_norm": 0.1777009814977646, + "learning_rate": 2.377176226961018e-05, + "loss": 0.0161, + "step": 41520 + }, + { + "action_loss": 0.003735143691301346, + "epoch": 37.33812949640288, + "step": 41520 + }, + { + "epoch": 37.33812949640288, + "step": 41520, + "torque_loss": 0.10782458633184433 + }, + { + "epoch": 37.33812949640288, + "force_loss": 0.01307226438075304, + "step": 41520 + }, + { + "epoch": 37.347122302158276, + "grad_norm": 0.1115955039858818, + "learning_rate": 2.3748304329308384e-05, + "loss": 0.0159, + "step": 41530 + }, + { + "action_loss": 0.0013150578597560525, + "epoch": 37.347122302158276, + "step": 41530 + }, + { + "epoch": 37.347122302158276, + "step": 41530, + "torque_loss": 0.10661900043487549 + }, + { + "epoch": 37.347122302158276, + "force_loss": 0.0032155411317944527, + "step": 41530 + }, + { + "epoch": 37.356115107913666, + "grad_norm": 0.46261322498321533, + "learning_rate": 2.372485436357858e-05, + "loss": 0.0168, + "step": 41540 + }, + { + "action_loss": 0.0012545754434540868, + "epoch": 37.356115107913666, + "step": 41540 + }, + { + "epoch": 37.356115107913666, + "step": 41540, + "torque_loss": 0.11633291095495224 + }, + { + "epoch": 37.356115107913666, + "force_loss": 0.0012813982320949435, + "step": 41540 + }, + { + "epoch": 37.365107913669064, + "grad_norm": 0.36423543095588684, + "learning_rate": 2.3701412379544296e-05, + "loss": 0.0187, + "step": 41550 + }, + { + "action_loss": 0.005754852667450905, + "epoch": 37.365107913669064, + "step": 41550 + }, + { + "epoch": 37.365107913669064, + "step": 41550, + "torque_loss": 0.14603643119335175 + }, + { + "epoch": 37.365107913669064, + "force_loss": 0.007697058375924826, + "step": 41550 + }, + { + "epoch": 37.37410071942446, + "grad_norm": 0.09964298456907272, + "learning_rate": 2.367797838432653e-05, + "loss": 0.0182, + "step": 41560 + }, + { + "action_loss": 0.01978965662419796, + "epoch": 37.37410071942446, + "step": 41560 + }, + { + "epoch": 37.37410071942446, + "step": 41560, + "torque_loss": 0.1627604216337204 + }, + { + "epoch": 37.37410071942446, + "force_loss": 0.019091719761490822, + "step": 41560 + }, + { + "epoch": 37.38309352517986, + "grad_norm": 0.1869172304868698, + "learning_rate": 2.3654552385043967e-05, + "loss": 0.0194, + "step": 41570 + }, + { + "action_loss": 0.0024029752239584923, + "epoch": 37.38309352517986, + "step": 41570 + }, + { + "epoch": 37.38309352517986, + "step": 41570, + "torque_loss": 0.15013819932937622 + }, + { + "epoch": 37.38309352517986, + "force_loss": 0.003192966105416417, + "step": 41570 + }, + { + "epoch": 37.39208633093525, + "grad_norm": 0.23554450273513794, + "learning_rate": 2.3631134388812742e-05, + "loss": 0.0194, + "step": 41580 + }, + { + "action_loss": 0.010989122092723846, + "epoch": 37.39208633093525, + "step": 41580 + }, + { + "epoch": 37.39208633093525, + "step": 41580, + "torque_loss": 0.13955427706241608 + }, + { + "epoch": 37.39208633093525, + "force_loss": 0.024112893268465996, + "step": 41580 + }, + { + "epoch": 37.40107913669065, + "grad_norm": 0.41500040888786316, + "learning_rate": 2.3607724402746684e-05, + "loss": 0.0187, + "step": 41590 + }, + { + "action_loss": 0.0011135967215523124, + "epoch": 37.40107913669065, + "step": 41590 + }, + { + "epoch": 37.40107913669065, + "step": 41590, + "torque_loss": 0.1296481043100357 + }, + { + "epoch": 37.40107913669065, + "force_loss": 0.000798785884398967, + "step": 41590 + }, + { + "epoch": 37.410071942446045, + "grad_norm": 0.08141325414180756, + "learning_rate": 2.35843224339571e-05, + "loss": 0.0148, + "step": 41600 + }, + { + "action_loss": 0.0013470901176333427, + "epoch": 37.410071942446045, + "step": 41600 + }, + { + "epoch": 37.410071942446045, + "step": 41600, + "torque_loss": 0.09661503881216049 + }, + { + "epoch": 37.410071942446045, + "force_loss": 0.0021243628580123186, + "step": 41600 + }, + { + "epoch": 37.419064748201436, + "grad_norm": 0.12963275611400604, + "learning_rate": 2.3560928489552897e-05, + "loss": 0.0133, + "step": 41610 + }, + { + "action_loss": 0.004545900505036116, + "epoch": 37.419064748201436, + "step": 41610 + }, + { + "epoch": 37.419064748201436, + "step": 41610, + "torque_loss": 0.1700814813375473 + }, + { + "epoch": 37.419064748201436, + "force_loss": 0.0026790162082761526, + "step": 41610 + }, + { + "epoch": 37.42805755395683, + "grad_norm": 0.1503637582063675, + "learning_rate": 2.353754257664053e-05, + "loss": 0.0183, + "step": 41620 + }, + { + "action_loss": 0.007140960078686476, + "epoch": 37.42805755395683, + "step": 41620 + }, + { + "epoch": 37.42805755395683, + "step": 41620, + "torque_loss": 0.13710756599903107 + }, + { + "epoch": 37.42805755395683, + "force_loss": 0.0037944645155221224, + "step": 41620 + }, + { + "epoch": 37.43705035971223, + "grad_norm": 0.08360598981380463, + "learning_rate": 2.3514164702324037e-05, + "loss": 0.0166, + "step": 41630 + }, + { + "action_loss": 0.004188325721770525, + "epoch": 37.43705035971223, + "step": 41630 + }, + { + "epoch": 37.43705035971223, + "step": 41630, + "torque_loss": 0.15347853302955627 + }, + { + "epoch": 37.43705035971223, + "force_loss": 0.009718392044305801, + "step": 41630 + }, + { + "epoch": 37.44604316546763, + "grad_norm": 0.14037662744522095, + "learning_rate": 2.3490794873704963e-05, + "loss": 0.0161, + "step": 41640 + }, + { + "action_loss": 0.0008292978745885193, + "epoch": 37.44604316546763, + "step": 41640 + }, + { + "epoch": 37.44604316546763, + "step": 41640, + "torque_loss": 0.1254401057958603 + }, + { + "epoch": 37.44604316546763, + "force_loss": 0.0013599743833765388, + "step": 41640 + }, + { + "epoch": 37.45503597122302, + "grad_norm": 0.25255194306373596, + "learning_rate": 2.3467433097882496e-05, + "loss": 0.0151, + "step": 41650 + }, + { + "action_loss": 0.0012644171947613358, + "epoch": 37.45503597122302, + "step": 41650 + }, + { + "epoch": 37.45503597122302, + "step": 41650, + "torque_loss": 0.11599936336278915 + }, + { + "epoch": 37.45503597122302, + "force_loss": 0.0030962014570832253, + "step": 41650 + }, + { + "epoch": 37.46402877697842, + "grad_norm": 0.1710033416748047, + "learning_rate": 2.34440793819533e-05, + "loss": 0.0165, + "step": 41660 + }, + { + "action_loss": 0.0017680843593552709, + "epoch": 37.46402877697842, + "step": 41660 + }, + { + "epoch": 37.46402877697842, + "step": 41660, + "torque_loss": 0.10299190133810043 + }, + { + "epoch": 37.46402877697842, + "force_loss": 0.001538500771857798, + "step": 41660 + }, + { + "epoch": 37.473021582733814, + "grad_norm": 0.15772037208080292, + "learning_rate": 2.3420733733011617e-05, + "loss": 0.0157, + "step": 41670 + }, + { + "action_loss": 0.005712547805160284, + "epoch": 37.473021582733814, + "step": 41670 + }, + { + "epoch": 37.473021582733814, + "step": 41670, + "torque_loss": 0.10994597524404526 + }, + { + "epoch": 37.473021582733814, + "force_loss": 0.007984291762113571, + "step": 41670 + }, + { + "epoch": 37.48201438848921, + "grad_norm": 0.29070183634757996, + "learning_rate": 2.3397396158149243e-05, + "loss": 0.0184, + "step": 41680 + }, + { + "action_loss": 0.005757478531450033, + "epoch": 37.48201438848921, + "step": 41680 + }, + { + "epoch": 37.48201438848921, + "step": 41680, + "torque_loss": 0.10696472972631454 + }, + { + "epoch": 37.48201438848921, + "force_loss": 0.009240470826625824, + "step": 41680 + }, + { + "epoch": 37.4910071942446, + "grad_norm": 0.13802073895931244, + "learning_rate": 2.3374066664455498e-05, + "loss": 0.0172, + "step": 41690 + }, + { + "action_loss": 0.0016419509192928672, + "epoch": 37.4910071942446, + "step": 41690 + }, + { + "epoch": 37.4910071942446, + "step": 41690, + "torque_loss": 0.11248960345983505 + }, + { + "epoch": 37.4910071942446, + "force_loss": 0.002011026255786419, + "step": 41690 + }, + { + "epoch": 37.5, + "grad_norm": 0.1483779400587082, + "learning_rate": 2.3350745259017315e-05, + "loss": 0.0146, + "step": 41700 + }, + { + "action_loss": 0.00665512727573514, + "epoch": 37.5, + "step": 41700 + }, + { + "epoch": 37.5, + "step": 41700, + "torque_loss": 0.11320441961288452 + }, + { + "epoch": 37.5, + "force_loss": 0.005524365231394768, + "step": 41700 + }, + { + "epoch": 37.5089928057554, + "grad_norm": 0.5056332349777222, + "learning_rate": 2.332743194891906e-05, + "loss": 0.0166, + "step": 41710 + }, + { + "action_loss": 0.0007786910864524543, + "epoch": 37.5089928057554, + "step": 41710 + }, + { + "epoch": 37.5089928057554, + "step": 41710, + "torque_loss": 0.09663679450750351 + }, + { + "epoch": 37.5089928057554, + "force_loss": 0.0010359091684222221, + "step": 41710 + }, + { + "epoch": 37.51798561151079, + "grad_norm": 0.09813045710325241, + "learning_rate": 2.330412674124276e-05, + "loss": 0.0138, + "step": 41720 + }, + { + "action_loss": 0.0011931395856663585, + "epoch": 37.51798561151079, + "step": 41720 + }, + { + "epoch": 37.51798561151079, + "step": 41720, + "torque_loss": 0.11519008129835129 + }, + { + "epoch": 37.51798561151079, + "force_loss": 0.004237674176692963, + "step": 41720 + }, + { + "epoch": 37.526978417266186, + "grad_norm": 0.19659537076950073, + "learning_rate": 2.328082964306786e-05, + "loss": 0.0148, + "step": 41730 + }, + { + "action_loss": 0.0006921563763171434, + "epoch": 37.526978417266186, + "step": 41730 + }, + { + "epoch": 37.526978417266186, + "step": 41730, + "torque_loss": 0.09513994306325912 + }, + { + "epoch": 37.526978417266186, + "force_loss": 0.002250265097245574, + "step": 41730 + }, + { + "epoch": 37.53597122302158, + "grad_norm": 0.08896561712026596, + "learning_rate": 2.325754066147145e-05, + "loss": 0.0145, + "step": 41740 + }, + { + "action_loss": 0.0012034709798172116, + "epoch": 37.53597122302158, + "step": 41740 + }, + { + "epoch": 37.53597122302158, + "step": 41740, + "torque_loss": 0.1256457418203354 + }, + { + "epoch": 37.53597122302158, + "force_loss": 0.0028368141502141953, + "step": 41740 + }, + { + "epoch": 37.54496402877698, + "grad_norm": 0.11042187362909317, + "learning_rate": 2.32342598035281e-05, + "loss": 0.0157, + "step": 41750 + }, + { + "action_loss": 0.002094205701723695, + "epoch": 37.54496402877698, + "step": 41750 + }, + { + "epoch": 37.54496402877698, + "step": 41750, + "torque_loss": 0.13694559037685394 + }, + { + "epoch": 37.54496402877698, + "force_loss": 0.008897403255105019, + "step": 41750 + }, + { + "epoch": 37.55395683453237, + "grad_norm": 0.22857968509197235, + "learning_rate": 2.321098707630991e-05, + "loss": 0.0163, + "step": 41760 + }, + { + "action_loss": 0.0010560353985056281, + "epoch": 37.55395683453237, + "step": 41760 + }, + { + "epoch": 37.55395683453237, + "step": 41760, + "torque_loss": 0.1850542575120926 + }, + { + "epoch": 37.55395683453237, + "force_loss": 0.0032666318584233522, + "step": 41760 + }, + { + "epoch": 37.56294964028777, + "grad_norm": 0.16816456615924835, + "learning_rate": 2.318772248688652e-05, + "loss": 0.0153, + "step": 41770 + }, + { + "action_loss": 0.0026116278022527695, + "epoch": 37.56294964028777, + "step": 41770 + }, + { + "epoch": 37.56294964028777, + "step": 41770, + "torque_loss": 0.16919898986816406 + }, + { + "epoch": 37.56294964028777, + "force_loss": 0.0057323165237903595, + "step": 41770 + }, + { + "epoch": 37.57194244604317, + "grad_norm": 0.49228811264038086, + "learning_rate": 2.3164466042325107e-05, + "loss": 0.0167, + "step": 41780 + }, + { + "action_loss": 0.0015910168876871467, + "epoch": 37.57194244604317, + "step": 41780 + }, + { + "epoch": 37.57194244604317, + "step": 41780, + "torque_loss": 0.1640763133764267 + }, + { + "epoch": 37.57194244604317, + "force_loss": 0.002082190243527293, + "step": 41780 + }, + { + "epoch": 37.580935251798564, + "grad_norm": 0.17482158541679382, + "learning_rate": 2.3141217749690353e-05, + "loss": 0.0174, + "step": 41790 + }, + { + "action_loss": 0.006881105247884989, + "epoch": 37.580935251798564, + "step": 41790 + }, + { + "epoch": 37.580935251798564, + "step": 41790, + "torque_loss": 0.15449021756649017 + }, + { + "epoch": 37.580935251798564, + "force_loss": 0.011124883778393269, + "step": 41790 + }, + { + "epoch": 37.589928057553955, + "grad_norm": 0.11784825474023819, + "learning_rate": 2.3117977616044466e-05, + "loss": 0.0153, + "step": 41800 + }, + { + "action_loss": 0.002115255920216441, + "epoch": 37.589928057553955, + "step": 41800 + }, + { + "epoch": 37.589928057553955, + "step": 41800, + "torque_loss": 0.0910719633102417 + }, + { + "epoch": 37.589928057553955, + "force_loss": 0.0017877034842967987, + "step": 41800 + }, + { + "epoch": 37.59892086330935, + "grad_norm": 0.5862287282943726, + "learning_rate": 2.309474564844722e-05, + "loss": 0.0177, + "step": 41810 + }, + { + "action_loss": 0.0015137457521632314, + "epoch": 37.59892086330935, + "step": 41810 + }, + { + "epoch": 37.59892086330935, + "step": 41810, + "torque_loss": 0.12903255224227905 + }, + { + "epoch": 37.59892086330935, + "force_loss": 0.002219780581071973, + "step": 41810 + }, + { + "epoch": 37.60791366906475, + "grad_norm": 0.0833015888929367, + "learning_rate": 2.307152185395585e-05, + "loss": 0.0167, + "step": 41820 + }, + { + "action_loss": 0.0014900037785992026, + "epoch": 37.60791366906475, + "step": 41820 + }, + { + "epoch": 37.60791366906475, + "step": 41820, + "torque_loss": 0.13498614728450775 + }, + { + "epoch": 37.60791366906475, + "force_loss": 0.0035768242087215185, + "step": 41820 + }, + { + "epoch": 37.61690647482014, + "grad_norm": 0.45619750022888184, + "learning_rate": 2.3048306239625144e-05, + "loss": 0.016, + "step": 41830 + }, + { + "action_loss": 0.002615357981994748, + "epoch": 37.61690647482014, + "step": 41830 + }, + { + "epoch": 37.61690647482014, + "step": 41830, + "torque_loss": 0.1370583325624466 + }, + { + "epoch": 37.61690647482014, + "force_loss": 0.004041640553623438, + "step": 41830 + }, + { + "epoch": 37.62589928057554, + "grad_norm": 0.2658224105834961, + "learning_rate": 2.3025098812507378e-05, + "loss": 0.0206, + "step": 41840 + }, + { + "action_loss": 0.0019492382416501641, + "epoch": 37.62589928057554, + "step": 41840 + }, + { + "epoch": 37.62589928057554, + "step": 41840, + "torque_loss": 0.1327981799840927 + }, + { + "epoch": 37.62589928057554, + "force_loss": 0.00436962116509676, + "step": 41840 + }, + { + "epoch": 37.634892086330936, + "grad_norm": 0.273560494184494, + "learning_rate": 2.3001899579652366e-05, + "loss": 0.015, + "step": 41850 + }, + { + "action_loss": 0.0014214725233614445, + "epoch": 37.634892086330936, + "step": 41850 + }, + { + "epoch": 37.634892086330936, + "step": 41850, + "torque_loss": 0.15702326595783234 + }, + { + "epoch": 37.634892086330936, + "force_loss": 0.0036117613781243563, + "step": 41850 + }, + { + "epoch": 37.643884892086334, + "grad_norm": 0.08339393138885498, + "learning_rate": 2.2978708548107393e-05, + "loss": 0.0156, + "step": 41860 + }, + { + "action_loss": 0.001197000965476036, + "epoch": 37.643884892086334, + "step": 41860 + }, + { + "epoch": 37.643884892086334, + "step": 41860, + "torque_loss": 0.16042494773864746 + }, + { + "epoch": 37.643884892086334, + "force_loss": 0.002590571530163288, + "step": 41860 + }, + { + "epoch": 37.652877697841724, + "grad_norm": 0.14148195087909698, + "learning_rate": 2.2955525724917348e-05, + "loss": 0.0156, + "step": 41870 + }, + { + "action_loss": 0.001695584156550467, + "epoch": 37.652877697841724, + "step": 41870 + }, + { + "epoch": 37.652877697841724, + "step": 41870, + "torque_loss": 0.1457749903202057 + }, + { + "epoch": 37.652877697841724, + "force_loss": 0.0023889620788395405, + "step": 41870 + }, + { + "epoch": 37.66187050359712, + "grad_norm": 0.31962788105010986, + "learning_rate": 2.2932351117124477e-05, + "loss": 0.0189, + "step": 41880 + }, + { + "action_loss": 0.002825948176905513, + "epoch": 37.66187050359712, + "step": 41880 + }, + { + "epoch": 37.66187050359712, + "step": 41880, + "torque_loss": 0.12647639214992523 + }, + { + "epoch": 37.66187050359712, + "force_loss": 0.005641849245876074, + "step": 41880 + }, + { + "epoch": 37.67086330935252, + "grad_norm": 0.09548363834619522, + "learning_rate": 2.29091847317687e-05, + "loss": 0.0155, + "step": 41890 + }, + { + "action_loss": 0.002940859878435731, + "epoch": 37.67086330935252, + "step": 41890 + }, + { + "epoch": 37.67086330935252, + "step": 41890, + "torque_loss": 0.11217710375785828 + }, + { + "epoch": 37.67086330935252, + "force_loss": 0.0021758966613560915, + "step": 41890 + }, + { + "epoch": 37.67985611510792, + "grad_norm": 0.20715530216693878, + "learning_rate": 2.2886026575887277e-05, + "loss": 0.0169, + "step": 41900 + }, + { + "action_loss": 0.0019089201232418418, + "epoch": 37.67985611510792, + "step": 41900 + }, + { + "epoch": 37.67985611510792, + "step": 41900, + "torque_loss": 0.1353653371334076 + }, + { + "epoch": 37.67985611510792, + "force_loss": 0.002920748433098197, + "step": 41900 + }, + { + "epoch": 37.68884892086331, + "grad_norm": 0.2596253752708435, + "learning_rate": 2.2862876656515094e-05, + "loss": 0.0166, + "step": 41910 + }, + { + "action_loss": 0.001412943354807794, + "epoch": 37.68884892086331, + "step": 41910 + }, + { + "epoch": 37.68884892086331, + "step": 41910, + "torque_loss": 0.1325875222682953 + }, + { + "epoch": 37.68884892086331, + "force_loss": 0.0016223323764279485, + "step": 41910 + }, + { + "epoch": 37.697841726618705, + "grad_norm": 0.2145116627216339, + "learning_rate": 2.2839734980684464e-05, + "loss": 0.0161, + "step": 41920 + }, + { + "action_loss": 0.007583460304886103, + "epoch": 37.697841726618705, + "step": 41920 + }, + { + "epoch": 37.697841726618705, + "step": 41920, + "torque_loss": 0.13984991610050201 + }, + { + "epoch": 37.697841726618705, + "force_loss": 0.012093673460185528, + "step": 41920 + }, + { + "epoch": 37.7068345323741, + "grad_norm": 0.1525728553533554, + "learning_rate": 2.281660155542522e-05, + "loss": 0.0186, + "step": 41930 + }, + { + "action_loss": 0.025422945618629456, + "epoch": 37.7068345323741, + "step": 41930 + }, + { + "epoch": 37.7068345323741, + "step": 41930, + "torque_loss": 0.10879912972450256 + }, + { + "epoch": 37.7068345323741, + "force_loss": 0.030627774074673653, + "step": 41930 + }, + { + "epoch": 37.71582733812949, + "grad_norm": 0.1916947066783905, + "learning_rate": 2.279347638776469e-05, + "loss": 0.0169, + "step": 41940 + }, + { + "action_loss": 0.004034934099763632, + "epoch": 37.71582733812949, + "step": 41940 + }, + { + "epoch": 37.71582733812949, + "step": 41940, + "torque_loss": 0.10615388303995132 + }, + { + "epoch": 37.71582733812949, + "force_loss": 0.004340244457125664, + "step": 41940 + }, + { + "epoch": 37.72482014388489, + "grad_norm": 0.4816558361053467, + "learning_rate": 2.2770359484727665e-05, + "loss": 0.0167, + "step": 41950 + }, + { + "action_loss": 0.0015975426649674773, + "epoch": 37.72482014388489, + "step": 41950 + }, + { + "epoch": 37.72482014388489, + "step": 41950, + "torque_loss": 0.09869460016489029 + }, + { + "epoch": 37.72482014388489, + "force_loss": 0.0016976045444607735, + "step": 41950 + }, + { + "epoch": 37.73381294964029, + "grad_norm": 0.11220408231019974, + "learning_rate": 2.27472508533365e-05, + "loss": 0.0169, + "step": 41960 + }, + { + "action_loss": 0.002514922758564353, + "epoch": 37.73381294964029, + "step": 41960 + }, + { + "epoch": 37.73381294964029, + "step": 41960, + "torque_loss": 0.1607474833726883 + }, + { + "epoch": 37.73381294964029, + "force_loss": 0.014331807382404804, + "step": 41960 + }, + { + "epoch": 37.742805755395686, + "grad_norm": 0.21857528388500214, + "learning_rate": 2.2724150500610948e-05, + "loss": 0.0202, + "step": 41970 + }, + { + "action_loss": 0.001497595920227468, + "epoch": 37.742805755395686, + "step": 41970 + }, + { + "epoch": 37.742805755395686, + "step": 41970, + "torque_loss": 0.12348630279302597 + }, + { + "epoch": 37.742805755395686, + "force_loss": 0.0011046187719330192, + "step": 41970 + }, + { + "epoch": 37.75179856115108, + "grad_norm": 0.3066596984863281, + "learning_rate": 2.2701058433568302e-05, + "loss": 0.0188, + "step": 41980 + }, + { + "action_loss": 0.0010938197374343872, + "epoch": 37.75179856115108, + "step": 41980 + }, + { + "epoch": 37.75179856115108, + "step": 41980, + "torque_loss": 0.09929013252258301 + }, + { + "epoch": 37.75179856115108, + "force_loss": 0.0019019526662304997, + "step": 41980 + }, + { + "epoch": 37.760791366906474, + "grad_norm": 0.13061729073524475, + "learning_rate": 2.2677974659223318e-05, + "loss": 0.0213, + "step": 41990 + }, + { + "action_loss": 0.007037907838821411, + "epoch": 37.760791366906474, + "step": 41990 + }, + { + "epoch": 37.760791366906474, + "step": 41990, + "torque_loss": 0.13844385743141174 + }, + { + "epoch": 37.760791366906474, + "force_loss": 0.00899954978376627, + "step": 41990 + }, + { + "epoch": 37.76978417266187, + "grad_norm": 0.29756227135658264, + "learning_rate": 2.2654899184588235e-05, + "loss": 0.0175, + "step": 42000 + }, + { + "action_loss": 0.0016529156127944589, + "epoch": 37.76978417266187, + "step": 42000 + }, + { + "epoch": 37.76978417266187, + "step": 42000, + "torque_loss": 0.17188672721385956 + }, + { + "epoch": 37.76978417266187, + "force_loss": 0.0018125292845070362, + "step": 42000 + }, + { + "epoch": 37.77877697841727, + "grad_norm": 0.1905078887939453, + "learning_rate": 2.2631832016672756e-05, + "loss": 0.0155, + "step": 42010 + }, + { + "action_loss": 0.0019595816265791655, + "epoch": 37.77877697841727, + "step": 42010 + }, + { + "epoch": 37.77877697841727, + "step": 42010, + "torque_loss": 0.14393800497055054 + }, + { + "epoch": 37.77877697841727, + "force_loss": 0.0009099827148020267, + "step": 42010 + }, + { + "epoch": 37.78776978417266, + "grad_norm": 0.3305841088294983, + "learning_rate": 2.2608773162484127e-05, + "loss": 0.0158, + "step": 42020 + }, + { + "action_loss": 0.005438648164272308, + "epoch": 37.78776978417266, + "step": 42020 + }, + { + "epoch": 37.78776978417266, + "step": 42020, + "torque_loss": 0.12238804250955582 + }, + { + "epoch": 37.78776978417266, + "force_loss": 0.009795799851417542, + "step": 42020 + }, + { + "epoch": 37.79676258992806, + "grad_norm": 0.30900031328201294, + "learning_rate": 2.2585722629026958e-05, + "loss": 0.0164, + "step": 42030 + }, + { + "action_loss": 0.0017492446349933743, + "epoch": 37.79676258992806, + "step": 42030 + }, + { + "epoch": 37.79676258992806, + "step": 42030, + "torque_loss": 0.1076723113656044 + }, + { + "epoch": 37.79676258992806, + "force_loss": 0.0033653213176876307, + "step": 42030 + }, + { + "epoch": 37.805755395683455, + "grad_norm": 0.07750316709280014, + "learning_rate": 2.2562680423303457e-05, + "loss": 0.0156, + "step": 42040 + }, + { + "action_loss": 0.031596552580595016, + "epoch": 37.805755395683455, + "step": 42040 + }, + { + "epoch": 37.805755395683455, + "step": 42040, + "torque_loss": 0.13096915185451508 + }, + { + "epoch": 37.805755395683455, + "force_loss": 0.045666519552469254, + "step": 42040 + }, + { + "epoch": 37.814748201438846, + "grad_norm": 0.08575436472892761, + "learning_rate": 2.2539646552313165e-05, + "loss": 0.0201, + "step": 42050 + }, + { + "action_loss": 0.01117153000086546, + "epoch": 37.814748201438846, + "step": 42050 + }, + { + "epoch": 37.814748201438846, + "step": 42050, + "torque_loss": 0.18546605110168457 + }, + { + "epoch": 37.814748201438846, + "force_loss": 0.015107501298189163, + "step": 42050 + }, + { + "epoch": 37.82374100719424, + "grad_norm": 0.1526290327310562, + "learning_rate": 2.251662102305322e-05, + "loss": 0.0163, + "step": 42060 + }, + { + "action_loss": 0.0016643124399706721, + "epoch": 37.82374100719424, + "step": 42060 + }, + { + "epoch": 37.82374100719424, + "step": 42060, + "torque_loss": 0.1225610077381134 + }, + { + "epoch": 37.82374100719424, + "force_loss": 0.005740236956626177, + "step": 42060 + }, + { + "epoch": 37.83273381294964, + "grad_norm": 0.10917676240205765, + "learning_rate": 2.2493603842518152e-05, + "loss": 0.0143, + "step": 42070 + }, + { + "action_loss": 0.003165190340951085, + "epoch": 37.83273381294964, + "step": 42070 + }, + { + "epoch": 37.83273381294964, + "step": 42070, + "torque_loss": 0.1288745254278183 + }, + { + "epoch": 37.83273381294964, + "force_loss": 0.0036003708373755217, + "step": 42070 + }, + { + "epoch": 37.84172661870504, + "grad_norm": 0.18314294517040253, + "learning_rate": 2.2470595017699974e-05, + "loss": 0.0155, + "step": 42080 + }, + { + "action_loss": 0.0015404910081997514, + "epoch": 37.84172661870504, + "step": 42080 + }, + { + "epoch": 37.84172661870504, + "step": 42080, + "torque_loss": 0.13327224552631378 + }, + { + "epoch": 37.84172661870504, + "force_loss": 0.0014931490877643228, + "step": 42080 + }, + { + "epoch": 37.85071942446043, + "grad_norm": 0.08783494681119919, + "learning_rate": 2.244759455558816e-05, + "loss": 0.0167, + "step": 42090 + }, + { + "action_loss": 0.0007188048330135643, + "epoch": 37.85071942446043, + "step": 42090 + }, + { + "epoch": 37.85071942446043, + "step": 42090, + "torque_loss": 0.11901036649942398 + }, + { + "epoch": 37.85071942446043, + "force_loss": 0.0007325056940317154, + "step": 42090 + }, + { + "epoch": 37.85971223021583, + "grad_norm": 0.11911371350288391, + "learning_rate": 2.2424602463169614e-05, + "loss": 0.0162, + "step": 42100 + }, + { + "action_loss": 0.0037666999269276857, + "epoch": 37.85971223021583, + "step": 42100 + }, + { + "epoch": 37.85971223021583, + "step": 42100, + "torque_loss": 0.09223818778991699 + }, + { + "epoch": 37.85971223021583, + "force_loss": 0.001488087815232575, + "step": 42100 + }, + { + "epoch": 37.868705035971225, + "grad_norm": 0.10478778183460236, + "learning_rate": 2.2401618747428776e-05, + "loss": 0.0159, + "step": 42110 + }, + { + "action_loss": 0.001334990723989904, + "epoch": 37.868705035971225, + "step": 42110 + }, + { + "epoch": 37.868705035971225, + "step": 42110, + "torque_loss": 0.1115121841430664 + }, + { + "epoch": 37.868705035971225, + "force_loss": 0.002197865629568696, + "step": 42110 + }, + { + "epoch": 37.87769784172662, + "grad_norm": 0.052730903029441833, + "learning_rate": 2.237864341534747e-05, + "loss": 0.0171, + "step": 42120 + }, + { + "action_loss": 0.0028694781940430403, + "epoch": 37.87769784172662, + "step": 42120 + }, + { + "epoch": 37.87769784172662, + "step": 42120, + "torque_loss": 0.1752682477235794 + }, + { + "epoch": 37.87769784172662, + "force_loss": 0.009043095633387566, + "step": 42120 + }, + { + "epoch": 37.88669064748201, + "grad_norm": 0.05406825989484787, + "learning_rate": 2.2355676473904998e-05, + "loss": 0.0148, + "step": 42130 + }, + { + "action_loss": 0.0006184595404192805, + "epoch": 37.88669064748201, + "step": 42130 + }, + { + "epoch": 37.88669064748201, + "step": 42130, + "torque_loss": 0.12273558974266052 + }, + { + "epoch": 37.88669064748201, + "force_loss": 0.0016919871559366584, + "step": 42130 + }, + { + "epoch": 37.89568345323741, + "grad_norm": 0.15935902297496796, + "learning_rate": 2.2332717930078108e-05, + "loss": 0.0171, + "step": 42140 + }, + { + "action_loss": 0.0024945142213255167, + "epoch": 37.89568345323741, + "step": 42140 + }, + { + "epoch": 37.89568345323741, + "step": 42140, + "torque_loss": 0.10992572456598282 + }, + { + "epoch": 37.89568345323741, + "force_loss": 0.007920256815850735, + "step": 42140 + }, + { + "epoch": 37.90467625899281, + "grad_norm": 0.19391345977783203, + "learning_rate": 2.2309767790840992e-05, + "loss": 0.0212, + "step": 42150 + }, + { + "action_loss": 0.016867203637957573, + "epoch": 37.90467625899281, + "step": 42150 + }, + { + "epoch": 37.90467625899281, + "step": 42150, + "torque_loss": 0.1651969999074936 + }, + { + "epoch": 37.90467625899281, + "force_loss": 0.02321917749941349, + "step": 42150 + }, + { + "epoch": 37.9136690647482, + "grad_norm": 0.08190567046403885, + "learning_rate": 2.228682606316529e-05, + "loss": 0.019, + "step": 42160 + }, + { + "action_loss": 0.0016315663233399391, + "epoch": 37.9136690647482, + "step": 42160 + }, + { + "epoch": 37.9136690647482, + "step": 42160, + "torque_loss": 0.12805941700935364 + }, + { + "epoch": 37.9136690647482, + "force_loss": 0.0020304303616285324, + "step": 42160 + }, + { + "epoch": 37.922661870503596, + "grad_norm": 0.2823522686958313, + "learning_rate": 2.2263892754020138e-05, + "loss": 0.0168, + "step": 42170 + }, + { + "action_loss": 0.0018545013153925538, + "epoch": 37.922661870503596, + "step": 42170 + }, + { + "epoch": 37.922661870503596, + "step": 42170, + "torque_loss": 0.11755432933568954 + }, + { + "epoch": 37.922661870503596, + "force_loss": 0.0014761259080842137, + "step": 42170 + }, + { + "epoch": 37.931654676258994, + "grad_norm": 0.11751024425029755, + "learning_rate": 2.2240967870372004e-05, + "loss": 0.0161, + "step": 42180 + }, + { + "action_loss": 0.01340011041611433, + "epoch": 37.931654676258994, + "step": 42180 + }, + { + "epoch": 37.931654676258994, + "step": 42180, + "torque_loss": 0.11241209506988525 + }, + { + "epoch": 37.931654676258994, + "force_loss": 0.013013034127652645, + "step": 42180 + }, + { + "epoch": 37.94064748201439, + "grad_norm": 0.22758176922798157, + "learning_rate": 2.2218051419184933e-05, + "loss": 0.0175, + "step": 42190 + }, + { + "action_loss": 0.0016600164817646146, + "epoch": 37.94064748201439, + "step": 42190 + }, + { + "epoch": 37.94064748201439, + "step": 42190, + "torque_loss": 0.11962270736694336 + }, + { + "epoch": 37.94064748201439, + "force_loss": 0.0010911537101492286, + "step": 42190 + }, + { + "epoch": 37.94964028776978, + "grad_norm": 0.283041775226593, + "learning_rate": 2.219514340742026e-05, + "loss": 0.0188, + "step": 42200 + }, + { + "action_loss": 0.004071592353284359, + "epoch": 37.94964028776978, + "step": 42200 + }, + { + "epoch": 37.94964028776978, + "step": 42200, + "torque_loss": 0.17521530389785767 + }, + { + "epoch": 37.94964028776978, + "force_loss": 0.004676694516092539, + "step": 42200 + }, + { + "epoch": 37.95863309352518, + "grad_norm": 0.3204297423362732, + "learning_rate": 2.2172243842036898e-05, + "loss": 0.0159, + "step": 42210 + }, + { + "action_loss": 0.0008400164660997689, + "epoch": 37.95863309352518, + "step": 42210 + }, + { + "epoch": 37.95863309352518, + "step": 42210, + "torque_loss": 0.14027972519397736 + }, + { + "epoch": 37.95863309352518, + "force_loss": 0.0009440624271519482, + "step": 42210 + }, + { + "epoch": 37.96762589928058, + "grad_norm": 0.2375713735818863, + "learning_rate": 2.2149352729991107e-05, + "loss": 0.0157, + "step": 42220 + }, + { + "action_loss": 0.003954483196139336, + "epoch": 37.96762589928058, + "step": 42220 + }, + { + "epoch": 37.96762589928058, + "step": 42220, + "torque_loss": 0.08916950225830078 + }, + { + "epoch": 37.96762589928058, + "force_loss": 0.0017460359958931804, + "step": 42220 + }, + { + "epoch": 37.976618705035975, + "grad_norm": 0.08957651257514954, + "learning_rate": 2.2126470078236605e-05, + "loss": 0.0166, + "step": 42230 + }, + { + "action_loss": 0.0007148857112042606, + "epoch": 37.976618705035975, + "step": 42230 + }, + { + "epoch": 37.976618705035975, + "step": 42230, + "torque_loss": 0.1047196015715599 + }, + { + "epoch": 37.976618705035975, + "force_loss": 0.0006302558467723429, + "step": 42230 + }, + { + "epoch": 37.985611510791365, + "grad_norm": 0.13261087238788605, + "learning_rate": 2.2103595893724533e-05, + "loss": 0.0154, + "step": 42240 + }, + { + "action_loss": 0.0010683628497645259, + "epoch": 37.985611510791365, + "step": 42240 + }, + { + "epoch": 37.985611510791365, + "step": 42240, + "torque_loss": 0.1169266328215599 + }, + { + "epoch": 37.985611510791365, + "force_loss": 0.0010190032189711928, + "step": 42240 + }, + { + "epoch": 37.99460431654676, + "grad_norm": 0.1813839077949524, + "learning_rate": 2.208073018340345e-05, + "loss": 0.0156, + "step": 42250 + }, + { + "action_loss": 0.0008676950819790363, + "epoch": 37.99460431654676, + "step": 42250 + }, + { + "epoch": 37.99460431654676, + "step": 42250, + "torque_loss": 0.13542599976062775 + }, + { + "epoch": 37.99460431654676, + "force_loss": 0.0010492483852431178, + "step": 42250 + }, + { + "epoch": 38.00359712230216, + "grad_norm": 0.26701563596725464, + "learning_rate": 2.2057872954219405e-05, + "loss": 0.0186, + "step": 42260 + }, + { + "action_loss": 0.0012737991055473685, + "epoch": 38.00359712230216, + "step": 42260 + }, + { + "epoch": 38.00359712230216, + "step": 42260, + "torque_loss": 0.11293216794729233 + }, + { + "epoch": 38.00359712230216, + "force_loss": 0.0013094739988446236, + "step": 42260 + }, + { + "epoch": 38.01258992805755, + "grad_norm": 0.3566124439239502, + "learning_rate": 2.203502421311575e-05, + "loss": 0.018, + "step": 42270 + }, + { + "action_loss": 0.0052148811519145966, + "epoch": 38.01258992805755, + "step": 42270 + }, + { + "epoch": 38.01258992805755, + "step": 42270, + "torque_loss": 0.13218361139297485 + }, + { + "epoch": 38.01258992805755, + "force_loss": 0.005130449775606394, + "step": 42270 + }, + { + "epoch": 38.02158273381295, + "grad_norm": 0.28244003653526306, + "learning_rate": 2.2012183967033388e-05, + "loss": 0.015, + "step": 42280 + }, + { + "action_loss": 0.0017035113414749503, + "epoch": 38.02158273381295, + "step": 42280 + }, + { + "epoch": 38.02158273381295, + "step": 42280, + "torque_loss": 0.12922503054141998 + }, + { + "epoch": 38.02158273381295, + "force_loss": 0.0009031973895616829, + "step": 42280 + }, + { + "epoch": 38.030575539568346, + "grad_norm": 0.08576733618974686, + "learning_rate": 2.198935222291056e-05, + "loss": 0.0177, + "step": 42290 + }, + { + "action_loss": 0.0012579961912706494, + "epoch": 38.030575539568346, + "step": 42290 + }, + { + "epoch": 38.030575539568346, + "step": 42290, + "torque_loss": 0.12923459708690643 + }, + { + "epoch": 38.030575539568346, + "force_loss": 0.002322385786101222, + "step": 42290 + }, + { + "epoch": 38.039568345323744, + "grad_norm": 0.15510354936122894, + "learning_rate": 2.1966528987682948e-05, + "loss": 0.0163, + "step": 42300 + }, + { + "action_loss": 0.0021190308034420013, + "epoch": 38.039568345323744, + "step": 42300 + }, + { + "epoch": 38.039568345323744, + "step": 42300, + "torque_loss": 0.1178090050816536 + }, + { + "epoch": 38.039568345323744, + "force_loss": 0.00510313780978322, + "step": 42300 + }, + { + "epoch": 38.048561151079134, + "grad_norm": 0.37736085057258606, + "learning_rate": 2.194371426828365e-05, + "loss": 0.0169, + "step": 42310 + }, + { + "action_loss": 0.008922112174332142, + "epoch": 38.048561151079134, + "step": 42310 + }, + { + "epoch": 38.048561151079134, + "step": 42310, + "torque_loss": 0.1116734966635704 + }, + { + "epoch": 38.048561151079134, + "force_loss": 0.00866603385657072, + "step": 42310 + }, + { + "epoch": 38.05755395683453, + "grad_norm": 0.2952435314655304, + "learning_rate": 2.192090807164317e-05, + "loss": 0.0146, + "step": 42320 + }, + { + "action_loss": 0.006244678515940905, + "epoch": 38.05755395683453, + "step": 42320 + }, + { + "epoch": 38.05755395683453, + "step": 42320, + "torque_loss": 0.20723174512386322 + }, + { + "epoch": 38.05755395683453, + "force_loss": 0.0156722255051136, + "step": 42320 + }, + { + "epoch": 38.06654676258993, + "grad_norm": 0.16199855506420135, + "learning_rate": 2.1898110404689422e-05, + "loss": 0.0165, + "step": 42330 + }, + { + "action_loss": 0.012500171549618244, + "epoch": 38.06654676258993, + "step": 42330 + }, + { + "epoch": 38.06654676258993, + "step": 42330, + "torque_loss": 0.14990507066249847 + }, + { + "epoch": 38.06654676258993, + "force_loss": 0.010013852268457413, + "step": 42330 + }, + { + "epoch": 38.07553956834533, + "grad_norm": 0.2425270676612854, + "learning_rate": 2.1875321274347776e-05, + "loss": 0.0168, + "step": 42340 + }, + { + "action_loss": 0.0013319241115823388, + "epoch": 38.07553956834533, + "step": 42340 + }, + { + "epoch": 38.07553956834533, + "step": 42340, + "torque_loss": 0.15097080171108246 + }, + { + "epoch": 38.07553956834533, + "force_loss": 0.003812998766079545, + "step": 42340 + }, + { + "epoch": 38.08453237410072, + "grad_norm": 0.09177496284246445, + "learning_rate": 2.18525406875409e-05, + "loss": 0.0172, + "step": 42350 + }, + { + "action_loss": 0.001440504565834999, + "epoch": 38.08453237410072, + "step": 42350 + }, + { + "epoch": 38.08453237410072, + "step": 42350, + "torque_loss": 0.11144503206014633 + }, + { + "epoch": 38.08453237410072, + "force_loss": 0.002569103380665183, + "step": 42350 + }, + { + "epoch": 38.093525179856115, + "grad_norm": 0.4311312735080719, + "learning_rate": 2.1829768651188997e-05, + "loss": 0.014, + "step": 42360 + }, + { + "action_loss": 0.0008007041178643703, + "epoch": 38.093525179856115, + "step": 42360 + }, + { + "epoch": 38.093525179856115, + "step": 42360, + "torque_loss": 0.1781158447265625 + }, + { + "epoch": 38.093525179856115, + "force_loss": 0.0013596760109066963, + "step": 42360 + }, + { + "epoch": 38.10251798561151, + "grad_norm": 0.3303205668926239, + "learning_rate": 2.180700517220958e-05, + "loss": 0.0128, + "step": 42370 + }, + { + "action_loss": 0.002289427211508155, + "epoch": 38.10251798561151, + "step": 42370 + }, + { + "epoch": 38.10251798561151, + "step": 42370, + "torque_loss": 0.10773161798715591 + }, + { + "epoch": 38.10251798561151, + "force_loss": 0.005581540521234274, + "step": 42370 + }, + { + "epoch": 38.111510791366904, + "grad_norm": 0.15102706849575043, + "learning_rate": 2.1784250257517603e-05, + "loss": 0.0162, + "step": 42380 + }, + { + "action_loss": 0.003619803348556161, + "epoch": 38.111510791366904, + "step": 42380 + }, + { + "epoch": 38.111510791366904, + "step": 42380, + "torque_loss": 0.1511116772890091 + }, + { + "epoch": 38.111510791366904, + "force_loss": 0.0023690329398959875, + "step": 42380 + }, + { + "epoch": 38.1205035971223, + "grad_norm": 0.366584450006485, + "learning_rate": 2.1761503914025406e-05, + "loss": 0.0163, + "step": 42390 + }, + { + "action_loss": 0.002153784967958927, + "epoch": 38.1205035971223, + "step": 42390 + }, + { + "epoch": 38.1205035971223, + "step": 42390, + "torque_loss": 0.11411242932081223 + }, + { + "epoch": 38.1205035971223, + "force_loss": 0.002840594155713916, + "step": 42390 + }, + { + "epoch": 38.1294964028777, + "grad_norm": 0.14787189662456512, + "learning_rate": 2.1738766148642705e-05, + "loss": 0.0144, + "step": 42400 + }, + { + "action_loss": 0.004105289001017809, + "epoch": 38.1294964028777, + "step": 42400 + }, + { + "epoch": 38.1294964028777, + "step": 42400, + "torque_loss": 0.14026354253292084 + }, + { + "epoch": 38.1294964028777, + "force_loss": 0.00389136653393507, + "step": 42400 + }, + { + "epoch": 38.138489208633096, + "grad_norm": 0.15205298364162445, + "learning_rate": 2.1716036968276683e-05, + "loss": 0.017, + "step": 42410 + }, + { + "action_loss": 0.0013198625529184937, + "epoch": 38.138489208633096, + "step": 42410 + }, + { + "epoch": 38.138489208633096, + "step": 42410, + "torque_loss": 0.07007986307144165 + }, + { + "epoch": 38.138489208633096, + "force_loss": 0.005232428666204214, + "step": 42410 + }, + { + "epoch": 38.14748201438849, + "grad_norm": 0.12271703034639359, + "learning_rate": 2.1693316379831808e-05, + "loss": 0.0137, + "step": 42420 + }, + { + "action_loss": 0.0018609925173223019, + "epoch": 38.14748201438849, + "step": 42420 + }, + { + "epoch": 38.14748201438849, + "step": 42420, + "torque_loss": 0.16388960182666779 + }, + { + "epoch": 38.14748201438849, + "force_loss": 0.0015514730475842953, + "step": 42420 + }, + { + "epoch": 38.156474820143885, + "grad_norm": 0.14147064089775085, + "learning_rate": 2.1670604390210037e-05, + "loss": 0.0169, + "step": 42430 + }, + { + "action_loss": 0.0010573114268481731, + "epoch": 38.156474820143885, + "step": 42430 + }, + { + "epoch": 38.156474820143885, + "step": 42430, + "torque_loss": 0.15909452736377716 + }, + { + "epoch": 38.156474820143885, + "force_loss": 0.0011442030081525445, + "step": 42430 + }, + { + "epoch": 38.16546762589928, + "grad_norm": 0.3083512783050537, + "learning_rate": 2.1647901006310656e-05, + "loss": 0.0158, + "step": 42440 + }, + { + "action_loss": 0.0033844977151602507, + "epoch": 38.16546762589928, + "step": 42440 + }, + { + "epoch": 38.16546762589928, + "step": 42440, + "torque_loss": 0.09742780774831772 + }, + { + "epoch": 38.16546762589928, + "force_loss": 0.010735255666077137, + "step": 42440 + }, + { + "epoch": 38.17446043165467, + "grad_norm": 0.21156315505504608, + "learning_rate": 2.1625206235030353e-05, + "loss": 0.0175, + "step": 42450 + }, + { + "action_loss": 0.014417861588299274, + "epoch": 38.17446043165467, + "step": 42450 + }, + { + "epoch": 38.17446043165467, + "step": 42450, + "torque_loss": 0.12278424948453903 + }, + { + "epoch": 38.17446043165467, + "force_loss": 0.02201397716999054, + "step": 42450 + }, + { + "epoch": 38.18345323741007, + "grad_norm": 0.2523946464061737, + "learning_rate": 2.160252008326321e-05, + "loss": 0.0196, + "step": 42460 + }, + { + "action_loss": 0.005058374721556902, + "epoch": 38.18345323741007, + "step": 42460 + }, + { + "epoch": 38.18345323741007, + "step": 42460, + "torque_loss": 0.10087931156158447 + }, + { + "epoch": 38.18345323741007, + "force_loss": 0.0036174114793539047, + "step": 42460 + }, + { + "epoch": 38.19244604316547, + "grad_norm": 0.14239241182804108, + "learning_rate": 2.157984255790067e-05, + "loss": 0.0168, + "step": 42470 + }, + { + "action_loss": 0.0024516042321920395, + "epoch": 38.19244604316547, + "step": 42470 + }, + { + "epoch": 38.19244604316547, + "step": 42470, + "torque_loss": 0.13193939626216888 + }, + { + "epoch": 38.19244604316547, + "force_loss": 0.0028035396244376898, + "step": 42470 + }, + { + "epoch": 38.201438848920866, + "grad_norm": 0.22383785247802734, + "learning_rate": 2.1557173665831553e-05, + "loss": 0.0151, + "step": 42480 + }, + { + "action_loss": 0.0018066968768835068, + "epoch": 38.201438848920866, + "step": 42480 + }, + { + "epoch": 38.201438848920866, + "step": 42480, + "torque_loss": 0.16234518587589264 + }, + { + "epoch": 38.201438848920866, + "force_loss": 0.006594979669898748, + "step": 42480 + }, + { + "epoch": 38.210431654676256, + "grad_norm": 0.23351898789405823, + "learning_rate": 2.153451341394212e-05, + "loss": 0.0194, + "step": 42490 + }, + { + "action_loss": 0.001507127657532692, + "epoch": 38.210431654676256, + "step": 42490 + }, + { + "epoch": 38.210431654676256, + "step": 42490, + "torque_loss": 0.20244701206684113 + }, + { + "epoch": 38.210431654676256, + "force_loss": 0.0035984080750495195, + "step": 42490 + }, + { + "epoch": 38.219424460431654, + "grad_norm": 0.08941373229026794, + "learning_rate": 2.151186180911589e-05, + "loss": 0.0155, + "step": 42500 + }, + { + "action_loss": 0.0013980502262711525, + "epoch": 38.219424460431654, + "step": 42500 + }, + { + "epoch": 38.219424460431654, + "step": 42500, + "torque_loss": 0.13525958359241486 + }, + { + "epoch": 38.219424460431654, + "force_loss": 0.0053792609833180904, + "step": 42500 + }, + { + "epoch": 38.22841726618705, + "grad_norm": 0.1562460958957672, + "learning_rate": 2.1489218858233877e-05, + "loss": 0.0174, + "step": 42510 + }, + { + "action_loss": 0.001466710469685495, + "epoch": 38.22841726618705, + "step": 42510 + }, + { + "epoch": 38.22841726618705, + "step": 42510, + "torque_loss": 0.10757160931825638 + }, + { + "epoch": 38.22841726618705, + "force_loss": 0.0019725810270756483, + "step": 42510 + }, + { + "epoch": 38.23741007194245, + "grad_norm": 0.1342063993215561, + "learning_rate": 2.1466584568174392e-05, + "loss": 0.0195, + "step": 42520 + }, + { + "action_loss": 0.007308927830308676, + "epoch": 38.23741007194245, + "step": 42520 + }, + { + "epoch": 38.23741007194245, + "step": 42520, + "torque_loss": 0.10933581739664078 + }, + { + "epoch": 38.23741007194245, + "force_loss": 0.0039587477222085, + "step": 42520 + }, + { + "epoch": 38.24640287769784, + "grad_norm": 0.399356871843338, + "learning_rate": 2.1443958945813132e-05, + "loss": 0.0183, + "step": 42530 + }, + { + "action_loss": 0.0010282915318384767, + "epoch": 38.24640287769784, + "step": 42530 + }, + { + "epoch": 38.24640287769784, + "step": 42530, + "torque_loss": 0.13052575290203094 + }, + { + "epoch": 38.24640287769784, + "force_loss": 0.0025030726101249456, + "step": 42530 + }, + { + "epoch": 38.25539568345324, + "grad_norm": 0.1908128410577774, + "learning_rate": 2.1421341998023163e-05, + "loss": 0.0194, + "step": 42540 + }, + { + "action_loss": 0.021642101928591728, + "epoch": 38.25539568345324, + "step": 42540 + }, + { + "epoch": 38.25539568345324, + "step": 42540, + "torque_loss": 0.10879906266927719 + }, + { + "epoch": 38.25539568345324, + "force_loss": 0.02443387545645237, + "step": 42540 + }, + { + "epoch": 38.264388489208635, + "grad_norm": 0.3333670198917389, + "learning_rate": 2.139873373167491e-05, + "loss": 0.0178, + "step": 42550 + }, + { + "action_loss": 0.011166143231093884, + "epoch": 38.264388489208635, + "step": 42550 + }, + { + "epoch": 38.264388489208635, + "step": 42550, + "torque_loss": 0.09375491738319397 + }, + { + "epoch": 38.264388489208635, + "force_loss": 0.01609027199447155, + "step": 42550 + }, + { + "epoch": 38.273381294964025, + "grad_norm": 0.1628560870885849, + "learning_rate": 2.13761341536362e-05, + "loss": 0.0163, + "step": 42560 + }, + { + "action_loss": 0.0015759443631395698, + "epoch": 38.273381294964025, + "step": 42560 + }, + { + "epoch": 38.273381294964025, + "step": 42560, + "torque_loss": 0.14005199074745178 + }, + { + "epoch": 38.273381294964025, + "force_loss": 0.0020526631269603968, + "step": 42560 + }, + { + "epoch": 38.28237410071942, + "grad_norm": 0.16305041313171387, + "learning_rate": 2.1353543270772136e-05, + "loss": 0.0153, + "step": 42570 + }, + { + "action_loss": 0.0032874986063688993, + "epoch": 38.28237410071942, + "step": 42570 + }, + { + "epoch": 38.28237410071942, + "step": 42570, + "torque_loss": 0.19163213670253754 + }, + { + "epoch": 38.28237410071942, + "force_loss": 0.00328506319783628, + "step": 42570 + }, + { + "epoch": 38.29136690647482, + "grad_norm": 0.08315874636173248, + "learning_rate": 2.1330961089945297e-05, + "loss": 0.0174, + "step": 42580 + }, + { + "action_loss": 0.0031259332317858934, + "epoch": 38.29136690647482, + "step": 42580 + }, + { + "epoch": 38.29136690647482, + "step": 42580, + "torque_loss": 0.11001918464899063 + }, + { + "epoch": 38.29136690647482, + "force_loss": 0.003332960419356823, + "step": 42580 + }, + { + "epoch": 38.30035971223022, + "grad_norm": 0.20289459824562073, + "learning_rate": 2.130838761801548e-05, + "loss": 0.0198, + "step": 42590 + }, + { + "action_loss": 0.0016095106257125735, + "epoch": 38.30035971223022, + "step": 42590 + }, + { + "epoch": 38.30035971223022, + "step": 42590, + "torque_loss": 0.13450567424297333 + }, + { + "epoch": 38.30035971223022, + "force_loss": 0.0033097604755312204, + "step": 42590 + }, + { + "epoch": 38.30935251798561, + "grad_norm": 0.11926200985908508, + "learning_rate": 2.1285822861839966e-05, + "loss": 0.0165, + "step": 42600 + }, + { + "action_loss": 0.0022899971809238195, + "epoch": 38.30935251798561, + "step": 42600 + }, + { + "epoch": 38.30935251798561, + "step": 42600, + "torque_loss": 0.15427549183368683 + }, + { + "epoch": 38.30935251798561, + "force_loss": 0.0017726510995998979, + "step": 42600 + }, + { + "epoch": 38.318345323741006, + "grad_norm": 0.0767514556646347, + "learning_rate": 2.126326682827331e-05, + "loss": 0.0142, + "step": 42610 + }, + { + "action_loss": 0.0012972141848877072, + "epoch": 38.318345323741006, + "step": 42610 + }, + { + "epoch": 38.318345323741006, + "step": 42610, + "torque_loss": 0.12797673046588898 + }, + { + "epoch": 38.318345323741006, + "force_loss": 0.003716723993420601, + "step": 42610 + }, + { + "epoch": 38.327338129496404, + "grad_norm": 0.09448380023241043, + "learning_rate": 2.124071952416744e-05, + "loss": 0.0149, + "step": 42620 + }, + { + "action_loss": 0.0014604199677705765, + "epoch": 38.327338129496404, + "step": 42620 + }, + { + "epoch": 38.327338129496404, + "step": 42620, + "torque_loss": 0.12701942026615143 + }, + { + "epoch": 38.327338129496404, + "force_loss": 0.002675536321476102, + "step": 42620 + }, + { + "epoch": 38.3363309352518, + "grad_norm": 0.11596908420324326, + "learning_rate": 2.1218180956371634e-05, + "loss": 0.0136, + "step": 42630 + }, + { + "action_loss": 0.005087161436676979, + "epoch": 38.3363309352518, + "step": 42630 + }, + { + "epoch": 38.3363309352518, + "step": 42630, + "torque_loss": 0.07203080505132675 + }, + { + "epoch": 38.3363309352518, + "force_loss": 0.005233215633779764, + "step": 42630 + }, + { + "epoch": 38.34532374100719, + "grad_norm": 0.15473967790603638, + "learning_rate": 2.119565113173252e-05, + "loss": 0.0173, + "step": 42640 + }, + { + "action_loss": 0.012108213268220425, + "epoch": 38.34532374100719, + "step": 42640 + }, + { + "epoch": 38.34532374100719, + "step": 42640, + "torque_loss": 0.08756273239850998 + }, + { + "epoch": 38.34532374100719, + "force_loss": 0.018123289570212364, + "step": 42640 + }, + { + "epoch": 38.35431654676259, + "grad_norm": 0.1036149337887764, + "learning_rate": 2.1173130057094033e-05, + "loss": 0.0155, + "step": 42650 + }, + { + "action_loss": 0.0009599531185813248, + "epoch": 38.35431654676259, + "step": 42650 + }, + { + "epoch": 38.35431654676259, + "step": 42650, + "torque_loss": 0.11805254966020584 + }, + { + "epoch": 38.35431654676259, + "force_loss": 0.0018283972749486566, + "step": 42650 + }, + { + "epoch": 38.36330935251799, + "grad_norm": 0.1567688286304474, + "learning_rate": 2.115061773929753e-05, + "loss": 0.0147, + "step": 42660 + }, + { + "action_loss": 0.0014010765589773655, + "epoch": 38.36330935251799, + "step": 42660 + }, + { + "epoch": 38.36330935251799, + "step": 42660, + "torque_loss": 0.11579396575689316 + }, + { + "epoch": 38.36330935251799, + "force_loss": 0.0018234135350212455, + "step": 42660 + }, + { + "epoch": 38.37230215827338, + "grad_norm": 0.2620141804218292, + "learning_rate": 2.1128114185181623e-05, + "loss": 0.0172, + "step": 42670 + }, + { + "action_loss": 0.003817712888121605, + "epoch": 38.37230215827338, + "step": 42670 + }, + { + "epoch": 38.37230215827338, + "step": 42670, + "torque_loss": 0.10588070750236511 + }, + { + "epoch": 38.37230215827338, + "force_loss": 0.007507646456360817, + "step": 42670 + }, + { + "epoch": 38.381294964028775, + "grad_norm": 0.22709479928016663, + "learning_rate": 2.1105619401582317e-05, + "loss": 0.016, + "step": 42680 + }, + { + "action_loss": 0.0008732354617677629, + "epoch": 38.381294964028775, + "step": 42680 + }, + { + "epoch": 38.381294964028775, + "step": 42680, + "torque_loss": 0.08710754662752151 + }, + { + "epoch": 38.381294964028775, + "force_loss": 0.0022411842364817858, + "step": 42680 + }, + { + "epoch": 38.39028776978417, + "grad_norm": 0.21565333008766174, + "learning_rate": 2.1083133395332928e-05, + "loss": 0.0162, + "step": 42690 + }, + { + "action_loss": 0.003400401910766959, + "epoch": 38.39028776978417, + "step": 42690 + }, + { + "epoch": 38.39028776978417, + "step": 42690, + "torque_loss": 0.1361086517572403 + }, + { + "epoch": 38.39028776978417, + "force_loss": 0.0055230301804840565, + "step": 42690 + }, + { + "epoch": 38.39928057553957, + "grad_norm": 0.1529228836297989, + "learning_rate": 2.1060656173264082e-05, + "loss": 0.0151, + "step": 42700 + }, + { + "action_loss": 0.005311049986630678, + "epoch": 38.39928057553957, + "step": 42700 + }, + { + "epoch": 38.39928057553957, + "step": 42700, + "torque_loss": 0.14421053230762482 + }, + { + "epoch": 38.39928057553957, + "force_loss": 0.006098801735788584, + "step": 42700 + }, + { + "epoch": 38.40827338129496, + "grad_norm": 0.275186151266098, + "learning_rate": 2.103818774220383e-05, + "loss": 0.0226, + "step": 42710 + }, + { + "action_loss": 0.0014118897961452603, + "epoch": 38.40827338129496, + "step": 42710 + }, + { + "epoch": 38.40827338129496, + "step": 42710, + "torque_loss": 0.09845157712697983 + }, + { + "epoch": 38.40827338129496, + "force_loss": 0.0006217022310011089, + "step": 42710 + }, + { + "epoch": 38.41726618705036, + "grad_norm": 0.19079558551311493, + "learning_rate": 2.1015728108977412e-05, + "loss": 0.016, + "step": 42720 + }, + { + "action_loss": 0.017542317509651184, + "epoch": 38.41726618705036, + "step": 42720 + }, + { + "epoch": 38.41726618705036, + "step": 42720, + "torque_loss": 0.12345749139785767 + }, + { + "epoch": 38.41726618705036, + "force_loss": 0.017251184210181236, + "step": 42720 + }, + { + "epoch": 38.42625899280576, + "grad_norm": 0.23277685046195984, + "learning_rate": 2.0993277280407548e-05, + "loss": 0.0183, + "step": 42730 + }, + { + "action_loss": 0.007395673543214798, + "epoch": 38.42625899280576, + "step": 42730 + }, + { + "epoch": 38.42625899280576, + "step": 42730, + "torque_loss": 0.11387022584676743 + }, + { + "epoch": 38.42625899280576, + "force_loss": 0.011070760898292065, + "step": 42730 + }, + { + "epoch": 38.435251798561154, + "grad_norm": 0.06706535816192627, + "learning_rate": 2.0970835263314132e-05, + "loss": 0.0163, + "step": 42740 + }, + { + "action_loss": 0.0020951805636286736, + "epoch": 38.435251798561154, + "step": 42740 + }, + { + "epoch": 38.435251798561154, + "step": 42740, + "torque_loss": 0.1587638109922409 + }, + { + "epoch": 38.435251798561154, + "force_loss": 0.004596231505274773, + "step": 42740 + }, + { + "epoch": 38.444244604316545, + "grad_norm": 0.1208859458565712, + "learning_rate": 2.094840206451451e-05, + "loss": 0.0156, + "step": 42750 + }, + { + "action_loss": 0.0021530913654714823, + "epoch": 38.444244604316545, + "step": 42750 + }, + { + "epoch": 38.444244604316545, + "step": 42750, + "torque_loss": 0.14189133048057556 + }, + { + "epoch": 38.444244604316545, + "force_loss": 0.0074575818143785, + "step": 42750 + }, + { + "epoch": 38.45323741007194, + "grad_norm": 0.15104053914546967, + "learning_rate": 2.0925977690823273e-05, + "loss": 0.0159, + "step": 42760 + }, + { + "action_loss": 0.010436457581818104, + "epoch": 38.45323741007194, + "step": 42760 + }, + { + "epoch": 38.45323741007194, + "step": 42760, + "torque_loss": 0.11887097358703613 + }, + { + "epoch": 38.45323741007194, + "force_loss": 0.015443484298884869, + "step": 42760 + }, + { + "epoch": 38.46223021582734, + "grad_norm": 0.12667535245418549, + "learning_rate": 2.0903562149052364e-05, + "loss": 0.0147, + "step": 42770 + }, + { + "action_loss": 0.0013711719075217843, + "epoch": 38.46223021582734, + "step": 42770 + }, + { + "epoch": 38.46223021582734, + "step": 42770, + "torque_loss": 0.13282687962055206 + }, + { + "epoch": 38.46223021582734, + "force_loss": 0.0016530733555555344, + "step": 42770 + }, + { + "epoch": 38.47122302158273, + "grad_norm": 0.2508116662502289, + "learning_rate": 2.0881155446011025e-05, + "loss": 0.0161, + "step": 42780 + }, + { + "action_loss": 0.002459365176036954, + "epoch": 38.47122302158273, + "step": 42780 + }, + { + "epoch": 38.47122302158273, + "step": 42780, + "torque_loss": 0.11887223273515701 + }, + { + "epoch": 38.47122302158273, + "force_loss": 0.010309889912605286, + "step": 42780 + }, + { + "epoch": 38.48021582733813, + "grad_norm": 0.14695261418819427, + "learning_rate": 2.0858757588505823e-05, + "loss": 0.0183, + "step": 42790 + }, + { + "action_loss": 0.003857190487906337, + "epoch": 38.48021582733813, + "step": 42790 + }, + { + "epoch": 38.48021582733813, + "step": 42790, + "torque_loss": 0.16643297672271729 + }, + { + "epoch": 38.48021582733813, + "force_loss": 0.005898170173168182, + "step": 42790 + }, + { + "epoch": 38.489208633093526, + "grad_norm": 0.09625194221735, + "learning_rate": 2.0836368583340622e-05, + "loss": 0.0144, + "step": 42800 + }, + { + "action_loss": 0.003638644702732563, + "epoch": 38.489208633093526, + "step": 42800 + }, + { + "epoch": 38.489208633093526, + "step": 42800, + "torque_loss": 0.12514102458953857 + }, + { + "epoch": 38.489208633093526, + "force_loss": 0.0020719580352306366, + "step": 42800 + }, + { + "epoch": 38.49820143884892, + "grad_norm": 0.15096129477024078, + "learning_rate": 2.081398843731664e-05, + "loss": 0.0155, + "step": 42810 + }, + { + "action_loss": 0.001300158561207354, + "epoch": 38.49820143884892, + "step": 42810 + }, + { + "epoch": 38.49820143884892, + "step": 42810, + "torque_loss": 0.11797508597373962 + }, + { + "epoch": 38.49820143884892, + "force_loss": 0.00276996660977602, + "step": 42810 + }, + { + "epoch": 38.507194244604314, + "grad_norm": 0.15623484551906586, + "learning_rate": 2.0791617157232357e-05, + "loss": 0.0153, + "step": 42820 + }, + { + "action_loss": 0.0023197324480861425, + "epoch": 38.507194244604314, + "step": 42820 + }, + { + "epoch": 38.507194244604314, + "step": 42820, + "torque_loss": 0.1320878118276596 + }, + { + "epoch": 38.507194244604314, + "force_loss": 0.005294546484947205, + "step": 42820 + }, + { + "epoch": 38.51618705035971, + "grad_norm": 0.23853468894958496, + "learning_rate": 2.0769254749883576e-05, + "loss": 0.0168, + "step": 42830 + }, + { + "action_loss": 0.0017935411306098104, + "epoch": 38.51618705035971, + "step": 42830 + }, + { + "epoch": 38.51618705035971, + "step": 42830, + "torque_loss": 0.14692510664463043 + }, + { + "epoch": 38.51618705035971, + "force_loss": 0.012311834841966629, + "step": 42830 + }, + { + "epoch": 38.52517985611511, + "grad_norm": 0.1728084683418274, + "learning_rate": 2.0746901222063415e-05, + "loss": 0.0157, + "step": 42840 + }, + { + "action_loss": 0.0020139499101787806, + "epoch": 38.52517985611511, + "step": 42840 + }, + { + "epoch": 38.52517985611511, + "step": 42840, + "torque_loss": 0.13630957901477814 + }, + { + "epoch": 38.52517985611511, + "force_loss": 0.005008505191653967, + "step": 42840 + }, + { + "epoch": 38.53417266187051, + "grad_norm": 0.08252532035112381, + "learning_rate": 2.072455658056226e-05, + "loss": 0.0162, + "step": 42850 + }, + { + "action_loss": 0.0016188727458938956, + "epoch": 38.53417266187051, + "step": 42850 + }, + { + "epoch": 38.53417266187051, + "step": 42850, + "torque_loss": 0.17332512140274048 + }, + { + "epoch": 38.53417266187051, + "force_loss": 0.0020309491083025932, + "step": 42850 + }, + { + "epoch": 38.5431654676259, + "grad_norm": 0.20099502801895142, + "learning_rate": 2.0702220832167873e-05, + "loss": 0.0177, + "step": 42860 + }, + { + "action_loss": 0.0013149403966963291, + "epoch": 38.5431654676259, + "step": 42860 + }, + { + "epoch": 38.5431654676259, + "step": 42860, + "torque_loss": 0.07513218373060226 + }, + { + "epoch": 38.5431654676259, + "force_loss": 0.0021678388584405184, + "step": 42860 + }, + { + "epoch": 38.552158273381295, + "grad_norm": 0.145574688911438, + "learning_rate": 2.0679893983665205e-05, + "loss": 0.0156, + "step": 42870 + }, + { + "action_loss": 0.0014957472449168563, + "epoch": 38.552158273381295, + "step": 42870 + }, + { + "epoch": 38.552158273381295, + "step": 42870, + "torque_loss": 0.1247599795460701 + }, + { + "epoch": 38.552158273381295, + "force_loss": 0.0020375109743326902, + "step": 42870 + }, + { + "epoch": 38.56115107913669, + "grad_norm": 0.2791518270969391, + "learning_rate": 2.0657576041836622e-05, + "loss": 0.0208, + "step": 42880 + }, + { + "action_loss": 0.006603449583053589, + "epoch": 38.56115107913669, + "step": 42880 + }, + { + "epoch": 38.56115107913669, + "step": 42880, + "torque_loss": 0.14387445151805878 + }, + { + "epoch": 38.56115107913669, + "force_loss": 0.0029947077855467796, + "step": 42880 + }, + { + "epoch": 38.57014388489208, + "grad_norm": 0.12982387840747833, + "learning_rate": 2.0635267013461666e-05, + "loss": 0.0217, + "step": 42890 + }, + { + "action_loss": 0.0017544155707582831, + "epoch": 38.57014388489208, + "step": 42890 + }, + { + "epoch": 38.57014388489208, + "step": 42890, + "torque_loss": 0.10584614425897598 + }, + { + "epoch": 38.57014388489208, + "force_loss": 0.0037342163268476725, + "step": 42890 + }, + { + "epoch": 38.57913669064748, + "grad_norm": 0.12017540633678436, + "learning_rate": 2.061296690531728e-05, + "loss": 0.0166, + "step": 42900 + }, + { + "action_loss": 0.0021294003818184137, + "epoch": 38.57913669064748, + "step": 42900 + }, + { + "epoch": 38.57913669064748, + "step": 42900, + "torque_loss": 0.09758982807397842 + }, + { + "epoch": 38.57913669064748, + "force_loss": 0.004263147246092558, + "step": 42900 + }, + { + "epoch": 38.58812949640288, + "grad_norm": 0.1537683755159378, + "learning_rate": 2.0590675724177622e-05, + "loss": 0.015, + "step": 42910 + }, + { + "action_loss": 0.00441939989104867, + "epoch": 38.58812949640288, + "step": 42910 + }, + { + "epoch": 38.58812949640288, + "step": 42910, + "torque_loss": 0.15077464282512665 + }, + { + "epoch": 38.58812949640288, + "force_loss": 0.006337263155728579, + "step": 42910 + }, + { + "epoch": 38.597122302158276, + "grad_norm": 0.12387017905712128, + "learning_rate": 2.0568393476814167e-05, + "loss": 0.0153, + "step": 42920 + }, + { + "action_loss": 0.005837309639900923, + "epoch": 38.597122302158276, + "step": 42920 + }, + { + "epoch": 38.597122302158276, + "step": 42920, + "torque_loss": 0.1131918802857399 + }, + { + "epoch": 38.597122302158276, + "force_loss": 0.005272162612527609, + "step": 42920 + }, + { + "epoch": 38.606115107913666, + "grad_norm": 0.33255335688591003, + "learning_rate": 2.0546120169995685e-05, + "loss": 0.0175, + "step": 42930 + }, + { + "action_loss": 0.0009794255020096898, + "epoch": 38.606115107913666, + "step": 42930 + }, + { + "epoch": 38.606115107913666, + "step": 42930, + "torque_loss": 0.16273508965969086 + }, + { + "epoch": 38.606115107913666, + "force_loss": 0.0017201714217662811, + "step": 42930 + }, + { + "epoch": 38.615107913669064, + "grad_norm": 0.24979008734226227, + "learning_rate": 2.0523855810488214e-05, + "loss": 0.0173, + "step": 42940 + }, + { + "action_loss": 0.0019623811822384596, + "epoch": 38.615107913669064, + "step": 42940 + }, + { + "epoch": 38.615107913669064, + "step": 42940, + "torque_loss": 0.08859298378229141 + }, + { + "epoch": 38.615107913669064, + "force_loss": 0.004715843591839075, + "step": 42940 + }, + { + "epoch": 38.62410071942446, + "grad_norm": 0.18740585446357727, + "learning_rate": 2.050160040505505e-05, + "loss": 0.016, + "step": 42950 + }, + { + "action_loss": 0.003015686059370637, + "epoch": 38.62410071942446, + "step": 42950 + }, + { + "epoch": 38.62410071942446, + "step": 42950, + "torque_loss": 0.11927451938390732 + }, + { + "epoch": 38.62410071942446, + "force_loss": 0.008128714747726917, + "step": 42950 + }, + { + "epoch": 38.63309352517986, + "grad_norm": 0.210119366645813, + "learning_rate": 2.0479353960456843e-05, + "loss": 0.0167, + "step": 42960 + }, + { + "action_loss": 0.004030993673950434, + "epoch": 38.63309352517986, + "step": 42960 + }, + { + "epoch": 38.63309352517986, + "step": 42960, + "torque_loss": 0.12204185873270035 + }, + { + "epoch": 38.63309352517986, + "force_loss": 0.004106556531041861, + "step": 42960 + }, + { + "epoch": 38.64208633093525, + "grad_norm": 0.15033361315727234, + "learning_rate": 2.0457116483451456e-05, + "loss": 0.0165, + "step": 42970 + }, + { + "action_loss": 0.0024442195426672697, + "epoch": 38.64208633093525, + "step": 42970 + }, + { + "epoch": 38.64208633093525, + "step": 42970, + "torque_loss": 0.09865373373031616 + }, + { + "epoch": 38.64208633093525, + "force_loss": 0.001946457545273006, + "step": 42970 + }, + { + "epoch": 38.65107913669065, + "grad_norm": 0.21015511453151703, + "learning_rate": 2.0434887980794043e-05, + "loss": 0.0155, + "step": 42980 + }, + { + "action_loss": 0.0006661896477453411, + "epoch": 38.65107913669065, + "step": 42980 + }, + { + "epoch": 38.65107913669065, + "step": 42980, + "torque_loss": 0.11808957904577255 + }, + { + "epoch": 38.65107913669065, + "force_loss": 0.0008906096336431801, + "step": 42980 + }, + { + "epoch": 38.660071942446045, + "grad_norm": 0.3225348889827728, + "learning_rate": 2.0412668459237043e-05, + "loss": 0.0189, + "step": 42990 + }, + { + "action_loss": 0.0025322488509118557, + "epoch": 38.660071942446045, + "step": 42990 + }, + { + "epoch": 38.660071942446045, + "step": 42990, + "torque_loss": 0.17096595466136932 + }, + { + "epoch": 38.660071942446045, + "force_loss": 0.012234543450176716, + "step": 42990 + }, + { + "epoch": 38.669064748201436, + "grad_norm": 0.1543445885181427, + "learning_rate": 2.039045792553016e-05, + "loss": 0.0153, + "step": 43000 + }, + { + "action_loss": 0.0023059502709656954, + "epoch": 38.669064748201436, + "step": 43000 + }, + { + "epoch": 38.669064748201436, + "step": 43000, + "torque_loss": 0.10220726579427719 + }, + { + "epoch": 38.669064748201436, + "force_loss": 0.0031755883246660233, + "step": 43000 + }, + { + "epoch": 38.67805755395683, + "grad_norm": 0.1288253217935562, + "learning_rate": 2.036825638642036e-05, + "loss": 0.0173, + "step": 43010 + }, + { + "action_loss": 0.004959840327501297, + "epoch": 38.67805755395683, + "step": 43010 + }, + { + "epoch": 38.67805755395683, + "step": 43010, + "torque_loss": 0.12836463749408722 + }, + { + "epoch": 38.67805755395683, + "force_loss": 0.009414522908627987, + "step": 43010 + }, + { + "epoch": 38.68705035971223, + "grad_norm": 0.32297179102897644, + "learning_rate": 2.0346063848651868e-05, + "loss": 0.0146, + "step": 43020 + }, + { + "action_loss": 0.0065900362096726894, + "epoch": 38.68705035971223, + "step": 43020 + }, + { + "epoch": 38.68705035971223, + "step": 43020, + "torque_loss": 0.13217408955097198 + }, + { + "epoch": 38.68705035971223, + "force_loss": 0.006156821269541979, + "step": 43020 + }, + { + "epoch": 38.69604316546763, + "grad_norm": 0.19051147997379303, + "learning_rate": 2.0323880318966254e-05, + "loss": 0.0172, + "step": 43030 + }, + { + "action_loss": 0.0032602306455373764, + "epoch": 38.69604316546763, + "step": 43030 + }, + { + "epoch": 38.69604316546763, + "step": 43030, + "torque_loss": 0.1282208412885666 + }, + { + "epoch": 38.69604316546763, + "force_loss": 0.0020867667626589537, + "step": 43030 + }, + { + "epoch": 38.70503597122302, + "grad_norm": 0.13722969591617584, + "learning_rate": 2.030170580410221e-05, + "loss": 0.0182, + "step": 43040 + }, + { + "action_loss": 0.0012555635767057538, + "epoch": 38.70503597122302, + "step": 43040 + }, + { + "epoch": 38.70503597122302, + "step": 43040, + "torque_loss": 0.12337436527013779 + }, + { + "epoch": 38.70503597122302, + "force_loss": 0.0032531581819057465, + "step": 43040 + }, + { + "epoch": 38.71402877697842, + "grad_norm": 0.25402286648750305, + "learning_rate": 2.0279540310795837e-05, + "loss": 0.0145, + "step": 43050 + }, + { + "action_loss": 0.004971138201653957, + "epoch": 38.71402877697842, + "step": 43050 + }, + { + "epoch": 38.71402877697842, + "step": 43050, + "torque_loss": 0.08613301068544388 + }, + { + "epoch": 38.71402877697842, + "force_loss": 0.006066437810659409, + "step": 43050 + }, + { + "epoch": 38.723021582733814, + "grad_norm": 0.07191108167171478, + "learning_rate": 2.0257383845780365e-05, + "loss": 0.0152, + "step": 43060 + }, + { + "action_loss": 0.007087436970323324, + "epoch": 38.723021582733814, + "step": 43060 + }, + { + "epoch": 38.723021582733814, + "step": 43060, + "torque_loss": 0.11101161688566208 + }, + { + "epoch": 38.723021582733814, + "force_loss": 0.017721662297844887, + "step": 43060 + }, + { + "epoch": 38.73201438848921, + "grad_norm": 0.1300228387117386, + "learning_rate": 2.0235236415786384e-05, + "loss": 0.0187, + "step": 43070 + }, + { + "action_loss": 0.001134268706664443, + "epoch": 38.73201438848921, + "step": 43070 + }, + { + "epoch": 38.73201438848921, + "step": 43070, + "torque_loss": 0.13750235736370087 + }, + { + "epoch": 38.73201438848921, + "force_loss": 0.0009366504964418709, + "step": 43070 + }, + { + "epoch": 38.7410071942446, + "grad_norm": 0.08991255611181259, + "learning_rate": 2.021309802754169e-05, + "loss": 0.0154, + "step": 43080 + }, + { + "action_loss": 0.01060511264950037, + "epoch": 38.7410071942446, + "step": 43080 + }, + { + "epoch": 38.7410071942446, + "step": 43080, + "torque_loss": 0.11946281790733337 + }, + { + "epoch": 38.7410071942446, + "force_loss": 0.010707574896514416, + "step": 43080 + }, + { + "epoch": 38.75, + "grad_norm": 0.08264023810625076, + "learning_rate": 2.0190968687771332e-05, + "loss": 0.0133, + "step": 43090 + }, + { + "action_loss": 0.0007571717142127454, + "epoch": 38.75, + "step": 43090 + }, + { + "epoch": 38.75, + "step": 43090, + "torque_loss": 0.11499225348234177 + }, + { + "epoch": 38.75, + "force_loss": 0.0019671835470944643, + "step": 43090 + }, + { + "epoch": 38.7589928057554, + "grad_norm": 0.2653161287307739, + "learning_rate": 2.016884840319763e-05, + "loss": 0.0147, + "step": 43100 + }, + { + "action_loss": 0.0028366928454488516, + "epoch": 38.7589928057554, + "step": 43100 + }, + { + "epoch": 38.7589928057554, + "step": 43100, + "torque_loss": 0.1616523563861847 + }, + { + "epoch": 38.7589928057554, + "force_loss": 0.00493226433172822, + "step": 43100 + }, + { + "epoch": 38.76798561151079, + "grad_norm": 0.3793729841709137, + "learning_rate": 2.0146737180540122e-05, + "loss": 0.0162, + "step": 43110 + }, + { + "action_loss": 0.0036913391668349504, + "epoch": 38.76798561151079, + "step": 43110 + }, + { + "epoch": 38.76798561151079, + "step": 43110, + "torque_loss": 0.154439315199852 + }, + { + "epoch": 38.76798561151079, + "force_loss": 0.007478731218725443, + "step": 43110 + }, + { + "epoch": 38.776978417266186, + "grad_norm": 0.1540844589471817, + "learning_rate": 2.012463502651564e-05, + "loss": 0.0142, + "step": 43120 + }, + { + "action_loss": 0.007521331310272217, + "epoch": 38.776978417266186, + "step": 43120 + }, + { + "epoch": 38.776978417266186, + "step": 43120, + "torque_loss": 0.12096823006868362 + }, + { + "epoch": 38.776978417266186, + "force_loss": 0.018969228491187096, + "step": 43120 + }, + { + "epoch": 38.78597122302158, + "grad_norm": 0.1410590261220932, + "learning_rate": 2.0102541947838228e-05, + "loss": 0.017, + "step": 43130 + }, + { + "action_loss": 0.000987159670330584, + "epoch": 38.78597122302158, + "step": 43130 + }, + { + "epoch": 38.78597122302158, + "step": 43130, + "torque_loss": 0.13239984214305878 + }, + { + "epoch": 38.78597122302158, + "force_loss": 0.0014093982754275203, + "step": 43130 + }, + { + "epoch": 38.79496402877698, + "grad_norm": 0.12232816219329834, + "learning_rate": 2.0080457951219173e-05, + "loss": 0.0147, + "step": 43140 + }, + { + "action_loss": 0.0008844216354191303, + "epoch": 38.79496402877698, + "step": 43140 + }, + { + "epoch": 38.79496402877698, + "step": 43140, + "torque_loss": 0.14423374831676483 + }, + { + "epoch": 38.79496402877698, + "force_loss": 0.0007429769611917436, + "step": 43140 + }, + { + "epoch": 38.80395683453237, + "grad_norm": 0.15267446637153625, + "learning_rate": 2.0058383043367017e-05, + "loss": 0.0199, + "step": 43150 + }, + { + "action_loss": 0.0012336460640653968, + "epoch": 38.80395683453237, + "step": 43150 + }, + { + "epoch": 38.80395683453237, + "step": 43150, + "torque_loss": 0.1527603715658188 + }, + { + "epoch": 38.80395683453237, + "force_loss": 0.002973536029458046, + "step": 43150 + }, + { + "epoch": 38.81294964028777, + "grad_norm": 0.1822865754365921, + "learning_rate": 2.0036317230987528e-05, + "loss": 0.0133, + "step": 43160 + }, + { + "action_loss": 0.0008694579009898007, + "epoch": 38.81294964028777, + "step": 43160 + }, + { + "epoch": 38.81294964028777, + "step": 43160, + "torque_loss": 0.11263538151979446 + }, + { + "epoch": 38.81294964028777, + "force_loss": 0.0010804197518154979, + "step": 43160 + }, + { + "epoch": 38.82194244604317, + "grad_norm": 0.1791848987340927, + "learning_rate": 2.0014260520783696e-05, + "loss": 0.0196, + "step": 43170 + }, + { + "action_loss": 0.004213029518723488, + "epoch": 38.82194244604317, + "step": 43170 + }, + { + "epoch": 38.82194244604317, + "step": 43170, + "torque_loss": 0.12668021023273468 + }, + { + "epoch": 38.82194244604317, + "force_loss": 0.008832802064716816, + "step": 43170 + }, + { + "epoch": 38.830935251798564, + "grad_norm": 0.5062022805213928, + "learning_rate": 1.9992212919455834e-05, + "loss": 0.0184, + "step": 43180 + }, + { + "action_loss": 0.011418166570365429, + "epoch": 38.830935251798564, + "step": 43180 + }, + { + "epoch": 38.830935251798564, + "step": 43180, + "torque_loss": 0.1398744434118271 + }, + { + "epoch": 38.830935251798564, + "force_loss": 0.02036232315003872, + "step": 43180 + }, + { + "epoch": 38.839928057553955, + "grad_norm": 0.27486929297447205, + "learning_rate": 1.9970174433701333e-05, + "loss": 0.017, + "step": 43190 + }, + { + "action_loss": 0.0007710832287557423, + "epoch": 38.839928057553955, + "step": 43190 + }, + { + "epoch": 38.839928057553955, + "step": 43190, + "torque_loss": 0.09834077209234238 + }, + { + "epoch": 38.839928057553955, + "force_loss": 0.0009948507649824023, + "step": 43190 + }, + { + "epoch": 38.84892086330935, + "grad_norm": 0.33841633796691895, + "learning_rate": 1.9948145070214992e-05, + "loss": 0.0143, + "step": 43200 + }, + { + "action_loss": 0.0026311948895454407, + "epoch": 38.84892086330935, + "step": 43200 + }, + { + "epoch": 38.84892086330935, + "step": 43200, + "torque_loss": 0.1537993997335434 + }, + { + "epoch": 38.84892086330935, + "force_loss": 0.002414965769276023, + "step": 43200 + }, + { + "epoch": 38.85791366906475, + "grad_norm": 0.089418426156044, + "learning_rate": 1.9926124835688663e-05, + "loss": 0.0181, + "step": 43210 + }, + { + "action_loss": 0.004569074604660273, + "epoch": 38.85791366906475, + "step": 43210 + }, + { + "epoch": 38.85791366906475, + "step": 43210, + "torque_loss": 0.1313643455505371 + }, + { + "epoch": 38.85791366906475, + "force_loss": 0.0030327008571475744, + "step": 43210 + }, + { + "epoch": 38.86690647482014, + "grad_norm": 0.36725449562072754, + "learning_rate": 1.9904113736811576e-05, + "loss": 0.0175, + "step": 43220 + }, + { + "action_loss": 0.01263834536075592, + "epoch": 38.86690647482014, + "step": 43220 + }, + { + "epoch": 38.86690647482014, + "step": 43220, + "torque_loss": 0.10542973130941391 + }, + { + "epoch": 38.86690647482014, + "force_loss": 0.006002683658152819, + "step": 43220 + }, + { + "epoch": 38.87589928057554, + "grad_norm": 0.23696266114711761, + "learning_rate": 1.9882111780270096e-05, + "loss": 0.0177, + "step": 43230 + }, + { + "action_loss": 0.000753029016777873, + "epoch": 38.87589928057554, + "step": 43230 + }, + { + "epoch": 38.87589928057554, + "step": 43230, + "torque_loss": 0.11790001392364502 + }, + { + "epoch": 38.87589928057554, + "force_loss": 0.0018389280885457993, + "step": 43230 + }, + { + "epoch": 38.884892086330936, + "grad_norm": 0.1683730036020279, + "learning_rate": 1.986011897274784e-05, + "loss": 0.0152, + "step": 43240 + }, + { + "action_loss": 0.0014560552081093192, + "epoch": 38.884892086330936, + "step": 43240 + }, + { + "epoch": 38.884892086330936, + "step": 43240, + "torque_loss": 0.09332650899887085 + }, + { + "epoch": 38.884892086330936, + "force_loss": 0.005439750384539366, + "step": 43240 + }, + { + "epoch": 38.893884892086334, + "grad_norm": 0.20827648043632507, + "learning_rate": 1.983813532092565e-05, + "loss": 0.021, + "step": 43250 + }, + { + "action_loss": 0.002569853560999036, + "epoch": 38.893884892086334, + "step": 43250 + }, + { + "epoch": 38.893884892086334, + "step": 43250, + "torque_loss": 0.11992853879928589 + }, + { + "epoch": 38.893884892086334, + "force_loss": 0.001781979575753212, + "step": 43250 + }, + { + "epoch": 38.902877697841724, + "grad_norm": 0.2119198888540268, + "learning_rate": 1.981616083148155e-05, + "loss": 0.0157, + "step": 43260 + }, + { + "action_loss": 0.0006633300217799842, + "epoch": 38.902877697841724, + "step": 43260 + }, + { + "epoch": 38.902877697841724, + "step": 43260, + "torque_loss": 0.12181542068719864 + }, + { + "epoch": 38.902877697841724, + "force_loss": 0.0007153118494898081, + "step": 43260 + }, + { + "epoch": 38.91187050359712, + "grad_norm": 0.1701694279909134, + "learning_rate": 1.9794195511090845e-05, + "loss": 0.0153, + "step": 43270 + }, + { + "action_loss": 0.0020223597530275583, + "epoch": 38.91187050359712, + "step": 43270 + }, + { + "epoch": 38.91187050359712, + "step": 43270, + "torque_loss": 0.09244181960821152 + }, + { + "epoch": 38.91187050359712, + "force_loss": 0.010265180841088295, + "step": 43270 + }, + { + "epoch": 38.92086330935252, + "grad_norm": 0.40680307149887085, + "learning_rate": 1.977223936642601e-05, + "loss": 0.0156, + "step": 43280 + }, + { + "action_loss": 0.0019339105347171426, + "epoch": 38.92086330935252, + "step": 43280 + }, + { + "epoch": 38.92086330935252, + "step": 43280, + "torque_loss": 0.13848191499710083 + }, + { + "epoch": 38.92086330935252, + "force_loss": 0.007765419315546751, + "step": 43280 + }, + { + "epoch": 38.92985611510792, + "grad_norm": 0.10724852234125137, + "learning_rate": 1.975029240415674e-05, + "loss": 0.0151, + "step": 43290 + }, + { + "action_loss": 0.004592752084136009, + "epoch": 38.92985611510792, + "step": 43290 + }, + { + "epoch": 38.92985611510792, + "step": 43290, + "torque_loss": 0.1376928687095642 + }, + { + "epoch": 38.92985611510792, + "force_loss": 0.006820193957537413, + "step": 43290 + }, + { + "epoch": 38.93884892086331, + "grad_norm": 0.09468401223421097, + "learning_rate": 1.9728354630949936e-05, + "loss": 0.0163, + "step": 43300 + }, + { + "action_loss": 0.002079724334180355, + "epoch": 38.93884892086331, + "step": 43300 + }, + { + "epoch": 38.93884892086331, + "step": 43300, + "torque_loss": 0.11005637794733047 + }, + { + "epoch": 38.93884892086331, + "force_loss": 0.0040276930667459965, + "step": 43300 + }, + { + "epoch": 38.947841726618705, + "grad_norm": 0.10339222103357315, + "learning_rate": 1.9706426053469716e-05, + "loss": 0.0151, + "step": 43310 + }, + { + "action_loss": 0.008520000614225864, + "epoch": 38.947841726618705, + "step": 43310 + }, + { + "epoch": 38.947841726618705, + "step": 43310, + "torque_loss": 0.11181195825338364 + }, + { + "epoch": 38.947841726618705, + "force_loss": 0.015329674817621708, + "step": 43310 + }, + { + "epoch": 38.9568345323741, + "grad_norm": 0.07079189270734787, + "learning_rate": 1.9684506678377396e-05, + "loss": 0.0158, + "step": 43320 + }, + { + "action_loss": 0.000937213480938226, + "epoch": 38.9568345323741, + "step": 43320 + }, + { + "epoch": 38.9568345323741, + "step": 43320, + "torque_loss": 0.08863457292318344 + }, + { + "epoch": 38.9568345323741, + "force_loss": 0.0016422374173998833, + "step": 43320 + }, + { + "epoch": 38.96582733812949, + "grad_norm": 0.5183575749397278, + "learning_rate": 1.9662596512331544e-05, + "loss": 0.0157, + "step": 43330 + }, + { + "action_loss": 0.0006735995411872864, + "epoch": 38.96582733812949, + "step": 43330 + }, + { + "epoch": 38.96582733812949, + "step": 43330, + "torque_loss": 0.10106196254491806 + }, + { + "epoch": 38.96582733812949, + "force_loss": 0.0006834732485003769, + "step": 43330 + }, + { + "epoch": 38.97482014388489, + "grad_norm": 0.19415691494941711, + "learning_rate": 1.964069556198782e-05, + "loss": 0.0182, + "step": 43340 + }, + { + "action_loss": 0.002119858283549547, + "epoch": 38.97482014388489, + "step": 43340 + }, + { + "epoch": 38.97482014388489, + "step": 43340, + "torque_loss": 0.14544929563999176 + }, + { + "epoch": 38.97482014388489, + "force_loss": 0.004749286454170942, + "step": 43340 + }, + { + "epoch": 38.98381294964029, + "grad_norm": 0.12878499925136566, + "learning_rate": 1.9618803833999232e-05, + "loss": 0.0165, + "step": 43350 + }, + { + "action_loss": 0.0027808435261249542, + "epoch": 38.98381294964029, + "step": 43350 + }, + { + "epoch": 38.98381294964029, + "step": 43350, + "torque_loss": 0.09459487348794937 + }, + { + "epoch": 38.98381294964029, + "force_loss": 0.0014990046620368958, + "step": 43350 + }, + { + "epoch": 38.992805755395686, + "grad_norm": 0.18176470696926117, + "learning_rate": 1.9596921335015838e-05, + "loss": 0.016, + "step": 43360 + }, + { + "action_loss": 0.0027199166361242533, + "epoch": 38.992805755395686, + "step": 43360 + }, + { + "epoch": 38.992805755395686, + "step": 43360, + "torque_loss": 0.1447519063949585 + }, + { + "epoch": 38.992805755395686, + "force_loss": 0.0030312202870845795, + "step": 43360 + }, + { + "epoch": 39.00179856115108, + "grad_norm": 0.07352231442928314, + "learning_rate": 1.957504807168501e-05, + "loss": 0.0167, + "step": 43370 + }, + { + "action_loss": 0.0007470918935723603, + "epoch": 39.00179856115108, + "step": 43370 + }, + { + "epoch": 39.00179856115108, + "step": 43370, + "torque_loss": 0.15074753761291504 + }, + { + "epoch": 39.00179856115108, + "force_loss": 0.0018721274100244045, + "step": 43370 + }, + { + "epoch": 39.010791366906474, + "grad_norm": 0.270116925239563, + "learning_rate": 1.9553184050651253e-05, + "loss": 0.0194, + "step": 43380 + }, + { + "action_loss": 0.001262398436665535, + "epoch": 39.010791366906474, + "step": 43380 + }, + { + "epoch": 39.010791366906474, + "step": 43380, + "torque_loss": 0.12718075513839722 + }, + { + "epoch": 39.010791366906474, + "force_loss": 0.0016740221763029695, + "step": 43380 + }, + { + "epoch": 39.01978417266187, + "grad_norm": 0.09478301554918289, + "learning_rate": 1.953132927855628e-05, + "loss": 0.0193, + "step": 43390 + }, + { + "action_loss": 0.0025347634218633175, + "epoch": 39.01978417266187, + "step": 43390 + }, + { + "epoch": 39.01978417266187, + "step": 43390, + "torque_loss": 0.07287508249282837 + }, + { + "epoch": 39.01978417266187, + "force_loss": 0.010764396749436855, + "step": 43390 + }, + { + "epoch": 39.02877697841727, + "grad_norm": 0.2662072777748108, + "learning_rate": 1.9509483762038995e-05, + "loss": 0.0168, + "step": 43400 + }, + { + "action_loss": 0.002846834482625127, + "epoch": 39.02877697841727, + "step": 43400 + }, + { + "epoch": 39.02877697841727, + "step": 43400, + "torque_loss": 0.11030423641204834 + }, + { + "epoch": 39.02877697841727, + "force_loss": 0.006497098598629236, + "step": 43400 + }, + { + "epoch": 39.03776978417266, + "grad_norm": 0.24567677080631256, + "learning_rate": 1.9487647507735467e-05, + "loss": 0.015, + "step": 43410 + }, + { + "action_loss": 0.004797985777258873, + "epoch": 39.03776978417266, + "step": 43410 + }, + { + "epoch": 39.03776978417266, + "step": 43410, + "torque_loss": 0.11644471436738968 + }, + { + "epoch": 39.03776978417266, + "force_loss": 0.01078809890896082, + "step": 43410 + }, + { + "epoch": 39.04676258992806, + "grad_norm": 0.15378287434577942, + "learning_rate": 1.9465820522279032e-05, + "loss": 0.0152, + "step": 43420 + }, + { + "action_loss": 0.004549337085336447, + "epoch": 39.04676258992806, + "step": 43420 + }, + { + "epoch": 39.04676258992806, + "step": 43420, + "torque_loss": 0.1682567596435547 + }, + { + "epoch": 39.04676258992806, + "force_loss": 0.014509479515254498, + "step": 43420 + }, + { + "epoch": 39.055755395683455, + "grad_norm": 0.29289907217025757, + "learning_rate": 1.9444002812300078e-05, + "loss": 0.0178, + "step": 43430 + }, + { + "action_loss": 0.0018622586503624916, + "epoch": 39.055755395683455, + "step": 43430 + }, + { + "epoch": 39.055755395683455, + "step": 43430, + "torque_loss": 0.08266764134168625 + }, + { + "epoch": 39.055755395683455, + "force_loss": 0.0013394304551184177, + "step": 43430 + }, + { + "epoch": 39.064748201438846, + "grad_norm": 0.34031981229782104, + "learning_rate": 1.94221943844263e-05, + "loss": 0.014, + "step": 43440 + }, + { + "action_loss": 0.0012294509215280414, + "epoch": 39.064748201438846, + "step": 43440 + }, + { + "epoch": 39.064748201438846, + "step": 43440, + "torque_loss": 0.12060848623514175 + }, + { + "epoch": 39.064748201438846, + "force_loss": 0.00680111488327384, + "step": 43440 + }, + { + "epoch": 39.07374100719424, + "grad_norm": 0.25598829984664917, + "learning_rate": 1.9400395245282515e-05, + "loss": 0.0163, + "step": 43450 + }, + { + "action_loss": 0.0032631580252200365, + "epoch": 39.07374100719424, + "step": 43450 + }, + { + "epoch": 39.07374100719424, + "step": 43450, + "torque_loss": 0.14373677968978882 + }, + { + "epoch": 39.07374100719424, + "force_loss": 0.00581319211050868, + "step": 43450 + }, + { + "epoch": 39.08273381294964, + "grad_norm": 0.3662920892238617, + "learning_rate": 1.937860540149071e-05, + "loss": 0.0178, + "step": 43460 + }, + { + "action_loss": 0.0008543722215108573, + "epoch": 39.08273381294964, + "step": 43460 + }, + { + "epoch": 39.08273381294964, + "step": 43460, + "torque_loss": 0.1530778557062149 + }, + { + "epoch": 39.08273381294964, + "force_loss": 0.0006587388925254345, + "step": 43460 + }, + { + "epoch": 39.09172661870504, + "grad_norm": 0.13331736624240875, + "learning_rate": 1.9356824859670082e-05, + "loss": 0.0162, + "step": 43470 + }, + { + "action_loss": 0.001381900510750711, + "epoch": 39.09172661870504, + "step": 43470 + }, + { + "epoch": 39.09172661870504, + "step": 43470, + "torque_loss": 0.13231825828552246 + }, + { + "epoch": 39.09172661870504, + "force_loss": 0.005654952023178339, + "step": 43470 + }, + { + "epoch": 39.10071942446043, + "grad_norm": 0.09411843866109848, + "learning_rate": 1.9335053626436967e-05, + "loss": 0.0141, + "step": 43480 + }, + { + "action_loss": 0.0007644129800610244, + "epoch": 39.10071942446043, + "step": 43480 + }, + { + "epoch": 39.10071942446043, + "step": 43480, + "torque_loss": 0.10469192266464233 + }, + { + "epoch": 39.10071942446043, + "force_loss": 0.0006866563926450908, + "step": 43480 + }, + { + "epoch": 39.10971223021583, + "grad_norm": 0.20438140630722046, + "learning_rate": 1.9313291708404885e-05, + "loss": 0.0171, + "step": 43490 + }, + { + "action_loss": 0.0017541605047881603, + "epoch": 39.10971223021583, + "step": 43490 + }, + { + "epoch": 39.10971223021583, + "step": 43490, + "torque_loss": 0.19136440753936768 + }, + { + "epoch": 39.10971223021583, + "force_loss": 0.0028786181937903166, + "step": 43490 + }, + { + "epoch": 39.118705035971225, + "grad_norm": 0.06751357018947601, + "learning_rate": 1.9291539112184587e-05, + "loss": 0.0173, + "step": 43500 + }, + { + "action_loss": 0.019240109249949455, + "epoch": 39.118705035971225, + "step": 43500 + }, + { + "epoch": 39.118705035971225, + "step": 43500, + "torque_loss": 0.13838006556034088 + }, + { + "epoch": 39.118705035971225, + "force_loss": 0.035848468542099, + "step": 43500 + }, + { + "epoch": 39.12769784172662, + "grad_norm": 0.14523009955883026, + "learning_rate": 1.9269795844383854e-05, + "loss": 0.0192, + "step": 43510 + }, + { + "action_loss": 0.0014279716415330768, + "epoch": 39.12769784172662, + "step": 43510 + }, + { + "epoch": 39.12769784172662, + "step": 43510, + "torque_loss": 0.13327476382255554 + }, + { + "epoch": 39.12769784172662, + "force_loss": 0.004329290706664324, + "step": 43510 + }, + { + "epoch": 39.13669064748201, + "grad_norm": 0.2870192527770996, + "learning_rate": 1.9248061911607777e-05, + "loss": 0.0153, + "step": 43520 + }, + { + "action_loss": 0.000583467714022845, + "epoch": 39.13669064748201, + "step": 43520 + }, + { + "epoch": 39.13669064748201, + "step": 43520, + "torque_loss": 0.08029218763113022 + }, + { + "epoch": 39.13669064748201, + "force_loss": 0.0007255204836837947, + "step": 43520 + }, + { + "epoch": 39.14568345323741, + "grad_norm": 0.29372504353523254, + "learning_rate": 1.9226337320458538e-05, + "loss": 0.0161, + "step": 43530 + }, + { + "action_loss": 0.0008068454917520285, + "epoch": 39.14568345323741, + "step": 43530 + }, + { + "epoch": 39.14568345323741, + "step": 43530, + "torque_loss": 0.07926338911056519 + }, + { + "epoch": 39.14568345323741, + "force_loss": 0.000790182501077652, + "step": 43530 + }, + { + "epoch": 39.15467625899281, + "grad_norm": 0.06133270636200905, + "learning_rate": 1.9204622077535488e-05, + "loss": 0.0128, + "step": 43540 + }, + { + "action_loss": 0.0011543224100023508, + "epoch": 39.15467625899281, + "step": 43540 + }, + { + "epoch": 39.15467625899281, + "step": 43540, + "torque_loss": 0.15765444934368134 + }, + { + "epoch": 39.15467625899281, + "force_loss": 0.0037975467275828123, + "step": 43540 + }, + { + "epoch": 39.1636690647482, + "grad_norm": 0.2015220820903778, + "learning_rate": 1.9182916189435147e-05, + "loss": 0.017, + "step": 43550 + }, + { + "action_loss": 0.00102571165189147, + "epoch": 39.1636690647482, + "step": 43550 + }, + { + "epoch": 39.1636690647482, + "step": 43550, + "torque_loss": 0.10125064104795456 + }, + { + "epoch": 39.1636690647482, + "force_loss": 0.0007454375736415386, + "step": 43550 + }, + { + "epoch": 39.172661870503596, + "grad_norm": 0.6480226516723633, + "learning_rate": 1.916121966275117e-05, + "loss": 0.0173, + "step": 43560 + }, + { + "action_loss": 0.0016835493734106421, + "epoch": 39.172661870503596, + "step": 43560 + }, + { + "epoch": 39.172661870503596, + "step": 43560, + "torque_loss": 0.12694333493709564 + }, + { + "epoch": 39.172661870503596, + "force_loss": 0.001822114922106266, + "step": 43560 + }, + { + "epoch": 39.181654676258994, + "grad_norm": 0.060405977070331573, + "learning_rate": 1.9139532504074443e-05, + "loss": 0.0144, + "step": 43570 + }, + { + "action_loss": 0.011442703194916248, + "epoch": 39.181654676258994, + "step": 43570 + }, + { + "epoch": 39.181654676258994, + "step": 43570, + "torque_loss": 0.10561565309762955 + }, + { + "epoch": 39.181654676258994, + "force_loss": 0.01838916353881359, + "step": 43570 + }, + { + "epoch": 39.19064748201439, + "grad_norm": 0.3246228098869324, + "learning_rate": 1.9117854719992885e-05, + "loss": 0.0156, + "step": 43580 + }, + { + "action_loss": 0.0021977215074002743, + "epoch": 39.19064748201439, + "step": 43580 + }, + { + "epoch": 39.19064748201439, + "step": 43580, + "torque_loss": 0.08217433840036392 + }, + { + "epoch": 39.19064748201439, + "force_loss": 0.0026522884145379066, + "step": 43580 + }, + { + "epoch": 39.19964028776978, + "grad_norm": 0.26682373881340027, + "learning_rate": 1.9096186317091687e-05, + "loss": 0.014, + "step": 43590 + }, + { + "action_loss": 0.0030627388041466475, + "epoch": 39.19964028776978, + "step": 43590 + }, + { + "epoch": 39.19964028776978, + "step": 43590, + "torque_loss": 0.11929595470428467 + }, + { + "epoch": 39.19964028776978, + "force_loss": 0.006410196889191866, + "step": 43590 + }, + { + "epoch": 39.20863309352518, + "grad_norm": 0.08988889306783676, + "learning_rate": 1.9074527301953116e-05, + "loss": 0.0164, + "step": 43600 + }, + { + "action_loss": 0.0009041757439263165, + "epoch": 39.20863309352518, + "step": 43600 + }, + { + "epoch": 39.20863309352518, + "step": 43600, + "torque_loss": 0.1162225529551506 + }, + { + "epoch": 39.20863309352518, + "force_loss": 0.0009028881904669106, + "step": 43600 + }, + { + "epoch": 39.21762589928058, + "grad_norm": 0.5964419841766357, + "learning_rate": 1.9052877681156607e-05, + "loss": 0.0165, + "step": 43610 + }, + { + "action_loss": 0.0005472433404065669, + "epoch": 39.21762589928058, + "step": 43610 + }, + { + "epoch": 39.21762589928058, + "step": 43610, + "torque_loss": 0.09671727567911148 + }, + { + "epoch": 39.21762589928058, + "force_loss": 0.0014344975352287292, + "step": 43610 + }, + { + "epoch": 39.226618705035975, + "grad_norm": 0.21963979303836823, + "learning_rate": 1.903123746127875e-05, + "loss": 0.021, + "step": 43620 + }, + { + "action_loss": 0.0010812049731612206, + "epoch": 39.226618705035975, + "step": 43620 + }, + { + "epoch": 39.226618705035975, + "step": 43620, + "torque_loss": 0.1577730029821396 + }, + { + "epoch": 39.226618705035975, + "force_loss": 0.0008934948709793389, + "step": 43620 + }, + { + "epoch": 39.235611510791365, + "grad_norm": 0.2075311690568924, + "learning_rate": 1.900960664889327e-05, + "loss": 0.0147, + "step": 43630 + }, + { + "action_loss": 0.0016371005913242698, + "epoch": 39.235611510791365, + "step": 43630 + }, + { + "epoch": 39.235611510791365, + "step": 43630, + "torque_loss": 0.09523005038499832 + }, + { + "epoch": 39.235611510791365, + "force_loss": 0.0019344780594110489, + "step": 43630 + }, + { + "epoch": 39.24460431654676, + "grad_norm": 0.06286769360303879, + "learning_rate": 1.8987985250571015e-05, + "loss": 0.0131, + "step": 43640 + }, + { + "action_loss": 0.0026696836575865746, + "epoch": 39.24460431654676, + "step": 43640 + }, + { + "epoch": 39.24460431654676, + "step": 43640, + "torque_loss": 0.09047457575798035 + }, + { + "epoch": 39.24460431654676, + "force_loss": 0.002093771705403924, + "step": 43640 + }, + { + "epoch": 39.25359712230216, + "grad_norm": 0.4066935181617737, + "learning_rate": 1.8966373272880054e-05, + "loss": 0.0155, + "step": 43650 + }, + { + "action_loss": 0.003066253149881959, + "epoch": 39.25359712230216, + "step": 43650 + }, + { + "epoch": 39.25359712230216, + "step": 43650, + "torque_loss": 0.13323789834976196 + }, + { + "epoch": 39.25359712230216, + "force_loss": 0.0063616386614739895, + "step": 43650 + }, + { + "epoch": 39.26258992805755, + "grad_norm": 0.4084271192550659, + "learning_rate": 1.8944770722385462e-05, + "loss": 0.0164, + "step": 43660 + }, + { + "action_loss": 0.001409812830388546, + "epoch": 39.26258992805755, + "step": 43660 + }, + { + "epoch": 39.26258992805755, + "step": 43660, + "torque_loss": 0.1265808790922165 + }, + { + "epoch": 39.26258992805755, + "force_loss": 0.0019094987073913217, + "step": 43660 + }, + { + "epoch": 39.27158273381295, + "grad_norm": 0.18222594261169434, + "learning_rate": 1.8923177605649576e-05, + "loss": 0.0145, + "step": 43670 + }, + { + "action_loss": 0.0012905653566122055, + "epoch": 39.27158273381295, + "step": 43670 + }, + { + "epoch": 39.27158273381295, + "step": 43670, + "torque_loss": 0.1176639273762703 + }, + { + "epoch": 39.27158273381295, + "force_loss": 0.0024132535327225924, + "step": 43670 + }, + { + "epoch": 39.280575539568346, + "grad_norm": 0.22960710525512695, + "learning_rate": 1.8901593929231802e-05, + "loss": 0.0157, + "step": 43680 + }, + { + "action_loss": 0.0017899327212944627, + "epoch": 39.280575539568346, + "step": 43680 + }, + { + "epoch": 39.280575539568346, + "step": 43680, + "torque_loss": 0.13487543165683746 + }, + { + "epoch": 39.280575539568346, + "force_loss": 0.004524525720626116, + "step": 43680 + }, + { + "epoch": 39.289568345323744, + "grad_norm": 0.11069312691688538, + "learning_rate": 1.8880019699688684e-05, + "loss": 0.016, + "step": 43690 + }, + { + "action_loss": 0.0025324011221528053, + "epoch": 39.289568345323744, + "step": 43690 + }, + { + "epoch": 39.289568345323744, + "step": 43690, + "torque_loss": 0.13795801997184753 + }, + { + "epoch": 39.289568345323744, + "force_loss": 0.0021935102995485067, + "step": 43690 + }, + { + "epoch": 39.298561151079134, + "grad_norm": 0.29894110560417175, + "learning_rate": 1.8858454923573904e-05, + "loss": 0.0179, + "step": 43700 + }, + { + "action_loss": 0.000977706047706306, + "epoch": 39.298561151079134, + "step": 43700 + }, + { + "epoch": 39.298561151079134, + "step": 43700, + "torque_loss": 0.10749268531799316 + }, + { + "epoch": 39.298561151079134, + "force_loss": 0.0025428796652704477, + "step": 43700 + }, + { + "epoch": 39.30755395683453, + "grad_norm": 0.1514466106891632, + "learning_rate": 1.8836899607438253e-05, + "loss": 0.0157, + "step": 43710 + }, + { + "action_loss": 0.003464254317805171, + "epoch": 39.30755395683453, + "step": 43710 + }, + { + "epoch": 39.30755395683453, + "step": 43710, + "torque_loss": 0.1474972516298294 + }, + { + "epoch": 39.30755395683453, + "force_loss": 0.007144351955503225, + "step": 43710 + }, + { + "epoch": 39.31654676258993, + "grad_norm": 0.13160359859466553, + "learning_rate": 1.8815353757829723e-05, + "loss": 0.0173, + "step": 43720 + }, + { + "action_loss": 0.002079211175441742, + "epoch": 39.31654676258993, + "step": 43720 + }, + { + "epoch": 39.31654676258993, + "step": 43720, + "torque_loss": 0.14407895505428314 + }, + { + "epoch": 39.31654676258993, + "force_loss": 0.006182938348501921, + "step": 43720 + }, + { + "epoch": 39.32553956834532, + "grad_norm": 0.2642349302768707, + "learning_rate": 1.879381738129331e-05, + "loss": 0.016, + "step": 43730 + }, + { + "action_loss": 0.0025451451074332, + "epoch": 39.32553956834532, + "step": 43730 + }, + { + "epoch": 39.32553956834532, + "step": 43730, + "torque_loss": 0.1272275298833847 + }, + { + "epoch": 39.32553956834532, + "force_loss": 0.01034451648592949, + "step": 43730 + }, + { + "epoch": 39.33453237410072, + "grad_norm": 0.2915138304233551, + "learning_rate": 1.8772290484371236e-05, + "loss": 0.0169, + "step": 43740 + }, + { + "action_loss": 0.001227371278218925, + "epoch": 39.33453237410072, + "step": 43740 + }, + { + "epoch": 39.33453237410072, + "step": 43740, + "torque_loss": 0.12451880425214767 + }, + { + "epoch": 39.33453237410072, + "force_loss": 0.0034549012780189514, + "step": 43740 + }, + { + "epoch": 39.343525179856115, + "grad_norm": 0.28453344106674194, + "learning_rate": 1.8750773073602795e-05, + "loss": 0.0155, + "step": 43750 + }, + { + "action_loss": 0.0029809344559907913, + "epoch": 39.343525179856115, + "step": 43750 + }, + { + "epoch": 39.343525179856115, + "step": 43750, + "torque_loss": 0.11854053288698196 + }, + { + "epoch": 39.343525179856115, + "force_loss": 0.0020232771057635546, + "step": 43750 + }, + { + "epoch": 39.35251798561151, + "grad_norm": 0.1431475281715393, + "learning_rate": 1.8729265155524405e-05, + "loss": 0.0169, + "step": 43760 + }, + { + "action_loss": 0.0012319684028625488, + "epoch": 39.35251798561151, + "step": 43760 + }, + { + "epoch": 39.35251798561151, + "step": 43760, + "torque_loss": 0.12439505010843277 + }, + { + "epoch": 39.35251798561151, + "force_loss": 0.0032792461570352316, + "step": 43760 + }, + { + "epoch": 39.361510791366904, + "grad_norm": 0.14642590284347534, + "learning_rate": 1.8707766736669607e-05, + "loss": 0.0154, + "step": 43770 + }, + { + "action_loss": 0.002598799765110016, + "epoch": 39.361510791366904, + "step": 43770 + }, + { + "epoch": 39.361510791366904, + "step": 43770, + "torque_loss": 0.12145161628723145 + }, + { + "epoch": 39.361510791366904, + "force_loss": 0.0018740269588306546, + "step": 43770 + }, + { + "epoch": 39.3705035971223, + "grad_norm": 0.08802445977926254, + "learning_rate": 1.8686277823569055e-05, + "loss": 0.0152, + "step": 43780 + }, + { + "action_loss": 0.0019907497335225344, + "epoch": 39.3705035971223, + "step": 43780 + }, + { + "epoch": 39.3705035971223, + "step": 43780, + "torque_loss": 0.14405936002731323 + }, + { + "epoch": 39.3705035971223, + "force_loss": 0.0013877536403015256, + "step": 43780 + }, + { + "epoch": 39.3794964028777, + "grad_norm": 0.12408871948719025, + "learning_rate": 1.8664798422750484e-05, + "loss": 0.0153, + "step": 43790 + }, + { + "action_loss": 0.02651992253959179, + "epoch": 39.3794964028777, + "step": 43790 + }, + { + "epoch": 39.3794964028777, + "step": 43790, + "torque_loss": 0.14238248765468597 + }, + { + "epoch": 39.3794964028777, + "force_loss": 0.02361910045146942, + "step": 43790 + }, + { + "epoch": 39.388489208633096, + "grad_norm": 0.2672511339187622, + "learning_rate": 1.8643328540738832e-05, + "loss": 0.0173, + "step": 43800 + }, + { + "action_loss": 0.0005486967856995761, + "epoch": 39.388489208633096, + "step": 43800 + }, + { + "epoch": 39.388489208633096, + "step": 43800, + "torque_loss": 0.10640814155340195 + }, + { + "epoch": 39.388489208633096, + "force_loss": 0.0005647577345371246, + "step": 43800 + }, + { + "epoch": 39.39748201438849, + "grad_norm": 0.3469555974006653, + "learning_rate": 1.862186818405601e-05, + "loss": 0.018, + "step": 43810 + }, + { + "action_loss": 0.0018105361377820373, + "epoch": 39.39748201438849, + "step": 43810 + }, + { + "epoch": 39.39748201438849, + "step": 43810, + "torque_loss": 0.11959423869848251 + }, + { + "epoch": 39.39748201438849, + "force_loss": 0.0017830407014116645, + "step": 43810 + }, + { + "epoch": 39.406474820143885, + "grad_norm": 0.09855153411626816, + "learning_rate": 1.8600417359221156e-05, + "loss": 0.0153, + "step": 43820 + }, + { + "action_loss": 0.0022541515063494444, + "epoch": 39.406474820143885, + "step": 43820 + }, + { + "epoch": 39.406474820143885, + "step": 43820, + "torque_loss": 0.0888887345790863 + }, + { + "epoch": 39.406474820143885, + "force_loss": 0.0025707988534122705, + "step": 43820 + }, + { + "epoch": 39.41546762589928, + "grad_norm": 0.11229920387268066, + "learning_rate": 1.8578976072750454e-05, + "loss": 0.0141, + "step": 43830 + }, + { + "action_loss": 0.0006761560216546059, + "epoch": 39.41546762589928, + "step": 43830 + }, + { + "epoch": 39.41546762589928, + "step": 43830, + "torque_loss": 0.10884235054254532 + }, + { + "epoch": 39.41546762589928, + "force_loss": 0.0007971436716616154, + "step": 43830 + }, + { + "epoch": 39.42446043165468, + "grad_norm": 0.13020965456962585, + "learning_rate": 1.8557544331157194e-05, + "loss": 0.0145, + "step": 43840 + }, + { + "action_loss": 0.0022146382834762335, + "epoch": 39.42446043165468, + "step": 43840 + }, + { + "epoch": 39.42446043165468, + "step": 43840, + "torque_loss": 0.10193448513746262 + }, + { + "epoch": 39.42446043165468, + "force_loss": 0.0014279544120654464, + "step": 43840 + }, + { + "epoch": 39.43345323741007, + "grad_norm": 0.33270177245140076, + "learning_rate": 1.8536122140951785e-05, + "loss": 0.0192, + "step": 43850 + }, + { + "action_loss": 0.009690295904874802, + "epoch": 39.43345323741007, + "step": 43850 + }, + { + "epoch": 39.43345323741007, + "step": 43850, + "torque_loss": 0.12405824661254883 + }, + { + "epoch": 39.43345323741007, + "force_loss": 0.012639988213777542, + "step": 43850 + }, + { + "epoch": 39.44244604316547, + "grad_norm": 0.243993878364563, + "learning_rate": 1.8514709508641688e-05, + "loss": 0.0186, + "step": 43860 + }, + { + "action_loss": 0.015644175931811333, + "epoch": 39.44244604316547, + "step": 43860 + }, + { + "epoch": 39.44244604316547, + "step": 43860, + "torque_loss": 0.16225554049015045 + }, + { + "epoch": 39.44244604316547, + "force_loss": 0.02187289111316204, + "step": 43860 + }, + { + "epoch": 39.451438848920866, + "grad_norm": 0.22017475962638855, + "learning_rate": 1.8493306440731555e-05, + "loss": 0.016, + "step": 43870 + }, + { + "action_loss": 0.00458850571885705, + "epoch": 39.451438848920866, + "step": 43870 + }, + { + "epoch": 39.451438848920866, + "step": 43870, + "torque_loss": 0.11322059482336044 + }, + { + "epoch": 39.451438848920866, + "force_loss": 0.0096434922888875, + "step": 43870 + }, + { + "epoch": 39.460431654676256, + "grad_norm": 0.4645187556743622, + "learning_rate": 1.8471912943723013e-05, + "loss": 0.0165, + "step": 43880 + }, + { + "action_loss": 0.0016340486472472548, + "epoch": 39.460431654676256, + "step": 43880 + }, + { + "epoch": 39.460431654676256, + "step": 43880, + "torque_loss": 0.1482481211423874 + }, + { + "epoch": 39.460431654676256, + "force_loss": 0.0021107306238263845, + "step": 43880 + }, + { + "epoch": 39.469424460431654, + "grad_norm": 0.17871703207492828, + "learning_rate": 1.8450529024114894e-05, + "loss": 0.0152, + "step": 43890 + }, + { + "action_loss": 0.009491377510130405, + "epoch": 39.469424460431654, + "step": 43890 + }, + { + "epoch": 39.469424460431654, + "step": 43890, + "torque_loss": 0.1356562227010727 + }, + { + "epoch": 39.469424460431654, + "force_loss": 0.01624637469649315, + "step": 43890 + }, + { + "epoch": 39.47841726618705, + "grad_norm": 0.12953442335128784, + "learning_rate": 1.842915468840301e-05, + "loss": 0.0153, + "step": 43900 + }, + { + "action_loss": 0.009950876235961914, + "epoch": 39.47841726618705, + "step": 43900 + }, + { + "epoch": 39.47841726618705, + "step": 43900, + "torque_loss": 0.14285694062709808 + }, + { + "epoch": 39.47841726618705, + "force_loss": 0.00954663660377264, + "step": 43900 + }, + { + "epoch": 39.48741007194245, + "grad_norm": 0.24169917404651642, + "learning_rate": 1.840778994308037e-05, + "loss": 0.0167, + "step": 43910 + }, + { + "action_loss": 0.0016071697464212775, + "epoch": 39.48741007194245, + "step": 43910 + }, + { + "epoch": 39.48741007194245, + "step": 43910, + "torque_loss": 0.1528947800397873 + }, + { + "epoch": 39.48741007194245, + "force_loss": 0.001843883073888719, + "step": 43910 + }, + { + "epoch": 39.49640287769784, + "grad_norm": 0.09055488556623459, + "learning_rate": 1.8386434794637004e-05, + "loss": 0.0132, + "step": 43920 + }, + { + "action_loss": 0.0011045786086469889, + "epoch": 39.49640287769784, + "step": 43920 + }, + { + "epoch": 39.49640287769784, + "step": 43920, + "torque_loss": 0.07543081790208817 + }, + { + "epoch": 39.49640287769784, + "force_loss": 0.002623551758006215, + "step": 43920 + }, + { + "epoch": 39.50539568345324, + "grad_norm": 0.17763882875442505, + "learning_rate": 1.8365089249560034e-05, + "loss": 0.0135, + "step": 43930 + }, + { + "action_loss": 0.0021788382437080145, + "epoch": 39.50539568345324, + "step": 43930 + }, + { + "epoch": 39.50539568345324, + "step": 43930, + "torque_loss": 0.15388917922973633 + }, + { + "epoch": 39.50539568345324, + "force_loss": 0.011935091577470303, + "step": 43930 + }, + { + "epoch": 39.514388489208635, + "grad_norm": 0.3831996023654938, + "learning_rate": 1.8343753314333683e-05, + "loss": 0.0153, + "step": 43940 + }, + { + "action_loss": 0.0022421476896852255, + "epoch": 39.514388489208635, + "step": 43940 + }, + { + "epoch": 39.514388489208635, + "step": 43940, + "torque_loss": 0.1466163545846939 + }, + { + "epoch": 39.514388489208635, + "force_loss": 0.004129001870751381, + "step": 43940 + }, + { + "epoch": 39.523381294964025, + "grad_norm": 0.42577558755874634, + "learning_rate": 1.8322426995439236e-05, + "loss": 0.0167, + "step": 43950 + }, + { + "action_loss": 0.005300629884004593, + "epoch": 39.523381294964025, + "step": 43950 + }, + { + "epoch": 39.523381294964025, + "step": 43950, + "torque_loss": 0.11630934476852417 + }, + { + "epoch": 39.523381294964025, + "force_loss": 0.010095425881445408, + "step": 43950 + }, + { + "epoch": 39.53237410071942, + "grad_norm": 0.14609003067016602, + "learning_rate": 1.8301110299355058e-05, + "loss": 0.0242, + "step": 43960 + }, + { + "action_loss": 0.001404804759658873, + "epoch": 39.53237410071942, + "step": 43960 + }, + { + "epoch": 39.53237410071942, + "step": 43960, + "torque_loss": 0.10890919715166092 + }, + { + "epoch": 39.53237410071942, + "force_loss": 0.0014651529490947723, + "step": 43960 + }, + { + "epoch": 39.54136690647482, + "grad_norm": 0.11471282690763474, + "learning_rate": 1.8279803232556625e-05, + "loss": 0.0164, + "step": 43970 + }, + { + "action_loss": 0.002044142922386527, + "epoch": 39.54136690647482, + "step": 43970 + }, + { + "epoch": 39.54136690647482, + "step": 43970, + "torque_loss": 0.18028700351715088 + }, + { + "epoch": 39.54136690647482, + "force_loss": 0.002922143554314971, + "step": 43970 + }, + { + "epoch": 39.55035971223022, + "grad_norm": 0.11105646193027496, + "learning_rate": 1.8258505801516444e-05, + "loss": 0.015, + "step": 43980 + }, + { + "action_loss": 0.0015438087284564972, + "epoch": 39.55035971223022, + "step": 43980 + }, + { + "epoch": 39.55035971223022, + "step": 43980, + "torque_loss": 0.13766025006771088 + }, + { + "epoch": 39.55035971223022, + "force_loss": 0.007871488109230995, + "step": 43980 + }, + { + "epoch": 39.55935251798561, + "grad_norm": 0.2366029918193817, + "learning_rate": 1.8237218012704117e-05, + "loss": 0.0162, + "step": 43990 + }, + { + "action_loss": 0.009585704654455185, + "epoch": 39.55935251798561, + "step": 43990 + }, + { + "epoch": 39.55935251798561, + "step": 43990, + "torque_loss": 0.09305298328399658 + }, + { + "epoch": 39.55935251798561, + "force_loss": 0.011574490927159786, + "step": 43990 + }, + { + "epoch": 39.568345323741006, + "grad_norm": 0.10388772934675217, + "learning_rate": 1.821593987258631e-05, + "loss": 0.0158, + "step": 44000 + }, + { + "action_loss": 0.0023453370667994022, + "epoch": 39.568345323741006, + "step": 44000 + }, + { + "epoch": 39.568345323741006, + "step": 44000, + "torque_loss": 0.09254888445138931 + }, + { + "epoch": 39.568345323741006, + "force_loss": 0.00380254746414721, + "step": 44000 + }, + { + "epoch": 39.577338129496404, + "grad_norm": 0.11751534789800644, + "learning_rate": 1.8194671387626744e-05, + "loss": 0.0158, + "step": 44010 + }, + { + "action_loss": 0.002771947532892227, + "epoch": 39.577338129496404, + "step": 44010 + }, + { + "epoch": 39.577338129496404, + "step": 44010, + "torque_loss": 0.10505024343729019 + }, + { + "epoch": 39.577338129496404, + "force_loss": 0.003331434680148959, + "step": 44010 + }, + { + "epoch": 39.5863309352518, + "grad_norm": 0.09511953592300415, + "learning_rate": 1.8173412564286276e-05, + "loss": 0.0167, + "step": 44020 + }, + { + "action_loss": 0.002071511233225465, + "epoch": 39.5863309352518, + "step": 44020 + }, + { + "epoch": 39.5863309352518, + "step": 44020, + "torque_loss": 0.12547074258327484 + }, + { + "epoch": 39.5863309352518, + "force_loss": 0.0028882029000669718, + "step": 44020 + }, + { + "epoch": 39.59532374100719, + "grad_norm": 0.25395458936691284, + "learning_rate": 1.8152163409022697e-05, + "loss": 0.0163, + "step": 44030 + }, + { + "action_loss": 0.0027981549501419067, + "epoch": 39.59532374100719, + "step": 44030 + }, + { + "epoch": 39.59532374100719, + "step": 44030, + "torque_loss": 0.11936943978071213 + }, + { + "epoch": 39.59532374100719, + "force_loss": 0.002433473477140069, + "step": 44030 + }, + { + "epoch": 39.60431654676259, + "grad_norm": 0.12484948337078094, + "learning_rate": 1.8130923928291023e-05, + "loss": 0.0152, + "step": 44040 + }, + { + "action_loss": 0.004602337721735239, + "epoch": 39.60431654676259, + "step": 44040 + }, + { + "epoch": 39.60431654676259, + "step": 44040, + "torque_loss": 0.1243242621421814 + }, + { + "epoch": 39.60431654676259, + "force_loss": 0.004839560482650995, + "step": 44040 + }, + { + "epoch": 39.61330935251799, + "grad_norm": 0.31259891390800476, + "learning_rate": 1.8109694128543163e-05, + "loss": 0.0165, + "step": 44050 + }, + { + "action_loss": 0.0018528815126046538, + "epoch": 39.61330935251799, + "step": 44050 + }, + { + "epoch": 39.61330935251799, + "step": 44050, + "torque_loss": 0.0797339603304863 + }, + { + "epoch": 39.61330935251799, + "force_loss": 0.0032678258139640093, + "step": 44050 + }, + { + "epoch": 39.62230215827338, + "grad_norm": 0.08590597659349442, + "learning_rate": 1.8088474016228237e-05, + "loss": 0.0146, + "step": 44060 + }, + { + "action_loss": 0.0007265713065862656, + "epoch": 39.62230215827338, + "step": 44060 + }, + { + "epoch": 39.62230215827338, + "step": 44060, + "torque_loss": 0.11183180660009384 + }, + { + "epoch": 39.62230215827338, + "force_loss": 0.0012429512571543455, + "step": 44060 + }, + { + "epoch": 39.631294964028775, + "grad_norm": 0.340399831533432, + "learning_rate": 1.8067263597792328e-05, + "loss": 0.0156, + "step": 44070 + }, + { + "action_loss": 0.0024141687899827957, + "epoch": 39.631294964028775, + "step": 44070 + }, + { + "epoch": 39.631294964028775, + "step": 44070, + "torque_loss": 0.14910492300987244 + }, + { + "epoch": 39.631294964028775, + "force_loss": 0.015826279297471046, + "step": 44070 + }, + { + "epoch": 39.64028776978417, + "grad_norm": 0.14503931999206543, + "learning_rate": 1.80460628796786e-05, + "loss": 0.0172, + "step": 44080 + }, + { + "action_loss": 0.0008308072574436665, + "epoch": 39.64028776978417, + "step": 44080 + }, + { + "epoch": 39.64028776978417, + "step": 44080, + "torque_loss": 0.12460996955633163 + }, + { + "epoch": 39.64028776978417, + "force_loss": 0.0015784470597282052, + "step": 44080 + }, + { + "epoch": 39.64928057553957, + "grad_norm": 0.2599188983440399, + "learning_rate": 1.8024871868327276e-05, + "loss": 0.015, + "step": 44090 + }, + { + "action_loss": 0.0031471364200115204, + "epoch": 39.64928057553957, + "step": 44090 + }, + { + "epoch": 39.64928057553957, + "step": 44090, + "torque_loss": 0.10941216349601746 + }, + { + "epoch": 39.64928057553957, + "force_loss": 0.005341757088899612, + "step": 44090 + }, + { + "epoch": 39.65827338129496, + "grad_norm": 0.20297805964946747, + "learning_rate": 1.8003690570175608e-05, + "loss": 0.0165, + "step": 44100 + }, + { + "action_loss": 0.0034830167423933744, + "epoch": 39.65827338129496, + "step": 44100 + }, + { + "epoch": 39.65827338129496, + "step": 44100, + "torque_loss": 0.15224498510360718 + }, + { + "epoch": 39.65827338129496, + "force_loss": 0.0027494810055941343, + "step": 44100 + }, + { + "epoch": 39.66726618705036, + "grad_norm": 0.10213612765073776, + "learning_rate": 1.7982518991657943e-05, + "loss": 0.0165, + "step": 44110 + }, + { + "action_loss": 0.0008230170351453125, + "epoch": 39.66726618705036, + "step": 44110 + }, + { + "epoch": 39.66726618705036, + "step": 44110, + "torque_loss": 0.1021357998251915 + }, + { + "epoch": 39.66726618705036, + "force_loss": 0.003565584309399128, + "step": 44110 + }, + { + "epoch": 39.67625899280576, + "grad_norm": 0.11593269556760788, + "learning_rate": 1.7961357139205643e-05, + "loss": 0.0123, + "step": 44120 + }, + { + "action_loss": 0.0008481573313474655, + "epoch": 39.67625899280576, + "step": 44120 + }, + { + "epoch": 39.67625899280576, + "step": 44120, + "torque_loss": 0.12810324132442474 + }, + { + "epoch": 39.67625899280576, + "force_loss": 0.0015052525559440255, + "step": 44120 + }, + { + "epoch": 39.685251798561154, + "grad_norm": 0.1353565901517868, + "learning_rate": 1.7940205019247108e-05, + "loss": 0.0135, + "step": 44130 + }, + { + "action_loss": 0.0014148285845294595, + "epoch": 39.685251798561154, + "step": 44130 + }, + { + "epoch": 39.685251798561154, + "step": 44130, + "torque_loss": 0.13040098547935486 + }, + { + "epoch": 39.685251798561154, + "force_loss": 0.0022107947152107954, + "step": 44130 + }, + { + "epoch": 39.694244604316545, + "grad_norm": 0.28801798820495605, + "learning_rate": 1.79190626382078e-05, + "loss": 0.0164, + "step": 44140 + }, + { + "action_loss": 0.000787557743024081, + "epoch": 39.694244604316545, + "step": 44140 + }, + { + "epoch": 39.694244604316545, + "step": 44140, + "torque_loss": 0.14245951175689697 + }, + { + "epoch": 39.694244604316545, + "force_loss": 0.0037407006602734327, + "step": 44140 + }, + { + "epoch": 39.70323741007194, + "grad_norm": 0.11380057781934738, + "learning_rate": 1.7897930002510215e-05, + "loss": 0.0174, + "step": 44150 + }, + { + "action_loss": 0.0007630392792634666, + "epoch": 39.70323741007194, + "step": 44150 + }, + { + "epoch": 39.70323741007194, + "step": 44150, + "torque_loss": 0.11410563439130783 + }, + { + "epoch": 39.70323741007194, + "force_loss": 0.0006533423438668251, + "step": 44150 + }, + { + "epoch": 39.71223021582734, + "grad_norm": 0.20559054613113403, + "learning_rate": 1.787680711857387e-05, + "loss": 0.0149, + "step": 44160 + }, + { + "action_loss": 0.00552561366930604, + "epoch": 39.71223021582734, + "step": 44160 + }, + { + "epoch": 39.71223021582734, + "step": 44160, + "torque_loss": 0.14884339272975922 + }, + { + "epoch": 39.71223021582734, + "force_loss": 0.008594215847551823, + "step": 44160 + }, + { + "epoch": 39.72122302158273, + "grad_norm": 0.12301896512508392, + "learning_rate": 1.7855693992815398e-05, + "loss": 0.0165, + "step": 44170 + }, + { + "action_loss": 0.001032329979352653, + "epoch": 39.72122302158273, + "step": 44170 + }, + { + "epoch": 39.72122302158273, + "step": 44170, + "torque_loss": 0.1236443892121315 + }, + { + "epoch": 39.72122302158273, + "force_loss": 0.0016958294436335564, + "step": 44170 + }, + { + "epoch": 39.73021582733813, + "grad_norm": 0.24747337400913239, + "learning_rate": 1.7834590631648328e-05, + "loss": 0.0154, + "step": 44180 + }, + { + "action_loss": 0.001045675715431571, + "epoch": 39.73021582733813, + "step": 44180 + }, + { + "epoch": 39.73021582733813, + "step": 44180, + "torque_loss": 0.11847928911447525 + }, + { + "epoch": 39.73021582733813, + "force_loss": 0.0025990523863583803, + "step": 44180 + }, + { + "epoch": 39.739208633093526, + "grad_norm": 0.19082608819007874, + "learning_rate": 1.7813497041483384e-05, + "loss": 0.017, + "step": 44190 + }, + { + "action_loss": 0.004814065992832184, + "epoch": 39.739208633093526, + "step": 44190 + }, + { + "epoch": 39.739208633093526, + "step": 44190, + "torque_loss": 0.13301880657672882 + }, + { + "epoch": 39.739208633093526, + "force_loss": 0.008390623144805431, + "step": 44190 + }, + { + "epoch": 39.74820143884892, + "grad_norm": 0.2500182092189789, + "learning_rate": 1.779241322872817e-05, + "loss": 0.0164, + "step": 44200 + }, + { + "action_loss": 0.005697909742593765, + "epoch": 39.74820143884892, + "step": 44200 + }, + { + "epoch": 39.74820143884892, + "step": 44200, + "torque_loss": 0.1487625241279602 + }, + { + "epoch": 39.74820143884892, + "force_loss": 0.013683046214282513, + "step": 44200 + }, + { + "epoch": 39.757194244604314, + "grad_norm": 0.18140637874603271, + "learning_rate": 1.777133919978744e-05, + "loss": 0.0155, + "step": 44210 + }, + { + "action_loss": 0.008365617133677006, + "epoch": 39.757194244604314, + "step": 44210 + }, + { + "epoch": 39.757194244604314, + "step": 44210, + "torque_loss": 0.19642280042171478 + }, + { + "epoch": 39.757194244604314, + "force_loss": 0.01971357874572277, + "step": 44210 + }, + { + "epoch": 39.76618705035971, + "grad_norm": 0.09545369446277618, + "learning_rate": 1.7750274961062912e-05, + "loss": 0.0201, + "step": 44220 + }, + { + "action_loss": 0.003569573163986206, + "epoch": 39.76618705035971, + "step": 44220 + }, + { + "epoch": 39.76618705035971, + "step": 44220, + "torque_loss": 0.11544469743967056 + }, + { + "epoch": 39.76618705035971, + "force_loss": 0.01004467811435461, + "step": 44220 + }, + { + "epoch": 39.77517985611511, + "grad_norm": 0.1329287886619568, + "learning_rate": 1.772922051895335e-05, + "loss": 0.0164, + "step": 44230 + }, + { + "action_loss": 0.002476147608831525, + "epoch": 39.77517985611511, + "step": 44230 + }, + { + "epoch": 39.77517985611511, + "step": 44230, + "torque_loss": 0.08876601606607437 + }, + { + "epoch": 39.77517985611511, + "force_loss": 0.008319896645843983, + "step": 44230 + }, + { + "epoch": 39.78417266187051, + "grad_norm": 0.11444862186908722, + "learning_rate": 1.770817587985453e-05, + "loss": 0.0153, + "step": 44240 + }, + { + "action_loss": 0.0015230766730383039, + "epoch": 39.78417266187051, + "step": 44240 + }, + { + "epoch": 39.78417266187051, + "step": 44240, + "torque_loss": 0.10689255595207214 + }, + { + "epoch": 39.78417266187051, + "force_loss": 0.0018003984587267041, + "step": 44240 + }, + { + "epoch": 39.7931654676259, + "grad_norm": 0.10953350365161896, + "learning_rate": 1.7687141050159246e-05, + "loss": 0.015, + "step": 44250 + }, + { + "action_loss": 0.009383227676153183, + "epoch": 39.7931654676259, + "step": 44250 + }, + { + "epoch": 39.7931654676259, + "step": 44250, + "torque_loss": 0.18183942139148712 + }, + { + "epoch": 39.7931654676259, + "force_loss": 0.00998720433562994, + "step": 44250 + }, + { + "epoch": 39.802158273381295, + "grad_norm": 0.1178608164191246, + "learning_rate": 1.7666116036257375e-05, + "loss": 0.0156, + "step": 44260 + }, + { + "action_loss": 0.0023383963853120804, + "epoch": 39.802158273381295, + "step": 44260 + }, + { + "epoch": 39.802158273381295, + "step": 44260, + "torque_loss": 0.14484000205993652 + }, + { + "epoch": 39.802158273381295, + "force_loss": 0.007460723165422678, + "step": 44260 + }, + { + "epoch": 39.81115107913669, + "grad_norm": 0.06822218000888824, + "learning_rate": 1.764510084453569e-05, + "loss": 0.0138, + "step": 44270 + }, + { + "action_loss": 0.006252707447856665, + "epoch": 39.81115107913669, + "step": 44270 + }, + { + "epoch": 39.81115107913669, + "step": 44270, + "torque_loss": 0.104990653693676 + }, + { + "epoch": 39.81115107913669, + "force_loss": 0.018709400668740273, + "step": 44270 + }, + { + "epoch": 39.82014388489208, + "grad_norm": 0.1215263083577156, + "learning_rate": 1.76240954813781e-05, + "loss": 0.0161, + "step": 44280 + }, + { + "action_loss": 0.0024274829775094986, + "epoch": 39.82014388489208, + "step": 44280 + }, + { + "epoch": 39.82014388489208, + "step": 44280, + "torque_loss": 0.15360279381275177 + }, + { + "epoch": 39.82014388489208, + "force_loss": 0.0024420542176812887, + "step": 44280 + }, + { + "epoch": 39.82913669064748, + "grad_norm": 0.15674108266830444, + "learning_rate": 1.7603099953165476e-05, + "loss": 0.0154, + "step": 44290 + }, + { + "action_loss": 0.0028388623613864183, + "epoch": 39.82913669064748, + "step": 44290 + }, + { + "epoch": 39.82913669064748, + "step": 44290, + "torque_loss": 0.1412268579006195 + }, + { + "epoch": 39.82913669064748, + "force_loss": 0.003971495199948549, + "step": 44290 + }, + { + "epoch": 39.83812949640288, + "grad_norm": 0.18095889687538147, + "learning_rate": 1.7582114266275683e-05, + "loss": 0.0192, + "step": 44300 + }, + { + "action_loss": 0.007399098481982946, + "epoch": 39.83812949640288, + "step": 44300 + }, + { + "epoch": 39.83812949640288, + "step": 44300, + "torque_loss": 0.1313752382993698 + }, + { + "epoch": 39.83812949640288, + "force_loss": 0.006780050694942474, + "step": 44300 + }, + { + "epoch": 39.847122302158276, + "grad_norm": 0.06766969710588455, + "learning_rate": 1.756113842708364e-05, + "loss": 0.015, + "step": 44310 + }, + { + "action_loss": 0.005207750480622053, + "epoch": 39.847122302158276, + "step": 44310 + }, + { + "epoch": 39.847122302158276, + "step": 44310, + "torque_loss": 0.09442100673913956 + }, + { + "epoch": 39.847122302158276, + "force_loss": 0.0039870780892670155, + "step": 44310 + }, + { + "epoch": 39.856115107913666, + "grad_norm": 0.11593057960271835, + "learning_rate": 1.7540172441961245e-05, + "loss": 0.017, + "step": 44320 + }, + { + "action_loss": 0.0027600687462836504, + "epoch": 39.856115107913666, + "step": 44320 + }, + { + "epoch": 39.856115107913666, + "step": 44320, + "torque_loss": 0.11569730192422867 + }, + { + "epoch": 39.856115107913666, + "force_loss": 0.0012520946329459548, + "step": 44320 + }, + { + "epoch": 39.865107913669064, + "grad_norm": 0.09822013974189758, + "learning_rate": 1.7519216317277387e-05, + "loss": 0.0132, + "step": 44330 + }, + { + "action_loss": 0.0033128943759948015, + "epoch": 39.865107913669064, + "step": 44330 + }, + { + "epoch": 39.865107913669064, + "step": 44330, + "torque_loss": 0.21037475764751434 + }, + { + "epoch": 39.865107913669064, + "force_loss": 0.002765208249911666, + "step": 44330 + }, + { + "epoch": 39.87410071942446, + "grad_norm": 0.10911911725997925, + "learning_rate": 1.7498270059398046e-05, + "loss": 0.0151, + "step": 44340 + }, + { + "action_loss": 0.001463107648305595, + "epoch": 39.87410071942446, + "step": 44340 + }, + { + "epoch": 39.87410071942446, + "step": 44340, + "torque_loss": 0.13337288796901703 + }, + { + "epoch": 39.87410071942446, + "force_loss": 0.0008917516097426414, + "step": 44340 + }, + { + "epoch": 39.88309352517986, + "grad_norm": 0.34264805912971497, + "learning_rate": 1.7477333674686062e-05, + "loss": 0.0161, + "step": 44350 + }, + { + "action_loss": 0.0015004229499027133, + "epoch": 39.88309352517986, + "step": 44350 + }, + { + "epoch": 39.88309352517986, + "step": 44350, + "torque_loss": 0.1908785104751587 + }, + { + "epoch": 39.88309352517986, + "force_loss": 0.0020156477112323046, + "step": 44350 + }, + { + "epoch": 39.89208633093525, + "grad_norm": 0.2536611557006836, + "learning_rate": 1.745640716950142e-05, + "loss": 0.0185, + "step": 44360 + }, + { + "action_loss": 0.011019092053174973, + "epoch": 39.89208633093525, + "step": 44360 + }, + { + "epoch": 39.89208633093525, + "step": 44360, + "torque_loss": 0.14018167555332184 + }, + { + "epoch": 39.89208633093525, + "force_loss": 0.014119830913841724, + "step": 44360 + }, + { + "epoch": 39.90107913669065, + "grad_norm": 0.4099871814250946, + "learning_rate": 1.7435490550201017e-05, + "loss": 0.0192, + "step": 44370 + }, + { + "action_loss": 0.0010389521485194564, + "epoch": 39.90107913669065, + "step": 44370 + }, + { + "epoch": 39.90107913669065, + "step": 44370, + "torque_loss": 0.09374342113733292 + }, + { + "epoch": 39.90107913669065, + "force_loss": 0.0029633017256855965, + "step": 44370 + }, + { + "epoch": 39.910071942446045, + "grad_norm": 0.11975488066673279, + "learning_rate": 1.7414583823138762e-05, + "loss": 0.014, + "step": 44380 + }, + { + "action_loss": 0.0014100942062214017, + "epoch": 39.910071942446045, + "step": 44380 + }, + { + "epoch": 39.910071942446045, + "step": 44380, + "torque_loss": 0.12192344665527344 + }, + { + "epoch": 39.910071942446045, + "force_loss": 0.0009730260935612023, + "step": 44380 + }, + { + "epoch": 39.919064748201436, + "grad_norm": 0.0840345025062561, + "learning_rate": 1.739368699466558e-05, + "loss": 0.0162, + "step": 44390 + }, + { + "action_loss": 0.0023275448475033045, + "epoch": 39.919064748201436, + "step": 44390 + }, + { + "epoch": 39.919064748201436, + "step": 44390, + "torque_loss": 0.12090662121772766 + }, + { + "epoch": 39.919064748201436, + "force_loss": 0.004105397965759039, + "step": 44390 + }, + { + "epoch": 39.92805755395683, + "grad_norm": 0.09263237565755844, + "learning_rate": 1.737280007112935e-05, + "loss": 0.0165, + "step": 44400 + }, + { + "action_loss": 0.0005101033020764589, + "epoch": 39.92805755395683, + "step": 44400 + }, + { + "epoch": 39.92805755395683, + "step": 44400, + "torque_loss": 0.0995657816529274 + }, + { + "epoch": 39.92805755395683, + "force_loss": 0.0005334276938810945, + "step": 44400 + }, + { + "epoch": 39.93705035971223, + "grad_norm": 0.11976148188114166, + "learning_rate": 1.735192305887502e-05, + "loss": 0.0139, + "step": 44410 + }, + { + "action_loss": 0.0017665340565145016, + "epoch": 39.93705035971223, + "step": 44410 + }, + { + "epoch": 39.93705035971223, + "step": 44410, + "torque_loss": 0.10761773586273193 + }, + { + "epoch": 39.93705035971223, + "force_loss": 0.0019337544217705727, + "step": 44410 + }, + { + "epoch": 39.94604316546763, + "grad_norm": 0.1317591369152069, + "learning_rate": 1.733105596424441e-05, + "loss": 0.0163, + "step": 44420 + }, + { + "action_loss": 0.003290182910859585, + "epoch": 39.94604316546763, + "step": 44420 + }, + { + "epoch": 39.94604316546763, + "step": 44420, + "torque_loss": 0.1056455448269844 + }, + { + "epoch": 39.94604316546763, + "force_loss": 0.007925410754978657, + "step": 44420 + }, + { + "epoch": 39.95503597122302, + "grad_norm": 0.09897452592849731, + "learning_rate": 1.7310198793576437e-05, + "loss": 0.0162, + "step": 44430 + }, + { + "action_loss": 0.0020759671460837126, + "epoch": 39.95503597122302, + "step": 44430 + }, + { + "epoch": 39.95503597122302, + "step": 44430, + "torque_loss": 0.1621129810810089 + }, + { + "epoch": 39.95503597122302, + "force_loss": 0.011458312161266804, + "step": 44430 + }, + { + "epoch": 39.96402877697842, + "grad_norm": 0.14866717159748077, + "learning_rate": 1.7289351553206952e-05, + "loss": 0.0173, + "step": 44440 + }, + { + "action_loss": 0.001105463714338839, + "epoch": 39.96402877697842, + "step": 44440 + }, + { + "epoch": 39.96402877697842, + "step": 44440, + "torque_loss": 0.15111349523067474 + }, + { + "epoch": 39.96402877697842, + "force_loss": 0.0043312846682965755, + "step": 44440 + }, + { + "epoch": 39.973021582733814, + "grad_norm": 0.3920046091079712, + "learning_rate": 1.7268514249468788e-05, + "loss": 0.0172, + "step": 44450 + }, + { + "action_loss": 0.0065963841043412685, + "epoch": 39.973021582733814, + "step": 44450 + }, + { + "epoch": 39.973021582733814, + "step": 44450, + "torque_loss": 0.11663947254419327 + }, + { + "epoch": 39.973021582733814, + "force_loss": 0.020411519333720207, + "step": 44450 + }, + { + "epoch": 39.98201438848921, + "grad_norm": 0.23445050418376923, + "learning_rate": 1.7247686888691765e-05, + "loss": 0.0155, + "step": 44460 + }, + { + "action_loss": 0.0011924013961106539, + "epoch": 39.98201438848921, + "step": 44460 + }, + { + "epoch": 39.98201438848921, + "step": 44460, + "torque_loss": 0.13264767825603485 + }, + { + "epoch": 39.98201438848921, + "force_loss": 0.0013270940398797393, + "step": 44460 + }, + { + "epoch": 39.9910071942446, + "grad_norm": 0.14190790057182312, + "learning_rate": 1.7226869477202694e-05, + "loss": 0.014, + "step": 44470 + }, + { + "action_loss": 0.004054693039506674, + "epoch": 39.9910071942446, + "step": 44470 + }, + { + "epoch": 39.9910071942446, + "step": 44470, + "torque_loss": 0.1341247707605362 + }, + { + "epoch": 39.9910071942446, + "force_loss": 0.003079628571867943, + "step": 44470 + }, + { + "epoch": 40.0, + "grad_norm": 0.2483171969652176, + "learning_rate": 1.7206062021325336e-05, + "loss": 0.0185, + "step": 44480 + }, + { + "action_loss": 0.0015627824468538165, + "epoch": 40.0, + "step": 44480 + }, + { + "epoch": 40.0, + "step": 44480, + "torque_loss": 0.11845695972442627 + }, + { + "epoch": 40.0, + "force_loss": 0.0026303771883249283, + "step": 44480 + }, + { + "epoch": 40.0089928057554, + "grad_norm": 0.1087113544344902, + "learning_rate": 1.7185264527380502e-05, + "loss": 0.0166, + "step": 44490 + }, + { + "action_loss": 0.0012507130159065127, + "epoch": 40.0089928057554, + "step": 44490 + }, + { + "epoch": 40.0089928057554, + "step": 44490, + "torque_loss": 0.08470701426267624 + }, + { + "epoch": 40.0089928057554, + "force_loss": 0.0010447535896673799, + "step": 44490 + }, + { + "epoch": 40.01798561151079, + "grad_norm": 0.27818405628204346, + "learning_rate": 1.716447700168584e-05, + "loss": 0.017, + "step": 44500 + }, + { + "action_loss": 0.0033613082487136126, + "epoch": 40.01798561151079, + "step": 44500 + }, + { + "epoch": 40.01798561151079, + "step": 44500, + "torque_loss": 0.09948086738586426 + }, + { + "epoch": 40.01798561151079, + "force_loss": 0.005918712820857763, + "step": 44500 + }, + { + "epoch": 40.026978417266186, + "grad_norm": 0.6830267906188965, + "learning_rate": 1.714369945055611e-05, + "loss": 0.0208, + "step": 44510 + }, + { + "action_loss": 0.0006649745628237724, + "epoch": 40.026978417266186, + "step": 44510 + }, + { + "epoch": 40.026978417266186, + "step": 44510, + "torque_loss": 0.10315630584955215 + }, + { + "epoch": 40.026978417266186, + "force_loss": 0.0006237350753508508, + "step": 44510 + }, + { + "epoch": 40.03597122302158, + "grad_norm": 0.07630914449691772, + "learning_rate": 1.7122931880302968e-05, + "loss": 0.0141, + "step": 44520 + }, + { + "action_loss": 0.001907266560010612, + "epoch": 40.03597122302158, + "step": 44520 + }, + { + "epoch": 40.03597122302158, + "step": 44520, + "torque_loss": 0.12051069736480713 + }, + { + "epoch": 40.03597122302158, + "force_loss": 0.0050184838473796844, + "step": 44520 + }, + { + "epoch": 40.04496402877698, + "grad_norm": 0.40941163897514343, + "learning_rate": 1.710217429723505e-05, + "loss": 0.0156, + "step": 44530 + }, + { + "action_loss": 0.0012308628065511584, + "epoch": 40.04496402877698, + "step": 44530 + }, + { + "epoch": 40.04496402877698, + "step": 44530, + "torque_loss": 0.14783541858196259 + }, + { + "epoch": 40.04496402877698, + "force_loss": 0.0012665995163843036, + "step": 44530 + }, + { + "epoch": 40.05395683453237, + "grad_norm": 0.11552932113409042, + "learning_rate": 1.7081426707657972e-05, + "loss": 0.0156, + "step": 44540 + }, + { + "action_loss": 0.003938527312129736, + "epoch": 40.05395683453237, + "step": 44540 + }, + { + "epoch": 40.05395683453237, + "step": 44540, + "torque_loss": 0.18767376244068146 + }, + { + "epoch": 40.05395683453237, + "force_loss": 0.011259282939136028, + "step": 44540 + }, + { + "epoch": 40.06294964028777, + "grad_norm": 0.16364285349845886, + "learning_rate": 1.7060689117874275e-05, + "loss": 0.017, + "step": 44550 + }, + { + "action_loss": 0.0019341300940141082, + "epoch": 40.06294964028777, + "step": 44550 + }, + { + "epoch": 40.06294964028777, + "step": 44550, + "torque_loss": 0.16263054311275482 + }, + { + "epoch": 40.06294964028777, + "force_loss": 0.0060359700582921505, + "step": 44550 + }, + { + "epoch": 40.07194244604317, + "grad_norm": 0.12794502079486847, + "learning_rate": 1.703996153418354e-05, + "loss": 0.0141, + "step": 44560 + }, + { + "action_loss": 0.0013586924178525805, + "epoch": 40.07194244604317, + "step": 44560 + }, + { + "epoch": 40.07194244604317, + "step": 44560, + "torque_loss": 0.10127987712621689 + }, + { + "epoch": 40.07194244604317, + "force_loss": 0.0018603671342134476, + "step": 44560 + }, + { + "epoch": 40.080935251798564, + "grad_norm": 0.2764982283115387, + "learning_rate": 1.7019243962882205e-05, + "loss": 0.0161, + "step": 44570 + }, + { + "action_loss": 0.00107524951454252, + "epoch": 40.080935251798564, + "step": 44570 + }, + { + "epoch": 40.080935251798564, + "step": 44570, + "torque_loss": 0.08953773230314255 + }, + { + "epoch": 40.080935251798564, + "force_loss": 0.003173167584463954, + "step": 44570 + }, + { + "epoch": 40.089928057553955, + "grad_norm": 0.08040502667427063, + "learning_rate": 1.6998536410263754e-05, + "loss": 0.0138, + "step": 44580 + }, + { + "action_loss": 0.0048240735195577145, + "epoch": 40.089928057553955, + "step": 44580 + }, + { + "epoch": 40.089928057553955, + "step": 44580, + "torque_loss": 0.14370672404766083 + }, + { + "epoch": 40.089928057553955, + "force_loss": 0.012612252496182919, + "step": 44580 + }, + { + "epoch": 40.09892086330935, + "grad_norm": 0.11724365502595901, + "learning_rate": 1.6977838882618596e-05, + "loss": 0.0143, + "step": 44590 + }, + { + "action_loss": 0.0014210754306986928, + "epoch": 40.09892086330935, + "step": 44590 + }, + { + "epoch": 40.09892086330935, + "step": 44590, + "torque_loss": 0.10057639330625534 + }, + { + "epoch": 40.09892086330935, + "force_loss": 0.0023515161592513323, + "step": 44590 + }, + { + "epoch": 40.10791366906475, + "grad_norm": 0.38110387325286865, + "learning_rate": 1.6957151386234088e-05, + "loss": 0.0143, + "step": 44600 + }, + { + "action_loss": 0.0024461140856146812, + "epoch": 40.10791366906475, + "step": 44600 + }, + { + "epoch": 40.10791366906475, + "step": 44600, + "torque_loss": 0.15897291898727417 + }, + { + "epoch": 40.10791366906475, + "force_loss": 0.007345343474298716, + "step": 44600 + }, + { + "epoch": 40.11690647482014, + "grad_norm": 0.0928143784403801, + "learning_rate": 1.6936473927394536e-05, + "loss": 0.0156, + "step": 44610 + }, + { + "action_loss": 0.003792443545535207, + "epoch": 40.11690647482014, + "step": 44610 + }, + { + "epoch": 40.11690647482014, + "step": 44610, + "torque_loss": 0.11347117274999619 + }, + { + "epoch": 40.11690647482014, + "force_loss": 0.0037435460835695267, + "step": 44610 + }, + { + "epoch": 40.12589928057554, + "grad_norm": 0.0897802785038948, + "learning_rate": 1.6915806512381222e-05, + "loss": 0.0157, + "step": 44620 + }, + { + "action_loss": 0.0036807898432016373, + "epoch": 40.12589928057554, + "step": 44620 + }, + { + "epoch": 40.12589928057554, + "step": 44620, + "torque_loss": 0.08641115576028824 + }, + { + "epoch": 40.12589928057554, + "force_loss": 0.0037523380015045404, + "step": 44620 + }, + { + "epoch": 40.134892086330936, + "grad_norm": 0.17474913597106934, + "learning_rate": 1.6895149147472344e-05, + "loss": 0.0166, + "step": 44630 + }, + { + "action_loss": 0.003175650490447879, + "epoch": 40.134892086330936, + "step": 44630 + }, + { + "epoch": 40.134892086330936, + "step": 44630, + "torque_loss": 0.09962426871061325 + }, + { + "epoch": 40.134892086330936, + "force_loss": 0.006837495137006044, + "step": 44630 + }, + { + "epoch": 40.143884892086334, + "grad_norm": 0.36691057682037354, + "learning_rate": 1.6874501838943073e-05, + "loss": 0.0174, + "step": 44640 + }, + { + "action_loss": 0.0024612240958958864, + "epoch": 40.143884892086334, + "step": 44640 + }, + { + "epoch": 40.143884892086334, + "step": 44640, + "torque_loss": 0.17060263454914093 + }, + { + "epoch": 40.143884892086334, + "force_loss": 0.011553235352039337, + "step": 44640 + }, + { + "epoch": 40.152877697841724, + "grad_norm": 0.4615657925605774, + "learning_rate": 1.6853864593065506e-05, + "loss": 0.016, + "step": 44650 + }, + { + "action_loss": 0.001044761505909264, + "epoch": 40.152877697841724, + "step": 44650 + }, + { + "epoch": 40.152877697841724, + "step": 44650, + "torque_loss": 0.11981693655252457 + }, + { + "epoch": 40.152877697841724, + "force_loss": 0.0020934399217367172, + "step": 44650 + }, + { + "epoch": 40.16187050359712, + "grad_norm": 0.2949974834918976, + "learning_rate": 1.683323741610871e-05, + "loss": 0.0157, + "step": 44660 + }, + { + "action_loss": 0.0012690444709733129, + "epoch": 40.16187050359712, + "step": 44660 + }, + { + "epoch": 40.16187050359712, + "step": 44660, + "torque_loss": 0.15798909962177277 + }, + { + "epoch": 40.16187050359712, + "force_loss": 0.0011633285321295261, + "step": 44660 + }, + { + "epoch": 40.17086330935252, + "grad_norm": 0.38125699758529663, + "learning_rate": 1.6812620314338674e-05, + "loss": 0.0166, + "step": 44670 + }, + { + "action_loss": 0.0026361290365457535, + "epoch": 40.17086330935252, + "step": 44670 + }, + { + "epoch": 40.17086330935252, + "step": 44670, + "torque_loss": 0.16972856223583221 + }, + { + "epoch": 40.17086330935252, + "force_loss": 0.002562169684097171, + "step": 44670 + }, + { + "epoch": 40.17985611510792, + "grad_norm": 0.1883958876132965, + "learning_rate": 1.6792013294018326e-05, + "loss": 0.0147, + "step": 44680 + }, + { + "action_loss": 0.004774322733283043, + "epoch": 40.17985611510792, + "step": 44680 + }, + { + "epoch": 40.17985611510792, + "step": 44680, + "torque_loss": 0.11046300083398819 + }, + { + "epoch": 40.17985611510792, + "force_loss": 0.002918919548392296, + "step": 44680 + }, + { + "epoch": 40.18884892086331, + "grad_norm": 0.13169023394584656, + "learning_rate": 1.6771416361407526e-05, + "loss": 0.0153, + "step": 44690 + }, + { + "action_loss": 0.000996866961941123, + "epoch": 40.18884892086331, + "step": 44690 + }, + { + "epoch": 40.18884892086331, + "step": 44690, + "torque_loss": 0.09149760007858276 + }, + { + "epoch": 40.18884892086331, + "force_loss": 0.0015735543565824628, + "step": 44690 + }, + { + "epoch": 40.197841726618705, + "grad_norm": 0.23048055171966553, + "learning_rate": 1.675082952276308e-05, + "loss": 0.0137, + "step": 44700 + }, + { + "action_loss": 0.0007311130757443607, + "epoch": 40.197841726618705, + "step": 44700 + }, + { + "epoch": 40.197841726618705, + "step": 44700, + "torque_loss": 0.09804457426071167 + }, + { + "epoch": 40.197841726618705, + "force_loss": 0.0007812807452864945, + "step": 44700 + }, + { + "epoch": 40.2068345323741, + "grad_norm": 0.22695928812026978, + "learning_rate": 1.6730252784338757e-05, + "loss": 0.0144, + "step": 44710 + }, + { + "action_loss": 0.0024429606273770332, + "epoch": 40.2068345323741, + "step": 44710 + }, + { + "epoch": 40.2068345323741, + "step": 44710, + "torque_loss": 0.13664865493774414 + }, + { + "epoch": 40.2068345323741, + "force_loss": 0.006511110812425613, + "step": 44710 + }, + { + "epoch": 40.21582733812949, + "grad_norm": 0.08444716781377792, + "learning_rate": 1.6709686152385166e-05, + "loss": 0.0148, + "step": 44720 + }, + { + "action_loss": 0.002623581560328603, + "epoch": 40.21582733812949, + "step": 44720 + }, + { + "epoch": 40.21582733812949, + "step": 44720, + "torque_loss": 0.12171286344528198 + }, + { + "epoch": 40.21582733812949, + "force_loss": 0.011893153190612793, + "step": 44720 + }, + { + "epoch": 40.22482014388489, + "grad_norm": 0.2250167280435562, + "learning_rate": 1.668912963314998e-05, + "loss": 0.0171, + "step": 44730 + }, + { + "action_loss": 0.008974972181022167, + "epoch": 40.22482014388489, + "step": 44730 + }, + { + "epoch": 40.22482014388489, + "step": 44730, + "torque_loss": 0.11451403051614761 + }, + { + "epoch": 40.22482014388489, + "force_loss": 0.020065506920218468, + "step": 44730 + }, + { + "epoch": 40.23381294964029, + "grad_norm": 0.1760580986738205, + "learning_rate": 1.6668583232877653e-05, + "loss": 0.0171, + "step": 44740 + }, + { + "action_loss": 0.0054635354317724705, + "epoch": 40.23381294964029, + "step": 44740 + }, + { + "epoch": 40.23381294964029, + "step": 44740, + "torque_loss": 0.09759384393692017 + }, + { + "epoch": 40.23381294964029, + "force_loss": 0.0041343956254422665, + "step": 44740 + }, + { + "epoch": 40.242805755395686, + "grad_norm": 0.12554466724395752, + "learning_rate": 1.6648046957809698e-05, + "loss": 0.0187, + "step": 44750 + }, + { + "action_loss": 0.008528563193976879, + "epoch": 40.242805755395686, + "step": 44750 + }, + { + "epoch": 40.242805755395686, + "step": 44750, + "torque_loss": 0.12795187532901764 + }, + { + "epoch": 40.242805755395686, + "force_loss": 0.013514988124370575, + "step": 44750 + }, + { + "epoch": 40.25179856115108, + "grad_norm": 0.12210524827241898, + "learning_rate": 1.6627520814184462e-05, + "loss": 0.0157, + "step": 44760 + }, + { + "action_loss": 0.006425800267606974, + "epoch": 40.25179856115108, + "step": 44760 + }, + { + "epoch": 40.25179856115108, + "step": 44760, + "torque_loss": 0.13641668856143951 + }, + { + "epoch": 40.25179856115108, + "force_loss": 0.01587783545255661, + "step": 44760 + }, + { + "epoch": 40.260791366906474, + "grad_norm": 0.13937558233737946, + "learning_rate": 1.660700480823726e-05, + "loss": 0.0162, + "step": 44770 + }, + { + "action_loss": 0.002961226971819997, + "epoch": 40.260791366906474, + "step": 44770 + }, + { + "epoch": 40.260791366906474, + "step": 44770, + "torque_loss": 0.1057615652680397 + }, + { + "epoch": 40.260791366906474, + "force_loss": 0.005273377988487482, + "step": 44770 + }, + { + "epoch": 40.26978417266187, + "grad_norm": 0.07900244742631912, + "learning_rate": 1.65864989462003e-05, + "loss": 0.0166, + "step": 44780 + }, + { + "action_loss": 0.001128497184254229, + "epoch": 40.26978417266187, + "step": 44780 + }, + { + "epoch": 40.26978417266187, + "step": 44780, + "torque_loss": 0.1287650316953659 + }, + { + "epoch": 40.26978417266187, + "force_loss": 0.0007052418659441173, + "step": 44780 + }, + { + "epoch": 40.27877697841727, + "grad_norm": 0.08304659277200699, + "learning_rate": 1.656600323430273e-05, + "loss": 0.019, + "step": 44790 + }, + { + "action_loss": 0.0005852977628819644, + "epoch": 40.27877697841727, + "step": 44790 + }, + { + "epoch": 40.27877697841727, + "step": 44790, + "torque_loss": 0.12477948516607285 + }, + { + "epoch": 40.27877697841727, + "force_loss": 0.0008666057256050408, + "step": 44790 + }, + { + "epoch": 40.28776978417266, + "grad_norm": 0.1904379278421402, + "learning_rate": 1.654551767877059e-05, + "loss": 0.0161, + "step": 44800 + }, + { + "action_loss": 0.000839108310174197, + "epoch": 40.28776978417266, + "step": 44800 + }, + { + "epoch": 40.28776978417266, + "step": 44800, + "torque_loss": 0.0952586904168129 + }, + { + "epoch": 40.28776978417266, + "force_loss": 0.0018974501872435212, + "step": 44800 + }, + { + "epoch": 40.29676258992806, + "grad_norm": 0.156271293759346, + "learning_rate": 1.6525042285826874e-05, + "loss": 0.0183, + "step": 44810 + }, + { + "action_loss": 0.002238394459709525, + "epoch": 40.29676258992806, + "step": 44810 + }, + { + "epoch": 40.29676258992806, + "step": 44810, + "torque_loss": 0.1386720985174179 + }, + { + "epoch": 40.29676258992806, + "force_loss": 0.0026462338864803314, + "step": 44810 + }, + { + "epoch": 40.305755395683455, + "grad_norm": 0.4390171766281128, + "learning_rate": 1.6504577061691468e-05, + "loss": 0.0183, + "step": 44820 + }, + { + "action_loss": 0.0010000247275456786, + "epoch": 40.305755395683455, + "step": 44820 + }, + { + "epoch": 40.305755395683455, + "step": 44820, + "torque_loss": 0.13093285262584686 + }, + { + "epoch": 40.305755395683455, + "force_loss": 0.0015082870377227664, + "step": 44820 + }, + { + "epoch": 40.314748201438846, + "grad_norm": 0.5617092847824097, + "learning_rate": 1.6484122012581143e-05, + "loss": 0.016, + "step": 44830 + }, + { + "action_loss": 0.0021910963114351034, + "epoch": 40.314748201438846, + "step": 44830 + }, + { + "epoch": 40.314748201438846, + "step": 44830, + "torque_loss": 0.1063302755355835 + }, + { + "epoch": 40.314748201438846, + "force_loss": 0.0035671256482601166, + "step": 44830 + }, + { + "epoch": 40.32374100719424, + "grad_norm": 0.22056697309017181, + "learning_rate": 1.6463677144709623e-05, + "loss": 0.0152, + "step": 44840 + }, + { + "action_loss": 0.00223253364674747, + "epoch": 40.32374100719424, + "step": 44840 + }, + { + "epoch": 40.32374100719424, + "step": 44840, + "torque_loss": 0.14081676304340363 + }, + { + "epoch": 40.32374100719424, + "force_loss": 0.004668839275836945, + "step": 44840 + }, + { + "epoch": 40.33273381294964, + "grad_norm": 0.07791223376989365, + "learning_rate": 1.6443242464287493e-05, + "loss": 0.0147, + "step": 44850 + }, + { + "action_loss": 0.0032717313151806593, + "epoch": 40.33273381294964, + "step": 44850 + }, + { + "epoch": 40.33273381294964, + "step": 44850, + "torque_loss": 0.11260297149419785 + }, + { + "epoch": 40.33273381294964, + "force_loss": 0.003491375595331192, + "step": 44850 + }, + { + "epoch": 40.34172661870504, + "grad_norm": 0.135382741689682, + "learning_rate": 1.642281797752232e-05, + "loss": 0.0159, + "step": 44860 + }, + { + "action_loss": 0.0015873191878199577, + "epoch": 40.34172661870504, + "step": 44860 + }, + { + "epoch": 40.34172661870504, + "step": 44860, + "torque_loss": 0.15371070802211761 + }, + { + "epoch": 40.34172661870504, + "force_loss": 0.0024278899654746056, + "step": 44860 + }, + { + "epoch": 40.35071942446043, + "grad_norm": 0.5832369923591614, + "learning_rate": 1.6402403690618456e-05, + "loss": 0.016, + "step": 44870 + }, + { + "action_loss": 0.001395586528815329, + "epoch": 40.35071942446043, + "step": 44870 + }, + { + "epoch": 40.35071942446043, + "step": 44870, + "torque_loss": 0.13407404720783234 + }, + { + "epoch": 40.35071942446043, + "force_loss": 0.002249868120998144, + "step": 44870 + }, + { + "epoch": 40.35971223021583, + "grad_norm": 0.09243930876255035, + "learning_rate": 1.6381999609777295e-05, + "loss": 0.0132, + "step": 44880 + }, + { + "action_loss": 0.0059046498499810696, + "epoch": 40.35971223021583, + "step": 44880 + }, + { + "epoch": 40.35971223021583, + "step": 44880, + "torque_loss": 0.11999446153640747 + }, + { + "epoch": 40.35971223021583, + "force_loss": 0.003569086315110326, + "step": 44880 + }, + { + "epoch": 40.368705035971225, + "grad_norm": 0.17036353051662445, + "learning_rate": 1.6361605741196983e-05, + "loss": 0.0251, + "step": 44890 + }, + { + "action_loss": 0.003418646054342389, + "epoch": 40.368705035971225, + "step": 44890 + }, + { + "epoch": 40.368705035971225, + "step": 44890, + "torque_loss": 0.14726051688194275 + }, + { + "epoch": 40.368705035971225, + "force_loss": 0.004301040433347225, + "step": 44890 + }, + { + "epoch": 40.37769784172662, + "grad_norm": 0.23966199159622192, + "learning_rate": 1.63412220910727e-05, + "loss": 0.0202, + "step": 44900 + }, + { + "action_loss": 0.009670862928032875, + "epoch": 40.37769784172662, + "step": 44900 + }, + { + "epoch": 40.37769784172662, + "step": 44900, + "torque_loss": 0.10886164754629135 + }, + { + "epoch": 40.37769784172662, + "force_loss": 0.0035433850716799498, + "step": 44900 + }, + { + "epoch": 40.38669064748201, + "grad_norm": 0.09231775999069214, + "learning_rate": 1.6320848665596433e-05, + "loss": 0.0178, + "step": 44910 + }, + { + "action_loss": 0.0011204906040802598, + "epoch": 40.38669064748201, + "step": 44910 + }, + { + "epoch": 40.38669064748201, + "step": 44910, + "torque_loss": 0.12653951346874237 + }, + { + "epoch": 40.38669064748201, + "force_loss": 0.002186434343457222, + "step": 44910 + }, + { + "epoch": 40.39568345323741, + "grad_norm": 0.21757644414901733, + "learning_rate": 1.6300485470957095e-05, + "loss": 0.0162, + "step": 44920 + }, + { + "action_loss": 0.002309808973222971, + "epoch": 40.39568345323741, + "step": 44920 + }, + { + "epoch": 40.39568345323741, + "step": 44920, + "torque_loss": 0.14443175494670868 + }, + { + "epoch": 40.39568345323741, + "force_loss": 0.004321340471506119, + "step": 44920 + }, + { + "epoch": 40.40467625899281, + "grad_norm": 0.1127144917845726, + "learning_rate": 1.6280132513340483e-05, + "loss": 0.0143, + "step": 44930 + }, + { + "action_loss": 0.0016665965085849166, + "epoch": 40.40467625899281, + "step": 44930 + }, + { + "epoch": 40.40467625899281, + "step": 44930, + "torque_loss": 0.12928690016269684 + }, + { + "epoch": 40.40467625899281, + "force_loss": 0.0019769768696278334, + "step": 44930 + }, + { + "epoch": 40.4136690647482, + "grad_norm": 0.11336503177881241, + "learning_rate": 1.62597897989293e-05, + "loss": 0.0149, + "step": 44940 + }, + { + "action_loss": 0.0010945176472887397, + "epoch": 40.4136690647482, + "step": 44940 + }, + { + "epoch": 40.4136690647482, + "step": 44940, + "torque_loss": 0.1361013948917389 + }, + { + "epoch": 40.4136690647482, + "force_loss": 0.0025673715863376856, + "step": 44940 + }, + { + "epoch": 40.422661870503596, + "grad_norm": 0.10752066224813461, + "learning_rate": 1.623945733390309e-05, + "loss": 0.0157, + "step": 44950 + }, + { + "action_loss": 0.002123811049386859, + "epoch": 40.422661870503596, + "step": 44950 + }, + { + "epoch": 40.422661870503596, + "step": 44950, + "torque_loss": 0.12423846125602722 + }, + { + "epoch": 40.422661870503596, + "force_loss": 0.0037513121496886015, + "step": 44950 + }, + { + "epoch": 40.431654676258994, + "grad_norm": 0.1287233978509903, + "learning_rate": 1.6219135124438374e-05, + "loss": 0.0141, + "step": 44960 + }, + { + "action_loss": 0.0007463112124241889, + "epoch": 40.431654676258994, + "step": 44960 + }, + { + "epoch": 40.431654676258994, + "step": 44960, + "torque_loss": 0.12744678556919098 + }, + { + "epoch": 40.431654676258994, + "force_loss": 0.0012125533539801836, + "step": 44960 + }, + { + "epoch": 40.44064748201439, + "grad_norm": 0.14318744838237762, + "learning_rate": 1.6198823176708465e-05, + "loss": 0.0151, + "step": 44970 + }, + { + "action_loss": 0.0009360754047520459, + "epoch": 40.44064748201439, + "step": 44970 + }, + { + "epoch": 40.44064748201439, + "step": 44970, + "torque_loss": 0.1614072471857071 + }, + { + "epoch": 40.44064748201439, + "force_loss": 0.0014803907833993435, + "step": 44970 + }, + { + "epoch": 40.44964028776978, + "grad_norm": 0.3382089138031006, + "learning_rate": 1.6178521496883613e-05, + "loss": 0.0196, + "step": 44980 + }, + { + "action_loss": 0.004354895558208227, + "epoch": 40.44964028776978, + "step": 44980 + }, + { + "epoch": 40.44964028776978, + "step": 44980, + "torque_loss": 0.12859804928302765 + }, + { + "epoch": 40.44964028776978, + "force_loss": 0.020465349778532982, + "step": 44980 + }, + { + "epoch": 40.45863309352518, + "grad_norm": 0.31221604347229004, + "learning_rate": 1.6158230091130926e-05, + "loss": 0.0143, + "step": 44990 + }, + { + "action_loss": 0.0008308025426231325, + "epoch": 40.45863309352518, + "step": 44990 + }, + { + "epoch": 40.45863309352518, + "step": 44990, + "torque_loss": 0.12496646493673325 + }, + { + "epoch": 40.45863309352518, + "force_loss": 0.001841373392380774, + "step": 44990 + }, + { + "epoch": 40.46762589928058, + "grad_norm": 0.14317816495895386, + "learning_rate": 1.613794896561438e-05, + "loss": 0.0152, + "step": 45000 + }, + { + "action_loss": 0.0012701962841674685, + "epoch": 40.46762589928058, + "step": 45000 + }, + { + "epoch": 40.46762589928058, + "step": 45000, + "torque_loss": 0.07607150822877884 + }, + { + "epoch": 40.46762589928058, + "force_loss": 0.0012807067250832915, + "step": 45000 + }, + { + "epoch": 40.476618705035975, + "grad_norm": 0.27331069111824036, + "learning_rate": 1.6117678126494894e-05, + "loss": 0.0148, + "step": 45010 + }, + { + "action_loss": 0.0020423864480108023, + "epoch": 40.476618705035975, + "step": 45010 + }, + { + "epoch": 40.476618705035975, + "step": 45010, + "torque_loss": 0.11822780221700668 + }, + { + "epoch": 40.476618705035975, + "force_loss": 0.005563536658883095, + "step": 45010 + }, + { + "epoch": 40.485611510791365, + "grad_norm": 0.20098598301410675, + "learning_rate": 1.6097417579930153e-05, + "loss": 0.0158, + "step": 45020 + }, + { + "action_loss": 0.0012331913458183408, + "epoch": 40.485611510791365, + "step": 45020 + }, + { + "epoch": 40.485611510791365, + "step": 45020, + "torque_loss": 0.11882391571998596 + }, + { + "epoch": 40.485611510791365, + "force_loss": 0.003346501151099801, + "step": 45020 + }, + { + "epoch": 40.49460431654676, + "grad_norm": 0.19315105676651, + "learning_rate": 1.6077167332074834e-05, + "loss": 0.0155, + "step": 45030 + }, + { + "action_loss": 0.0031557867769151926, + "epoch": 40.49460431654676, + "step": 45030 + }, + { + "epoch": 40.49460431654676, + "step": 45030, + "torque_loss": 0.09228048473596573 + }, + { + "epoch": 40.49460431654676, + "force_loss": 0.005141712725162506, + "step": 45030 + }, + { + "epoch": 40.50359712230216, + "grad_norm": 0.33281370997428894, + "learning_rate": 1.605692738908037e-05, + "loss": 0.0136, + "step": 45040 + }, + { + "action_loss": 0.0030528989154845476, + "epoch": 40.50359712230216, + "step": 45040 + }, + { + "epoch": 40.50359712230216, + "step": 45040, + "torque_loss": 0.17031632363796234 + }, + { + "epoch": 40.50359712230216, + "force_loss": 0.0069494531489908695, + "step": 45040 + }, + { + "epoch": 40.51258992805755, + "grad_norm": 0.27201807498931885, + "learning_rate": 1.6036697757095176e-05, + "loss": 0.0169, + "step": 45050 + }, + { + "action_loss": 0.0018646420212462544, + "epoch": 40.51258992805755, + "step": 45050 + }, + { + "epoch": 40.51258992805755, + "step": 45050, + "torque_loss": 0.09695889800786972 + }, + { + "epoch": 40.51258992805755, + "force_loss": 0.0022622295655310154, + "step": 45050 + }, + { + "epoch": 40.52158273381295, + "grad_norm": 0.15830875933170319, + "learning_rate": 1.6016478442264428e-05, + "loss": 0.0143, + "step": 45060 + }, + { + "action_loss": 0.0020414425525814295, + "epoch": 40.52158273381295, + "step": 45060 + }, + { + "epoch": 40.52158273381295, + "step": 45060, + "torque_loss": 0.10891855508089066 + }, + { + "epoch": 40.52158273381295, + "force_loss": 0.003519168123602867, + "step": 45060 + }, + { + "epoch": 40.530575539568346, + "grad_norm": 0.10168365389108658, + "learning_rate": 1.599626945073026e-05, + "loss": 0.0142, + "step": 45070 + }, + { + "action_loss": 0.0014845033874735236, + "epoch": 40.530575539568346, + "step": 45070 + }, + { + "epoch": 40.530575539568346, + "step": 45070, + "torque_loss": 0.11872855573892593 + }, + { + "epoch": 40.530575539568346, + "force_loss": 0.001696488237939775, + "step": 45070 + }, + { + "epoch": 40.539568345323744, + "grad_norm": 0.4593876302242279, + "learning_rate": 1.597607078863162e-05, + "loss": 0.0153, + "step": 45080 + }, + { + "action_loss": 0.001522569335065782, + "epoch": 40.539568345323744, + "step": 45080 + }, + { + "epoch": 40.539568345323744, + "step": 45080, + "torque_loss": 0.12280359119176865 + }, + { + "epoch": 40.539568345323744, + "force_loss": 0.005877872463315725, + "step": 45080 + }, + { + "epoch": 40.548561151079134, + "grad_norm": 0.08953931927680969, + "learning_rate": 1.595588246210432e-05, + "loss": 0.0155, + "step": 45090 + }, + { + "action_loss": 0.001429850235581398, + "epoch": 40.548561151079134, + "step": 45090 + }, + { + "epoch": 40.548561151079134, + "step": 45090, + "torque_loss": 0.11547132581472397 + }, + { + "epoch": 40.548561151079134, + "force_loss": 0.002205177675932646, + "step": 45090 + }, + { + "epoch": 40.55755395683453, + "grad_norm": 0.1410577893257141, + "learning_rate": 1.5935704477281048e-05, + "loss": 0.0147, + "step": 45100 + }, + { + "action_loss": 0.0018070373916998506, + "epoch": 40.55755395683453, + "step": 45100 + }, + { + "epoch": 40.55755395683453, + "step": 45100, + "torque_loss": 0.11407071352005005 + }, + { + "epoch": 40.55755395683453, + "force_loss": 0.0007607995066791773, + "step": 45100 + }, + { + "epoch": 40.56654676258993, + "grad_norm": 0.33400487899780273, + "learning_rate": 1.5915536840291323e-05, + "loss": 0.0145, + "step": 45110 + }, + { + "action_loss": 0.0018557592993602157, + "epoch": 40.56654676258993, + "step": 45110 + }, + { + "epoch": 40.56654676258993, + "step": 45110, + "torque_loss": 0.1627974957227707 + }, + { + "epoch": 40.56654676258993, + "force_loss": 0.0016722064465284348, + "step": 45110 + }, + { + "epoch": 40.57553956834532, + "grad_norm": 0.18177655339241028, + "learning_rate": 1.5895379557261576e-05, + "loss": 0.0153, + "step": 45120 + }, + { + "action_loss": 0.0011204140027984977, + "epoch": 40.57553956834532, + "step": 45120 + }, + { + "epoch": 40.57553956834532, + "step": 45120, + "torque_loss": 0.07101935893297195 + }, + { + "epoch": 40.57553956834532, + "force_loss": 0.0013590813614428043, + "step": 45120 + }, + { + "epoch": 40.58453237410072, + "grad_norm": 0.5464057922363281, + "learning_rate": 1.5875232634315033e-05, + "loss": 0.0146, + "step": 45130 + }, + { + "action_loss": 0.0014324383810162544, + "epoch": 40.58453237410072, + "step": 45130 + }, + { + "epoch": 40.58453237410072, + "step": 45130, + "torque_loss": 0.13846252858638763 + }, + { + "epoch": 40.58453237410072, + "force_loss": 0.0020315914880484343, + "step": 45130 + }, + { + "epoch": 40.593525179856115, + "grad_norm": 0.3913037180900574, + "learning_rate": 1.5855096077571812e-05, + "loss": 0.0168, + "step": 45140 + }, + { + "action_loss": 0.0036267004907131195, + "epoch": 40.593525179856115, + "step": 45140 + }, + { + "epoch": 40.593525179856115, + "step": 45140, + "torque_loss": 0.10154348611831665 + }, + { + "epoch": 40.593525179856115, + "force_loss": 0.003939494024962187, + "step": 45140 + }, + { + "epoch": 40.60251798561151, + "grad_norm": 0.19053564965724945, + "learning_rate": 1.5834969893148855e-05, + "loss": 0.0141, + "step": 45150 + }, + { + "action_loss": 0.0006660239887423813, + "epoch": 40.60251798561151, + "step": 45150 + }, + { + "epoch": 40.60251798561151, + "step": 45150, + "torque_loss": 0.13420797884464264 + }, + { + "epoch": 40.60251798561151, + "force_loss": 0.0005756028695032, + "step": 45150 + }, + { + "epoch": 40.611510791366904, + "grad_norm": 0.10361992567777634, + "learning_rate": 1.581485408715997e-05, + "loss": 0.0148, + "step": 45160 + }, + { + "action_loss": 0.0024870671331882477, + "epoch": 40.611510791366904, + "step": 45160 + }, + { + "epoch": 40.611510791366904, + "step": 45160, + "torque_loss": 0.1262078732252121 + }, + { + "epoch": 40.611510791366904, + "force_loss": 0.005188516806811094, + "step": 45160 + }, + { + "epoch": 40.6205035971223, + "grad_norm": 0.10443980991840363, + "learning_rate": 1.5794748665715785e-05, + "loss": 0.0149, + "step": 45170 + }, + { + "action_loss": 0.0007142227259464562, + "epoch": 40.6205035971223, + "step": 45170 + }, + { + "epoch": 40.6205035971223, + "step": 45170, + "torque_loss": 0.08687439560890198 + }, + { + "epoch": 40.6205035971223, + "force_loss": 0.0015860926359891891, + "step": 45170 + }, + { + "epoch": 40.6294964028777, + "grad_norm": 0.21977931261062622, + "learning_rate": 1.5774653634923857e-05, + "loss": 0.0137, + "step": 45180 + }, + { + "action_loss": 0.0007581103127449751, + "epoch": 40.6294964028777, + "step": 45180 + }, + { + "epoch": 40.6294964028777, + "step": 45180, + "torque_loss": 0.08276765793561935 + }, + { + "epoch": 40.6294964028777, + "force_loss": 0.0022683802526444197, + "step": 45180 + }, + { + "epoch": 40.638489208633096, + "grad_norm": 0.12322276085615158, + "learning_rate": 1.575456900088845e-05, + "loss": 0.0141, + "step": 45190 + }, + { + "action_loss": 0.016932303085923195, + "epoch": 40.638489208633096, + "step": 45190 + }, + { + "epoch": 40.638489208633096, + "step": 45190, + "torque_loss": 0.13495950400829315 + }, + { + "epoch": 40.638489208633096, + "force_loss": 0.012971772812306881, + "step": 45190 + }, + { + "epoch": 40.64748201438849, + "grad_norm": 0.07109040766954422, + "learning_rate": 1.5734494769710816e-05, + "loss": 0.0169, + "step": 45200 + }, + { + "action_loss": 0.001673296676017344, + "epoch": 40.64748201438849, + "step": 45200 + }, + { + "epoch": 40.64748201438849, + "step": 45200, + "torque_loss": 0.11823960393667221 + }, + { + "epoch": 40.64748201438849, + "force_loss": 0.0017986007733270526, + "step": 45200 + }, + { + "epoch": 40.656474820143885, + "grad_norm": 0.37495097517967224, + "learning_rate": 1.5714430947488912e-05, + "loss": 0.0145, + "step": 45210 + }, + { + "action_loss": 0.0019356566481292248, + "epoch": 40.656474820143885, + "step": 45210 + }, + { + "epoch": 40.656474820143885, + "step": 45210, + "torque_loss": 0.1061038076877594 + }, + { + "epoch": 40.656474820143885, + "force_loss": 0.006631011608988047, + "step": 45210 + }, + { + "epoch": 40.66546762589928, + "grad_norm": 0.3506937026977539, + "learning_rate": 1.5694377540317645e-05, + "loss": 0.0176, + "step": 45220 + }, + { + "action_loss": 0.0008853801409713924, + "epoch": 40.66546762589928, + "step": 45220 + }, + { + "epoch": 40.66546762589928, + "step": 45220, + "torque_loss": 0.10339949280023575 + }, + { + "epoch": 40.66546762589928, + "force_loss": 0.005537648219615221, + "step": 45220 + }, + { + "epoch": 40.67446043165468, + "grad_norm": 0.28692197799682617, + "learning_rate": 1.5674334554288694e-05, + "loss": 0.0159, + "step": 45230 + }, + { + "action_loss": 0.0023041001986712217, + "epoch": 40.67446043165468, + "step": 45230 + }, + { + "epoch": 40.67446043165468, + "step": 45230, + "torque_loss": 0.08491676300764084 + }, + { + "epoch": 40.67446043165468, + "force_loss": 0.0036495423410087824, + "step": 45230 + }, + { + "epoch": 40.68345323741007, + "grad_norm": 0.11309140920639038, + "learning_rate": 1.5654301995490582e-05, + "loss": 0.0151, + "step": 45240 + }, + { + "action_loss": 0.003722668858245015, + "epoch": 40.68345323741007, + "step": 45240 + }, + { + "epoch": 40.68345323741007, + "step": 45240, + "torque_loss": 0.16350679099559784 + }, + { + "epoch": 40.68345323741007, + "force_loss": 0.006182520184665918, + "step": 45240 + }, + { + "epoch": 40.69244604316547, + "grad_norm": 0.26787275075912476, + "learning_rate": 1.5634279870008685e-05, + "loss": 0.0144, + "step": 45250 + }, + { + "action_loss": 0.001883464865386486, + "epoch": 40.69244604316547, + "step": 45250 + }, + { + "epoch": 40.69244604316547, + "step": 45250, + "torque_loss": 0.1572028249502182 + }, + { + "epoch": 40.69244604316547, + "force_loss": 0.0016495849704369903, + "step": 45250 + }, + { + "epoch": 40.701438848920866, + "grad_norm": 0.0759255588054657, + "learning_rate": 1.5614268183925174e-05, + "loss": 0.014, + "step": 45260 + }, + { + "action_loss": 0.005943686235696077, + "epoch": 40.701438848920866, + "step": 45260 + }, + { + "epoch": 40.701438848920866, + "step": 45260, + "torque_loss": 0.082745261490345 + }, + { + "epoch": 40.701438848920866, + "force_loss": 0.006494285073131323, + "step": 45260 + }, + { + "epoch": 40.710431654676256, + "grad_norm": 0.12950587272644043, + "learning_rate": 1.5594266943319097e-05, + "loss": 0.0168, + "step": 45270 + }, + { + "action_loss": 0.002122250385582447, + "epoch": 40.710431654676256, + "step": 45270 + }, + { + "epoch": 40.710431654676256, + "step": 45270, + "torque_loss": 0.17228136956691742 + }, + { + "epoch": 40.710431654676256, + "force_loss": 0.0015712982276454568, + "step": 45270 + }, + { + "epoch": 40.719424460431654, + "grad_norm": 0.07140425592660904, + "learning_rate": 1.5574276154266294e-05, + "loss": 0.0147, + "step": 45280 + }, + { + "action_loss": 0.001982799032703042, + "epoch": 40.719424460431654, + "step": 45280 + }, + { + "epoch": 40.719424460431654, + "step": 45280, + "torque_loss": 0.11044812202453613 + }, + { + "epoch": 40.719424460431654, + "force_loss": 0.0032283940818160772, + "step": 45280 + }, + { + "epoch": 40.72841726618705, + "grad_norm": 0.07352296262979507, + "learning_rate": 1.5554295822839437e-05, + "loss": 0.0175, + "step": 45290 + }, + { + "action_loss": 0.0007347942446358502, + "epoch": 40.72841726618705, + "step": 45290 + }, + { + "epoch": 40.72841726618705, + "step": 45290, + "torque_loss": 0.08273150026798248 + }, + { + "epoch": 40.72841726618705, + "force_loss": 0.00250991084612906, + "step": 45290 + }, + { + "epoch": 40.73741007194245, + "grad_norm": 0.2720288336277008, + "learning_rate": 1.5534325955108025e-05, + "loss": 0.0149, + "step": 45300 + }, + { + "action_loss": 0.0036175574641674757, + "epoch": 40.73741007194245, + "step": 45300 + }, + { + "epoch": 40.73741007194245, + "step": 45300, + "torque_loss": 0.147342249751091 + }, + { + "epoch": 40.73741007194245, + "force_loss": 0.0033506855834275484, + "step": 45300 + }, + { + "epoch": 40.74640287769784, + "grad_norm": 0.10177946835756302, + "learning_rate": 1.5514366557138373e-05, + "loss": 0.0158, + "step": 45310 + }, + { + "action_loss": 0.0006860622670501471, + "epoch": 40.74640287769784, + "step": 45310 + }, + { + "epoch": 40.74640287769784, + "step": 45310, + "torque_loss": 0.13801836967468262 + }, + { + "epoch": 40.74640287769784, + "force_loss": 0.0010156398639082909, + "step": 45310 + }, + { + "epoch": 40.75539568345324, + "grad_norm": 0.11018849164247513, + "learning_rate": 1.5494417634993602e-05, + "loss": 0.0148, + "step": 45320 + }, + { + "action_loss": 0.0032422132790088654, + "epoch": 40.75539568345324, + "step": 45320 + }, + { + "epoch": 40.75539568345324, + "step": 45320, + "torque_loss": 0.1261252611875534 + }, + { + "epoch": 40.75539568345324, + "force_loss": 0.002237763023003936, + "step": 45320 + }, + { + "epoch": 40.764388489208635, + "grad_norm": 0.6076178550720215, + "learning_rate": 1.547447919473372e-05, + "loss": 0.0187, + "step": 45330 + }, + { + "action_loss": 0.0018011104548349977, + "epoch": 40.764388489208635, + "step": 45330 + }, + { + "epoch": 40.764388489208635, + "step": 45330, + "torque_loss": 0.10535530000925064 + }, + { + "epoch": 40.764388489208635, + "force_loss": 0.00834402535110712, + "step": 45330 + }, + { + "epoch": 40.773381294964025, + "grad_norm": 0.189901664853096, + "learning_rate": 1.5454551242415434e-05, + "loss": 0.0139, + "step": 45340 + }, + { + "action_loss": 0.0012888670898973942, + "epoch": 40.773381294964025, + "step": 45340 + }, + { + "epoch": 40.773381294964025, + "step": 45340, + "torque_loss": 0.19431431591510773 + }, + { + "epoch": 40.773381294964025, + "force_loss": 0.0008001910173334181, + "step": 45340 + }, + { + "epoch": 40.78237410071942, + "grad_norm": 0.1786688268184662, + "learning_rate": 1.543463378409239e-05, + "loss": 0.0141, + "step": 45350 + }, + { + "action_loss": 0.001953274942934513, + "epoch": 40.78237410071942, + "step": 45350 + }, + { + "epoch": 40.78237410071942, + "step": 45350, + "torque_loss": 0.10562432557344437 + }, + { + "epoch": 40.78237410071942, + "force_loss": 0.003258664859458804, + "step": 45350 + }, + { + "epoch": 40.79136690647482, + "grad_norm": 0.1174292042851448, + "learning_rate": 1.541472682581493e-05, + "loss": 0.0203, + "step": 45360 + }, + { + "action_loss": 0.0025638267397880554, + "epoch": 40.79136690647482, + "step": 45360 + }, + { + "epoch": 40.79136690647482, + "step": 45360, + "torque_loss": 0.1209598183631897 + }, + { + "epoch": 40.79136690647482, + "force_loss": 0.0020155783277004957, + "step": 45360 + }, + { + "epoch": 40.80035971223022, + "grad_norm": 0.15606212615966797, + "learning_rate": 1.5394830373630298e-05, + "loss": 0.0163, + "step": 45370 + }, + { + "action_loss": 0.004919996950775385, + "epoch": 40.80035971223022, + "step": 45370 + }, + { + "epoch": 40.80035971223022, + "step": 45370, + "torque_loss": 0.11587062478065491 + }, + { + "epoch": 40.80035971223022, + "force_loss": 0.004136399365961552, + "step": 45370 + }, + { + "epoch": 40.80935251798561, + "grad_norm": 0.27999019622802734, + "learning_rate": 1.5374944433582506e-05, + "loss": 0.0169, + "step": 45380 + }, + { + "action_loss": 0.0020243690814822912, + "epoch": 40.80935251798561, + "step": 45380 + }, + { + "epoch": 40.80935251798561, + "step": 45380, + "torque_loss": 0.1055983304977417 + }, + { + "epoch": 40.80935251798561, + "force_loss": 0.004501225892454386, + "step": 45380 + }, + { + "epoch": 40.818345323741006, + "grad_norm": 0.14808295667171478, + "learning_rate": 1.5355069011712375e-05, + "loss": 0.015, + "step": 45390 + }, + { + "action_loss": 0.006646020337939262, + "epoch": 40.818345323741006, + "step": 45390 + }, + { + "epoch": 40.818345323741006, + "step": 45390, + "torque_loss": 0.10003986209630966 + }, + { + "epoch": 40.818345323741006, + "force_loss": 0.017654258757829666, + "step": 45390 + }, + { + "epoch": 40.827338129496404, + "grad_norm": 0.3043540120124817, + "learning_rate": 1.5335204114057526e-05, + "loss": 0.0211, + "step": 45400 + }, + { + "action_loss": 0.001958661014214158, + "epoch": 40.827338129496404, + "step": 45400 + }, + { + "epoch": 40.827338129496404, + "step": 45400, + "torque_loss": 0.10480699688196182 + }, + { + "epoch": 40.827338129496404, + "force_loss": 0.0018539298325777054, + "step": 45400 + }, + { + "epoch": 40.8363309352518, + "grad_norm": 0.13785338401794434, + "learning_rate": 1.5315349746652387e-05, + "loss": 0.0154, + "step": 45410 + }, + { + "action_loss": 0.00223384122364223, + "epoch": 40.8363309352518, + "step": 45410 + }, + { + "epoch": 40.8363309352518, + "step": 45410, + "torque_loss": 0.13858433067798615 + }, + { + "epoch": 40.8363309352518, + "force_loss": 0.004815421532839537, + "step": 45410 + }, + { + "epoch": 40.84532374100719, + "grad_norm": 0.10613209009170532, + "learning_rate": 1.5295505915528212e-05, + "loss": 0.0159, + "step": 45420 + }, + { + "action_loss": 0.0032362292986363173, + "epoch": 40.84532374100719, + "step": 45420 + }, + { + "epoch": 40.84532374100719, + "step": 45420, + "torque_loss": 0.08778562396764755 + }, + { + "epoch": 40.84532374100719, + "force_loss": 0.008067467249929905, + "step": 45420 + }, + { + "epoch": 40.85431654676259, + "grad_norm": 0.10648372024297714, + "learning_rate": 1.5275672626713024e-05, + "loss": 0.0156, + "step": 45430 + }, + { + "action_loss": 0.0023236980196088552, + "epoch": 40.85431654676259, + "step": 45430 + }, + { + "epoch": 40.85431654676259, + "step": 45430, + "torque_loss": 0.13655267655849457 + }, + { + "epoch": 40.85431654676259, + "force_loss": 0.003679906716570258, + "step": 45430 + }, + { + "epoch": 40.86330935251799, + "grad_norm": 0.11320368200540543, + "learning_rate": 1.5255849886231643e-05, + "loss": 0.0166, + "step": 45440 + }, + { + "action_loss": 0.0014845350524410605, + "epoch": 40.86330935251799, + "step": 45440 + }, + { + "epoch": 40.86330935251799, + "step": 45440, + "torque_loss": 0.14364412426948547 + }, + { + "epoch": 40.86330935251799, + "force_loss": 0.004346174653619528, + "step": 45440 + }, + { + "epoch": 40.87230215827338, + "grad_norm": 0.15395492315292358, + "learning_rate": 1.523603770010571e-05, + "loss": 0.0183, + "step": 45450 + }, + { + "action_loss": 0.0007283451850526035, + "epoch": 40.87230215827338, + "step": 45450 + }, + { + "epoch": 40.87230215827338, + "step": 45450, + "torque_loss": 0.12713085114955902 + }, + { + "epoch": 40.87230215827338, + "force_loss": 0.0021612446289509535, + "step": 45450 + }, + { + "epoch": 40.881294964028775, + "grad_norm": 0.22847002744674683, + "learning_rate": 1.521623607435363e-05, + "loss": 0.0157, + "step": 45460 + }, + { + "action_loss": 0.008981908671557903, + "epoch": 40.881294964028775, + "step": 45460 + }, + { + "epoch": 40.881294964028775, + "step": 45460, + "torque_loss": 0.16054268181324005 + }, + { + "epoch": 40.881294964028775, + "force_loss": 0.013512018136680126, + "step": 45460 + }, + { + "epoch": 40.89028776978417, + "grad_norm": 0.21227699518203735, + "learning_rate": 1.5196445014990612e-05, + "loss": 0.0195, + "step": 45470 + }, + { + "action_loss": 0.008794314227998257, + "epoch": 40.89028776978417, + "step": 45470 + }, + { + "epoch": 40.89028776978417, + "step": 45470, + "torque_loss": 0.103215791285038 + }, + { + "epoch": 40.89028776978417, + "force_loss": 0.011172245256602764, + "step": 45470 + }, + { + "epoch": 40.89928057553957, + "grad_norm": 0.07943612337112427, + "learning_rate": 1.5176664528028672e-05, + "loss": 0.0148, + "step": 45480 + }, + { + "action_loss": 0.001446147565729916, + "epoch": 40.89928057553957, + "step": 45480 + }, + { + "epoch": 40.89928057553957, + "step": 45480, + "torque_loss": 0.14647291600704193 + }, + { + "epoch": 40.89928057553957, + "force_loss": 0.0014994280645623803, + "step": 45480 + }, + { + "epoch": 40.90827338129496, + "grad_norm": 0.08619990199804306, + "learning_rate": 1.5156894619476574e-05, + "loss": 0.0159, + "step": 45490 + }, + { + "action_loss": 0.003168029710650444, + "epoch": 40.90827338129496, + "step": 45490 + }, + { + "epoch": 40.90827338129496, + "step": 45490, + "torque_loss": 0.15680629014968872 + }, + { + "epoch": 40.90827338129496, + "force_loss": 0.01152495015412569, + "step": 45490 + }, + { + "epoch": 40.91726618705036, + "grad_norm": 0.18403546512126923, + "learning_rate": 1.5137135295339938e-05, + "loss": 0.0172, + "step": 45500 + }, + { + "action_loss": 0.0038366764783859253, + "epoch": 40.91726618705036, + "step": 45500 + }, + { + "epoch": 40.91726618705036, + "step": 45500, + "torque_loss": 0.12737993896007538 + }, + { + "epoch": 40.91726618705036, + "force_loss": 0.003249027533456683, + "step": 45500 + }, + { + "epoch": 40.92625899280576, + "grad_norm": 0.13323071599006653, + "learning_rate": 1.5117386561621073e-05, + "loss": 0.0159, + "step": 45510 + }, + { + "action_loss": 0.002135268412530422, + "epoch": 40.92625899280576, + "step": 45510 + }, + { + "epoch": 40.92625899280576, + "step": 45510, + "torque_loss": 0.0728834941983223 + }, + { + "epoch": 40.92625899280576, + "force_loss": 0.003202298656105995, + "step": 45510 + }, + { + "epoch": 40.935251798561154, + "grad_norm": 0.2703399658203125, + "learning_rate": 1.5097648424319167e-05, + "loss": 0.0159, + "step": 45520 + }, + { + "action_loss": 0.004016268998384476, + "epoch": 40.935251798561154, + "step": 45520 + }, + { + "epoch": 40.935251798561154, + "step": 45520, + "torque_loss": 0.0884188786149025 + }, + { + "epoch": 40.935251798561154, + "force_loss": 0.012089270167052746, + "step": 45520 + }, + { + "epoch": 40.944244604316545, + "grad_norm": 0.05581743270158768, + "learning_rate": 1.5077920889430119e-05, + "loss": 0.0152, + "step": 45530 + }, + { + "action_loss": 0.0008043153211474419, + "epoch": 40.944244604316545, + "step": 45530 + }, + { + "epoch": 40.944244604316545, + "step": 45530, + "torque_loss": 0.1398676186800003 + }, + { + "epoch": 40.944244604316545, + "force_loss": 0.0019333800300955772, + "step": 45530 + }, + { + "epoch": 40.95323741007194, + "grad_norm": 0.11849424242973328, + "learning_rate": 1.5058203962946644e-05, + "loss": 0.0176, + "step": 45540 + }, + { + "action_loss": 0.0020483946427702904, + "epoch": 40.95323741007194, + "step": 45540 + }, + { + "epoch": 40.95323741007194, + "step": 45540, + "torque_loss": 0.13761721551418304 + }, + { + "epoch": 40.95323741007194, + "force_loss": 0.0018013231456279755, + "step": 45540 + }, + { + "epoch": 40.96223021582734, + "grad_norm": 0.16001176834106445, + "learning_rate": 1.503849765085822e-05, + "loss": 0.0163, + "step": 45550 + }, + { + "action_loss": 0.0009472144884057343, + "epoch": 40.96223021582734, + "step": 45550 + }, + { + "epoch": 40.96223021582734, + "step": 45550, + "torque_loss": 0.13335828483104706 + }, + { + "epoch": 40.96223021582734, + "force_loss": 0.0007911059074103832, + "step": 45550 + }, + { + "epoch": 40.97122302158273, + "grad_norm": 0.1334599256515503, + "learning_rate": 1.501880195915109e-05, + "loss": 0.0166, + "step": 45560 + }, + { + "action_loss": 0.012951535172760487, + "epoch": 40.97122302158273, + "step": 45560 + }, + { + "epoch": 40.97122302158273, + "step": 45560, + "torque_loss": 0.12569712102413177 + }, + { + "epoch": 40.97122302158273, + "force_loss": 0.01916349306702614, + "step": 45560 + }, + { + "epoch": 40.98021582733813, + "grad_norm": 0.22745516896247864, + "learning_rate": 1.499911689380833e-05, + "loss": 0.0212, + "step": 45570 + }, + { + "action_loss": 0.003298509633168578, + "epoch": 40.98021582733813, + "step": 45570 + }, + { + "epoch": 40.98021582733813, + "step": 45570, + "torque_loss": 0.12431459873914719 + }, + { + "epoch": 40.98021582733813, + "force_loss": 0.0026190143544226885, + "step": 45570 + }, + { + "epoch": 40.989208633093526, + "grad_norm": 0.13201363384723663, + "learning_rate": 1.4979442460809683e-05, + "loss": 0.0147, + "step": 45580 + }, + { + "action_loss": 0.0020275565329939127, + "epoch": 40.989208633093526, + "step": 45580 + }, + { + "epoch": 40.989208633093526, + "step": 45580, + "torque_loss": 0.10227114707231522 + }, + { + "epoch": 40.989208633093526, + "force_loss": 0.0012670475989580154, + "step": 45580 + }, + { + "epoch": 40.99820143884892, + "grad_norm": 0.07656856626272202, + "learning_rate": 1.4959778666131763e-05, + "loss": 0.0169, + "step": 45590 + }, + { + "action_loss": 0.004446713253855705, + "epoch": 40.99820143884892, + "step": 45590 + }, + { + "epoch": 40.99820143884892, + "step": 45590, + "torque_loss": 0.17414820194244385 + }, + { + "epoch": 40.99820143884892, + "force_loss": 0.0033637837041169405, + "step": 45590 + }, + { + "epoch": 41.007194244604314, + "grad_norm": 0.05595703423023224, + "learning_rate": 1.4940125515747905e-05, + "loss": 0.0143, + "step": 45600 + }, + { + "action_loss": 0.0009173061698675156, + "epoch": 41.007194244604314, + "step": 45600 + }, + { + "epoch": 41.007194244604314, + "step": 45600, + "torque_loss": 0.13735492527484894 + }, + { + "epoch": 41.007194244604314, + "force_loss": 0.0036803781986236572, + "step": 45600 + }, + { + "epoch": 41.01618705035971, + "grad_norm": 0.07746853679418564, + "learning_rate": 1.4920483015628211e-05, + "loss": 0.0195, + "step": 45610 + }, + { + "action_loss": 0.0012599130859598517, + "epoch": 41.01618705035971, + "step": 45610 + }, + { + "epoch": 41.01618705035971, + "step": 45610, + "torque_loss": 0.11418485641479492 + }, + { + "epoch": 41.01618705035971, + "force_loss": 0.002129557076841593, + "step": 45610 + }, + { + "epoch": 41.02517985611511, + "grad_norm": 0.138280987739563, + "learning_rate": 1.490085117173956e-05, + "loss": 0.0172, + "step": 45620 + }, + { + "action_loss": 0.0014922342961654067, + "epoch": 41.02517985611511, + "step": 45620 + }, + { + "epoch": 41.02517985611511, + "step": 45620, + "torque_loss": 0.12018273025751114 + }, + { + "epoch": 41.02517985611511, + "force_loss": 0.0010604195995256305, + "step": 45620 + }, + { + "epoch": 41.03417266187051, + "grad_norm": 0.15245187282562256, + "learning_rate": 1.488122999004558e-05, + "loss": 0.0158, + "step": 45630 + }, + { + "action_loss": 0.0030966622289270163, + "epoch": 41.03417266187051, + "step": 45630 + }, + { + "epoch": 41.03417266187051, + "step": 45630, + "torque_loss": 0.15085382759571075 + }, + { + "epoch": 41.03417266187051, + "force_loss": 0.0018634916050359607, + "step": 45630 + }, + { + "epoch": 41.0431654676259, + "grad_norm": 0.15233322978019714, + "learning_rate": 1.486161947650666e-05, + "loss": 0.017, + "step": 45640 + }, + { + "action_loss": 0.0010055876336991787, + "epoch": 41.0431654676259, + "step": 45640 + }, + { + "epoch": 41.0431654676259, + "step": 45640, + "torque_loss": 0.11032122373580933 + }, + { + "epoch": 41.0431654676259, + "force_loss": 0.0014110886259004474, + "step": 45640 + }, + { + "epoch": 41.052158273381295, + "grad_norm": 0.21772274374961853, + "learning_rate": 1.4842019637079995e-05, + "loss": 0.0145, + "step": 45650 + }, + { + "action_loss": 0.0009151121485047042, + "epoch": 41.052158273381295, + "step": 45650 + }, + { + "epoch": 41.052158273381295, + "step": 45650, + "torque_loss": 0.11252357810735703 + }, + { + "epoch": 41.052158273381295, + "force_loss": 0.0015598125755786896, + "step": 45650 + }, + { + "epoch": 41.06115107913669, + "grad_norm": 0.3965555727481842, + "learning_rate": 1.482243047771944e-05, + "loss": 0.0156, + "step": 45660 + }, + { + "action_loss": 0.0006716644275002182, + "epoch": 41.06115107913669, + "step": 45660 + }, + { + "epoch": 41.06115107913669, + "step": 45660, + "torque_loss": 0.12471433728933334 + }, + { + "epoch": 41.06115107913669, + "force_loss": 0.0005602568271569908, + "step": 45660 + }, + { + "epoch": 41.07014388489208, + "grad_norm": 0.119111567735672, + "learning_rate": 1.4802852004375712e-05, + "loss": 0.0161, + "step": 45670 + }, + { + "action_loss": 0.0006047275965102017, + "epoch": 41.07014388489208, + "step": 45670 + }, + { + "epoch": 41.07014388489208, + "step": 45670, + "torque_loss": 0.08508279919624329 + }, + { + "epoch": 41.07014388489208, + "force_loss": 0.002354872180148959, + "step": 45670 + }, + { + "epoch": 41.07913669064748, + "grad_norm": 0.07040691375732422, + "learning_rate": 1.4783284222996218e-05, + "loss": 0.0164, + "step": 45680 + }, + { + "action_loss": 0.0008050825563259423, + "epoch": 41.07913669064748, + "step": 45680 + }, + { + "epoch": 41.07913669064748, + "step": 45680, + "torque_loss": 0.10329881310462952 + }, + { + "epoch": 41.07913669064748, + "force_loss": 0.001120261731557548, + "step": 45680 + }, + { + "epoch": 41.08812949640288, + "grad_norm": 0.052260711789131165, + "learning_rate": 1.4763727139525135e-05, + "loss": 0.0139, + "step": 45690 + }, + { + "action_loss": 0.0048795039765536785, + "epoch": 41.08812949640288, + "step": 45690 + }, + { + "epoch": 41.08812949640288, + "step": 45690, + "torque_loss": 0.12040800601243973 + }, + { + "epoch": 41.08812949640288, + "force_loss": 0.01828545518219471, + "step": 45690 + }, + { + "epoch": 41.097122302158276, + "grad_norm": 0.0828484520316124, + "learning_rate": 1.4744180759903392e-05, + "loss": 0.0154, + "step": 45700 + }, + { + "action_loss": 0.0006536262226291001, + "epoch": 41.097122302158276, + "step": 45700 + }, + { + "epoch": 41.097122302158276, + "step": 45700, + "torque_loss": 0.10166070610284805 + }, + { + "epoch": 41.097122302158276, + "force_loss": 0.0007730152574367821, + "step": 45700 + }, + { + "epoch": 41.106115107913666, + "grad_norm": 0.49192941188812256, + "learning_rate": 1.4724645090068635e-05, + "loss": 0.0172, + "step": 45710 + }, + { + "action_loss": 0.0007073748856782913, + "epoch": 41.106115107913666, + "step": 45710 + }, + { + "epoch": 41.106115107913666, + "step": 45710, + "torque_loss": 0.12907542288303375 + }, + { + "epoch": 41.106115107913666, + "force_loss": 0.0007066354155540466, + "step": 45710 + }, + { + "epoch": 41.115107913669064, + "grad_norm": 0.0751081332564354, + "learning_rate": 1.4705120135955341e-05, + "loss": 0.0148, + "step": 45720 + }, + { + "action_loss": 0.0018327077850699425, + "epoch": 41.115107913669064, + "step": 45720 + }, + { + "epoch": 41.115107913669064, + "step": 45720, + "torque_loss": 0.1009114608168602 + }, + { + "epoch": 41.115107913669064, + "force_loss": 0.007172440644353628, + "step": 45720 + }, + { + "epoch": 41.12410071942446, + "grad_norm": 0.05827561765909195, + "learning_rate": 1.4685605903494614e-05, + "loss": 0.014, + "step": 45730 + }, + { + "action_loss": 0.004469592124223709, + "epoch": 41.12410071942446, + "step": 45730 + }, + { + "epoch": 41.12410071942446, + "step": 45730, + "torque_loss": 0.11639633029699326 + }, + { + "epoch": 41.12410071942446, + "force_loss": 0.004198806826025248, + "step": 45730 + }, + { + "epoch": 41.13309352517986, + "grad_norm": 0.4126000702381134, + "learning_rate": 1.46661023986144e-05, + "loss": 0.0142, + "step": 45740 + }, + { + "action_loss": 0.003094307379797101, + "epoch": 41.13309352517986, + "step": 45740 + }, + { + "epoch": 41.13309352517986, + "step": 45740, + "torque_loss": 0.13494877517223358 + }, + { + "epoch": 41.13309352517986, + "force_loss": 0.0034474774729460478, + "step": 45740 + }, + { + "epoch": 41.14208633093525, + "grad_norm": 0.07729417085647583, + "learning_rate": 1.4646609627239344e-05, + "loss": 0.0151, + "step": 45750 + }, + { + "action_loss": 0.005464589688926935, + "epoch": 41.14208633093525, + "step": 45750 + }, + { + "epoch": 41.14208633093525, + "step": 45750, + "torque_loss": 0.11652740091085434 + }, + { + "epoch": 41.14208633093525, + "force_loss": 0.0057957409881055355, + "step": 45750 + }, + { + "epoch": 41.15107913669065, + "grad_norm": 0.0866890475153923, + "learning_rate": 1.4627127595290835e-05, + "loss": 0.0144, + "step": 45760 + }, + { + "action_loss": 0.0029875747859477997, + "epoch": 41.15107913669065, + "step": 45760 + }, + { + "epoch": 41.15107913669065, + "step": 45760, + "torque_loss": 0.12687520682811737 + }, + { + "epoch": 41.15107913669065, + "force_loss": 0.008059307001531124, + "step": 45760 + }, + { + "epoch": 41.160071942446045, + "grad_norm": 0.2610211968421936, + "learning_rate": 1.460765630868699e-05, + "loss": 0.0151, + "step": 45770 + }, + { + "action_loss": 0.0010249275946989655, + "epoch": 41.160071942446045, + "step": 45770 + }, + { + "epoch": 41.160071942446045, + "step": 45770, + "torque_loss": 0.15003173053264618 + }, + { + "epoch": 41.160071942446045, + "force_loss": 0.0017228905344381928, + "step": 45770 + }, + { + "epoch": 41.169064748201436, + "grad_norm": 0.13967429101467133, + "learning_rate": 1.4588195773342678e-05, + "loss": 0.0196, + "step": 45780 + }, + { + "action_loss": 0.0013811002718284726, + "epoch": 41.169064748201436, + "step": 45780 + }, + { + "epoch": 41.169064748201436, + "step": 45780, + "torque_loss": 0.1370999664068222 + }, + { + "epoch": 41.169064748201436, + "force_loss": 0.003218189114704728, + "step": 45780 + }, + { + "epoch": 41.17805755395683, + "grad_norm": 0.33587485551834106, + "learning_rate": 1.4568745995169485e-05, + "loss": 0.0145, + "step": 45790 + }, + { + "action_loss": 0.0018223609076812863, + "epoch": 41.17805755395683, + "step": 45790 + }, + { + "epoch": 41.17805755395683, + "step": 45790, + "torque_loss": 0.08675705641508102 + }, + { + "epoch": 41.17805755395683, + "force_loss": 0.005780382547527552, + "step": 45790 + }, + { + "epoch": 41.18705035971223, + "grad_norm": 0.14802373945713043, + "learning_rate": 1.4549306980075778e-05, + "loss": 0.0144, + "step": 45800 + }, + { + "action_loss": 0.0024017065297812223, + "epoch": 41.18705035971223, + "step": 45800 + }, + { + "epoch": 41.18705035971223, + "step": 45800, + "torque_loss": 0.14377476274967194 + }, + { + "epoch": 41.18705035971223, + "force_loss": 0.004742899909615517, + "step": 45800 + }, + { + "epoch": 41.19604316546763, + "grad_norm": 0.11063234508037567, + "learning_rate": 1.4529878733966557e-05, + "loss": 0.0156, + "step": 45810 + }, + { + "action_loss": 0.002027997048571706, + "epoch": 41.19604316546763, + "step": 45810 + }, + { + "epoch": 41.19604316546763, + "step": 45810, + "torque_loss": 0.08225687593221664 + }, + { + "epoch": 41.19604316546763, + "force_loss": 0.0023245718330144882, + "step": 45810 + }, + { + "epoch": 41.20503597122302, + "grad_norm": 0.11345665156841278, + "learning_rate": 1.4510461262743658e-05, + "loss": 0.0182, + "step": 45820 + }, + { + "action_loss": 0.002794221742078662, + "epoch": 41.20503597122302, + "step": 45820 + }, + { + "epoch": 41.20503597122302, + "step": 45820, + "torque_loss": 0.11140748113393784 + }, + { + "epoch": 41.20503597122302, + "force_loss": 0.0028088155668228865, + "step": 45820 + }, + { + "epoch": 41.21402877697842, + "grad_norm": 0.5280176997184753, + "learning_rate": 1.4491054572305585e-05, + "loss": 0.0177, + "step": 45830 + }, + { + "action_loss": 0.002045967150479555, + "epoch": 41.21402877697842, + "step": 45830 + }, + { + "epoch": 41.21402877697842, + "step": 45830, + "torque_loss": 0.17886781692504883 + }, + { + "epoch": 41.21402877697842, + "force_loss": 0.008194251917302608, + "step": 45830 + }, + { + "epoch": 41.223021582733814, + "grad_norm": 0.28480222821235657, + "learning_rate": 1.4471658668547566e-05, + "loss": 0.016, + "step": 45840 + }, + { + "action_loss": 0.003977763932198286, + "epoch": 41.223021582733814, + "step": 45840 + }, + { + "epoch": 41.223021582733814, + "step": 45840, + "torque_loss": 0.11935213953256607 + }, + { + "epoch": 41.223021582733814, + "force_loss": 0.009474127553403378, + "step": 45840 + }, + { + "epoch": 41.23201438848921, + "grad_norm": 0.4027993977069855, + "learning_rate": 1.4452273557361579e-05, + "loss": 0.016, + "step": 45850 + }, + { + "action_loss": 0.0012138989986851811, + "epoch": 41.23201438848921, + "step": 45850 + }, + { + "epoch": 41.23201438848921, + "step": 45850, + "torque_loss": 0.13976942002773285 + }, + { + "epoch": 41.23201438848921, + "force_loss": 0.0014497647061944008, + "step": 45850 + }, + { + "epoch": 41.2410071942446, + "grad_norm": 0.37084975838661194, + "learning_rate": 1.4432899244636282e-05, + "loss": 0.0147, + "step": 45860 + }, + { + "action_loss": 0.0019848018418997526, + "epoch": 41.2410071942446, + "step": 45860 + }, + { + "epoch": 41.2410071942446, + "step": 45860, + "torque_loss": 0.10346949100494385 + }, + { + "epoch": 41.2410071942446, + "force_loss": 0.006480767857283354, + "step": 45860 + }, + { + "epoch": 41.25, + "grad_norm": 0.07505764812231064, + "learning_rate": 1.4413535736257134e-05, + "loss": 0.0141, + "step": 45870 + }, + { + "action_loss": 0.0006394576630555093, + "epoch": 41.25, + "step": 45870 + }, + { + "epoch": 41.25, + "step": 45870, + "torque_loss": 0.1338426172733307 + }, + { + "epoch": 41.25, + "force_loss": 0.0005767183029092848, + "step": 45870 + }, + { + "epoch": 41.2589928057554, + "grad_norm": 0.14940722286701202, + "learning_rate": 1.439418303810619e-05, + "loss": 0.0132, + "step": 45880 + }, + { + "action_loss": 0.0008269302197732031, + "epoch": 41.2589928057554, + "step": 45880 + }, + { + "epoch": 41.2589928057554, + "step": 45880, + "torque_loss": 0.09742355346679688 + }, + { + "epoch": 41.2589928057554, + "force_loss": 0.0026347816456109285, + "step": 45880 + }, + { + "epoch": 41.26798561151079, + "grad_norm": 0.35172533988952637, + "learning_rate": 1.4374841156062352e-05, + "loss": 0.0156, + "step": 45890 + }, + { + "action_loss": 0.0020411249715834856, + "epoch": 41.26798561151079, + "step": 45890 + }, + { + "epoch": 41.26798561151079, + "step": 45890, + "torque_loss": 0.12388905882835388 + }, + { + "epoch": 41.26798561151079, + "force_loss": 0.004184046760201454, + "step": 45890 + }, + { + "epoch": 41.276978417266186, + "grad_norm": 0.19972166419029236, + "learning_rate": 1.4355510096001112e-05, + "loss": 0.0141, + "step": 45900 + }, + { + "action_loss": 0.0008028179290704429, + "epoch": 41.276978417266186, + "step": 45900 + }, + { + "epoch": 41.276978417266186, + "step": 45900, + "torque_loss": 0.11227947473526001 + }, + { + "epoch": 41.276978417266186, + "force_loss": 0.000998382456600666, + "step": 45900 + }, + { + "epoch": 41.28597122302158, + "grad_norm": 0.13529066741466522, + "learning_rate": 1.4336189863794786e-05, + "loss": 0.0151, + "step": 45910 + }, + { + "action_loss": 0.005704980343580246, + "epoch": 41.28597122302158, + "step": 45910 + }, + { + "epoch": 41.28597122302158, + "step": 45910, + "torque_loss": 0.15707606077194214 + }, + { + "epoch": 41.28597122302158, + "force_loss": 0.010159828700125217, + "step": 45910 + }, + { + "epoch": 41.29496402877698, + "grad_norm": 0.06918837130069733, + "learning_rate": 1.4316880465312327e-05, + "loss": 0.0152, + "step": 45920 + }, + { + "action_loss": 0.0011797536863014102, + "epoch": 41.29496402877698, + "step": 45920 + }, + { + "epoch": 41.29496402877698, + "step": 45920, + "torque_loss": 0.12955911457538605 + }, + { + "epoch": 41.29496402877698, + "force_loss": 0.0008417136850766838, + "step": 45920 + }, + { + "epoch": 41.30395683453237, + "grad_norm": 0.19777178764343262, + "learning_rate": 1.4297581906419426e-05, + "loss": 0.0138, + "step": 45930 + }, + { + "action_loss": 0.001145684625953436, + "epoch": 41.30395683453237, + "step": 45930 + }, + { + "epoch": 41.30395683453237, + "step": 45930, + "torque_loss": 0.20803320407867432 + }, + { + "epoch": 41.30395683453237, + "force_loss": 0.004739269148558378, + "step": 45930 + }, + { + "epoch": 41.31294964028777, + "grad_norm": 0.08228175342082977, + "learning_rate": 1.4278294192978475e-05, + "loss": 0.0145, + "step": 45940 + }, + { + "action_loss": 0.00227418658323586, + "epoch": 41.31294964028777, + "step": 45940 + }, + { + "epoch": 41.31294964028777, + "step": 45940, + "torque_loss": 0.12369390577077866 + }, + { + "epoch": 41.31294964028777, + "force_loss": 0.00150086369831115, + "step": 45940 + }, + { + "epoch": 41.32194244604317, + "grad_norm": 0.43653446435928345, + "learning_rate": 1.4259017330848574e-05, + "loss": 0.019, + "step": 45950 + }, + { + "action_loss": 0.0018179056933149695, + "epoch": 41.32194244604317, + "step": 45950 + }, + { + "epoch": 41.32194244604317, + "step": 45950, + "torque_loss": 0.16293329000473022 + }, + { + "epoch": 41.32194244604317, + "force_loss": 0.004935581702739, + "step": 45950 + }, + { + "epoch": 41.330935251798564, + "grad_norm": 0.09548771381378174, + "learning_rate": 1.4239751325885498e-05, + "loss": 0.0163, + "step": 45960 + }, + { + "action_loss": 0.0013453805586323142, + "epoch": 41.330935251798564, + "step": 45960 + }, + { + "epoch": 41.330935251798564, + "step": 45960, + "torque_loss": 0.10989055037498474 + }, + { + "epoch": 41.330935251798564, + "force_loss": 0.001306669320911169, + "step": 45960 + }, + { + "epoch": 41.339928057553955, + "grad_norm": 0.07037942856550217, + "learning_rate": 1.4220496183941795e-05, + "loss": 0.0145, + "step": 45970 + }, + { + "action_loss": 0.006145510822534561, + "epoch": 41.339928057553955, + "step": 45970 + }, + { + "epoch": 41.339928057553955, + "step": 45970, + "torque_loss": 0.11592301726341248 + }, + { + "epoch": 41.339928057553955, + "force_loss": 0.01623745821416378, + "step": 45970 + }, + { + "epoch": 41.34892086330935, + "grad_norm": 0.16258494555950165, + "learning_rate": 1.4201251910866648e-05, + "loss": 0.0152, + "step": 45980 + }, + { + "action_loss": 0.001714748446829617, + "epoch": 41.34892086330935, + "step": 45980 + }, + { + "epoch": 41.34892086330935, + "step": 45980, + "torque_loss": 0.10614802688360214 + }, + { + "epoch": 41.34892086330935, + "force_loss": 0.007494939956814051, + "step": 45980 + }, + { + "epoch": 41.35791366906475, + "grad_norm": 0.13360010087490082, + "learning_rate": 1.4182018512505957e-05, + "loss": 0.0141, + "step": 45990 + }, + { + "action_loss": 0.0036005883011966944, + "epoch": 41.35791366906475, + "step": 45990 + }, + { + "epoch": 41.35791366906475, + "step": 45990, + "torque_loss": 0.13439002633094788 + }, + { + "epoch": 41.35791366906475, + "force_loss": 0.01338754128664732, + "step": 45990 + }, + { + "epoch": 41.36690647482014, + "grad_norm": 0.44523322582244873, + "learning_rate": 1.4162795994702327e-05, + "loss": 0.0158, + "step": 46000 + }, + { + "action_loss": 0.0009819652186706662, + "epoch": 41.36690647482014, + "step": 46000 + }, + { + "epoch": 41.36690647482014, + "step": 46000, + "torque_loss": 0.12439358234405518 + }, + { + "epoch": 41.36690647482014, + "force_loss": 0.0018455515382811427, + "step": 46000 + }, + { + "epoch": 41.37589928057554, + "grad_norm": 0.36756545305252075, + "learning_rate": 1.4143584363295032e-05, + "loss": 0.0166, + "step": 46010 + }, + { + "action_loss": 0.0021100991871207952, + "epoch": 41.37589928057554, + "step": 46010 + }, + { + "epoch": 41.37589928057554, + "step": 46010, + "torque_loss": 0.11846810579299927 + }, + { + "epoch": 41.37589928057554, + "force_loss": 0.004856557119637728, + "step": 46010 + }, + { + "epoch": 41.384892086330936, + "grad_norm": 0.27665412425994873, + "learning_rate": 1.4124383624120101e-05, + "loss": 0.0161, + "step": 46020 + }, + { + "action_loss": 0.0018661081558093429, + "epoch": 41.384892086330936, + "step": 46020 + }, + { + "epoch": 41.384892086330936, + "step": 46020, + "torque_loss": 0.10356643050909042 + }, + { + "epoch": 41.384892086330936, + "force_loss": 0.002635879209265113, + "step": 46020 + }, + { + "epoch": 41.393884892086334, + "grad_norm": 0.12330401688814163, + "learning_rate": 1.4105193783010151e-05, + "loss": 0.016, + "step": 46030 + }, + { + "action_loss": 0.0014474744675680995, + "epoch": 41.393884892086334, + "step": 46030 + }, + { + "epoch": 41.393884892086334, + "step": 46030, + "torque_loss": 0.12947173416614532 + }, + { + "epoch": 41.393884892086334, + "force_loss": 0.001322103664278984, + "step": 46030 + }, + { + "epoch": 41.402877697841724, + "grad_norm": 0.34685513377189636, + "learning_rate": 1.4086014845794621e-05, + "loss": 0.0156, + "step": 46040 + }, + { + "action_loss": 0.009591239504516125, + "epoch": 41.402877697841724, + "step": 46040 + }, + { + "epoch": 41.402877697841724, + "step": 46040, + "torque_loss": 0.11023377627134323 + }, + { + "epoch": 41.402877697841724, + "force_loss": 0.012572459876537323, + "step": 46040 + }, + { + "epoch": 41.41187050359712, + "grad_norm": 0.11846359819173813, + "learning_rate": 1.4066846818299489e-05, + "loss": 0.0166, + "step": 46050 + }, + { + "action_loss": 0.001323712058365345, + "epoch": 41.41187050359712, + "step": 46050 + }, + { + "epoch": 41.41187050359712, + "step": 46050, + "torque_loss": 0.14367739856243134 + }, + { + "epoch": 41.41187050359712, + "force_loss": 0.0006256178603507578, + "step": 46050 + }, + { + "epoch": 41.42086330935252, + "grad_norm": 0.13408155739307404, + "learning_rate": 1.4047689706347555e-05, + "loss": 0.0152, + "step": 46060 + }, + { + "action_loss": 0.0007833559648133814, + "epoch": 41.42086330935252, + "step": 46060 + }, + { + "epoch": 41.42086330935252, + "step": 46060, + "torque_loss": 0.10750800371170044 + }, + { + "epoch": 41.42086330935252, + "force_loss": 0.0025953929871320724, + "step": 46060 + }, + { + "epoch": 41.42985611510792, + "grad_norm": 0.1011774018406868, + "learning_rate": 1.402854351575822e-05, + "loss": 0.0159, + "step": 46070 + }, + { + "action_loss": 0.005844214931130409, + "epoch": 41.42985611510792, + "step": 46070 + }, + { + "epoch": 41.42985611510792, + "step": 46070, + "torque_loss": 0.11894115805625916 + }, + { + "epoch": 41.42985611510792, + "force_loss": 0.00927754770964384, + "step": 46070 + }, + { + "epoch": 41.43884892086331, + "grad_norm": 0.0896984338760376, + "learning_rate": 1.4009408252347588e-05, + "loss": 0.0168, + "step": 46080 + }, + { + "action_loss": 0.00584026426076889, + "epoch": 41.43884892086331, + "step": 46080 + }, + { + "epoch": 41.43884892086331, + "step": 46080, + "torque_loss": 0.1551179140806198 + }, + { + "epoch": 41.43884892086331, + "force_loss": 0.006083325948566198, + "step": 46080 + }, + { + "epoch": 41.447841726618705, + "grad_norm": 0.18654625117778778, + "learning_rate": 1.399028392192846e-05, + "loss": 0.0182, + "step": 46090 + }, + { + "action_loss": 0.0019407831132411957, + "epoch": 41.447841726618705, + "step": 46090 + }, + { + "epoch": 41.447841726618705, + "step": 46090, + "torque_loss": 0.11483168601989746 + }, + { + "epoch": 41.447841726618705, + "force_loss": 0.003445149166509509, + "step": 46090 + }, + { + "epoch": 41.4568345323741, + "grad_norm": 0.09483020752668381, + "learning_rate": 1.397117053031029e-05, + "loss": 0.0222, + "step": 46100 + }, + { + "action_loss": 0.0031765957828611135, + "epoch": 41.4568345323741, + "step": 46100 + }, + { + "epoch": 41.4568345323741, + "step": 46100, + "torque_loss": 0.12828777730464935 + }, + { + "epoch": 41.4568345323741, + "force_loss": 0.005128649529069662, + "step": 46100 + }, + { + "epoch": 41.46582733812949, + "grad_norm": 0.25609779357910156, + "learning_rate": 1.3952068083299213e-05, + "loss": 0.0145, + "step": 46110 + }, + { + "action_loss": 0.0025511858984827995, + "epoch": 41.46582733812949, + "step": 46110 + }, + { + "epoch": 41.46582733812949, + "step": 46110, + "torque_loss": 0.1489318460226059 + }, + { + "epoch": 41.46582733812949, + "force_loss": 0.004632181953638792, + "step": 46110 + }, + { + "epoch": 41.47482014388489, + "grad_norm": 0.23925311863422394, + "learning_rate": 1.3932976586698082e-05, + "loss": 0.0182, + "step": 46120 + }, + { + "action_loss": 0.0010949471034109592, + "epoch": 41.47482014388489, + "step": 46120 + }, + { + "epoch": 41.47482014388489, + "step": 46120, + "torque_loss": 0.10768363624811172 + }, + { + "epoch": 41.47482014388489, + "force_loss": 0.002231505000963807, + "step": 46120 + }, + { + "epoch": 41.48381294964029, + "grad_norm": 0.30661842226982117, + "learning_rate": 1.3913896046306363e-05, + "loss": 0.0163, + "step": 46130 + }, + { + "action_loss": 0.0018126320792362094, + "epoch": 41.48381294964029, + "step": 46130 + }, + { + "epoch": 41.48381294964029, + "step": 46130, + "torque_loss": 0.13091595470905304 + }, + { + "epoch": 41.48381294964029, + "force_loss": 0.0016024975338950753, + "step": 46130 + }, + { + "epoch": 41.492805755395686, + "grad_norm": 0.23943215608596802, + "learning_rate": 1.389482646792023e-05, + "loss": 0.0219, + "step": 46140 + }, + { + "action_loss": 0.00118375348392874, + "epoch": 41.492805755395686, + "step": 46140 + }, + { + "epoch": 41.492805755395686, + "step": 46140, + "torque_loss": 0.10795984417200089 + }, + { + "epoch": 41.492805755395686, + "force_loss": 0.0016770964721217752, + "step": 46140 + }, + { + "epoch": 41.50179856115108, + "grad_norm": 0.23393382132053375, + "learning_rate": 1.387576785733251e-05, + "loss": 0.0155, + "step": 46150 + }, + { + "action_loss": 0.018763892352581024, + "epoch": 41.50179856115108, + "step": 46150 + }, + { + "epoch": 41.50179856115108, + "step": 46150, + "torque_loss": 0.13682280480861664 + }, + { + "epoch": 41.50179856115108, + "force_loss": 0.020032361149787903, + "step": 46150 + }, + { + "epoch": 41.510791366906474, + "grad_norm": 0.4050309658050537, + "learning_rate": 1.3856720220332703e-05, + "loss": 0.0185, + "step": 46160 + }, + { + "action_loss": 0.0017664622282609344, + "epoch": 41.510791366906474, + "step": 46160 + }, + { + "epoch": 41.510791366906474, + "step": 46160, + "torque_loss": 0.1190435066819191 + }, + { + "epoch": 41.510791366906474, + "force_loss": 0.0017898627556860447, + "step": 46160 + }, + { + "epoch": 41.51978417266187, + "grad_norm": 0.14559349417686462, + "learning_rate": 1.383768356270701e-05, + "loss": 0.0143, + "step": 46170 + }, + { + "action_loss": 0.0022800681181252003, + "epoch": 41.51978417266187, + "step": 46170 + }, + { + "epoch": 41.51978417266187, + "step": 46170, + "torque_loss": 0.15389318764209747 + }, + { + "epoch": 41.51978417266187, + "force_loss": 0.011671776883304119, + "step": 46170 + }, + { + "epoch": 41.52877697841727, + "grad_norm": 0.2197759747505188, + "learning_rate": 1.3818657890238207e-05, + "loss": 0.0174, + "step": 46180 + }, + { + "action_loss": 0.001531957183033228, + "epoch": 41.52877697841727, + "step": 46180 + }, + { + "epoch": 41.52877697841727, + "step": 46180, + "torque_loss": 0.10026818513870239 + }, + { + "epoch": 41.52877697841727, + "force_loss": 0.0018418096005916595, + "step": 46180 + }, + { + "epoch": 41.53776978417266, + "grad_norm": 0.16019059717655182, + "learning_rate": 1.3799643208705859e-05, + "loss": 0.0149, + "step": 46190 + }, + { + "action_loss": 0.011442805640399456, + "epoch": 41.53776978417266, + "step": 46190 + }, + { + "epoch": 41.53776978417266, + "step": 46190, + "torque_loss": 0.1324142962694168 + }, + { + "epoch": 41.53776978417266, + "force_loss": 0.013543106615543365, + "step": 46190 + }, + { + "epoch": 41.54676258992806, + "grad_norm": 0.26096782088279724, + "learning_rate": 1.3780639523886058e-05, + "loss": 0.0198, + "step": 46200 + }, + { + "action_loss": 0.0016514029121026397, + "epoch": 41.54676258992806, + "step": 46200 + }, + { + "epoch": 41.54676258992806, + "step": 46200, + "torque_loss": 0.12119301408529282 + }, + { + "epoch": 41.54676258992806, + "force_loss": 0.0034138669725507498, + "step": 46200 + }, + { + "epoch": 41.555755395683455, + "grad_norm": 0.0929505005478859, + "learning_rate": 1.3761646841551668e-05, + "loss": 0.0151, + "step": 46210 + }, + { + "action_loss": 0.0011883891420438886, + "epoch": 41.555755395683455, + "step": 46210 + }, + { + "epoch": 41.555755395683455, + "step": 46210, + "torque_loss": 0.10892126709222794 + }, + { + "epoch": 41.555755395683455, + "force_loss": 0.0010458383476361632, + "step": 46210 + }, + { + "epoch": 41.564748201438846, + "grad_norm": 0.057342126965522766, + "learning_rate": 1.3742665167472146e-05, + "loss": 0.0147, + "step": 46220 + }, + { + "action_loss": 0.006611010525375605, + "epoch": 41.564748201438846, + "step": 46220 + }, + { + "epoch": 41.564748201438846, + "step": 46220, + "torque_loss": 0.1379125565290451 + }, + { + "epoch": 41.564748201438846, + "force_loss": 0.011980275623500347, + "step": 46220 + }, + { + "epoch": 41.57374100719424, + "grad_norm": 0.3273448944091797, + "learning_rate": 1.372369450741363e-05, + "loss": 0.0175, + "step": 46230 + }, + { + "action_loss": 0.0010159645462408662, + "epoch": 41.57374100719424, + "step": 46230 + }, + { + "epoch": 41.57374100719424, + "step": 46230, + "torque_loss": 0.1348085254430771 + }, + { + "epoch": 41.57374100719424, + "force_loss": 0.0008509322069585323, + "step": 46230 + }, + { + "epoch": 41.58273381294964, + "grad_norm": 0.0944715365767479, + "learning_rate": 1.3704734867138901e-05, + "loss": 0.0167, + "step": 46240 + }, + { + "action_loss": 0.001384379924274981, + "epoch": 41.58273381294964, + "step": 46240 + }, + { + "epoch": 41.58273381294964, + "step": 46240, + "torque_loss": 0.10782928019762039 + }, + { + "epoch": 41.58273381294964, + "force_loss": 0.0019347543129697442, + "step": 46240 + }, + { + "epoch": 41.59172661870504, + "grad_norm": 0.1085263267159462, + "learning_rate": 1.36857862524074e-05, + "loss": 0.0153, + "step": 46250 + }, + { + "action_loss": 0.001557510462589562, + "epoch": 41.59172661870504, + "step": 46250 + }, + { + "epoch": 41.59172661870504, + "step": 46250, + "torque_loss": 0.17917267978191376 + }, + { + "epoch": 41.59172661870504, + "force_loss": 0.0009153726859949529, + "step": 46250 + }, + { + "epoch": 41.60071942446043, + "grad_norm": 0.10347095131874084, + "learning_rate": 1.3666848668975213e-05, + "loss": 0.0153, + "step": 46260 + }, + { + "action_loss": 0.0010037199826911092, + "epoch": 41.60071942446043, + "step": 46260 + }, + { + "epoch": 41.60071942446043, + "step": 46260, + "torque_loss": 0.12637753784656525 + }, + { + "epoch": 41.60071942446043, + "force_loss": 0.002662701765075326, + "step": 46260 + }, + { + "epoch": 41.60971223021583, + "grad_norm": 0.1711738407611847, + "learning_rate": 1.3647922122595063e-05, + "loss": 0.0147, + "step": 46270 + }, + { + "action_loss": 0.0010016815504059196, + "epoch": 41.60971223021583, + "step": 46270 + }, + { + "epoch": 41.60971223021583, + "step": 46270, + "torque_loss": 0.16279369592666626 + }, + { + "epoch": 41.60971223021583, + "force_loss": 0.0016701758140698075, + "step": 46270 + }, + { + "epoch": 41.618705035971225, + "grad_norm": 0.16988788545131683, + "learning_rate": 1.3629006619016366e-05, + "loss": 0.0184, + "step": 46280 + }, + { + "action_loss": 0.0006814058870077133, + "epoch": 41.618705035971225, + "step": 46280 + }, + { + "epoch": 41.618705035971225, + "step": 46280, + "torque_loss": 0.13192714750766754 + }, + { + "epoch": 41.618705035971225, + "force_loss": 0.004833744373172522, + "step": 46280 + }, + { + "epoch": 41.62769784172662, + "grad_norm": 0.06417033821344376, + "learning_rate": 1.3610102163985139e-05, + "loss": 0.0201, + "step": 46290 + }, + { + "action_loss": 0.002310168696567416, + "epoch": 41.62769784172662, + "step": 46290 + }, + { + "epoch": 41.62769784172662, + "step": 46290, + "torque_loss": 0.14548902213573456 + }, + { + "epoch": 41.62769784172662, + "force_loss": 0.004910944029688835, + "step": 46290 + }, + { + "epoch": 41.63669064748201, + "grad_norm": 0.09510938078165054, + "learning_rate": 1.3591208763244057e-05, + "loss": 0.0164, + "step": 46300 + }, + { + "action_loss": 0.0007438471657224, + "epoch": 41.63669064748201, + "step": 46300 + }, + { + "epoch": 41.63669064748201, + "step": 46300, + "torque_loss": 0.1430986076593399 + }, + { + "epoch": 41.63669064748201, + "force_loss": 0.0009638678166083992, + "step": 46300 + }, + { + "epoch": 41.64568345323741, + "grad_norm": 0.0993509292602539, + "learning_rate": 1.3572326422532428e-05, + "loss": 0.0199, + "step": 46310 + }, + { + "action_loss": 0.00236869347281754, + "epoch": 41.64568345323741, + "step": 46310 + }, + { + "epoch": 41.64568345323741, + "step": 46310, + "torque_loss": 0.13813556730747223 + }, + { + "epoch": 41.64568345323741, + "force_loss": 0.0011360583594068885, + "step": 46310 + }, + { + "epoch": 41.65467625899281, + "grad_norm": 0.30295559763908386, + "learning_rate": 1.355345514758622e-05, + "loss": 0.0155, + "step": 46320 + }, + { + "action_loss": 0.010639210231602192, + "epoch": 41.65467625899281, + "step": 46320 + }, + { + "epoch": 41.65467625899281, + "step": 46320, + "torque_loss": 0.1164890006184578 + }, + { + "epoch": 41.65467625899281, + "force_loss": 0.01661677099764347, + "step": 46320 + }, + { + "epoch": 41.6636690647482, + "grad_norm": 0.2708222568035126, + "learning_rate": 1.3534594944138007e-05, + "loss": 0.0165, + "step": 46330 + }, + { + "action_loss": 0.0009878772543743253, + "epoch": 41.6636690647482, + "step": 46330 + }, + { + "epoch": 41.6636690647482, + "step": 46330, + "torque_loss": 0.12461438030004501 + }, + { + "epoch": 41.6636690647482, + "force_loss": 0.0009639711934141815, + "step": 46330 + }, + { + "epoch": 41.672661870503596, + "grad_norm": 0.4043882489204407, + "learning_rate": 1.3515745817917069e-05, + "loss": 0.0146, + "step": 46340 + }, + { + "action_loss": 0.002103827428072691, + "epoch": 41.672661870503596, + "step": 46340 + }, + { + "epoch": 41.672661870503596, + "step": 46340, + "torque_loss": 0.1126183271408081 + }, + { + "epoch": 41.672661870503596, + "force_loss": 0.0028829958755522966, + "step": 46340 + }, + { + "epoch": 41.681654676258994, + "grad_norm": 0.5196036100387573, + "learning_rate": 1.3496907774649208e-05, + "loss": 0.0151, + "step": 46350 + }, + { + "action_loss": 0.000996283837594092, + "epoch": 41.681654676258994, + "step": 46350 + }, + { + "epoch": 41.681654676258994, + "step": 46350, + "torque_loss": 0.12013643980026245 + }, + { + "epoch": 41.681654676258994, + "force_loss": 0.0012420298298820853, + "step": 46350 + }, + { + "epoch": 41.69064748201439, + "grad_norm": 0.2126244753599167, + "learning_rate": 1.3478080820056987e-05, + "loss": 0.0207, + "step": 46360 + }, + { + "action_loss": 0.0007505409885197878, + "epoch": 41.69064748201439, + "step": 46360 + }, + { + "epoch": 41.69064748201439, + "step": 46360, + "torque_loss": 0.16304346919059753 + }, + { + "epoch": 41.69064748201439, + "force_loss": 0.0014648130163550377, + "step": 46360 + }, + { + "epoch": 41.69964028776978, + "grad_norm": 0.11799972504377365, + "learning_rate": 1.3459264959859474e-05, + "loss": 0.0163, + "step": 46370 + }, + { + "action_loss": 0.0010451226262375712, + "epoch": 41.69964028776978, + "step": 46370 + }, + { + "epoch": 41.69964028776978, + "step": 46370, + "torque_loss": 0.19468139111995697 + }, + { + "epoch": 41.69964028776978, + "force_loss": 0.0022757528349757195, + "step": 46370 + }, + { + "epoch": 41.70863309352518, + "grad_norm": 0.32826197147369385, + "learning_rate": 1.3440460199772487e-05, + "loss": 0.0131, + "step": 46380 + }, + { + "action_loss": 0.0007544537074863911, + "epoch": 41.70863309352518, + "step": 46380 + }, + { + "epoch": 41.70863309352518, + "step": 46380, + "torque_loss": 0.21511048078536987 + }, + { + "epoch": 41.70863309352518, + "force_loss": 0.0018087811768054962, + "step": 46380 + }, + { + "epoch": 41.71762589928058, + "grad_norm": 0.3819451630115509, + "learning_rate": 1.3421666545508382e-05, + "loss": 0.0167, + "step": 46390 + }, + { + "action_loss": 0.0023083712439984083, + "epoch": 41.71762589928058, + "step": 46390 + }, + { + "epoch": 41.71762589928058, + "step": 46390, + "torque_loss": 0.15789858996868134 + }, + { + "epoch": 41.71762589928058, + "force_loss": 0.007165252696722746, + "step": 46390 + }, + { + "epoch": 41.726618705035975, + "grad_norm": 0.15164703130722046, + "learning_rate": 1.3402884002776194e-05, + "loss": 0.0149, + "step": 46400 + }, + { + "action_loss": 0.0008625375921837986, + "epoch": 41.726618705035975, + "step": 46400 + }, + { + "epoch": 41.726618705035975, + "step": 46400, + "torque_loss": 0.18257851898670197 + }, + { + "epoch": 41.726618705035975, + "force_loss": 0.0007447314565069973, + "step": 46400 + }, + { + "epoch": 41.735611510791365, + "grad_norm": 0.14613275229930878, + "learning_rate": 1.3384112577281555e-05, + "loss": 0.0195, + "step": 46410 + }, + { + "action_loss": 0.005302126053720713, + "epoch": 41.735611510791365, + "step": 46410 + }, + { + "epoch": 41.735611510791365, + "step": 46410, + "torque_loss": 0.10265422612428665 + }, + { + "epoch": 41.735611510791365, + "force_loss": 0.009346667677164078, + "step": 46410 + }, + { + "epoch": 41.74460431654676, + "grad_norm": 0.17907406389713287, + "learning_rate": 1.3365352274726711e-05, + "loss": 0.0172, + "step": 46420 + }, + { + "action_loss": 0.0017045149579644203, + "epoch": 41.74460431654676, + "step": 46420 + }, + { + "epoch": 41.74460431654676, + "step": 46420, + "torque_loss": 0.13871800899505615 + }, + { + "epoch": 41.74460431654676, + "force_loss": 0.0034153710585087538, + "step": 46420 + }, + { + "epoch": 41.75359712230216, + "grad_norm": 0.1436508744955063, + "learning_rate": 1.3346603100810578e-05, + "loss": 0.0166, + "step": 46430 + }, + { + "action_loss": 0.0007342484313994646, + "epoch": 41.75359712230216, + "step": 46430 + }, + { + "epoch": 41.75359712230216, + "step": 46430, + "torque_loss": 0.08791147917509079 + }, + { + "epoch": 41.75359712230216, + "force_loss": 0.002309478120878339, + "step": 46430 + }, + { + "epoch": 41.76258992805755, + "grad_norm": 0.1547205150127411, + "learning_rate": 1.3327865061228645e-05, + "loss": 0.017, + "step": 46440 + }, + { + "action_loss": 0.003924580756574869, + "epoch": 41.76258992805755, + "step": 46440 + }, + { + "epoch": 41.76258992805755, + "step": 46440, + "torque_loss": 0.12193212658166885 + }, + { + "epoch": 41.76258992805755, + "force_loss": 0.0033480441197752953, + "step": 46440 + }, + { + "epoch": 41.77158273381295, + "grad_norm": 0.5368626713752747, + "learning_rate": 1.330913816167304e-05, + "loss": 0.0212, + "step": 46450 + }, + { + "action_loss": 0.0010073069715872407, + "epoch": 41.77158273381295, + "step": 46450 + }, + { + "epoch": 41.77158273381295, + "step": 46450, + "torque_loss": 0.09068405628204346 + }, + { + "epoch": 41.77158273381295, + "force_loss": 0.004405824933201075, + "step": 46450 + }, + { + "epoch": 41.780575539568346, + "grad_norm": 0.2417355477809906, + "learning_rate": 1.3290422407832492e-05, + "loss": 0.0178, + "step": 46460 + }, + { + "action_loss": 0.012405410408973694, + "epoch": 41.780575539568346, + "step": 46460 + }, + { + "epoch": 41.780575539568346, + "step": 46460, + "torque_loss": 0.2033160924911499 + }, + { + "epoch": 41.780575539568346, + "force_loss": 0.024507006630301476, + "step": 46460 + }, + { + "epoch": 41.789568345323744, + "grad_norm": 0.08595062047243118, + "learning_rate": 1.3271717805392354e-05, + "loss": 0.0177, + "step": 46470 + }, + { + "action_loss": 0.002071862341836095, + "epoch": 41.789568345323744, + "step": 46470 + }, + { + "epoch": 41.789568345323744, + "step": 46470, + "torque_loss": 0.1411842703819275 + }, + { + "epoch": 41.789568345323744, + "force_loss": 0.0019701633136719465, + "step": 46470 + }, + { + "epoch": 41.798561151079134, + "grad_norm": 0.10007981956005096, + "learning_rate": 1.3253024360034582e-05, + "loss": 0.0164, + "step": 46480 + }, + { + "action_loss": 0.0008854269399307668, + "epoch": 41.798561151079134, + "step": 46480 + }, + { + "epoch": 41.798561151079134, + "step": 46480, + "torque_loss": 0.1057625338435173 + }, + { + "epoch": 41.798561151079134, + "force_loss": 0.004036023747175932, + "step": 46480 + }, + { + "epoch": 41.80755395683453, + "grad_norm": 0.05173208937048912, + "learning_rate": 1.323434207743779e-05, + "loss": 0.0186, + "step": 46490 + }, + { + "action_loss": 0.002810916630551219, + "epoch": 41.80755395683453, + "step": 46490 + }, + { + "epoch": 41.80755395683453, + "step": 46490, + "torque_loss": 0.11307287216186523 + }, + { + "epoch": 41.80755395683453, + "force_loss": 0.001997811021283269, + "step": 46490 + }, + { + "epoch": 41.81654676258993, + "grad_norm": 0.114821657538414, + "learning_rate": 1.3215670963277105e-05, + "loss": 0.0188, + "step": 46500 + }, + { + "action_loss": 0.0010481419740244746, + "epoch": 41.81654676258993, + "step": 46500 + }, + { + "epoch": 41.81654676258993, + "step": 46500, + "torque_loss": 0.1736762970685959 + }, + { + "epoch": 41.81654676258993, + "force_loss": 0.0021222021896392107, + "step": 46500 + }, + { + "epoch": 41.82553956834532, + "grad_norm": 0.1164456307888031, + "learning_rate": 1.3197011023224376e-05, + "loss": 0.0146, + "step": 46510 + }, + { + "action_loss": 0.0010417585726827383, + "epoch": 41.82553956834532, + "step": 46510 + }, + { + "epoch": 41.82553956834532, + "step": 46510, + "torque_loss": 0.11614730209112167 + }, + { + "epoch": 41.82553956834532, + "force_loss": 0.002403555205091834, + "step": 46510 + }, + { + "epoch": 41.83453237410072, + "grad_norm": 0.29869675636291504, + "learning_rate": 1.3178362262947941e-05, + "loss": 0.0209, + "step": 46520 + }, + { + "action_loss": 0.004037730861455202, + "epoch": 41.83453237410072, + "step": 46520 + }, + { + "epoch": 41.83453237410072, + "step": 46520, + "torque_loss": 0.12442287057638168 + }, + { + "epoch": 41.83453237410072, + "force_loss": 0.008197152987122536, + "step": 46520 + }, + { + "epoch": 41.843525179856115, + "grad_norm": 0.11285030841827393, + "learning_rate": 1.3159724688112845e-05, + "loss": 0.0149, + "step": 46530 + }, + { + "action_loss": 0.0009287123684771359, + "epoch": 41.843525179856115, + "step": 46530 + }, + { + "epoch": 41.843525179856115, + "step": 46530, + "torque_loss": 0.14639179408550262 + }, + { + "epoch": 41.843525179856115, + "force_loss": 0.00194704404566437, + "step": 46530 + }, + { + "epoch": 41.85251798561151, + "grad_norm": 0.12745630741119385, + "learning_rate": 1.3141098304380683e-05, + "loss": 0.0159, + "step": 46540 + }, + { + "action_loss": 0.0009762852569110692, + "epoch": 41.85251798561151, + "step": 46540 + }, + { + "epoch": 41.85251798561151, + "step": 46540, + "torque_loss": 0.16845448315143585 + }, + { + "epoch": 41.85251798561151, + "force_loss": 0.0028479357715696096, + "step": 46540 + }, + { + "epoch": 41.861510791366904, + "grad_norm": 0.07736373692750931, + "learning_rate": 1.3122483117409651e-05, + "loss": 0.0161, + "step": 46550 + }, + { + "action_loss": 0.0008849836303852499, + "epoch": 41.861510791366904, + "step": 46550 + }, + { + "epoch": 41.861510791366904, + "step": 46550, + "torque_loss": 0.13746891915798187 + }, + { + "epoch": 41.861510791366904, + "force_loss": 0.0025917694438248873, + "step": 46550 + }, + { + "epoch": 41.8705035971223, + "grad_norm": 0.40781673789024353, + "learning_rate": 1.3103879132854552e-05, + "loss": 0.0173, + "step": 46560 + }, + { + "action_loss": 0.023091599345207214, + "epoch": 41.8705035971223, + "step": 46560 + }, + { + "epoch": 41.8705035971223, + "step": 46560, + "torque_loss": 0.12224598973989487 + }, + { + "epoch": 41.8705035971223, + "force_loss": 0.02782335877418518, + "step": 46560 + }, + { + "epoch": 41.8794964028777, + "grad_norm": 0.10636132955551147, + "learning_rate": 1.3085286356366771e-05, + "loss": 0.0174, + "step": 46570 + }, + { + "action_loss": 0.0009309242013841867, + "epoch": 41.8794964028777, + "step": 46570 + }, + { + "epoch": 41.8794964028777, + "step": 46570, + "torque_loss": 0.0996650829911232 + }, + { + "epoch": 41.8794964028777, + "force_loss": 0.0025415238924324512, + "step": 46570 + }, + { + "epoch": 41.888489208633096, + "grad_norm": 0.08773689717054367, + "learning_rate": 1.3066704793594337e-05, + "loss": 0.0164, + "step": 46580 + }, + { + "action_loss": 0.002911119954660535, + "epoch": 41.888489208633096, + "step": 46580 + }, + { + "epoch": 41.888489208633096, + "step": 46580, + "torque_loss": 0.1087740957736969 + }, + { + "epoch": 41.888489208633096, + "force_loss": 0.0035202335566282272, + "step": 46580 + }, + { + "epoch": 41.89748201438849, + "grad_norm": 0.32328110933303833, + "learning_rate": 1.3048134450181816e-05, + "loss": 0.0155, + "step": 46590 + }, + { + "action_loss": 0.00818340852856636, + "epoch": 41.89748201438849, + "step": 46590 + }, + { + "epoch": 41.89748201438849, + "step": 46590, + "torque_loss": 0.15708810091018677 + }, + { + "epoch": 41.89748201438849, + "force_loss": 0.012910461984574795, + "step": 46590 + }, + { + "epoch": 41.906474820143885, + "grad_norm": 0.11418038606643677, + "learning_rate": 1.3029575331770394e-05, + "loss": 0.0153, + "step": 46600 + }, + { + "action_loss": 0.0007504620589315891, + "epoch": 41.906474820143885, + "step": 46600 + }, + { + "epoch": 41.906474820143885, + "step": 46600, + "torque_loss": 0.12935860455036163 + }, + { + "epoch": 41.906474820143885, + "force_loss": 0.0008481654222123325, + "step": 46600 + }, + { + "epoch": 41.91546762589928, + "grad_norm": 0.3308919072151184, + "learning_rate": 1.3011027443997837e-05, + "loss": 0.0146, + "step": 46610 + }, + { + "action_loss": 0.0008278512395918369, + "epoch": 41.91546762589928, + "step": 46610 + }, + { + "epoch": 41.91546762589928, + "step": 46610, + "torque_loss": 0.13288386166095734 + }, + { + "epoch": 41.91546762589928, + "force_loss": 0.0007833181880414486, + "step": 46610 + }, + { + "epoch": 41.92446043165468, + "grad_norm": 0.21655842661857605, + "learning_rate": 1.2992490792498507e-05, + "loss": 0.0144, + "step": 46620 + }, + { + "action_loss": 0.003982121590524912, + "epoch": 41.92446043165468, + "step": 46620 + }, + { + "epoch": 41.92446043165468, + "step": 46620, + "torque_loss": 0.1748986840248108 + }, + { + "epoch": 41.92446043165468, + "force_loss": 0.012981380335986614, + "step": 46620 + }, + { + "epoch": 41.93345323741007, + "grad_norm": 0.18265517055988312, + "learning_rate": 1.297396538290333e-05, + "loss": 0.0171, + "step": 46630 + }, + { + "action_loss": 0.0009083410259336233, + "epoch": 41.93345323741007, + "step": 46630 + }, + { + "epoch": 41.93345323741007, + "step": 46630, + "torque_loss": 0.10882826894521713 + }, + { + "epoch": 41.93345323741007, + "force_loss": 0.0006618359475396574, + "step": 46630 + }, + { + "epoch": 41.94244604316547, + "grad_norm": 0.2547270357608795, + "learning_rate": 1.2955451220839888e-05, + "loss": 0.013, + "step": 46640 + }, + { + "action_loss": 0.0031260100658982992, + "epoch": 41.94244604316547, + "step": 46640 + }, + { + "epoch": 41.94244604316547, + "step": 46640, + "torque_loss": 0.14381995797157288 + }, + { + "epoch": 41.94244604316547, + "force_loss": 0.0017336081946268678, + "step": 46640 + }, + { + "epoch": 41.951438848920866, + "grad_norm": 0.6436915397644043, + "learning_rate": 1.2936948311932223e-05, + "loss": 0.0174, + "step": 46650 + }, + { + "action_loss": 0.0027956615667790174, + "epoch": 41.951438848920866, + "step": 46650 + }, + { + "epoch": 41.951438848920866, + "step": 46650, + "torque_loss": 0.11021675914525986 + }, + { + "epoch": 41.951438848920866, + "force_loss": 0.0063346861861646175, + "step": 46650 + }, + { + "epoch": 41.960431654676256, + "grad_norm": 0.09409542381763458, + "learning_rate": 1.2918456661801104e-05, + "loss": 0.0152, + "step": 46660 + }, + { + "action_loss": 0.000733214255888015, + "epoch": 41.960431654676256, + "step": 46660 + }, + { + "epoch": 41.960431654676256, + "step": 46660, + "torque_loss": 0.1312297135591507 + }, + { + "epoch": 41.960431654676256, + "force_loss": 0.0016367450589314103, + "step": 46660 + }, + { + "epoch": 41.969424460431654, + "grad_norm": 0.06914551556110382, + "learning_rate": 1.2899976276063736e-05, + "loss": 0.0149, + "step": 46670 + }, + { + "action_loss": 0.004222599323838949, + "epoch": 41.969424460431654, + "step": 46670 + }, + { + "epoch": 41.969424460431654, + "step": 46670, + "torque_loss": 0.10489914566278458 + }, + { + "epoch": 41.969424460431654, + "force_loss": 0.0031899672467261553, + "step": 46670 + }, + { + "epoch": 41.97841726618705, + "grad_norm": 0.18672610819339752, + "learning_rate": 1.2881507160334022e-05, + "loss": 0.0162, + "step": 46680 + }, + { + "action_loss": 0.0008945378358475864, + "epoch": 41.97841726618705, + "step": 46680 + }, + { + "epoch": 41.97841726618705, + "step": 46680, + "torque_loss": 0.11672995239496231 + }, + { + "epoch": 41.97841726618705, + "force_loss": 0.000731871637981385, + "step": 46680 + }, + { + "epoch": 41.98741007194245, + "grad_norm": 0.0604969821870327, + "learning_rate": 1.286304932022238e-05, + "loss": 0.0141, + "step": 46690 + }, + { + "action_loss": 0.001619009650312364, + "epoch": 41.98741007194245, + "step": 46690 + }, + { + "epoch": 41.98741007194245, + "step": 46690, + "torque_loss": 0.15916641056537628 + }, + { + "epoch": 41.98741007194245, + "force_loss": 0.0056321038864552975, + "step": 46690 + }, + { + "epoch": 41.99640287769784, + "grad_norm": 0.32196900248527527, + "learning_rate": 1.2844602761335806e-05, + "loss": 0.0195, + "step": 46700 + }, + { + "action_loss": 0.001419402309693396, + "epoch": 41.99640287769784, + "step": 46700 + }, + { + "epoch": 41.99640287769784, + "step": 46700, + "torque_loss": 0.15044492483139038 + }, + { + "epoch": 41.99640287769784, + "force_loss": 0.001633634907193482, + "step": 46700 + }, + { + "epoch": 42.00539568345324, + "grad_norm": 0.11786222457885742, + "learning_rate": 1.2826167489277885e-05, + "loss": 0.0142, + "step": 46710 + }, + { + "action_loss": 0.0036024730652570724, + "epoch": 42.00539568345324, + "step": 46710 + }, + { + "epoch": 42.00539568345324, + "step": 46710, + "torque_loss": 0.13186015188694 + }, + { + "epoch": 42.00539568345324, + "force_loss": 0.014024369418621063, + "step": 46710 + }, + { + "epoch": 42.014388489208635, + "grad_norm": 0.18105298280715942, + "learning_rate": 1.2807743509648745e-05, + "loss": 0.0139, + "step": 46720 + }, + { + "action_loss": 0.0012587569653987885, + "epoch": 42.014388489208635, + "step": 46720 + }, + { + "epoch": 42.014388489208635, + "step": 46720, + "torque_loss": 0.07623998075723648 + }, + { + "epoch": 42.014388489208635, + "force_loss": 0.000937749573495239, + "step": 46720 + }, + { + "epoch": 42.023381294964025, + "grad_norm": 0.20153272151947021, + "learning_rate": 1.2789330828045149e-05, + "loss": 0.0148, + "step": 46730 + }, + { + "action_loss": 0.0010120555525645614, + "epoch": 42.023381294964025, + "step": 46730 + }, + { + "epoch": 42.023381294964025, + "step": 46730, + "torque_loss": 0.12439671158790588 + }, + { + "epoch": 42.023381294964025, + "force_loss": 0.0018724799156188965, + "step": 46730 + }, + { + "epoch": 42.03237410071942, + "grad_norm": 0.14741671085357666, + "learning_rate": 1.2770929450060332e-05, + "loss": 0.0161, + "step": 46740 + }, + { + "action_loss": 0.0007091127336025238, + "epoch": 42.03237410071942, + "step": 46740 + }, + { + "epoch": 42.03237410071942, + "step": 46740, + "torque_loss": 0.1275375634431839 + }, + { + "epoch": 42.03237410071942, + "force_loss": 0.0010350263910368085, + "step": 46740 + }, + { + "epoch": 42.04136690647482, + "grad_norm": 0.30644872784614563, + "learning_rate": 1.2752539381284184e-05, + "loss": 0.0158, + "step": 46750 + }, + { + "action_loss": 0.0012461119331419468, + "epoch": 42.04136690647482, + "step": 46750 + }, + { + "epoch": 42.04136690647482, + "step": 46750, + "torque_loss": 0.10962500423192978 + }, + { + "epoch": 42.04136690647482, + "force_loss": 0.0029679357539862394, + "step": 46750 + }, + { + "epoch": 42.05035971223022, + "grad_norm": 0.05391109362244606, + "learning_rate": 1.273416062730311e-05, + "loss": 0.0151, + "step": 46760 + }, + { + "action_loss": 0.00622273376211524, + "epoch": 42.05035971223022, + "step": 46760 + }, + { + "epoch": 42.05035971223022, + "step": 46760, + "torque_loss": 0.142961323261261 + }, + { + "epoch": 42.05035971223022, + "force_loss": 0.008253638632595539, + "step": 46760 + }, + { + "epoch": 42.05935251798561, + "grad_norm": 0.06334792822599411, + "learning_rate": 1.2715793193700088e-05, + "loss": 0.0163, + "step": 46770 + }, + { + "action_loss": 0.0005098909023217857, + "epoch": 42.05935251798561, + "step": 46770 + }, + { + "epoch": 42.05935251798561, + "step": 46770, + "torque_loss": 0.11684909462928772 + }, + { + "epoch": 42.05935251798561, + "force_loss": 0.0008358575869351625, + "step": 46770 + }, + { + "epoch": 42.068345323741006, + "grad_norm": 0.12084853649139404, + "learning_rate": 1.2697437086054664e-05, + "loss": 0.0172, + "step": 46780 + }, + { + "action_loss": 0.010842480696737766, + "epoch": 42.068345323741006, + "step": 46780 + }, + { + "epoch": 42.068345323741006, + "step": 46780, + "torque_loss": 0.1762898713350296 + }, + { + "epoch": 42.068345323741006, + "force_loss": 0.012682903558015823, + "step": 46780 + }, + { + "epoch": 42.077338129496404, + "grad_norm": 0.17878368496894836, + "learning_rate": 1.2679092309942937e-05, + "loss": 0.0163, + "step": 46790 + }, + { + "action_loss": 0.0048118163831532, + "epoch": 42.077338129496404, + "step": 46790 + }, + { + "epoch": 42.077338129496404, + "step": 46790, + "torque_loss": 0.13234467804431915 + }, + { + "epoch": 42.077338129496404, + "force_loss": 0.009777412749826908, + "step": 46790 + }, + { + "epoch": 42.0863309352518, + "grad_norm": 0.2512463927268982, + "learning_rate": 1.266075887093755e-05, + "loss": 0.0147, + "step": 46800 + }, + { + "action_loss": 0.0015251734293997288, + "epoch": 42.0863309352518, + "step": 46800 + }, + { + "epoch": 42.0863309352518, + "step": 46800, + "torque_loss": 0.1434912234544754 + }, + { + "epoch": 42.0863309352518, + "force_loss": 0.004402507562190294, + "step": 46800 + }, + { + "epoch": 42.09532374100719, + "grad_norm": 0.30996543169021606, + "learning_rate": 1.2642436774607757e-05, + "loss": 0.0176, + "step": 46810 + }, + { + "action_loss": 0.0016457010060548782, + "epoch": 42.09532374100719, + "step": 46810 + }, + { + "epoch": 42.09532374100719, + "step": 46810, + "torque_loss": 0.1044391393661499 + }, + { + "epoch": 42.09532374100719, + "force_loss": 0.0029262385796755552, + "step": 46810 + }, + { + "epoch": 42.10431654676259, + "grad_norm": 0.11659257858991623, + "learning_rate": 1.2624126026519278e-05, + "loss": 0.0133, + "step": 46820 + }, + { + "action_loss": 0.009499938227236271, + "epoch": 42.10431654676259, + "step": 46820 + }, + { + "epoch": 42.10431654676259, + "step": 46820, + "torque_loss": 0.19349241256713867 + }, + { + "epoch": 42.10431654676259, + "force_loss": 0.030090009793639183, + "step": 46820 + }, + { + "epoch": 42.11330935251799, + "grad_norm": 0.16274572908878326, + "learning_rate": 1.2605826632234474e-05, + "loss": 0.0174, + "step": 46830 + }, + { + "action_loss": 0.002080908278003335, + "epoch": 42.11330935251799, + "step": 46830 + }, + { + "epoch": 42.11330935251799, + "step": 46830, + "torque_loss": 0.130034938454628 + }, + { + "epoch": 42.11330935251799, + "force_loss": 0.003555377246811986, + "step": 46830 + }, + { + "epoch": 42.12230215827338, + "grad_norm": 0.14463350176811218, + "learning_rate": 1.2587538597312198e-05, + "loss": 0.0147, + "step": 46840 + }, + { + "action_loss": 0.02536866068840027, + "epoch": 42.12230215827338, + "step": 46840 + }, + { + "epoch": 42.12230215827338, + "step": 46840, + "torque_loss": 0.11947962641716003 + }, + { + "epoch": 42.12230215827338, + "force_loss": 0.0279528945684433, + "step": 46840 + }, + { + "epoch": 42.131294964028775, + "grad_norm": 0.10108552128076553, + "learning_rate": 1.2569261927307884e-05, + "loss": 0.0177, + "step": 46850 + }, + { + "action_loss": 0.0006828121840953827, + "epoch": 42.131294964028775, + "step": 46850 + }, + { + "epoch": 42.131294964028775, + "step": 46850, + "torque_loss": 0.17187131941318512 + }, + { + "epoch": 42.131294964028775, + "force_loss": 0.0005717813619412482, + "step": 46850 + }, + { + "epoch": 42.14028776978417, + "grad_norm": 0.07092025130987167, + "learning_rate": 1.2550996627773493e-05, + "loss": 0.015, + "step": 46860 + }, + { + "action_loss": 0.0008692220435477793, + "epoch": 42.14028776978417, + "step": 46860 + }, + { + "epoch": 42.14028776978417, + "step": 46860, + "torque_loss": 0.14385050535202026 + }, + { + "epoch": 42.14028776978417, + "force_loss": 0.0017538117244839668, + "step": 46860 + }, + { + "epoch": 42.14928057553957, + "grad_norm": 0.35319799184799194, + "learning_rate": 1.2532742704257527e-05, + "loss": 0.0168, + "step": 46870 + }, + { + "action_loss": 0.003098190063610673, + "epoch": 42.14928057553957, + "step": 46870 + }, + { + "epoch": 42.14928057553957, + "step": 46870, + "torque_loss": 0.12692494690418243 + }, + { + "epoch": 42.14928057553957, + "force_loss": 0.006886093411594629, + "step": 46870 + }, + { + "epoch": 42.15827338129496, + "grad_norm": 0.11866920441389084, + "learning_rate": 1.2514500162305087e-05, + "loss": 0.0145, + "step": 46880 + }, + { + "action_loss": 0.0020604028832167387, + "epoch": 42.15827338129496, + "step": 46880 + }, + { + "epoch": 42.15827338129496, + "step": 46880, + "torque_loss": 0.10279615968465805 + }, + { + "epoch": 42.15827338129496, + "force_loss": 0.00376189430244267, + "step": 46880 + }, + { + "epoch": 42.16726618705036, + "grad_norm": 0.07028356939554214, + "learning_rate": 1.2496269007457728e-05, + "loss": 0.0155, + "step": 46890 + }, + { + "action_loss": 0.002010742900893092, + "epoch": 42.16726618705036, + "step": 46890 + }, + { + "epoch": 42.16726618705036, + "step": 46890, + "torque_loss": 0.1491326540708542 + }, + { + "epoch": 42.16726618705036, + "force_loss": 0.002597041428089142, + "step": 46890 + }, + { + "epoch": 42.17625899280576, + "grad_norm": 0.11444475501775742, + "learning_rate": 1.2478049245253625e-05, + "loss": 0.0146, + "step": 46900 + }, + { + "action_loss": 0.0010552414460107684, + "epoch": 42.17625899280576, + "step": 46900 + }, + { + "epoch": 42.17625899280576, + "step": 46900, + "torque_loss": 0.09426501393318176 + }, + { + "epoch": 42.17625899280576, + "force_loss": 0.002343730768188834, + "step": 46900 + }, + { + "epoch": 42.185251798561154, + "grad_norm": 0.33438387513160706, + "learning_rate": 1.2459840881227459e-05, + "loss": 0.017, + "step": 46910 + }, + { + "action_loss": 0.0014436669880524278, + "epoch": 42.185251798561154, + "step": 46910 + }, + { + "epoch": 42.185251798561154, + "step": 46910, + "torque_loss": 0.10698408633470535 + }, + { + "epoch": 42.185251798561154, + "force_loss": 0.002083592815324664, + "step": 46910 + }, + { + "epoch": 42.194244604316545, + "grad_norm": 0.0719999298453331, + "learning_rate": 1.2441643920910435e-05, + "loss": 0.0162, + "step": 46920 + }, + { + "action_loss": 0.0013842815533280373, + "epoch": 42.194244604316545, + "step": 46920 + }, + { + "epoch": 42.194244604316545, + "step": 46920, + "torque_loss": 0.12207833677530289 + }, + { + "epoch": 42.194244604316545, + "force_loss": 0.0017060752725228667, + "step": 46920 + }, + { + "epoch": 42.20323741007194, + "grad_norm": 0.10483632236719131, + "learning_rate": 1.2423458369830322e-05, + "loss": 0.0138, + "step": 46930 + }, + { + "action_loss": 0.004464512690901756, + "epoch": 42.20323741007194, + "step": 46930 + }, + { + "epoch": 42.20323741007194, + "step": 46930, + "torque_loss": 0.1396876573562622 + }, + { + "epoch": 42.20323741007194, + "force_loss": 0.007298026233911514, + "step": 46930 + }, + { + "epoch": 42.21223021582734, + "grad_norm": 0.10816202312707901, + "learning_rate": 1.2405284233511406e-05, + "loss": 0.0172, + "step": 46940 + }, + { + "action_loss": 0.010293533094227314, + "epoch": 42.21223021582734, + "step": 46940 + }, + { + "epoch": 42.21223021582734, + "step": 46940, + "torque_loss": 0.1770031452178955 + }, + { + "epoch": 42.21223021582734, + "force_loss": 0.01605510711669922, + "step": 46940 + }, + { + "epoch": 42.22122302158273, + "grad_norm": 0.272856742143631, + "learning_rate": 1.2387121517474487e-05, + "loss": 0.0166, + "step": 46950 + }, + { + "action_loss": 0.001577790710143745, + "epoch": 42.22122302158273, + "step": 46950 + }, + { + "epoch": 42.22122302158273, + "step": 46950, + "torque_loss": 0.12595312297344208 + }, + { + "epoch": 42.22122302158273, + "force_loss": 0.004614649806171656, + "step": 46950 + }, + { + "epoch": 42.23021582733813, + "grad_norm": 0.17100203037261963, + "learning_rate": 1.2368970227236975e-05, + "loss": 0.0176, + "step": 46960 + }, + { + "action_loss": 0.0012894395040348172, + "epoch": 42.23021582733813, + "step": 46960 + }, + { + "epoch": 42.23021582733813, + "step": 46960, + "torque_loss": 0.11722856760025024 + }, + { + "epoch": 42.23021582733813, + "force_loss": 0.0024467355106025934, + "step": 46960 + }, + { + "epoch": 42.239208633093526, + "grad_norm": 0.14707109332084656, + "learning_rate": 1.2350830368312688e-05, + "loss": 0.0158, + "step": 46970 + }, + { + "action_loss": 0.0006690708105452359, + "epoch": 42.239208633093526, + "step": 46970 + }, + { + "epoch": 42.239208633093526, + "step": 46970, + "torque_loss": 0.07282324135303497 + }, + { + "epoch": 42.239208633093526, + "force_loss": 0.000768433790653944, + "step": 46970 + }, + { + "epoch": 42.24820143884892, + "grad_norm": 0.28848797082901, + "learning_rate": 1.2332701946212083e-05, + "loss": 0.0153, + "step": 46980 + }, + { + "action_loss": 0.0026538728270679712, + "epoch": 42.24820143884892, + "step": 46980 + }, + { + "epoch": 42.24820143884892, + "step": 46980, + "torque_loss": 0.08095107227563858 + }, + { + "epoch": 42.24820143884892, + "force_loss": 0.006578672677278519, + "step": 46980 + }, + { + "epoch": 42.257194244604314, + "grad_norm": 0.36609283089637756, + "learning_rate": 1.2314584966442077e-05, + "loss": 0.0149, + "step": 46990 + }, + { + "action_loss": 0.004711670335382223, + "epoch": 42.257194244604314, + "step": 46990 + }, + { + "epoch": 42.257194244604314, + "step": 46990, + "torque_loss": 0.09482771158218384 + }, + { + "epoch": 42.257194244604314, + "force_loss": 0.013062643818557262, + "step": 46990 + }, + { + "epoch": 42.26618705035971, + "grad_norm": 0.07432954013347626, + "learning_rate": 1.2296479434506136e-05, + "loss": 0.0163, + "step": 47000 + }, + { + "action_loss": 0.0012593042338266969, + "epoch": 42.26618705035971, + "step": 47000 + }, + { + "epoch": 42.26618705035971, + "step": 47000, + "torque_loss": 0.08349712938070297 + }, + { + "epoch": 42.26618705035971, + "force_loss": 0.004426718223839998, + "step": 47000 + }, + { + "epoch": 42.27517985611511, + "grad_norm": 0.1788962036371231, + "learning_rate": 1.2278385355904232e-05, + "loss": 0.0149, + "step": 47010 + }, + { + "action_loss": 0.0008733691647648811, + "epoch": 42.27517985611511, + "step": 47010 + }, + { + "epoch": 42.27517985611511, + "step": 47010, + "torque_loss": 0.12246324867010117 + }, + { + "epoch": 42.27517985611511, + "force_loss": 0.0005289769032970071, + "step": 47010 + }, + { + "epoch": 42.28417266187051, + "grad_norm": 0.43475180864334106, + "learning_rate": 1.2260302736132867e-05, + "loss": 0.0151, + "step": 47020 + }, + { + "action_loss": 0.0011639531003311276, + "epoch": 42.28417266187051, + "step": 47020 + }, + { + "epoch": 42.28417266187051, + "step": 47020, + "torque_loss": 0.10735426098108292 + }, + { + "epoch": 42.28417266187051, + "force_loss": 0.0036427301820367575, + "step": 47020 + }, + { + "epoch": 42.2931654676259, + "grad_norm": 0.09938303381204605, + "learning_rate": 1.2242231580685098e-05, + "loss": 0.0175, + "step": 47030 + }, + { + "action_loss": 0.003048837184906006, + "epoch": 42.2931654676259, + "step": 47030 + }, + { + "epoch": 42.2931654676259, + "step": 47030, + "torque_loss": 0.14806193113327026 + }, + { + "epoch": 42.2931654676259, + "force_loss": 0.0018813660135492682, + "step": 47030 + }, + { + "epoch": 42.302158273381295, + "grad_norm": 0.09749345481395721, + "learning_rate": 1.2224171895050413e-05, + "loss": 0.0153, + "step": 47040 + }, + { + "action_loss": 0.006674807518720627, + "epoch": 42.302158273381295, + "step": 47040 + }, + { + "epoch": 42.302158273381295, + "step": 47040, + "torque_loss": 0.09648054838180542 + }, + { + "epoch": 42.302158273381295, + "force_loss": 0.010812963359057903, + "step": 47040 + }, + { + "epoch": 42.31115107913669, + "grad_norm": 0.3661826252937317, + "learning_rate": 1.2206123684714903e-05, + "loss": 0.0154, + "step": 47050 + }, + { + "action_loss": 0.0029531989712268114, + "epoch": 42.31115107913669, + "step": 47050 + }, + { + "epoch": 42.31115107913669, + "step": 47050, + "torque_loss": 0.17020951211452484 + }, + { + "epoch": 42.31115107913669, + "force_loss": 0.018691852688789368, + "step": 47050 + }, + { + "epoch": 42.32014388489208, + "grad_norm": 0.2752850353717804, + "learning_rate": 1.2188086955161132e-05, + "loss": 0.0145, + "step": 47060 + }, + { + "action_loss": 0.0015264186076819897, + "epoch": 42.32014388489208, + "step": 47060 + }, + { + "epoch": 42.32014388489208, + "step": 47060, + "torque_loss": 0.10866586118936539 + }, + { + "epoch": 42.32014388489208, + "force_loss": 0.0016908334800973535, + "step": 47060 + }, + { + "epoch": 42.32913669064748, + "grad_norm": 0.10309295356273651, + "learning_rate": 1.2170061711868175e-05, + "loss": 0.0157, + "step": 47070 + }, + { + "action_loss": 0.0029808704275637865, + "epoch": 42.32913669064748, + "step": 47070 + }, + { + "epoch": 42.32913669064748, + "step": 47070, + "torque_loss": 0.10860654711723328 + }, + { + "epoch": 42.32913669064748, + "force_loss": 0.010925118811428547, + "step": 47070 + }, + { + "epoch": 42.33812949640288, + "grad_norm": 0.20670431852340698, + "learning_rate": 1.215204796031163e-05, + "loss": 0.0145, + "step": 47080 + }, + { + "action_loss": 0.0036128342617303133, + "epoch": 42.33812949640288, + "step": 47080 + }, + { + "epoch": 42.33812949640288, + "step": 47080, + "torque_loss": 0.1813892275094986 + }, + { + "epoch": 42.33812949640288, + "force_loss": 0.018829522654414177, + "step": 47080 + }, + { + "epoch": 42.347122302158276, + "grad_norm": 0.05608697608113289, + "learning_rate": 1.2134045705963599e-05, + "loss": 0.0142, + "step": 47090 + }, + { + "action_loss": 0.0007133810431696475, + "epoch": 42.347122302158276, + "step": 47090 + }, + { + "epoch": 42.347122302158276, + "step": 47090, + "torque_loss": 0.0937839224934578 + }, + { + "epoch": 42.347122302158276, + "force_loss": 0.0009587321546860039, + "step": 47090 + }, + { + "epoch": 42.356115107913666, + "grad_norm": 0.10110324621200562, + "learning_rate": 1.2116054954292689e-05, + "loss": 0.0128, + "step": 47100 + }, + { + "action_loss": 0.001186329871416092, + "epoch": 42.356115107913666, + "step": 47100 + }, + { + "epoch": 42.356115107913666, + "step": 47100, + "torque_loss": 0.12169986963272095 + }, + { + "epoch": 42.356115107913666, + "force_loss": 0.0014533005887642503, + "step": 47100 + }, + { + "epoch": 42.365107913669064, + "grad_norm": 0.2030871957540512, + "learning_rate": 1.2098075710764011e-05, + "loss": 0.0157, + "step": 47110 + }, + { + "action_loss": 0.004844674840569496, + "epoch": 42.365107913669064, + "step": 47110 + }, + { + "epoch": 42.365107913669064, + "step": 47110, + "torque_loss": 0.1532885581254959 + }, + { + "epoch": 42.365107913669064, + "force_loss": 0.008660007268190384, + "step": 47110 + }, + { + "epoch": 42.37410071942446, + "grad_norm": 0.09478098899126053, + "learning_rate": 1.2080107980839183e-05, + "loss": 0.0157, + "step": 47120 + }, + { + "action_loss": 0.0007692959625273943, + "epoch": 42.37410071942446, + "step": 47120 + }, + { + "epoch": 42.37410071942446, + "step": 47120, + "torque_loss": 0.09850936383008957 + }, + { + "epoch": 42.37410071942446, + "force_loss": 0.0012820689007639885, + "step": 47120 + }, + { + "epoch": 42.38309352517986, + "grad_norm": 0.14071500301361084, + "learning_rate": 1.2062151769976343e-05, + "loss": 0.0174, + "step": 47130 + }, + { + "action_loss": 0.0007247214671224356, + "epoch": 42.38309352517986, + "step": 47130 + }, + { + "epoch": 42.38309352517986, + "step": 47130, + "torque_loss": 0.12936072051525116 + }, + { + "epoch": 42.38309352517986, + "force_loss": 0.0009547374211251736, + "step": 47130 + }, + { + "epoch": 42.39208633093525, + "grad_norm": 0.07789991050958633, + "learning_rate": 1.204420708363011e-05, + "loss": 0.0153, + "step": 47140 + }, + { + "action_loss": 0.0015309160808101296, + "epoch": 42.39208633093525, + "step": 47140 + }, + { + "epoch": 42.39208633093525, + "step": 47140, + "torque_loss": 0.08339641243219376 + }, + { + "epoch": 42.39208633093525, + "force_loss": 0.0027038194239139557, + "step": 47140 + }, + { + "epoch": 42.40107913669065, + "grad_norm": 0.07954797893762589, + "learning_rate": 1.2026273927251597e-05, + "loss": 0.0155, + "step": 47150 + }, + { + "action_loss": 0.0012581819901242852, + "epoch": 42.40107913669065, + "step": 47150 + }, + { + "epoch": 42.40107913669065, + "step": 47150, + "torque_loss": 0.1861734837293625 + }, + { + "epoch": 42.40107913669065, + "force_loss": 0.001045328681357205, + "step": 47150 + }, + { + "epoch": 42.410071942446045, + "grad_norm": 0.32643038034439087, + "learning_rate": 1.2008352306288424e-05, + "loss": 0.0158, + "step": 47160 + }, + { + "action_loss": 0.0018418746767565608, + "epoch": 42.410071942446045, + "step": 47160 + }, + { + "epoch": 42.410071942446045, + "step": 47160, + "torque_loss": 0.08608373999595642 + }, + { + "epoch": 42.410071942446045, + "force_loss": 0.0071082995273172855, + "step": 47160 + }, + { + "epoch": 42.419064748201436, + "grad_norm": 0.26478394865989685, + "learning_rate": 1.1990442226184695e-05, + "loss": 0.0132, + "step": 47170 + }, + { + "action_loss": 0.0007814674754627049, + "epoch": 42.419064748201436, + "step": 47170 + }, + { + "epoch": 42.419064748201436, + "step": 47170, + "torque_loss": 0.14269594848155975 + }, + { + "epoch": 42.419064748201436, + "force_loss": 0.0013496694155037403, + "step": 47170 + }, + { + "epoch": 42.42805755395683, + "grad_norm": 0.36180445551872253, + "learning_rate": 1.1972543692381066e-05, + "loss": 0.0165, + "step": 47180 + }, + { + "action_loss": 0.005655744578689337, + "epoch": 42.42805755395683, + "step": 47180 + }, + { + "epoch": 42.42805755395683, + "step": 47180, + "torque_loss": 0.14706192910671234 + }, + { + "epoch": 42.42805755395683, + "force_loss": 0.0059926752001047134, + "step": 47180 + }, + { + "epoch": 42.43705035971223, + "grad_norm": 0.28917908668518066, + "learning_rate": 1.1954656710314576e-05, + "loss": 0.0168, + "step": 47190 + }, + { + "action_loss": 0.000995397916994989, + "epoch": 42.43705035971223, + "step": 47190 + }, + { + "epoch": 42.43705035971223, + "step": 47190, + "torque_loss": 0.06930605322122574 + }, + { + "epoch": 42.43705035971223, + "force_loss": 0.007246797438710928, + "step": 47190 + }, + { + "epoch": 42.44604316546763, + "grad_norm": 0.1822378784418106, + "learning_rate": 1.1936781285418875e-05, + "loss": 0.0144, + "step": 47200 + }, + { + "action_loss": 0.0010932345176115632, + "epoch": 42.44604316546763, + "step": 47200 + }, + { + "epoch": 42.44604316546763, + "step": 47200, + "torque_loss": 0.14512300491333008 + }, + { + "epoch": 42.44604316546763, + "force_loss": 0.0010726271430030465, + "step": 47200 + }, + { + "epoch": 42.45503597122302, + "grad_norm": 0.07754116505384445, + "learning_rate": 1.1918917423123993e-05, + "loss": 0.0143, + "step": 47210 + }, + { + "action_loss": 0.00236272974871099, + "epoch": 42.45503597122302, + "step": 47210 + }, + { + "epoch": 42.45503597122302, + "step": 47210, + "torque_loss": 0.12109533697366714 + }, + { + "epoch": 42.45503597122302, + "force_loss": 0.010092931799590588, + "step": 47210 + }, + { + "epoch": 42.46402877697842, + "grad_norm": 0.36079341173171997, + "learning_rate": 1.1901065128856537e-05, + "loss": 0.0175, + "step": 47220 + }, + { + "action_loss": 0.0034934638533741236, + "epoch": 42.46402877697842, + "step": 47220 + }, + { + "epoch": 42.46402877697842, + "step": 47220, + "torque_loss": 0.08428329974412918 + }, + { + "epoch": 42.46402877697842, + "force_loss": 0.004261286463588476, + "step": 47220 + }, + { + "epoch": 42.473021582733814, + "grad_norm": 0.07354458421468735, + "learning_rate": 1.1883224408039551e-05, + "loss": 0.0149, + "step": 47230 + }, + { + "action_loss": 0.0019940249621868134, + "epoch": 42.473021582733814, + "step": 47230 + }, + { + "epoch": 42.473021582733814, + "step": 47230, + "torque_loss": 0.12210458517074585 + }, + { + "epoch": 42.473021582733814, + "force_loss": 0.002284869784489274, + "step": 47230 + }, + { + "epoch": 42.48201438848921, + "grad_norm": 0.3708144426345825, + "learning_rate": 1.1865395266092578e-05, + "loss": 0.0183, + "step": 47240 + }, + { + "action_loss": 0.0005899166571907699, + "epoch": 42.48201438848921, + "step": 47240 + }, + { + "epoch": 42.48201438848921, + "step": 47240, + "torque_loss": 0.07279258221387863 + }, + { + "epoch": 42.48201438848921, + "force_loss": 0.0008377400226891041, + "step": 47240 + }, + { + "epoch": 42.4910071942446, + "grad_norm": 0.12171059846878052, + "learning_rate": 1.1847577708431633e-05, + "loss": 0.0164, + "step": 47250 + }, + { + "action_loss": 0.001660170848481357, + "epoch": 42.4910071942446, + "step": 47250 + }, + { + "epoch": 42.4910071942446, + "step": 47250, + "torque_loss": 0.12421798706054688 + }, + { + "epoch": 42.4910071942446, + "force_loss": 0.004743021447211504, + "step": 47250 + }, + { + "epoch": 42.5, + "grad_norm": 0.1717991977930069, + "learning_rate": 1.1829771740469225e-05, + "loss": 0.0139, + "step": 47260 + }, + { + "action_loss": 0.0016320167342200875, + "epoch": 42.5, + "step": 47260 + }, + { + "epoch": 42.5, + "step": 47260, + "torque_loss": 0.1975756287574768 + }, + { + "epoch": 42.5, + "force_loss": 0.0021180794574320316, + "step": 47260 + }, + { + "epoch": 42.5089928057554, + "grad_norm": 0.08667226135730743, + "learning_rate": 1.1811977367614324e-05, + "loss": 0.0151, + "step": 47270 + }, + { + "action_loss": 0.0016210117610171437, + "epoch": 42.5089928057554, + "step": 47270 + }, + { + "epoch": 42.5089928057554, + "step": 47270, + "torque_loss": 0.11727314442396164 + }, + { + "epoch": 42.5089928057554, + "force_loss": 0.003396612824872136, + "step": 47270 + }, + { + "epoch": 42.51798561151079, + "grad_norm": 0.07232655584812164, + "learning_rate": 1.1794194595272412e-05, + "loss": 0.0235, + "step": 47280 + }, + { + "action_loss": 0.00337769091129303, + "epoch": 42.51798561151079, + "step": 47280 + }, + { + "epoch": 42.51798561151079, + "step": 47280, + "torque_loss": 0.13087229430675507 + }, + { + "epoch": 42.51798561151079, + "force_loss": 0.009339473210275173, + "step": 47280 + }, + { + "epoch": 42.526978417266186, + "grad_norm": 0.30817991495132446, + "learning_rate": 1.1776423428845423e-05, + "loss": 0.0137, + "step": 47290 + }, + { + "action_loss": 0.013060093857347965, + "epoch": 42.526978417266186, + "step": 47290 + }, + { + "epoch": 42.526978417266186, + "step": 47290, + "torque_loss": 0.13149075210094452 + }, + { + "epoch": 42.526978417266186, + "force_loss": 0.033541012555360794, + "step": 47290 + }, + { + "epoch": 42.53597122302158, + "grad_norm": 0.07581840455532074, + "learning_rate": 1.1758663873731756e-05, + "loss": 0.018, + "step": 47300 + }, + { + "action_loss": 0.0008062750566750765, + "epoch": 42.53597122302158, + "step": 47300 + }, + { + "epoch": 42.53597122302158, + "step": 47300, + "torque_loss": 0.09492490440607071 + }, + { + "epoch": 42.53597122302158, + "force_loss": 0.001890915329568088, + "step": 47300 + }, + { + "epoch": 42.54496402877698, + "grad_norm": 0.16609393060207367, + "learning_rate": 1.1740915935326302e-05, + "loss": 0.0209, + "step": 47310 + }, + { + "action_loss": 0.0017578349215909839, + "epoch": 42.54496402877698, + "step": 47310 + }, + { + "epoch": 42.54496402877698, + "step": 47310, + "torque_loss": 0.10964155942201614 + }, + { + "epoch": 42.54496402877698, + "force_loss": 0.0014121043495833874, + "step": 47310 + }, + { + "epoch": 42.55395683453237, + "grad_norm": 0.25634074211120605, + "learning_rate": 1.1723179619020396e-05, + "loss": 0.0139, + "step": 47320 + }, + { + "action_loss": 0.0015162843046709895, + "epoch": 42.55395683453237, + "step": 47320 + }, + { + "epoch": 42.55395683453237, + "step": 47320, + "torque_loss": 0.1422034651041031 + }, + { + "epoch": 42.55395683453237, + "force_loss": 0.00577051704749465, + "step": 47320 + }, + { + "epoch": 42.56294964028777, + "grad_norm": 0.2171502560377121, + "learning_rate": 1.1705454930201914e-05, + "loss": 0.0157, + "step": 47330 + }, + { + "action_loss": 0.011758401058614254, + "epoch": 42.56294964028777, + "step": 47330 + }, + { + "epoch": 42.56294964028777, + "step": 47330, + "torque_loss": 0.1420688033103943 + }, + { + "epoch": 42.56294964028777, + "force_loss": 0.019480887800455093, + "step": 47330 + }, + { + "epoch": 42.57194244604317, + "grad_norm": 0.09375839680433273, + "learning_rate": 1.1687741874255087e-05, + "loss": 0.0175, + "step": 47340 + }, + { + "action_loss": 0.0005921342526562512, + "epoch": 42.57194244604317, + "step": 47340 + }, + { + "epoch": 42.57194244604317, + "step": 47340, + "torque_loss": 0.11464810371398926 + }, + { + "epoch": 42.57194244604317, + "force_loss": 0.0009963855845853686, + "step": 47340 + }, + { + "epoch": 42.580935251798564, + "grad_norm": 0.10439424216747284, + "learning_rate": 1.1670040456560728e-05, + "loss": 0.0137, + "step": 47350 + }, + { + "action_loss": 0.0026397760957479477, + "epoch": 42.580935251798564, + "step": 47350 + }, + { + "epoch": 42.580935251798564, + "step": 47350, + "torque_loss": 0.1713700294494629 + }, + { + "epoch": 42.580935251798564, + "force_loss": 0.008073628880083561, + "step": 47350 + }, + { + "epoch": 42.589928057553955, + "grad_norm": 0.4963211119174957, + "learning_rate": 1.1652350682496005e-05, + "loss": 0.0183, + "step": 47360 + }, + { + "action_loss": 0.0018448602641001344, + "epoch": 42.589928057553955, + "step": 47360 + }, + { + "epoch": 42.589928057553955, + "step": 47360, + "torque_loss": 0.10342096537351608 + }, + { + "epoch": 42.589928057553955, + "force_loss": 0.0017703211633488536, + "step": 47360 + }, + { + "epoch": 42.59892086330935, + "grad_norm": 0.07963874936103821, + "learning_rate": 1.163467255743465e-05, + "loss": 0.014, + "step": 47370 + }, + { + "action_loss": 0.002963275881484151, + "epoch": 42.59892086330935, + "step": 47370 + }, + { + "epoch": 42.59892086330935, + "step": 47370, + "torque_loss": 0.09060266613960266 + }, + { + "epoch": 42.59892086330935, + "force_loss": 0.0015868988120928407, + "step": 47370 + }, + { + "epoch": 42.60791366906475, + "grad_norm": 0.07791832834482193, + "learning_rate": 1.1617006086746796e-05, + "loss": 0.0151, + "step": 47380 + }, + { + "action_loss": 0.013082985766232014, + "epoch": 42.60791366906475, + "step": 47380 + }, + { + "epoch": 42.60791366906475, + "step": 47380, + "torque_loss": 0.1579044908285141 + }, + { + "epoch": 42.60791366906475, + "force_loss": 0.012325276620686054, + "step": 47380 + }, + { + "epoch": 42.61690647482014, + "grad_norm": 0.1890263557434082, + "learning_rate": 1.1599351275799047e-05, + "loss": 0.0171, + "step": 47390 + }, + { + "action_loss": 0.0037095455918461084, + "epoch": 42.61690647482014, + "step": 47390 + }, + { + "epoch": 42.61690647482014, + "step": 47390, + "torque_loss": 0.09953423589468002 + }, + { + "epoch": 42.61690647482014, + "force_loss": 0.014167613349854946, + "step": 47390 + }, + { + "epoch": 42.62589928057554, + "grad_norm": 0.05807991698384285, + "learning_rate": 1.1581708129954466e-05, + "loss": 0.013, + "step": 47400 + }, + { + "action_loss": 0.0013249530456960201, + "epoch": 42.62589928057554, + "step": 47400 + }, + { + "epoch": 42.62589928057554, + "step": 47400, + "torque_loss": 0.08267392963171005 + }, + { + "epoch": 42.62589928057554, + "force_loss": 0.003339221002534032, + "step": 47400 + }, + { + "epoch": 42.634892086330936, + "grad_norm": 0.08891882002353668, + "learning_rate": 1.1564076654572587e-05, + "loss": 0.0137, + "step": 47410 + }, + { + "action_loss": 0.0017964104190468788, + "epoch": 42.634892086330936, + "step": 47410 + }, + { + "epoch": 42.634892086330936, + "step": 47410, + "torque_loss": 0.1630369871854782 + }, + { + "epoch": 42.634892086330936, + "force_loss": 0.0049766856245696545, + "step": 47410 + }, + { + "epoch": 42.643884892086334, + "grad_norm": 0.09315340965986252, + "learning_rate": 1.1546456855009358e-05, + "loss": 0.0153, + "step": 47420 + }, + { + "action_loss": 0.0008421282400377095, + "epoch": 42.643884892086334, + "step": 47420 + }, + { + "epoch": 42.643884892086334, + "step": 47420, + "torque_loss": 0.1481899619102478 + }, + { + "epoch": 42.643884892086334, + "force_loss": 0.001376375905238092, + "step": 47420 + }, + { + "epoch": 42.652877697841724, + "grad_norm": 0.1282106190919876, + "learning_rate": 1.1528848736617248e-05, + "loss": 0.0165, + "step": 47430 + }, + { + "action_loss": 0.0007081867661327124, + "epoch": 42.652877697841724, + "step": 47430 + }, + { + "epoch": 42.652877697841724, + "step": 47430, + "torque_loss": 0.08617168664932251 + }, + { + "epoch": 42.652877697841724, + "force_loss": 0.0006583284121006727, + "step": 47430 + }, + { + "epoch": 42.66187050359712, + "grad_norm": 0.14328855276107788, + "learning_rate": 1.1511252304745112e-05, + "loss": 0.017, + "step": 47440 + }, + { + "action_loss": 0.036643579602241516, + "epoch": 42.66187050359712, + "step": 47440 + }, + { + "epoch": 42.66187050359712, + "step": 47440, + "torque_loss": 0.15435637533664703 + }, + { + "epoch": 42.66187050359712, + "force_loss": 0.05480148270726204, + "step": 47440 + }, + { + "epoch": 42.67086330935252, + "grad_norm": 0.1529199630022049, + "learning_rate": 1.1493667564738297e-05, + "loss": 0.0265, + "step": 47450 + }, + { + "action_loss": 0.006078500300645828, + "epoch": 42.67086330935252, + "step": 47450 + }, + { + "epoch": 42.67086330935252, + "step": 47450, + "torque_loss": 0.11079076677560806 + }, + { + "epoch": 42.67086330935252, + "force_loss": 0.01721310243010521, + "step": 47450 + }, + { + "epoch": 42.67985611510792, + "grad_norm": 0.06626751273870468, + "learning_rate": 1.1476094521938574e-05, + "loss": 0.0138, + "step": 47460 + }, + { + "action_loss": 0.000707072380464524, + "epoch": 42.67985611510792, + "step": 47460 + }, + { + "epoch": 42.67985611510792, + "step": 47460, + "torque_loss": 0.08428502082824707 + }, + { + "epoch": 42.67985611510792, + "force_loss": 0.0025898588355630636, + "step": 47460 + }, + { + "epoch": 42.68884892086331, + "grad_norm": 0.12890978157520294, + "learning_rate": 1.1458533181684167e-05, + "loss": 0.0148, + "step": 47470 + }, + { + "action_loss": 0.0006818519905209541, + "epoch": 42.68884892086331, + "step": 47470 + }, + { + "epoch": 42.68884892086331, + "step": 47470, + "torque_loss": 0.14568443596363068 + }, + { + "epoch": 42.68884892086331, + "force_loss": 0.0012362306006252766, + "step": 47470 + }, + { + "epoch": 42.697841726618705, + "grad_norm": 0.0929703414440155, + "learning_rate": 1.1440983549309753e-05, + "loss": 0.0152, + "step": 47480 + }, + { + "action_loss": 0.0008459368837065995, + "epoch": 42.697841726618705, + "step": 47480 + }, + { + "epoch": 42.697841726618705, + "step": 47480, + "torque_loss": 0.12080162763595581 + }, + { + "epoch": 42.697841726618705, + "force_loss": 0.001601852010935545, + "step": 47480 + }, + { + "epoch": 42.7068345323741, + "grad_norm": 0.15074065327644348, + "learning_rate": 1.1423445630146434e-05, + "loss": 0.0142, + "step": 47490 + }, + { + "action_loss": 0.002071307273581624, + "epoch": 42.7068345323741, + "step": 47490 + }, + { + "epoch": 42.7068345323741, + "step": 47490, + "torque_loss": 0.11533572524785995 + }, + { + "epoch": 42.7068345323741, + "force_loss": 0.0038288019131869078, + "step": 47490 + }, + { + "epoch": 42.71582733812949, + "grad_norm": 0.15697233378887177, + "learning_rate": 1.1405919429521799e-05, + "loss": 0.0195, + "step": 47500 + }, + { + "action_loss": 0.0005225121858529747, + "epoch": 42.71582733812949, + "step": 47500 + }, + { + "epoch": 42.71582733812949, + "step": 47500, + "torque_loss": 0.08119718730449677 + }, + { + "epoch": 42.71582733812949, + "force_loss": 0.00042487154132686555, + "step": 47500 + }, + { + "epoch": 42.72482014388489, + "grad_norm": 0.09051039069890976, + "learning_rate": 1.1388404952759802e-05, + "loss": 0.0158, + "step": 47510 + }, + { + "action_loss": 0.0012061764718964696, + "epoch": 42.72482014388489, + "step": 47510 + }, + { + "epoch": 42.72482014388489, + "step": 47510, + "torque_loss": 0.11353711038827896 + }, + { + "epoch": 42.72482014388489, + "force_loss": 0.001552483648993075, + "step": 47510 + }, + { + "epoch": 42.73381294964029, + "grad_norm": 0.06365683674812317, + "learning_rate": 1.1370902205180923e-05, + "loss": 0.015, + "step": 47520 + }, + { + "action_loss": 0.0011349001433700323, + "epoch": 42.73381294964029, + "step": 47520 + }, + { + "epoch": 42.73381294964029, + "step": 47520, + "torque_loss": 0.12694470584392548 + }, + { + "epoch": 42.73381294964029, + "force_loss": 0.004099330399185419, + "step": 47520 + }, + { + "epoch": 42.742805755395686, + "grad_norm": 0.08204715698957443, + "learning_rate": 1.1353411192101987e-05, + "loss": 0.0172, + "step": 47530 + }, + { + "action_loss": 0.0005749166011810303, + "epoch": 42.742805755395686, + "step": 47530 + }, + { + "epoch": 42.742805755395686, + "step": 47530, + "torque_loss": 0.11609262228012085 + }, + { + "epoch": 42.742805755395686, + "force_loss": 0.0006579051841981709, + "step": 47530 + }, + { + "epoch": 42.75179856115108, + "grad_norm": 0.08540261536836624, + "learning_rate": 1.133593191883634e-05, + "loss": 0.0135, + "step": 47540 + }, + { + "action_loss": 0.0014629355864599347, + "epoch": 42.75179856115108, + "step": 47540 + }, + { + "epoch": 42.75179856115108, + "step": 47540, + "torque_loss": 0.12457611411809921 + }, + { + "epoch": 42.75179856115108, + "force_loss": 0.0027056329417973757, + "step": 47540 + }, + { + "epoch": 42.760791366906474, + "grad_norm": 0.11708243936300278, + "learning_rate": 1.1318464390693711e-05, + "loss": 0.015, + "step": 47550 + }, + { + "action_loss": 0.0018090964294970036, + "epoch": 42.760791366906474, + "step": 47550 + }, + { + "epoch": 42.760791366906474, + "step": 47550, + "torque_loss": 0.14880378544330597 + }, + { + "epoch": 42.760791366906474, + "force_loss": 0.0025596863124519587, + "step": 47550 + }, + { + "epoch": 42.76978417266187, + "grad_norm": 0.10936582833528519, + "learning_rate": 1.1301008612980257e-05, + "loss": 0.0155, + "step": 47560 + }, + { + "action_loss": 0.001809805049560964, + "epoch": 42.76978417266187, + "step": 47560 + }, + { + "epoch": 42.76978417266187, + "step": 47560, + "torque_loss": 0.11501473933458328 + }, + { + "epoch": 42.76978417266187, + "force_loss": 0.0019889865070581436, + "step": 47560 + }, + { + "epoch": 42.77877697841727, + "grad_norm": 0.24609656631946564, + "learning_rate": 1.128356459099863e-05, + "loss": 0.0177, + "step": 47570 + }, + { + "action_loss": 0.0010163206607103348, + "epoch": 42.77877697841727, + "step": 47570 + }, + { + "epoch": 42.77877697841727, + "step": 47570, + "torque_loss": 0.09729542583227158 + }, + { + "epoch": 42.77877697841727, + "force_loss": 0.0007158178486861289, + "step": 47570 + }, + { + "epoch": 42.78776978417266, + "grad_norm": 0.5613279342651367, + "learning_rate": 1.1266132330047802e-05, + "loss": 0.0162, + "step": 47580 + }, + { + "action_loss": 0.0008137661498039961, + "epoch": 42.78776978417266, + "step": 47580 + }, + { + "epoch": 42.78776978417266, + "step": 47580, + "torque_loss": 0.0968179702758789 + }, + { + "epoch": 42.78776978417266, + "force_loss": 0.0014220471493899822, + "step": 47580 + }, + { + "epoch": 42.79676258992806, + "grad_norm": 0.05134407803416252, + "learning_rate": 1.1248711835423281e-05, + "loss": 0.0165, + "step": 47590 + }, + { + "action_loss": 0.0011653086403384805, + "epoch": 42.79676258992806, + "step": 47590 + }, + { + "epoch": 42.79676258992806, + "step": 47590, + "torque_loss": 0.12019113451242447 + }, + { + "epoch": 42.79676258992806, + "force_loss": 0.003048662096261978, + "step": 47590 + }, + { + "epoch": 42.805755395683455, + "grad_norm": 0.05334530770778656, + "learning_rate": 1.123130311241693e-05, + "loss": 0.0138, + "step": 47600 + }, + { + "action_loss": 0.007098613306879997, + "epoch": 42.805755395683455, + "step": 47600 + }, + { + "epoch": 42.805755395683455, + "step": 47600, + "torque_loss": 0.08616843074560165 + }, + { + "epoch": 42.805755395683455, + "force_loss": 0.01013248786330223, + "step": 47600 + }, + { + "epoch": 42.814748201438846, + "grad_norm": 0.06116926297545433, + "learning_rate": 1.1213906166317068e-05, + "loss": 0.0149, + "step": 47610 + }, + { + "action_loss": 0.0006389851332642138, + "epoch": 42.814748201438846, + "step": 47610 + }, + { + "epoch": 42.814748201438846, + "step": 47610, + "torque_loss": 0.14744198322296143 + }, + { + "epoch": 42.814748201438846, + "force_loss": 0.0006112952250987291, + "step": 47610 + }, + { + "epoch": 42.82374100719424, + "grad_norm": 0.360599547624588, + "learning_rate": 1.1196521002408427e-05, + "loss": 0.0181, + "step": 47620 + }, + { + "action_loss": 0.0008904607966542244, + "epoch": 42.82374100719424, + "step": 47620 + }, + { + "epoch": 42.82374100719424, + "step": 47620, + "torque_loss": 0.12338205426931381 + }, + { + "epoch": 42.82374100719424, + "force_loss": 0.0040774052031338215, + "step": 47620 + }, + { + "epoch": 42.83273381294964, + "grad_norm": 0.08569305390119553, + "learning_rate": 1.1179147625972159e-05, + "loss": 0.0158, + "step": 47630 + }, + { + "action_loss": 0.0024999361485242844, + "epoch": 42.83273381294964, + "step": 47630 + }, + { + "epoch": 42.83273381294964, + "step": 47630, + "torque_loss": 0.10827334970235825 + }, + { + "epoch": 42.83273381294964, + "force_loss": 0.002221408998593688, + "step": 47630 + }, + { + "epoch": 42.84172661870504, + "grad_norm": 0.11933228373527527, + "learning_rate": 1.1161786042285822e-05, + "loss": 0.0153, + "step": 47640 + }, + { + "action_loss": 0.005867915228009224, + "epoch": 42.84172661870504, + "step": 47640 + }, + { + "epoch": 42.84172661870504, + "step": 47640, + "torque_loss": 0.11276962608098984 + }, + { + "epoch": 42.84172661870504, + "force_loss": 0.0065126265399158, + "step": 47640 + }, + { + "epoch": 42.85071942446043, + "grad_norm": 0.19709131121635437, + "learning_rate": 1.1144436256623447e-05, + "loss": 0.0162, + "step": 47650 + }, + { + "action_loss": 0.006689954549074173, + "epoch": 42.85071942446043, + "step": 47650 + }, + { + "epoch": 42.85071942446043, + "step": 47650, + "torque_loss": 0.15377478301525116 + }, + { + "epoch": 42.85071942446043, + "force_loss": 0.0033050470519810915, + "step": 47650 + }, + { + "epoch": 42.85971223021583, + "grad_norm": 0.10624817758798599, + "learning_rate": 1.1127098274255392e-05, + "loss": 0.018, + "step": 47660 + }, + { + "action_loss": 0.0006072015967220068, + "epoch": 42.85971223021583, + "step": 47660 + }, + { + "epoch": 42.85971223021583, + "step": 47660, + "torque_loss": 0.13391609489917755 + }, + { + "epoch": 42.85971223021583, + "force_loss": 0.0014309532707557082, + "step": 47660 + }, + { + "epoch": 42.868705035971225, + "grad_norm": 0.07417885959148407, + "learning_rate": 1.1109772100448512e-05, + "loss": 0.0152, + "step": 47670 + }, + { + "action_loss": 0.0029009487479925156, + "epoch": 42.868705035971225, + "step": 47670 + }, + { + "epoch": 42.868705035971225, + "step": 47670, + "torque_loss": 0.10411717742681503 + }, + { + "epoch": 42.868705035971225, + "force_loss": 0.004915538243949413, + "step": 47670 + }, + { + "epoch": 42.87769784172662, + "grad_norm": 0.2228514403104782, + "learning_rate": 1.1092457740466033e-05, + "loss": 0.0149, + "step": 47680 + }, + { + "action_loss": 0.002773801563307643, + "epoch": 42.87769784172662, + "step": 47680 + }, + { + "epoch": 42.87769784172662, + "step": 47680, + "torque_loss": 0.13222777843475342 + }, + { + "epoch": 42.87769784172662, + "force_loss": 0.0041300226002931595, + "step": 47680 + }, + { + "epoch": 42.88669064748201, + "grad_norm": 0.21478824317455292, + "learning_rate": 1.10751551995676e-05, + "loss": 0.0202, + "step": 47690 + }, + { + "action_loss": 0.003015379188582301, + "epoch": 42.88669064748201, + "step": 47690 + }, + { + "epoch": 42.88669064748201, + "step": 47690, + "torque_loss": 0.16523443162441254 + }, + { + "epoch": 42.88669064748201, + "force_loss": 0.005697846412658691, + "step": 47690 + }, + { + "epoch": 42.89568345323741, + "grad_norm": 0.1537024825811386, + "learning_rate": 1.1057864483009262e-05, + "loss": 0.0173, + "step": 47700 + }, + { + "action_loss": 0.004201934672892094, + "epoch": 42.89568345323741, + "step": 47700 + }, + { + "epoch": 42.89568345323741, + "step": 47700, + "torque_loss": 0.1378016471862793 + }, + { + "epoch": 42.89568345323741, + "force_loss": 0.004908794071525335, + "step": 47700 + }, + { + "epoch": 42.90467625899281, + "grad_norm": 0.19818437099456787, + "learning_rate": 1.1040585596043473e-05, + "loss": 0.0155, + "step": 47710 + }, + { + "action_loss": 0.001223340048454702, + "epoch": 42.90467625899281, + "step": 47710 + }, + { + "epoch": 42.90467625899281, + "step": 47710, + "torque_loss": 0.12380459159612656 + }, + { + "epoch": 42.90467625899281, + "force_loss": 0.0011330292327329516, + "step": 47710 + }, + { + "epoch": 42.9136690647482, + "grad_norm": 0.06923489272594452, + "learning_rate": 1.1023318543919148e-05, + "loss": 0.0158, + "step": 47720 + }, + { + "action_loss": 0.0033123933244496584, + "epoch": 42.9136690647482, + "step": 47720 + }, + { + "epoch": 42.9136690647482, + "step": 47720, + "torque_loss": 0.11249303817749023 + }, + { + "epoch": 42.9136690647482, + "force_loss": 0.00871638860553503, + "step": 47720 + }, + { + "epoch": 42.922661870503596, + "grad_norm": 0.15130597352981567, + "learning_rate": 1.10060633318815e-05, + "loss": 0.0153, + "step": 47730 + }, + { + "action_loss": 0.0005271590780466795, + "epoch": 42.922661870503596, + "step": 47730 + }, + { + "epoch": 42.922661870503596, + "step": 47730, + "torque_loss": 0.13004140555858612 + }, + { + "epoch": 42.922661870503596, + "force_loss": 0.000815410225186497, + "step": 47730 + }, + { + "epoch": 42.931654676258994, + "grad_norm": 0.08738364279270172, + "learning_rate": 1.0988819965172248e-05, + "loss": 0.0187, + "step": 47740 + }, + { + "action_loss": 0.00766528956592083, + "epoch": 42.931654676258994, + "step": 47740 + }, + { + "epoch": 42.931654676258994, + "step": 47740, + "torque_loss": 0.12799929082393646 + }, + { + "epoch": 42.931654676258994, + "force_loss": 0.012195862829685211, + "step": 47740 + }, + { + "epoch": 42.94064748201439, + "grad_norm": 0.17410610616207123, + "learning_rate": 1.0971588449029462e-05, + "loss": 0.0182, + "step": 47750 + }, + { + "action_loss": 0.004894527141004801, + "epoch": 42.94064748201439, + "step": 47750 + }, + { + "epoch": 42.94064748201439, + "step": 47750, + "torque_loss": 0.14845876395702362 + }, + { + "epoch": 42.94064748201439, + "force_loss": 0.0026279091835021973, + "step": 47750 + }, + { + "epoch": 42.94964028776978, + "grad_norm": 0.24348744750022888, + "learning_rate": 1.095436878868762e-05, + "loss": 0.0185, + "step": 47760 + }, + { + "action_loss": 0.0014869635924696922, + "epoch": 42.94964028776978, + "step": 47760 + }, + { + "epoch": 42.94964028776978, + "step": 47760, + "torque_loss": 0.1049477681517601 + }, + { + "epoch": 42.94964028776978, + "force_loss": 0.00132732221391052, + "step": 47760 + }, + { + "epoch": 42.95863309352518, + "grad_norm": 0.21606042981147766, + "learning_rate": 1.0937160989377598e-05, + "loss": 0.0178, + "step": 47770 + }, + { + "action_loss": 0.0005397333297878504, + "epoch": 42.95863309352518, + "step": 47770 + }, + { + "epoch": 42.95863309352518, + "step": 47770, + "torque_loss": 0.16108444333076477 + }, + { + "epoch": 42.95863309352518, + "force_loss": 0.0009688996360637248, + "step": 47770 + }, + { + "epoch": 42.96762589928058, + "grad_norm": 0.058848198503255844, + "learning_rate": 1.0919965056326676e-05, + "loss": 0.0159, + "step": 47780 + }, + { + "action_loss": 0.0009252442978322506, + "epoch": 42.96762589928058, + "step": 47780 + }, + { + "epoch": 42.96762589928058, + "step": 47780, + "torque_loss": 0.1137903556227684 + }, + { + "epoch": 42.96762589928058, + "force_loss": 0.0012039474677294493, + "step": 47780 + }, + { + "epoch": 42.976618705035975, + "grad_norm": 0.12039360404014587, + "learning_rate": 1.0902780994758504e-05, + "loss": 0.0149, + "step": 47790 + }, + { + "action_loss": 0.0012971563264727592, + "epoch": 42.976618705035975, + "step": 47790 + }, + { + "epoch": 42.976618705035975, + "step": 47790, + "torque_loss": 0.12286225706338882 + }, + { + "epoch": 42.976618705035975, + "force_loss": 0.001622358220629394, + "step": 47790 + }, + { + "epoch": 42.985611510791365, + "grad_norm": 0.24436727166175842, + "learning_rate": 1.0885608809893193e-05, + "loss": 0.0138, + "step": 47800 + }, + { + "action_loss": 0.0010201864643022418, + "epoch": 42.985611510791365, + "step": 47800 + }, + { + "epoch": 42.985611510791365, + "step": 47800, + "torque_loss": 0.10960216075181961 + }, + { + "epoch": 42.985611510791365, + "force_loss": 0.001070774276740849, + "step": 47800 + }, + { + "epoch": 42.99460431654676, + "grad_norm": 0.32260990142822266, + "learning_rate": 1.0868448506947142e-05, + "loss": 0.016, + "step": 47810 + }, + { + "action_loss": 0.0006577176391147077, + "epoch": 42.99460431654676, + "step": 47810 + }, + { + "epoch": 42.99460431654676, + "step": 47810, + "torque_loss": 0.13745005428791046 + }, + { + "epoch": 42.99460431654676, + "force_loss": 0.0011286836815997958, + "step": 47810 + }, + { + "epoch": 43.00359712230216, + "grad_norm": 0.06442060321569443, + "learning_rate": 1.0851300091133243e-05, + "loss": 0.0128, + "step": 47820 + }, + { + "action_loss": 0.002379534998908639, + "epoch": 43.00359712230216, + "step": 47820 + }, + { + "epoch": 43.00359712230216, + "step": 47820, + "torque_loss": 0.11185771226882935 + }, + { + "epoch": 43.00359712230216, + "force_loss": 0.0060440548695623875, + "step": 47820 + }, + { + "epoch": 43.01258992805755, + "grad_norm": 0.1262531578540802, + "learning_rate": 1.083416356766071e-05, + "loss": 0.0162, + "step": 47830 + }, + { + "action_loss": 0.012524661608040333, + "epoch": 43.01258992805755, + "step": 47830 + }, + { + "epoch": 43.01258992805755, + "step": 47830, + "torque_loss": 0.1212630495429039 + }, + { + "epoch": 43.01258992805755, + "force_loss": 0.007837825454771519, + "step": 47830 + }, + { + "epoch": 43.02158273381295, + "grad_norm": 0.1975613683462143, + "learning_rate": 1.0817038941735175e-05, + "loss": 0.0216, + "step": 47840 + }, + { + "action_loss": 0.0007798531441949308, + "epoch": 43.02158273381295, + "step": 47840 + }, + { + "epoch": 43.02158273381295, + "step": 47840, + "torque_loss": 0.10824859142303467 + }, + { + "epoch": 43.02158273381295, + "force_loss": 0.0011800049105659127, + "step": 47840 + }, + { + "epoch": 43.030575539568346, + "grad_norm": 0.381454199552536, + "learning_rate": 1.0799926218558642e-05, + "loss": 0.018, + "step": 47850 + }, + { + "action_loss": 0.002221165457740426, + "epoch": 43.030575539568346, + "step": 47850 + }, + { + "epoch": 43.030575539568346, + "step": 47850, + "torque_loss": 0.08142309635877609 + }, + { + "epoch": 43.030575539568346, + "force_loss": 0.00194599200040102, + "step": 47850 + }, + { + "epoch": 43.039568345323744, + "grad_norm": 0.37045004963874817, + "learning_rate": 1.0782825403329488e-05, + "loss": 0.0159, + "step": 47860 + }, + { + "action_loss": 0.011807483620941639, + "epoch": 43.039568345323744, + "step": 47860 + }, + { + "epoch": 43.039568345323744, + "step": 47860, + "torque_loss": 0.14993666112422943 + }, + { + "epoch": 43.039568345323744, + "force_loss": 0.03662684187293053, + "step": 47860 + }, + { + "epoch": 43.048561151079134, + "grad_norm": 0.28101295232772827, + "learning_rate": 1.076573650124254e-05, + "loss": 0.0153, + "step": 47870 + }, + { + "action_loss": 0.003324523800984025, + "epoch": 43.048561151079134, + "step": 47870 + }, + { + "epoch": 43.048561151079134, + "step": 47870, + "torque_loss": 0.13396786153316498 + }, + { + "epoch": 43.048561151079134, + "force_loss": 0.002152825240045786, + "step": 47870 + }, + { + "epoch": 43.05755395683453, + "grad_norm": 0.17905646562576294, + "learning_rate": 1.0748659517488891e-05, + "loss": 0.0164, + "step": 47880 + }, + { + "action_loss": 0.0008058263338170946, + "epoch": 43.05755395683453, + "step": 47880 + }, + { + "epoch": 43.05755395683453, + "step": 47880, + "torque_loss": 0.07485131919384003 + }, + { + "epoch": 43.05755395683453, + "force_loss": 0.0027047526091337204, + "step": 47880 + }, + { + "epoch": 43.06654676258993, + "grad_norm": 0.08739248663187027, + "learning_rate": 1.0731594457256138e-05, + "loss": 0.0155, + "step": 47890 + }, + { + "action_loss": 0.001667486852966249, + "epoch": 43.06654676258993, + "step": 47890 + }, + { + "epoch": 43.06654676258993, + "step": 47890, + "torque_loss": 0.13758164644241333 + }, + { + "epoch": 43.06654676258993, + "force_loss": 0.0022158322390168905, + "step": 47890 + }, + { + "epoch": 43.07553956834533, + "grad_norm": 0.05744181200861931, + "learning_rate": 1.0714541325728139e-05, + "loss": 0.0157, + "step": 47900 + }, + { + "action_loss": 0.0023959812242537737, + "epoch": 43.07553956834533, + "step": 47900 + }, + { + "epoch": 43.07553956834533, + "step": 47900, + "torque_loss": 0.13124150037765503 + }, + { + "epoch": 43.07553956834533, + "force_loss": 0.0031061277259141207, + "step": 47900 + }, + { + "epoch": 43.08453237410072, + "grad_norm": 0.14698629081249237, + "learning_rate": 1.0697500128085231e-05, + "loss": 0.0146, + "step": 47910 + }, + { + "action_loss": 0.00103837123606354, + "epoch": 43.08453237410072, + "step": 47910 + }, + { + "epoch": 43.08453237410072, + "step": 47910, + "torque_loss": 0.0659821629524231 + }, + { + "epoch": 43.08453237410072, + "force_loss": 0.0024087040219455957, + "step": 47910 + }, + { + "epoch": 43.093525179856115, + "grad_norm": 0.2218926101922989, + "learning_rate": 1.0680470869504055e-05, + "loss": 0.0164, + "step": 47920 + }, + { + "action_loss": 0.0012790969340130687, + "epoch": 43.093525179856115, + "step": 47920 + }, + { + "epoch": 43.093525179856115, + "step": 47920, + "torque_loss": 0.17348580062389374 + }, + { + "epoch": 43.093525179856115, + "force_loss": 0.0015070317313075066, + "step": 47920 + }, + { + "epoch": 43.10251798561151, + "grad_norm": 0.09568403661251068, + "learning_rate": 1.066345355515766e-05, + "loss": 0.0151, + "step": 47930 + }, + { + "action_loss": 0.0010845664655789733, + "epoch": 43.10251798561151, + "step": 47930 + }, + { + "epoch": 43.10251798561151, + "step": 47930, + "torque_loss": 0.11754051595926285 + }, + { + "epoch": 43.10251798561151, + "force_loss": 0.0034554668236523867, + "step": 47930 + }, + { + "epoch": 43.111510791366904, + "grad_norm": 0.12043570727109909, + "learning_rate": 1.0646448190215453e-05, + "loss": 0.0144, + "step": 47940 + }, + { + "action_loss": 0.0005038560484535992, + "epoch": 43.111510791366904, + "step": 47940 + }, + { + "epoch": 43.111510791366904, + "step": 47940, + "torque_loss": 0.09740033000707626 + }, + { + "epoch": 43.111510791366904, + "force_loss": 0.0004790698003489524, + "step": 47940 + }, + { + "epoch": 43.1205035971223, + "grad_norm": 0.0963497906923294, + "learning_rate": 1.0629454779843217e-05, + "loss": 0.0147, + "step": 47950 + }, + { + "action_loss": 0.005476843565702438, + "epoch": 43.1205035971223, + "step": 47950 + }, + { + "epoch": 43.1205035971223, + "step": 47950, + "torque_loss": 0.11254861950874329 + }, + { + "epoch": 43.1205035971223, + "force_loss": 0.011349198408424854, + "step": 47950 + }, + { + "epoch": 43.1294964028777, + "grad_norm": 0.05257950723171234, + "learning_rate": 1.0612473329203082e-05, + "loss": 0.0149, + "step": 47960 + }, + { + "action_loss": 0.0011553984368219972, + "epoch": 43.1294964028777, + "step": 47960 + }, + { + "epoch": 43.1294964028777, + "step": 47960, + "torque_loss": 0.14551182091236115 + }, + { + "epoch": 43.1294964028777, + "force_loss": 0.002435431582853198, + "step": 47960 + }, + { + "epoch": 43.138489208633096, + "grad_norm": 0.15176130831241608, + "learning_rate": 1.0595503843453596e-05, + "loss": 0.0173, + "step": 47970 + }, + { + "action_loss": 0.0014907823642715812, + "epoch": 43.138489208633096, + "step": 47970 + }, + { + "epoch": 43.138489208633096, + "step": 47970, + "torque_loss": 0.12223824113607407 + }, + { + "epoch": 43.138489208633096, + "force_loss": 0.002127317478880286, + "step": 47970 + }, + { + "epoch": 43.14748201438849, + "grad_norm": 0.4841651916503906, + "learning_rate": 1.0578546327749634e-05, + "loss": 0.0163, + "step": 47980 + }, + { + "action_loss": 0.000627431261818856, + "epoch": 43.14748201438849, + "step": 47980 + }, + { + "epoch": 43.14748201438849, + "step": 47980, + "torque_loss": 0.103617824614048 + }, + { + "epoch": 43.14748201438849, + "force_loss": 0.0006132099078968167, + "step": 47980 + }, + { + "epoch": 43.156474820143885, + "grad_norm": 0.12395449727773666, + "learning_rate": 1.0561600787242425e-05, + "loss": 0.0153, + "step": 47990 + }, + { + "action_loss": 0.0010367375798523426, + "epoch": 43.156474820143885, + "step": 47990 + }, + { + "epoch": 43.156474820143885, + "step": 47990, + "torque_loss": 0.13103224337100983 + }, + { + "epoch": 43.156474820143885, + "force_loss": 0.0014029039302840829, + "step": 47990 + }, + { + "epoch": 43.16546762589928, + "grad_norm": 0.06919924914836884, + "learning_rate": 1.0544667227079591e-05, + "loss": 0.0162, + "step": 48000 + }, + { + "action_loss": 0.006488892715424299, + "epoch": 43.16546762589928, + "step": 48000 + }, + { + "epoch": 43.16546762589928, + "step": 48000, + "torque_loss": 0.13330407440662384 + }, + { + "epoch": 43.16546762589928, + "force_loss": 0.009447026066482067, + "step": 48000 + }, + { + "epoch": 43.17446043165467, + "grad_norm": 0.0908312201499939, + "learning_rate": 1.0527745652405085e-05, + "loss": 0.0168, + "step": 48010 + }, + { + "action_loss": 0.0035829085391014814, + "epoch": 43.17446043165467, + "step": 48010 + }, + { + "epoch": 43.17446043165467, + "step": 48010, + "torque_loss": 0.13573874533176422 + }, + { + "epoch": 43.17446043165467, + "force_loss": 0.011387116275727749, + "step": 48010 + }, + { + "epoch": 43.18345323741007, + "grad_norm": 0.08729702234268188, + "learning_rate": 1.051083606835927e-05, + "loss": 0.0148, + "step": 48020 + }, + { + "action_loss": 0.0021713587921112776, + "epoch": 43.18345323741007, + "step": 48020 + }, + { + "epoch": 43.18345323741007, + "step": 48020, + "torque_loss": 0.11696640402078629 + }, + { + "epoch": 43.18345323741007, + "force_loss": 0.006436000112444162, + "step": 48020 + }, + { + "epoch": 43.19244604316547, + "grad_norm": 0.16368085145950317, + "learning_rate": 1.049393848007878e-05, + "loss": 0.0138, + "step": 48030 + }, + { + "action_loss": 0.0011179042048752308, + "epoch": 43.19244604316547, + "step": 48030 + }, + { + "epoch": 43.19244604316547, + "step": 48030, + "torque_loss": 0.08802339434623718 + }, + { + "epoch": 43.19244604316547, + "force_loss": 0.00294036534614861, + "step": 48030 + }, + { + "epoch": 43.201438848920866, + "grad_norm": 0.17621979117393494, + "learning_rate": 1.0477052892696709e-05, + "loss": 0.02, + "step": 48040 + }, + { + "action_loss": 0.000769799284171313, + "epoch": 43.201438848920866, + "step": 48040 + }, + { + "epoch": 43.201438848920866, + "step": 48040, + "torque_loss": 0.11906150728464127 + }, + { + "epoch": 43.201438848920866, + "force_loss": 0.01241263747215271, + "step": 48040 + }, + { + "epoch": 43.210431654676256, + "grad_norm": 0.0709763914346695, + "learning_rate": 1.0460179311342394e-05, + "loss": 0.0205, + "step": 48050 + }, + { + "action_loss": 0.001575362286530435, + "epoch": 43.210431654676256, + "step": 48050 + }, + { + "epoch": 43.210431654676256, + "step": 48050, + "torque_loss": 0.16489823162555695 + }, + { + "epoch": 43.210431654676256, + "force_loss": 0.005189526826143265, + "step": 48050 + }, + { + "epoch": 43.219424460431654, + "grad_norm": 0.06481238454580307, + "learning_rate": 1.0443317741141634e-05, + "loss": 0.0128, + "step": 48060 + }, + { + "action_loss": 0.003785705892369151, + "epoch": 43.219424460431654, + "step": 48060 + }, + { + "epoch": 43.219424460431654, + "step": 48060, + "torque_loss": 0.09890396147966385 + }, + { + "epoch": 43.219424460431654, + "force_loss": 0.002923009218648076, + "step": 48060 + }, + { + "epoch": 43.22841726618705, + "grad_norm": 0.12806440889835358, + "learning_rate": 1.0426468187216514e-05, + "loss": 0.017, + "step": 48070 + }, + { + "action_loss": 0.0007881092024035752, + "epoch": 43.22841726618705, + "step": 48070 + }, + { + "epoch": 43.22841726618705, + "step": 48070, + "torque_loss": 0.0918109342455864 + }, + { + "epoch": 43.22841726618705, + "force_loss": 0.0022175053600221872, + "step": 48070 + }, + { + "epoch": 43.23741007194245, + "grad_norm": 0.07798373699188232, + "learning_rate": 1.0409630654685477e-05, + "loss": 0.0132, + "step": 48080 + }, + { + "action_loss": 0.0009924840414896607, + "epoch": 43.23741007194245, + "step": 48080 + }, + { + "epoch": 43.23741007194245, + "step": 48080, + "torque_loss": 0.1431632786989212 + }, + { + "epoch": 43.23741007194245, + "force_loss": 0.00153402762953192, + "step": 48080 + }, + { + "epoch": 43.24640287769784, + "grad_norm": 0.04101170226931572, + "learning_rate": 1.039280514866332e-05, + "loss": 0.0149, + "step": 48090 + }, + { + "action_loss": 0.004973534028977156, + "epoch": 43.24640287769784, + "step": 48090 + }, + { + "epoch": 43.24640287769784, + "step": 48090, + "torque_loss": 0.09770975261926651 + }, + { + "epoch": 43.24640287769784, + "force_loss": 0.016257310286164284, + "step": 48090 + }, + { + "epoch": 43.25539568345324, + "grad_norm": 0.07817025482654572, + "learning_rate": 1.0375991674261198e-05, + "loss": 0.0149, + "step": 48100 + }, + { + "action_loss": 0.002991507528349757, + "epoch": 43.25539568345324, + "step": 48100 + }, + { + "epoch": 43.25539568345324, + "step": 48100, + "torque_loss": 0.11104949563741684 + }, + { + "epoch": 43.25539568345324, + "force_loss": 0.0017960690893232822, + "step": 48100 + }, + { + "epoch": 43.264388489208635, + "grad_norm": 0.05605383217334747, + "learning_rate": 1.0359190236586575e-05, + "loss": 0.016, + "step": 48110 + }, + { + "action_loss": 0.00559874577447772, + "epoch": 43.264388489208635, + "step": 48110 + }, + { + "epoch": 43.264388489208635, + "step": 48110, + "torque_loss": 0.07325108349323273 + }, + { + "epoch": 43.264388489208635, + "force_loss": 0.00607203459367156, + "step": 48110 + }, + { + "epoch": 43.273381294964025, + "grad_norm": 0.06400890648365021, + "learning_rate": 1.0342400840743322e-05, + "loss": 0.0132, + "step": 48120 + }, + { + "action_loss": 0.002850184217095375, + "epoch": 43.273381294964025, + "step": 48120 + }, + { + "epoch": 43.273381294964025, + "step": 48120, + "torque_loss": 0.15429259836673737 + }, + { + "epoch": 43.273381294964025, + "force_loss": 0.002299923449754715, + "step": 48120 + }, + { + "epoch": 43.28237410071942, + "grad_norm": 0.11021051555871964, + "learning_rate": 1.0325623491831593e-05, + "loss": 0.0139, + "step": 48130 + }, + { + "action_loss": 0.0017068613087758422, + "epoch": 43.28237410071942, + "step": 48130 + }, + { + "epoch": 43.28237410071942, + "step": 48130, + "torque_loss": 0.13638852536678314 + }, + { + "epoch": 43.28237410071942, + "force_loss": 0.004259928595274687, + "step": 48130 + }, + { + "epoch": 43.29136690647482, + "grad_norm": 0.05624956265091896, + "learning_rate": 1.0308858194947906e-05, + "loss": 0.0166, + "step": 48140 + }, + { + "action_loss": 0.002726856619119644, + "epoch": 43.29136690647482, + "step": 48140 + }, + { + "epoch": 43.29136690647482, + "step": 48140, + "torque_loss": 0.12415780872106552 + }, + { + "epoch": 43.29136690647482, + "force_loss": 0.0031326161697506905, + "step": 48140 + }, + { + "epoch": 43.30035971223022, + "grad_norm": 0.07504697889089584, + "learning_rate": 1.0292104955185111e-05, + "loss": 0.0151, + "step": 48150 + }, + { + "action_loss": 0.0004920857609249651, + "epoch": 43.30035971223022, + "step": 48150 + }, + { + "epoch": 43.30035971223022, + "step": 48150, + "torque_loss": 0.12791381776332855 + }, + { + "epoch": 43.30035971223022, + "force_loss": 0.0024852887727320194, + "step": 48150 + }, + { + "epoch": 43.30935251798561, + "grad_norm": 0.16796861588954926, + "learning_rate": 1.0275363777632396e-05, + "loss": 0.0139, + "step": 48160 + }, + { + "action_loss": 0.0007870792760513723, + "epoch": 43.30935251798561, + "step": 48160 + }, + { + "epoch": 43.30935251798561, + "step": 48160, + "torque_loss": 0.11213616281747818 + }, + { + "epoch": 43.30935251798561, + "force_loss": 0.0005961991264484823, + "step": 48160 + }, + { + "epoch": 43.318345323741006, + "grad_norm": 0.09855154156684875, + "learning_rate": 1.0258634667375321e-05, + "loss": 0.02, + "step": 48170 + }, + { + "action_loss": 0.002681231126189232, + "epoch": 43.318345323741006, + "step": 48170 + }, + { + "epoch": 43.318345323741006, + "step": 48170, + "torque_loss": 0.09299081563949585 + }, + { + "epoch": 43.318345323741006, + "force_loss": 0.003628367558121681, + "step": 48170 + }, + { + "epoch": 43.327338129496404, + "grad_norm": 0.18675276637077332, + "learning_rate": 1.02419176294957e-05, + "loss": 0.0165, + "step": 48180 + }, + { + "action_loss": 0.0019436705624684691, + "epoch": 43.327338129496404, + "step": 48180 + }, + { + "epoch": 43.327338129496404, + "step": 48180, + "torque_loss": 0.09857650846242905 + }, + { + "epoch": 43.327338129496404, + "force_loss": 0.012498490512371063, + "step": 48180 + }, + { + "epoch": 43.3363309352518, + "grad_norm": 0.06614652276039124, + "learning_rate": 1.0225212669071782e-05, + "loss": 0.0139, + "step": 48190 + }, + { + "action_loss": 0.0006972583942115307, + "epoch": 43.3363309352518, + "step": 48190 + }, + { + "epoch": 43.3363309352518, + "step": 48190, + "torque_loss": 0.08957979083061218 + }, + { + "epoch": 43.3363309352518, + "force_loss": 0.0018935035914182663, + "step": 48190 + }, + { + "epoch": 43.34532374100719, + "grad_norm": 0.1687958687543869, + "learning_rate": 1.0208519791178029e-05, + "loss": 0.0141, + "step": 48200 + }, + { + "action_loss": 0.0013340270379558206, + "epoch": 43.34532374100719, + "step": 48200 + }, + { + "epoch": 43.34532374100719, + "step": 48200, + "torque_loss": 0.12439454346895218 + }, + { + "epoch": 43.34532374100719, + "force_loss": 0.0025843174662441015, + "step": 48200 + }, + { + "epoch": 43.35431654676259, + "grad_norm": 0.2555897831916809, + "learning_rate": 1.019183900088535e-05, + "loss": 0.0159, + "step": 48210 + }, + { + "action_loss": 0.0019390127854421735, + "epoch": 43.35431654676259, + "step": 48210 + }, + { + "epoch": 43.35431654676259, + "step": 48210, + "torque_loss": 0.16420213878154755 + }, + { + "epoch": 43.35431654676259, + "force_loss": 0.003517964854836464, + "step": 48210 + }, + { + "epoch": 43.36330935251799, + "grad_norm": 0.08727017045021057, + "learning_rate": 1.0175170303260906e-05, + "loss": 0.0155, + "step": 48220 + }, + { + "action_loss": 0.0011818286729976535, + "epoch": 43.36330935251799, + "step": 48220 + }, + { + "epoch": 43.36330935251799, + "step": 48220, + "torque_loss": 0.12860801815986633 + }, + { + "epoch": 43.36330935251799, + "force_loss": 0.003914635628461838, + "step": 48220 + }, + { + "epoch": 43.37230215827338, + "grad_norm": 0.17293666303157806, + "learning_rate": 1.0158513703368206e-05, + "loss": 0.0183, + "step": 48230 + }, + { + "action_loss": 0.0008485823054797947, + "epoch": 43.37230215827338, + "step": 48230 + }, + { + "epoch": 43.37230215827338, + "step": 48230, + "torque_loss": 0.08316108584403992 + }, + { + "epoch": 43.37230215827338, + "force_loss": 0.0029302656184881926, + "step": 48230 + }, + { + "epoch": 43.381294964028775, + "grad_norm": 0.40532225370407104, + "learning_rate": 1.0141869206267095e-05, + "loss": 0.0152, + "step": 48240 + }, + { + "action_loss": 0.0006334370118565857, + "epoch": 43.381294964028775, + "step": 48240 + }, + { + "epoch": 43.381294964028775, + "step": 48240, + "torque_loss": 0.13413994014263153 + }, + { + "epoch": 43.381294964028775, + "force_loss": 0.0036111585795879364, + "step": 48240 + }, + { + "epoch": 43.39028776978417, + "grad_norm": 0.051989614963531494, + "learning_rate": 1.0125236817013723e-05, + "loss": 0.0147, + "step": 48250 + }, + { + "action_loss": 0.0008893844787962735, + "epoch": 43.39028776978417, + "step": 48250 + }, + { + "epoch": 43.39028776978417, + "step": 48250, + "torque_loss": 0.13448502123355865 + }, + { + "epoch": 43.39028776978417, + "force_loss": 0.0013115155743435025, + "step": 48250 + }, + { + "epoch": 43.39928057553957, + "grad_norm": 0.2579008936882019, + "learning_rate": 1.010861654066056e-05, + "loss": 0.0143, + "step": 48260 + }, + { + "action_loss": 0.0024942161981016397, + "epoch": 43.39928057553957, + "step": 48260 + }, + { + "epoch": 43.39928057553957, + "step": 48260, + "torque_loss": 0.1423993855714798 + }, + { + "epoch": 43.39928057553957, + "force_loss": 0.005319037940353155, + "step": 48260 + }, + { + "epoch": 43.40827338129496, + "grad_norm": 0.11613548547029495, + "learning_rate": 1.0092008382256434e-05, + "loss": 0.0147, + "step": 48270 + }, + { + "action_loss": 0.0005665691569447517, + "epoch": 43.40827338129496, + "step": 48270 + }, + { + "epoch": 43.40827338129496, + "step": 48270, + "torque_loss": 0.09662297368049622 + }, + { + "epoch": 43.40827338129496, + "force_loss": 0.0004258766712155193, + "step": 48270 + }, + { + "epoch": 43.41726618705036, + "grad_norm": 0.11186227947473526, + "learning_rate": 1.0075412346846458e-05, + "loss": 0.0139, + "step": 48280 + }, + { + "action_loss": 0.004276219289749861, + "epoch": 43.41726618705036, + "step": 48280 + }, + { + "epoch": 43.41726618705036, + "step": 48280, + "torque_loss": 0.09548377990722656 + }, + { + "epoch": 43.41726618705036, + "force_loss": 0.0027953078970313072, + "step": 48280 + }, + { + "epoch": 43.42625899280576, + "grad_norm": 0.08906149864196777, + "learning_rate": 1.0058828439472056e-05, + "loss": 0.0142, + "step": 48290 + }, + { + "action_loss": 0.002606047550216317, + "epoch": 43.42625899280576, + "step": 48290 + }, + { + "epoch": 43.42625899280576, + "step": 48290, + "torque_loss": 0.1044873595237732 + }, + { + "epoch": 43.42625899280576, + "force_loss": 0.0037624333053827286, + "step": 48290 + }, + { + "epoch": 43.435251798561154, + "grad_norm": 0.36315038800239563, + "learning_rate": 1.0042256665170996e-05, + "loss": 0.0173, + "step": 48300 + }, + { + "action_loss": 0.0008198173600248992, + "epoch": 43.435251798561154, + "step": 48300 + }, + { + "epoch": 43.435251798561154, + "step": 48300, + "torque_loss": 0.1483038067817688 + }, + { + "epoch": 43.435251798561154, + "force_loss": 0.001358009991236031, + "step": 48300 + }, + { + "epoch": 43.444244604316545, + "grad_norm": 0.4151192903518677, + "learning_rate": 1.0025697028977332e-05, + "loss": 0.0159, + "step": 48310 + }, + { + "action_loss": 0.00045225091162137687, + "epoch": 43.444244604316545, + "step": 48310 + }, + { + "epoch": 43.444244604316545, + "step": 48310, + "torque_loss": 0.07546117156744003 + }, + { + "epoch": 43.444244604316545, + "force_loss": 0.0013339482247829437, + "step": 48310 + }, + { + "epoch": 43.45323741007194, + "grad_norm": 0.20928062498569489, + "learning_rate": 1.0009149535921454e-05, + "loss": 0.013, + "step": 48320 + }, + { + "action_loss": 0.0013231117045506835, + "epoch": 43.45323741007194, + "step": 48320 + }, + { + "epoch": 43.45323741007194, + "step": 48320, + "torque_loss": 0.09207256883382797 + }, + { + "epoch": 43.45323741007194, + "force_loss": 0.003994135186076164, + "step": 48320 + }, + { + "epoch": 43.46223021582734, + "grad_norm": 0.11802376061677933, + "learning_rate": 9.992614191030031e-06, + "loss": 0.0141, + "step": 48330 + }, + { + "action_loss": 0.0004930900759063661, + "epoch": 43.46223021582734, + "step": 48330 + }, + { + "epoch": 43.46223021582734, + "step": 48330, + "torque_loss": 0.12215912342071533 + }, + { + "epoch": 43.46223021582734, + "force_loss": 0.0006290259771049023, + "step": 48330 + }, + { + "epoch": 43.47122302158273, + "grad_norm": 0.1930931806564331, + "learning_rate": 9.976090999326115e-06, + "loss": 0.0158, + "step": 48340 + }, + { + "action_loss": 0.001698052161373198, + "epoch": 43.47122302158273, + "step": 48340 + }, + { + "epoch": 43.47122302158273, + "step": 48340, + "torque_loss": 0.11253729462623596 + }, + { + "epoch": 43.47122302158273, + "force_loss": 0.006606638431549072, + "step": 48340 + }, + { + "epoch": 43.48021582733813, + "grad_norm": 0.19494445621967316, + "learning_rate": 9.959579965828952e-06, + "loss": 0.0154, + "step": 48350 + }, + { + "action_loss": 0.00363553105853498, + "epoch": 43.48021582733813, + "step": 48350 + }, + { + "epoch": 43.48021582733813, + "step": 48350, + "torque_loss": 0.11971917003393173 + }, + { + "epoch": 43.48021582733813, + "force_loss": 0.004944715183228254, + "step": 48350 + }, + { + "epoch": 43.489208633093526, + "grad_norm": 0.10635515302419662, + "learning_rate": 9.943081095554218e-06, + "loss": 0.0143, + "step": 48360 + }, + { + "action_loss": 0.0006834636442363262, + "epoch": 43.489208633093526, + "step": 48360 + }, + { + "epoch": 43.489208633093526, + "step": 48360, + "torque_loss": 0.09464162588119507 + }, + { + "epoch": 43.489208633093526, + "force_loss": 0.003731540171429515, + "step": 48360 + }, + { + "epoch": 43.49820143884892, + "grad_norm": 0.17328238487243652, + "learning_rate": 9.926594393513783e-06, + "loss": 0.0152, + "step": 48370 + }, + { + "action_loss": 0.002312740543857217, + "epoch": 43.49820143884892, + "step": 48370 + }, + { + "epoch": 43.49820143884892, + "step": 48370, + "torque_loss": 0.129380464553833 + }, + { + "epoch": 43.49820143884892, + "force_loss": 0.0029780417680740356, + "step": 48370 + }, + { + "epoch": 43.507194244604314, + "grad_norm": 0.09742707014083862, + "learning_rate": 9.910119864715906e-06, + "loss": 0.0165, + "step": 48380 + }, + { + "action_loss": 0.002389805158600211, + "epoch": 43.507194244604314, + "step": 48380 + }, + { + "epoch": 43.507194244604314, + "step": 48380, + "torque_loss": 0.1433708518743515 + }, + { + "epoch": 43.507194244604314, + "force_loss": 0.007369231432676315, + "step": 48380 + }, + { + "epoch": 43.51618705035971, + "grad_norm": 0.07832226157188416, + "learning_rate": 9.8936575141651e-06, + "loss": 0.0143, + "step": 48390 + }, + { + "action_loss": 0.0012067954521626234, + "epoch": 43.51618705035971, + "step": 48390 + }, + { + "epoch": 43.51618705035971, + "step": 48390, + "torque_loss": 0.11249709129333496 + }, + { + "epoch": 43.51618705035971, + "force_loss": 0.004242629278451204, + "step": 48390 + }, + { + "epoch": 43.52517985611511, + "grad_norm": 0.39420026540756226, + "learning_rate": 9.877207346862194e-06, + "loss": 0.0153, + "step": 48400 + }, + { + "action_loss": 0.001663988921791315, + "epoch": 43.52517985611511, + "step": 48400 + }, + { + "epoch": 43.52517985611511, + "step": 48400, + "torque_loss": 0.15344999730587006 + }, + { + "epoch": 43.52517985611511, + "force_loss": 0.0034708238672465086, + "step": 48400 + }, + { + "epoch": 43.53417266187051, + "grad_norm": 0.1646430641412735, + "learning_rate": 9.860769367804312e-06, + "loss": 0.0147, + "step": 48410 + }, + { + "action_loss": 0.0021479518618434668, + "epoch": 43.53417266187051, + "step": 48410 + }, + { + "epoch": 43.53417266187051, + "step": 48410, + "torque_loss": 0.1471019834280014 + }, + { + "epoch": 43.53417266187051, + "force_loss": 0.003607754362747073, + "step": 48410 + }, + { + "epoch": 43.5431654676259, + "grad_norm": 0.11888998001813889, + "learning_rate": 9.844343581984877e-06, + "loss": 0.016, + "step": 48420 + }, + { + "action_loss": 0.0011510709300637245, + "epoch": 43.5431654676259, + "step": 48420 + }, + { + "epoch": 43.5431654676259, + "step": 48420, + "torque_loss": 0.08036055415868759 + }, + { + "epoch": 43.5431654676259, + "force_loss": 0.0010939823696389794, + "step": 48420 + }, + { + "epoch": 43.552158273381295, + "grad_norm": 0.37382176518440247, + "learning_rate": 9.82792999439362e-06, + "loss": 0.015, + "step": 48430 + }, + { + "action_loss": 0.0010367751820012927, + "epoch": 43.552158273381295, + "step": 48430 + }, + { + "epoch": 43.552158273381295, + "step": 48430, + "torque_loss": 0.15117304027080536 + }, + { + "epoch": 43.552158273381295, + "force_loss": 0.0016779651632532477, + "step": 48430 + }, + { + "epoch": 43.56115107913669, + "grad_norm": 0.23981720209121704, + "learning_rate": 9.811528610016546e-06, + "loss": 0.0146, + "step": 48440 + }, + { + "action_loss": 0.0008652581018395722, + "epoch": 43.56115107913669, + "step": 48440 + }, + { + "epoch": 43.56115107913669, + "step": 48440, + "torque_loss": 0.09773878008127213 + }, + { + "epoch": 43.56115107913669, + "force_loss": 0.0007889159023761749, + "step": 48440 + }, + { + "epoch": 43.57014388489208, + "grad_norm": 0.1496325582265854, + "learning_rate": 9.79513943383597e-06, + "loss": 0.0133, + "step": 48450 + }, + { + "action_loss": 0.002451093401759863, + "epoch": 43.57014388489208, + "step": 48450 + }, + { + "epoch": 43.57014388489208, + "step": 48450, + "torque_loss": 0.08924409002065659 + }, + { + "epoch": 43.57014388489208, + "force_loss": 0.010365149937570095, + "step": 48450 + }, + { + "epoch": 43.57913669064748, + "grad_norm": 0.1631256490945816, + "learning_rate": 9.778762470830489e-06, + "loss": 0.0145, + "step": 48460 + }, + { + "action_loss": 0.002930335933342576, + "epoch": 43.57913669064748, + "step": 48460 + }, + { + "epoch": 43.57913669064748, + "step": 48460, + "torque_loss": 0.13075071573257446 + }, + { + "epoch": 43.57913669064748, + "force_loss": 0.008413630537688732, + "step": 48460 + }, + { + "epoch": 43.58812949640288, + "grad_norm": 0.07262802869081497, + "learning_rate": 9.762397725974982e-06, + "loss": 0.0148, + "step": 48470 + }, + { + "action_loss": 0.0033307112753391266, + "epoch": 43.58812949640288, + "step": 48470 + }, + { + "epoch": 43.58812949640288, + "step": 48470, + "torque_loss": 0.12713240087032318 + }, + { + "epoch": 43.58812949640288, + "force_loss": 0.0052453563548624516, + "step": 48470 + }, + { + "epoch": 43.597122302158276, + "grad_norm": 0.1431194543838501, + "learning_rate": 9.746045204240622e-06, + "loss": 0.0167, + "step": 48480 + }, + { + "action_loss": 0.0030724897515028715, + "epoch": 43.597122302158276, + "step": 48480 + }, + { + "epoch": 43.597122302158276, + "step": 48480, + "torque_loss": 0.11863982677459717 + }, + { + "epoch": 43.597122302158276, + "force_loss": 0.016462991014122963, + "step": 48480 + }, + { + "epoch": 43.606115107913666, + "grad_norm": 0.10779836028814316, + "learning_rate": 9.729704910594917e-06, + "loss": 0.0149, + "step": 48490 + }, + { + "action_loss": 0.0005644892808049917, + "epoch": 43.606115107913666, + "step": 48490 + }, + { + "epoch": 43.606115107913666, + "step": 48490, + "torque_loss": 0.13799667358398438 + }, + { + "epoch": 43.606115107913666, + "force_loss": 0.00060946837766096, + "step": 48490 + }, + { + "epoch": 43.615107913669064, + "grad_norm": 0.15771760046482086, + "learning_rate": 9.713376850001554e-06, + "loss": 0.0146, + "step": 48500 + }, + { + "action_loss": 0.0004909606068395078, + "epoch": 43.615107913669064, + "step": 48500 + }, + { + "epoch": 43.615107913669064, + "step": 48500, + "torque_loss": 0.11307934671640396 + }, + { + "epoch": 43.615107913669064, + "force_loss": 0.0008854271727614105, + "step": 48500 + }, + { + "epoch": 43.62410071942446, + "grad_norm": 0.10096035897731781, + "learning_rate": 9.697061027420622e-06, + "loss": 0.0145, + "step": 48510 + }, + { + "action_loss": 0.0008008410222828388, + "epoch": 43.62410071942446, + "step": 48510 + }, + { + "epoch": 43.62410071942446, + "step": 48510, + "torque_loss": 0.1088382825255394 + }, + { + "epoch": 43.62410071942446, + "force_loss": 0.0022940707858651876, + "step": 48510 + }, + { + "epoch": 43.63309352517986, + "grad_norm": 0.11563926190137863, + "learning_rate": 9.680757447808385e-06, + "loss": 0.0177, + "step": 48520 + }, + { + "action_loss": 0.0006132869748398662, + "epoch": 43.63309352517986, + "step": 48520 + }, + { + "epoch": 43.63309352517986, + "step": 48520, + "torque_loss": 0.1222463920712471 + }, + { + "epoch": 43.63309352517986, + "force_loss": 0.001937453169375658, + "step": 48520 + }, + { + "epoch": 43.64208633093525, + "grad_norm": 0.2531619966030121, + "learning_rate": 9.664466116117488e-06, + "loss": 0.0131, + "step": 48530 + }, + { + "action_loss": 0.000509663310367614, + "epoch": 43.64208633093525, + "step": 48530 + }, + { + "epoch": 43.64208633093525, + "step": 48530, + "torque_loss": 0.08936712145805359 + }, + { + "epoch": 43.64208633093525, + "force_loss": 0.0009677493944764137, + "step": 48530 + }, + { + "epoch": 43.65107913669065, + "grad_norm": 0.058159422129392624, + "learning_rate": 9.64818703729678e-06, + "loss": 0.014, + "step": 48540 + }, + { + "action_loss": 0.0007652916829101741, + "epoch": 43.65107913669065, + "step": 48540 + }, + { + "epoch": 43.65107913669065, + "step": 48540, + "torque_loss": 0.1308279037475586 + }, + { + "epoch": 43.65107913669065, + "force_loss": 0.0008879646775312722, + "step": 48540 + }, + { + "epoch": 43.660071942446045, + "grad_norm": 0.18933160603046417, + "learning_rate": 9.631920216291423e-06, + "loss": 0.0157, + "step": 48550 + }, + { + "action_loss": 0.009560401551425457, + "epoch": 43.660071942446045, + "step": 48550 + }, + { + "epoch": 43.660071942446045, + "step": 48550, + "torque_loss": 0.18331509828567505 + }, + { + "epoch": 43.660071942446045, + "force_loss": 0.012608930468559265, + "step": 48550 + }, + { + "epoch": 43.669064748201436, + "grad_norm": 0.14010848104953766, + "learning_rate": 9.615665658042849e-06, + "loss": 0.0165, + "step": 48560 + }, + { + "action_loss": 0.0013685687445104122, + "epoch": 43.669064748201436, + "step": 48560 + }, + { + "epoch": 43.669064748201436, + "step": 48560, + "torque_loss": 0.1093185618519783 + }, + { + "epoch": 43.669064748201436, + "force_loss": 0.0030134720727801323, + "step": 48560 + }, + { + "epoch": 43.67805755395683, + "grad_norm": 0.10423647612333298, + "learning_rate": 9.599423367488747e-06, + "loss": 0.015, + "step": 48570 + }, + { + "action_loss": 0.010759256780147552, + "epoch": 43.67805755395683, + "step": 48570 + }, + { + "epoch": 43.67805755395683, + "step": 48570, + "torque_loss": 0.09712091833353043 + }, + { + "epoch": 43.67805755395683, + "force_loss": 0.0132240429520607, + "step": 48570 + }, + { + "epoch": 43.68705035971223, + "grad_norm": 0.29065975546836853, + "learning_rate": 9.583193349563124e-06, + "loss": 0.0145, + "step": 48580 + }, + { + "action_loss": 0.0010066931135952473, + "epoch": 43.68705035971223, + "step": 48580 + }, + { + "epoch": 43.68705035971223, + "step": 48580, + "torque_loss": 0.1086641177535057 + }, + { + "epoch": 43.68705035971223, + "force_loss": 0.0013468527467921376, + "step": 48580 + }, + { + "epoch": 43.69604316546763, + "grad_norm": 0.17229269444942474, + "learning_rate": 9.566975609196216e-06, + "loss": 0.0148, + "step": 48590 + }, + { + "action_loss": 0.0010770100634545088, + "epoch": 43.69604316546763, + "step": 48590 + }, + { + "epoch": 43.69604316546763, + "step": 48590, + "torque_loss": 0.12984324991703033 + }, + { + "epoch": 43.69604316546763, + "force_loss": 0.002969875233247876, + "step": 48590 + }, + { + "epoch": 43.70503597122302, + "grad_norm": 0.11129924654960632, + "learning_rate": 9.550770151314548e-06, + "loss": 0.0145, + "step": 48600 + }, + { + "action_loss": 0.0027359535451978445, + "epoch": 43.70503597122302, + "step": 48600 + }, + { + "epoch": 43.70503597122302, + "step": 48600, + "torque_loss": 0.12264364957809448 + }, + { + "epoch": 43.70503597122302, + "force_loss": 0.009422987699508667, + "step": 48600 + }, + { + "epoch": 43.71402877697842, + "grad_norm": 0.09316616505384445, + "learning_rate": 9.53457698084091e-06, + "loss": 0.0139, + "step": 48610 + }, + { + "action_loss": 0.005461699794977903, + "epoch": 43.71402877697842, + "step": 48610 + }, + { + "epoch": 43.71402877697842, + "step": 48610, + "torque_loss": 0.10269784927368164 + }, + { + "epoch": 43.71402877697842, + "force_loss": 0.022500261664390564, + "step": 48610 + }, + { + "epoch": 43.723021582733814, + "grad_norm": 0.07619958370923996, + "learning_rate": 9.518396102694355e-06, + "loss": 0.0144, + "step": 48620 + }, + { + "action_loss": 0.01489965245127678, + "epoch": 43.723021582733814, + "step": 48620 + }, + { + "epoch": 43.723021582733814, + "step": 48620, + "torque_loss": 0.15996527671813965 + }, + { + "epoch": 43.723021582733814, + "force_loss": 0.016346463933587074, + "step": 48620 + }, + { + "epoch": 43.73201438848921, + "grad_norm": 0.08920487761497498, + "learning_rate": 9.502227521790198e-06, + "loss": 0.0164, + "step": 48630 + }, + { + "action_loss": 0.005055455956608057, + "epoch": 43.73201438848921, + "step": 48630 + }, + { + "epoch": 43.73201438848921, + "step": 48630, + "torque_loss": 0.140116885304451 + }, + { + "epoch": 43.73201438848921, + "force_loss": 0.003923205658793449, + "step": 48630 + }, + { + "epoch": 43.7410071942446, + "grad_norm": 0.2509458661079407, + "learning_rate": 9.486071243040063e-06, + "loss": 0.0184, + "step": 48640 + }, + { + "action_loss": 0.0010319643188267946, + "epoch": 43.7410071942446, + "step": 48640 + }, + { + "epoch": 43.7410071942446, + "step": 48640, + "torque_loss": 0.14142358303070068 + }, + { + "epoch": 43.7410071942446, + "force_loss": 0.0011054282076656818, + "step": 48640 + }, + { + "epoch": 43.75, + "grad_norm": 0.21261194348335266, + "learning_rate": 9.469927271351747e-06, + "loss": 0.0159, + "step": 48650 + }, + { + "action_loss": 0.0008157800766639411, + "epoch": 43.75, + "step": 48650 + }, + { + "epoch": 43.75, + "step": 48650, + "torque_loss": 0.16082684695720673 + }, + { + "epoch": 43.75, + "force_loss": 0.0017475209897384048, + "step": 48650 + }, + { + "epoch": 43.7589928057554, + "grad_norm": 0.23251934349536896, + "learning_rate": 9.453795611629419e-06, + "loss": 0.0142, + "step": 48660 + }, + { + "action_loss": 0.001602761447429657, + "epoch": 43.7589928057554, + "step": 48660 + }, + { + "epoch": 43.7589928057554, + "step": 48660, + "torque_loss": 0.12902593612670898 + }, + { + "epoch": 43.7589928057554, + "force_loss": 0.008653654716908932, + "step": 48660 + }, + { + "epoch": 43.76798561151079, + "grad_norm": 0.21277140080928802, + "learning_rate": 9.437676268773399e-06, + "loss": 0.0153, + "step": 48670 + }, + { + "action_loss": 0.0008176176925189793, + "epoch": 43.76798561151079, + "step": 48670 + }, + { + "epoch": 43.76798561151079, + "step": 48670, + "torque_loss": 0.1603170782327652 + }, + { + "epoch": 43.76798561151079, + "force_loss": 0.0009851822396740317, + "step": 48670 + }, + { + "epoch": 43.776978417266186, + "grad_norm": 0.07089299708604813, + "learning_rate": 9.421569247680357e-06, + "loss": 0.0147, + "step": 48680 + }, + { + "action_loss": 0.0010423255153000355, + "epoch": 43.776978417266186, + "step": 48680 + }, + { + "epoch": 43.776978417266186, + "step": 48680, + "torque_loss": 0.1136939525604248 + }, + { + "epoch": 43.776978417266186, + "force_loss": 0.0017612312221899629, + "step": 48680 + }, + { + "epoch": 43.78597122302158, + "grad_norm": 0.1810387820005417, + "learning_rate": 9.40547455324316e-06, + "loss": 0.0167, + "step": 48690 + }, + { + "action_loss": 0.005367903504520655, + "epoch": 43.78597122302158, + "step": 48690 + }, + { + "epoch": 43.78597122302158, + "step": 48690, + "torque_loss": 0.15231801569461823 + }, + { + "epoch": 43.78597122302158, + "force_loss": 0.00645092548802495, + "step": 48690 + }, + { + "epoch": 43.79496402877698, + "grad_norm": 0.2494833618402481, + "learning_rate": 9.389392190350965e-06, + "loss": 0.0158, + "step": 48700 + }, + { + "action_loss": 0.003078509122133255, + "epoch": 43.79496402877698, + "step": 48700 + }, + { + "epoch": 43.79496402877698, + "step": 48700, + "torque_loss": 0.15402019023895264 + }, + { + "epoch": 43.79496402877698, + "force_loss": 0.0024214035365730524, + "step": 48700 + }, + { + "epoch": 43.80395683453237, + "grad_norm": 0.17299841344356537, + "learning_rate": 9.373322163889153e-06, + "loss": 0.016, + "step": 48710 + }, + { + "action_loss": 0.0007417557644657791, + "epoch": 43.80395683453237, + "step": 48710 + }, + { + "epoch": 43.80395683453237, + "step": 48710, + "torque_loss": 0.1902492791414261 + }, + { + "epoch": 43.80395683453237, + "force_loss": 0.0018473620293661952, + "step": 48710 + }, + { + "epoch": 43.81294964028777, + "grad_norm": 0.40739917755126953, + "learning_rate": 9.357264478739375e-06, + "loss": 0.0161, + "step": 48720 + }, + { + "action_loss": 0.0008976270328275859, + "epoch": 43.81294964028777, + "step": 48720 + }, + { + "epoch": 43.81294964028777, + "step": 48720, + "torque_loss": 0.13780051469802856 + }, + { + "epoch": 43.81294964028777, + "force_loss": 0.0007627615705132484, + "step": 48720 + }, + { + "epoch": 43.82194244604317, + "grad_norm": 0.05911646783351898, + "learning_rate": 9.341219139779567e-06, + "loss": 0.0154, + "step": 48730 + }, + { + "action_loss": 0.0008540501003153622, + "epoch": 43.82194244604317, + "step": 48730 + }, + { + "epoch": 43.82194244604317, + "step": 48730, + "torque_loss": 0.14919531345367432 + }, + { + "epoch": 43.82194244604317, + "force_loss": 0.0017305637011304498, + "step": 48730 + }, + { + "epoch": 43.830935251798564, + "grad_norm": 0.4672837555408478, + "learning_rate": 9.325186151883824e-06, + "loss": 0.0153, + "step": 48740 + }, + { + "action_loss": 0.0017807031981647015, + "epoch": 43.830935251798564, + "step": 48740 + }, + { + "epoch": 43.830935251798564, + "step": 48740, + "torque_loss": 0.11897901445627213 + }, + { + "epoch": 43.830935251798564, + "force_loss": 0.0018606815719977021, + "step": 48740 + }, + { + "epoch": 43.839928057553955, + "grad_norm": 0.05451711267232895, + "learning_rate": 9.30916551992258e-06, + "loss": 0.0129, + "step": 48750 + }, + { + "action_loss": 0.003994535189121962, + "epoch": 43.839928057553955, + "step": 48750 + }, + { + "epoch": 43.839928057553955, + "step": 48750, + "torque_loss": 0.1472126841545105 + }, + { + "epoch": 43.839928057553955, + "force_loss": 0.0032157546374946833, + "step": 48750 + }, + { + "epoch": 43.84892086330935, + "grad_norm": 0.14822275936603546, + "learning_rate": 9.293157248762479e-06, + "loss": 0.0158, + "step": 48760 + }, + { + "action_loss": 0.002055221237242222, + "epoch": 43.84892086330935, + "step": 48760 + }, + { + "epoch": 43.84892086330935, + "step": 48760, + "torque_loss": 0.17756320536136627 + }, + { + "epoch": 43.84892086330935, + "force_loss": 0.0023576233070343733, + "step": 48760 + }, + { + "epoch": 43.85791366906475, + "grad_norm": 0.06743174046278, + "learning_rate": 9.2771613432664e-06, + "loss": 0.0166, + "step": 48770 + }, + { + "action_loss": 0.04644298925995827, + "epoch": 43.85791366906475, + "step": 48770 + }, + { + "epoch": 43.85791366906475, + "step": 48770, + "torque_loss": 0.16290383040905 + }, + { + "epoch": 43.85791366906475, + "force_loss": 0.05463673546910286, + "step": 48770 + }, + { + "epoch": 43.86690647482014, + "grad_norm": 0.0918324664235115, + "learning_rate": 9.261177808293481e-06, + "loss": 0.0227, + "step": 48780 + }, + { + "action_loss": 0.0009478906285949051, + "epoch": 43.86690647482014, + "step": 48780 + }, + { + "epoch": 43.86690647482014, + "step": 48780, + "torque_loss": 0.11124614626169205 + }, + { + "epoch": 43.86690647482014, + "force_loss": 0.0012306583812460303, + "step": 48780 + }, + { + "epoch": 43.87589928057554, + "grad_norm": 0.13011440634727478, + "learning_rate": 9.245206648699096e-06, + "loss": 0.0147, + "step": 48790 + }, + { + "action_loss": 0.0006437050178647041, + "epoch": 43.87589928057554, + "step": 48790 + }, + { + "epoch": 43.87589928057554, + "step": 48790, + "torque_loss": 0.16849011182785034 + }, + { + "epoch": 43.87589928057554, + "force_loss": 0.0013001452898606658, + "step": 48790 + }, + { + "epoch": 43.884892086330936, + "grad_norm": 0.43789413571357727, + "learning_rate": 9.22924786933485e-06, + "loss": 0.0158, + "step": 48800 + }, + { + "action_loss": 0.001970255747437477, + "epoch": 43.884892086330936, + "step": 48800 + }, + { + "epoch": 43.884892086330936, + "step": 48800, + "torque_loss": 0.16100837290287018 + }, + { + "epoch": 43.884892086330936, + "force_loss": 0.004228240344673395, + "step": 48800 + }, + { + "epoch": 43.893884892086334, + "grad_norm": 0.22035063803195953, + "learning_rate": 9.213301475048642e-06, + "loss": 0.015, + "step": 48810 + }, + { + "action_loss": 0.0014453298645094037, + "epoch": 43.893884892086334, + "step": 48810 + }, + { + "epoch": 43.893884892086334, + "step": 48810, + "torque_loss": 0.11721111088991165 + }, + { + "epoch": 43.893884892086334, + "force_loss": 0.0015289318980649114, + "step": 48810 + }, + { + "epoch": 43.902877697841724, + "grad_norm": 0.16457585990428925, + "learning_rate": 9.197367470684504e-06, + "loss": 0.0156, + "step": 48820 + }, + { + "action_loss": 0.0010235834633931518, + "epoch": 43.902877697841724, + "step": 48820 + }, + { + "epoch": 43.902877697841724, + "step": 48820, + "torque_loss": 0.17007851600646973 + }, + { + "epoch": 43.902877697841724, + "force_loss": 0.0010748450877144933, + "step": 48820 + }, + { + "epoch": 43.91187050359712, + "grad_norm": 0.13499639928340912, + "learning_rate": 9.181445861082816e-06, + "loss": 0.0169, + "step": 48830 + }, + { + "action_loss": 0.003606931073591113, + "epoch": 43.91187050359712, + "step": 48830 + }, + { + "epoch": 43.91187050359712, + "step": 48830, + "torque_loss": 0.11121616512537003 + }, + { + "epoch": 43.91187050359712, + "force_loss": 0.01151010300964117, + "step": 48830 + }, + { + "epoch": 43.92086330935252, + "grad_norm": 0.4295700788497925, + "learning_rate": 9.16553665108012e-06, + "loss": 0.02, + "step": 48840 + }, + { + "action_loss": 0.000658109609503299, + "epoch": 43.92086330935252, + "step": 48840 + }, + { + "epoch": 43.92086330935252, + "step": 48840, + "torque_loss": 0.11263301968574524 + }, + { + "epoch": 43.92086330935252, + "force_loss": 0.0013278181431815028, + "step": 48840 + }, + { + "epoch": 43.92985611510792, + "grad_norm": 0.1152895987033844, + "learning_rate": 9.149639845509223e-06, + "loss": 0.0154, + "step": 48850 + }, + { + "action_loss": 0.0008247655932791531, + "epoch": 43.92985611510792, + "step": 48850 + }, + { + "epoch": 43.92985611510792, + "step": 48850, + "torque_loss": 0.13623180985450745 + }, + { + "epoch": 43.92985611510792, + "force_loss": 0.0009287621360272169, + "step": 48850 + }, + { + "epoch": 43.93884892086331, + "grad_norm": 0.23939968645572662, + "learning_rate": 9.133755449199144e-06, + "loss": 0.0139, + "step": 48860 + }, + { + "action_loss": 0.0019896244630217552, + "epoch": 43.93884892086331, + "step": 48860 + }, + { + "epoch": 43.93884892086331, + "step": 48860, + "torque_loss": 0.14498431980609894 + }, + { + "epoch": 43.93884892086331, + "force_loss": 0.0045209359377622604, + "step": 48860 + }, + { + "epoch": 43.947841726618705, + "grad_norm": 0.0391739197075367, + "learning_rate": 9.117883466975135e-06, + "loss": 0.0141, + "step": 48870 + }, + { + "action_loss": 0.0010534119792282581, + "epoch": 43.947841726618705, + "step": 48870 + }, + { + "epoch": 43.947841726618705, + "step": 48870, + "torque_loss": 0.12999878823757172 + }, + { + "epoch": 43.947841726618705, + "force_loss": 0.002682490274310112, + "step": 48870 + }, + { + "epoch": 43.9568345323741, + "grad_norm": 0.0908980444073677, + "learning_rate": 9.10202390365873e-06, + "loss": 0.0157, + "step": 48880 + }, + { + "action_loss": 0.0020297958981245756, + "epoch": 43.9568345323741, + "step": 48880 + }, + { + "epoch": 43.9568345323741, + "step": 48880, + "torque_loss": 0.06899247318506241 + }, + { + "epoch": 43.9568345323741, + "force_loss": 0.0021482689771801233, + "step": 48880 + }, + { + "epoch": 43.96582733812949, + "grad_norm": 0.22997556626796722, + "learning_rate": 9.086176764067583e-06, + "loss": 0.0166, + "step": 48890 + }, + { + "action_loss": 0.0038438818883150816, + "epoch": 43.96582733812949, + "step": 48890 + }, + { + "epoch": 43.96582733812949, + "step": 48890, + "torque_loss": 0.13119101524353027 + }, + { + "epoch": 43.96582733812949, + "force_loss": 0.003268355503678322, + "step": 48890 + }, + { + "epoch": 43.97482014388489, + "grad_norm": 0.4135984182357788, + "learning_rate": 9.070342053015684e-06, + "loss": 0.0164, + "step": 48900 + }, + { + "action_loss": 0.009184107184410095, + "epoch": 43.97482014388489, + "step": 48900 + }, + { + "epoch": 43.97482014388489, + "step": 48900, + "torque_loss": 0.15513502061367035 + }, + { + "epoch": 43.97482014388489, + "force_loss": 0.011585988104343414, + "step": 48900 + }, + { + "epoch": 43.98381294964029, + "grad_norm": 0.24386382102966309, + "learning_rate": 9.054519775313187e-06, + "loss": 0.0162, + "step": 48910 + }, + { + "action_loss": 0.012753057293593884, + "epoch": 43.98381294964029, + "step": 48910 + }, + { + "epoch": 43.98381294964029, + "step": 48910, + "torque_loss": 0.16046780347824097 + }, + { + "epoch": 43.98381294964029, + "force_loss": 0.026838863268494606, + "step": 48910 + }, + { + "epoch": 43.992805755395686, + "grad_norm": 0.0535489022731781, + "learning_rate": 9.038709935766476e-06, + "loss": 0.0146, + "step": 48920 + }, + { + "action_loss": 0.0015181965427473187, + "epoch": 43.992805755395686, + "step": 48920 + }, + { + "epoch": 43.992805755395686, + "step": 48920, + "torque_loss": 0.1305958330631256 + }, + { + "epoch": 43.992805755395686, + "force_loss": 0.0020104984287172556, + "step": 48920 + }, + { + "epoch": 44.00179856115108, + "grad_norm": 0.4193684756755829, + "learning_rate": 9.02291253917817e-06, + "loss": 0.0175, + "step": 48930 + }, + { + "action_loss": 0.0006691753515042365, + "epoch": 44.00179856115108, + "step": 48930 + }, + { + "epoch": 44.00179856115108, + "step": 48930, + "torque_loss": 0.13290785253047943 + }, + { + "epoch": 44.00179856115108, + "force_loss": 0.0035969307646155357, + "step": 48930 + }, + { + "epoch": 44.010791366906474, + "grad_norm": 0.16773025691509247, + "learning_rate": 9.007127590347091e-06, + "loss": 0.0151, + "step": 48940 + }, + { + "action_loss": 0.0009007633198052645, + "epoch": 44.010791366906474, + "step": 48940 + }, + { + "epoch": 44.010791366906474, + "step": 48940, + "torque_loss": 0.1115138903260231 + }, + { + "epoch": 44.010791366906474, + "force_loss": 0.0032146081794053316, + "step": 48940 + }, + { + "epoch": 44.01978417266187, + "grad_norm": 0.3608867824077606, + "learning_rate": 8.991355094068288e-06, + "loss": 0.0168, + "step": 48950 + }, + { + "action_loss": 0.0011368446284905076, + "epoch": 44.01978417266187, + "step": 48950 + }, + { + "epoch": 44.01978417266187, + "step": 48950, + "torque_loss": 0.09599777311086655 + }, + { + "epoch": 44.01978417266187, + "force_loss": 0.0060363151133060455, + "step": 48950 + }, + { + "epoch": 44.02877697841727, + "grad_norm": 0.0976967141032219, + "learning_rate": 8.975595055133062e-06, + "loss": 0.0143, + "step": 48960 + }, + { + "action_loss": 0.0013883948558941483, + "epoch": 44.02877697841727, + "step": 48960 + }, + { + "epoch": 44.02877697841727, + "step": 48960, + "torque_loss": 0.12109386920928955 + }, + { + "epoch": 44.02877697841727, + "force_loss": 0.0020069105084985495, + "step": 48960 + }, + { + "epoch": 44.03776978417266, + "grad_norm": 0.08910827338695526, + "learning_rate": 8.959847478328848e-06, + "loss": 0.0161, + "step": 48970 + }, + { + "action_loss": 0.0008580006542615592, + "epoch": 44.03776978417266, + "step": 48970 + }, + { + "epoch": 44.03776978417266, + "step": 48970, + "torque_loss": 0.11592822521924973 + }, + { + "epoch": 44.03776978417266, + "force_loss": 0.00447820546105504, + "step": 48970 + }, + { + "epoch": 44.04676258992806, + "grad_norm": 0.29000189900398254, + "learning_rate": 8.944112368439378e-06, + "loss": 0.0149, + "step": 48980 + }, + { + "action_loss": 0.0020134069491177797, + "epoch": 44.04676258992806, + "step": 48980 + }, + { + "epoch": 44.04676258992806, + "step": 48980, + "torque_loss": 0.1304437518119812 + }, + { + "epoch": 44.04676258992806, + "force_loss": 0.010307746939361095, + "step": 48980 + }, + { + "epoch": 44.055755395683455, + "grad_norm": 0.04845907539129257, + "learning_rate": 8.928389730244552e-06, + "loss": 0.0142, + "step": 48990 + }, + { + "action_loss": 0.0007547863642685115, + "epoch": 44.055755395683455, + "step": 48990 + }, + { + "epoch": 44.055755395683455, + "step": 48990, + "torque_loss": 0.11735448241233826 + }, + { + "epoch": 44.055755395683455, + "force_loss": 0.0007481517386622727, + "step": 48990 + }, + { + "epoch": 44.064748201438846, + "grad_norm": 0.14411114156246185, + "learning_rate": 8.912679568520494e-06, + "loss": 0.0145, + "step": 49000 + }, + { + "action_loss": 0.0008495559450238943, + "epoch": 44.064748201438846, + "step": 49000 + }, + { + "epoch": 44.064748201438846, + "step": 49000, + "torque_loss": 0.1195276752114296 + }, + { + "epoch": 44.064748201438846, + "force_loss": 0.0035455410834401846, + "step": 49000 + }, + { + "epoch": 44.07374100719424, + "grad_norm": 0.16706620156764984, + "learning_rate": 8.896981888039534e-06, + "loss": 0.0136, + "step": 49010 + }, + { + "action_loss": 0.0011316556483507156, + "epoch": 44.07374100719424, + "step": 49010 + }, + { + "epoch": 44.07374100719424, + "step": 49010, + "torque_loss": 0.1310105174779892 + }, + { + "epoch": 44.07374100719424, + "force_loss": 0.0017588408663868904, + "step": 49010 + }, + { + "epoch": 44.08273381294964, + "grad_norm": 0.09481914341449738, + "learning_rate": 8.881296693570201e-06, + "loss": 0.0147, + "step": 49020 + }, + { + "action_loss": 0.008379969745874405, + "epoch": 44.08273381294964, + "step": 49020 + }, + { + "epoch": 44.08273381294964, + "step": 49020, + "torque_loss": 0.17608459293842316 + }, + { + "epoch": 44.08273381294964, + "force_loss": 0.01149552408605814, + "step": 49020 + }, + { + "epoch": 44.09172661870504, + "grad_norm": 0.15379805862903595, + "learning_rate": 8.865623989877281e-06, + "loss": 0.0165, + "step": 49030 + }, + { + "action_loss": 0.0005789054557681084, + "epoch": 44.09172661870504, + "step": 49030 + }, + { + "epoch": 44.09172661870504, + "step": 49030, + "torque_loss": 0.07873835414648056 + }, + { + "epoch": 44.09172661870504, + "force_loss": 0.003443564986810088, + "step": 49030 + }, + { + "epoch": 44.10071942446043, + "grad_norm": 0.11708827316761017, + "learning_rate": 8.849963781721681e-06, + "loss": 0.0149, + "step": 49040 + }, + { + "action_loss": 0.0032002823427319527, + "epoch": 44.10071942446043, + "step": 49040 + }, + { + "epoch": 44.10071942446043, + "step": 49040, + "torque_loss": 0.1290290653705597 + }, + { + "epoch": 44.10071942446043, + "force_loss": 0.005330126266926527, + "step": 49040 + }, + { + "epoch": 44.10971223021583, + "grad_norm": 0.0901990532875061, + "learning_rate": 8.834316073860588e-06, + "loss": 0.016, + "step": 49050 + }, + { + "action_loss": 0.014266385696828365, + "epoch": 44.10971223021583, + "step": 49050 + }, + { + "epoch": 44.10971223021583, + "step": 49050, + "torque_loss": 0.13990400731563568 + }, + { + "epoch": 44.10971223021583, + "force_loss": 0.01892738975584507, + "step": 49050 + }, + { + "epoch": 44.118705035971225, + "grad_norm": 0.15352337062358856, + "learning_rate": 8.818680871047357e-06, + "loss": 0.019, + "step": 49060 + }, + { + "action_loss": 0.0013577440986409783, + "epoch": 44.118705035971225, + "step": 49060 + }, + { + "epoch": 44.118705035971225, + "step": 49060, + "torque_loss": 0.09515384584665298 + }, + { + "epoch": 44.118705035971225, + "force_loss": 0.0034006417263299227, + "step": 49060 + }, + { + "epoch": 44.12769784172662, + "grad_norm": 0.10146159678697586, + "learning_rate": 8.803058178031549e-06, + "loss": 0.0148, + "step": 49070 + }, + { + "action_loss": 0.0005237226141616702, + "epoch": 44.12769784172662, + "step": 49070 + }, + { + "epoch": 44.12769784172662, + "step": 49070, + "torque_loss": 0.10528019815683365 + }, + { + "epoch": 44.12769784172662, + "force_loss": 0.0022101302165538073, + "step": 49070 + }, + { + "epoch": 44.13669064748201, + "grad_norm": 0.06642207503318787, + "learning_rate": 8.787447999558922e-06, + "loss": 0.0167, + "step": 49080 + }, + { + "action_loss": 0.005163056310266256, + "epoch": 44.13669064748201, + "step": 49080 + }, + { + "epoch": 44.13669064748201, + "step": 49080, + "torque_loss": 0.15639592707157135 + }, + { + "epoch": 44.13669064748201, + "force_loss": 0.009870877489447594, + "step": 49080 + }, + { + "epoch": 44.14568345323741, + "grad_norm": 0.12552420794963837, + "learning_rate": 8.77185034037144e-06, + "loss": 0.0159, + "step": 49090 + }, + { + "action_loss": 0.0008313118014484644, + "epoch": 44.14568345323741, + "step": 49090 + }, + { + "epoch": 44.14568345323741, + "step": 49090, + "torque_loss": 0.08804670721292496 + }, + { + "epoch": 44.14568345323741, + "force_loss": 0.0007858526078052819, + "step": 49090 + }, + { + "epoch": 44.15467625899281, + "grad_norm": 0.13472674787044525, + "learning_rate": 8.756265205207259e-06, + "loss": 0.0135, + "step": 49100 + }, + { + "action_loss": 0.0012137899175286293, + "epoch": 44.15467625899281, + "step": 49100 + }, + { + "epoch": 44.15467625899281, + "step": 49100, + "torque_loss": 0.13230256736278534 + }, + { + "epoch": 44.15467625899281, + "force_loss": 0.002622600644826889, + "step": 49100 + }, + { + "epoch": 44.1636690647482, + "grad_norm": 0.06753526628017426, + "learning_rate": 8.740692598800732e-06, + "loss": 0.0128, + "step": 49110 + }, + { + "action_loss": 0.00447540869936347, + "epoch": 44.1636690647482, + "step": 49110 + }, + { + "epoch": 44.1636690647482, + "step": 49110, + "torque_loss": 0.11595549434423447 + }, + { + "epoch": 44.1636690647482, + "force_loss": 0.0022706470917910337, + "step": 49110 + }, + { + "epoch": 44.172661870503596, + "grad_norm": 0.11529926210641861, + "learning_rate": 8.72513252588239e-06, + "loss": 0.0142, + "step": 49120 + }, + { + "action_loss": 0.0013867107918486, + "epoch": 44.172661870503596, + "step": 49120 + }, + { + "epoch": 44.172661870503596, + "step": 49120, + "torque_loss": 0.10046526044607162 + }, + { + "epoch": 44.172661870503596, + "force_loss": 0.0031199390068650246, + "step": 49120 + }, + { + "epoch": 44.181654676258994, + "grad_norm": 0.06633419543504715, + "learning_rate": 8.709584991178998e-06, + "loss": 0.0133, + "step": 49130 + }, + { + "action_loss": 0.0021849339827895164, + "epoch": 44.181654676258994, + "step": 49130 + }, + { + "epoch": 44.181654676258994, + "step": 49130, + "torque_loss": 0.09284258633852005 + }, + { + "epoch": 44.181654676258994, + "force_loss": 0.008053903467953205, + "step": 49130 + }, + { + "epoch": 44.19064748201439, + "grad_norm": 0.07379432767629623, + "learning_rate": 8.694049999413479e-06, + "loss": 0.0127, + "step": 49140 + }, + { + "action_loss": 0.0017946548759937286, + "epoch": 44.19064748201439, + "step": 49140 + }, + { + "epoch": 44.19064748201439, + "step": 49140, + "torque_loss": 0.04979671910405159 + }, + { + "epoch": 44.19064748201439, + "force_loss": 0.002636880613863468, + "step": 49140 + }, + { + "epoch": 44.19964028776978, + "grad_norm": 0.13538052141666412, + "learning_rate": 8.678527555304945e-06, + "loss": 0.0137, + "step": 49150 + }, + { + "action_loss": 0.0010748477652668953, + "epoch": 44.19964028776978, + "step": 49150 + }, + { + "epoch": 44.19964028776978, + "step": 49150, + "torque_loss": 0.16835923492908478 + }, + { + "epoch": 44.19964028776978, + "force_loss": 0.006916664075106382, + "step": 49150 + }, + { + "epoch": 44.20863309352518, + "grad_norm": 0.10933997482061386, + "learning_rate": 8.663017663568712e-06, + "loss": 0.0139, + "step": 49160 + }, + { + "action_loss": 0.005764565896242857, + "epoch": 44.20863309352518, + "step": 49160 + }, + { + "epoch": 44.20863309352518, + "step": 49160, + "torque_loss": 0.16226160526275635 + }, + { + "epoch": 44.20863309352518, + "force_loss": 0.0039595505222678185, + "step": 49160 + }, + { + "epoch": 44.21762589928058, + "grad_norm": 0.04308479279279709, + "learning_rate": 8.647520328916259e-06, + "loss": 0.0163, + "step": 49170 + }, + { + "action_loss": 0.001063380972482264, + "epoch": 44.21762589928058, + "step": 49170 + }, + { + "epoch": 44.21762589928058, + "step": 49170, + "torque_loss": 0.102976955473423 + }, + { + "epoch": 44.21762589928058, + "force_loss": 0.0026875126641243696, + "step": 49170 + }, + { + "epoch": 44.226618705035975, + "grad_norm": 0.2102208137512207, + "learning_rate": 8.632035556055307e-06, + "loss": 0.0169, + "step": 49180 + }, + { + "action_loss": 0.002664957894012332, + "epoch": 44.226618705035975, + "step": 49180 + }, + { + "epoch": 44.226618705035975, + "step": 49180, + "torque_loss": 0.11847998946905136 + }, + { + "epoch": 44.226618705035975, + "force_loss": 0.00101189233828336, + "step": 49180 + }, + { + "epoch": 44.235611510791365, + "grad_norm": 0.16477319598197937, + "learning_rate": 8.616563349689672e-06, + "loss": 0.0168, + "step": 49190 + }, + { + "action_loss": 0.0013223630376160145, + "epoch": 44.235611510791365, + "step": 49190 + }, + { + "epoch": 44.235611510791365, + "step": 49190, + "torque_loss": 0.10624772310256958 + }, + { + "epoch": 44.235611510791365, + "force_loss": 0.002198045840486884, + "step": 49190 + }, + { + "epoch": 44.24460431654676, + "grad_norm": 0.3269161283969879, + "learning_rate": 8.601103714519448e-06, + "loss": 0.0152, + "step": 49200 + }, + { + "action_loss": 0.0033661779016256332, + "epoch": 44.24460431654676, + "step": 49200 + }, + { + "epoch": 44.24460431654676, + "step": 49200, + "torque_loss": 0.1871439814567566 + }, + { + "epoch": 44.24460431654676, + "force_loss": 0.005143625661730766, + "step": 49200 + }, + { + "epoch": 44.25359712230216, + "grad_norm": 0.5780916213989258, + "learning_rate": 8.58565665524082e-06, + "loss": 0.0158, + "step": 49210 + }, + { + "action_loss": 0.001128057367168367, + "epoch": 44.25359712230216, + "step": 49210 + }, + { + "epoch": 44.25359712230216, + "step": 49210, + "torque_loss": 0.10066789388656616 + }, + { + "epoch": 44.25359712230216, + "force_loss": 0.005184227600693703, + "step": 49210 + }, + { + "epoch": 44.26258992805755, + "grad_norm": 0.06680929660797119, + "learning_rate": 8.570222176546222e-06, + "loss": 0.0171, + "step": 49220 + }, + { + "action_loss": 0.0006675591575913131, + "epoch": 44.26258992805755, + "step": 49220 + }, + { + "epoch": 44.26258992805755, + "step": 49220, + "torque_loss": 0.08851144462823868 + }, + { + "epoch": 44.26258992805755, + "force_loss": 0.0009518570150248706, + "step": 49220 + }, + { + "epoch": 44.27158273381295, + "grad_norm": 0.6446833610534668, + "learning_rate": 8.554800283124242e-06, + "loss": 0.0159, + "step": 49230 + }, + { + "action_loss": 0.006528045982122421, + "epoch": 44.27158273381295, + "step": 49230 + }, + { + "epoch": 44.27158273381295, + "step": 49230, + "torque_loss": 0.11312786489725113 + }, + { + "epoch": 44.27158273381295, + "force_loss": 0.008985977619886398, + "step": 49230 + }, + { + "epoch": 44.280575539568346, + "grad_norm": 0.06628572940826416, + "learning_rate": 8.539390979659639e-06, + "loss": 0.0165, + "step": 49240 + }, + { + "action_loss": 0.0010382866021245718, + "epoch": 44.280575539568346, + "step": 49240 + }, + { + "epoch": 44.280575539568346, + "step": 49240, + "torque_loss": 0.11408758908510208 + }, + { + "epoch": 44.280575539568346, + "force_loss": 0.0024445292074233294, + "step": 49240 + }, + { + "epoch": 44.289568345323744, + "grad_norm": 0.14445219933986664, + "learning_rate": 8.523994270833352e-06, + "loss": 0.0152, + "step": 49250 + }, + { + "action_loss": 0.001168295624665916, + "epoch": 44.289568345323744, + "step": 49250 + }, + { + "epoch": 44.289568345323744, + "step": 49250, + "torque_loss": 0.10078869014978409 + }, + { + "epoch": 44.289568345323744, + "force_loss": 0.001015942427329719, + "step": 49250 + }, + { + "epoch": 44.298561151079134, + "grad_norm": 0.07885625213384628, + "learning_rate": 8.5086101613225e-06, + "loss": 0.0173, + "step": 49260 + }, + { + "action_loss": 0.0004780030867550522, + "epoch": 44.298561151079134, + "step": 49260 + }, + { + "epoch": 44.298561151079134, + "step": 49260, + "torque_loss": 0.09015974402427673 + }, + { + "epoch": 44.298561151079134, + "force_loss": 0.0013808520743623376, + "step": 49260 + }, + { + "epoch": 44.30755395683453, + "grad_norm": 0.18947839736938477, + "learning_rate": 8.493238655800346e-06, + "loss": 0.0131, + "step": 49270 + }, + { + "action_loss": 0.004167734179645777, + "epoch": 44.30755395683453, + "step": 49270 + }, + { + "epoch": 44.30755395683453, + "step": 49270, + "torque_loss": 0.12240535020828247 + }, + { + "epoch": 44.30755395683453, + "force_loss": 0.010304260067641735, + "step": 49270 + }, + { + "epoch": 44.31654676258993, + "grad_norm": 0.0866895318031311, + "learning_rate": 8.47787975893638e-06, + "loss": 0.015, + "step": 49280 + }, + { + "action_loss": 0.006357399746775627, + "epoch": 44.31654676258993, + "step": 49280 + }, + { + "epoch": 44.31654676258993, + "step": 49280, + "torque_loss": 0.15336821973323822 + }, + { + "epoch": 44.31654676258993, + "force_loss": 0.011244997382164001, + "step": 49280 + }, + { + "epoch": 44.32553956834532, + "grad_norm": 0.1137361228466034, + "learning_rate": 8.462533475396211e-06, + "loss": 0.0154, + "step": 49290 + }, + { + "action_loss": 0.001686560339294374, + "epoch": 44.32553956834532, + "step": 49290 + }, + { + "epoch": 44.32553956834532, + "step": 49290, + "torque_loss": 0.10393055528402328 + }, + { + "epoch": 44.32553956834532, + "force_loss": 0.006603599991649389, + "step": 49290 + }, + { + "epoch": 44.33453237410072, + "grad_norm": 0.4816022217273712, + "learning_rate": 8.447199809841643e-06, + "loss": 0.0134, + "step": 49300 + }, + { + "action_loss": 0.0007192753255367279, + "epoch": 44.33453237410072, + "step": 49300 + }, + { + "epoch": 44.33453237410072, + "step": 49300, + "torque_loss": 0.12039162963628769 + }, + { + "epoch": 44.33453237410072, + "force_loss": 0.0037354787345975637, + "step": 49300 + }, + { + "epoch": 44.343525179856115, + "grad_norm": 0.06910062581300735, + "learning_rate": 8.431878766930635e-06, + "loss": 0.0129, + "step": 49310 + }, + { + "action_loss": 0.0005455666687339544, + "epoch": 44.343525179856115, + "step": 49310 + }, + { + "epoch": 44.343525179856115, + "step": 49310, + "torque_loss": 0.09510147571563721 + }, + { + "epoch": 44.343525179856115, + "force_loss": 0.0009796253871172667, + "step": 49310 + }, + { + "epoch": 44.35251798561151, + "grad_norm": 0.15499146282672882, + "learning_rate": 8.416570351317304e-06, + "loss": 0.015, + "step": 49320 + }, + { + "action_loss": 0.003919545561075211, + "epoch": 44.35251798561151, + "step": 49320 + }, + { + "epoch": 44.35251798561151, + "step": 49320, + "torque_loss": 0.13794682919979095 + }, + { + "epoch": 44.35251798561151, + "force_loss": 0.010099179111421108, + "step": 49320 + }, + { + "epoch": 44.361510791366904, + "grad_norm": 0.11637290567159653, + "learning_rate": 8.401274567651973e-06, + "loss": 0.0148, + "step": 49330 + }, + { + "action_loss": 0.0019072741270065308, + "epoch": 44.361510791366904, + "step": 49330 + }, + { + "epoch": 44.361510791366904, + "step": 49330, + "torque_loss": 0.14849670231342316 + }, + { + "epoch": 44.361510791366904, + "force_loss": 0.0016698756953701377, + "step": 49330 + }, + { + "epoch": 44.3705035971223, + "grad_norm": 0.35222741961479187, + "learning_rate": 8.385991420581058e-06, + "loss": 0.0171, + "step": 49340 + }, + { + "action_loss": 0.0018626055680215359, + "epoch": 44.3705035971223, + "step": 49340 + }, + { + "epoch": 44.3705035971223, + "step": 49340, + "torque_loss": 0.1551978439092636 + }, + { + "epoch": 44.3705035971223, + "force_loss": 0.0018640585476532578, + "step": 49340 + }, + { + "epoch": 44.3794964028777, + "grad_norm": 0.04401099681854248, + "learning_rate": 8.370720914747215e-06, + "loss": 0.0157, + "step": 49350 + }, + { + "action_loss": 0.0011097675887867808, + "epoch": 44.3794964028777, + "step": 49350 + }, + { + "epoch": 44.3794964028777, + "step": 49350, + "torque_loss": 0.060345251113176346 + }, + { + "epoch": 44.3794964028777, + "force_loss": 0.0016489513218402863, + "step": 49350 + }, + { + "epoch": 44.388489208633096, + "grad_norm": 0.34394213557243347, + "learning_rate": 8.355463054789181e-06, + "loss": 0.0155, + "step": 49360 + }, + { + "action_loss": 0.003258590353652835, + "epoch": 44.388489208633096, + "step": 49360 + }, + { + "epoch": 44.388489208633096, + "step": 49360, + "torque_loss": 0.12820221483707428 + }, + { + "epoch": 44.388489208633096, + "force_loss": 0.0032490657176822424, + "step": 49360 + }, + { + "epoch": 44.39748201438849, + "grad_norm": 0.5119810104370117, + "learning_rate": 8.340217845341919e-06, + "loss": 0.0183, + "step": 49370 + }, + { + "action_loss": 0.001605277881026268, + "epoch": 44.39748201438849, + "step": 49370 + }, + { + "epoch": 44.39748201438849, + "step": 49370, + "torque_loss": 0.13937672972679138 + }, + { + "epoch": 44.39748201438849, + "force_loss": 0.002603571629151702, + "step": 49370 + }, + { + "epoch": 44.406474820143885, + "grad_norm": 0.6002481579780579, + "learning_rate": 8.324985291036514e-06, + "loss": 0.0176, + "step": 49380 + }, + { + "action_loss": 0.0011088335886597633, + "epoch": 44.406474820143885, + "step": 49380 + }, + { + "epoch": 44.406474820143885, + "step": 49380, + "torque_loss": 0.12105637043714523 + }, + { + "epoch": 44.406474820143885, + "force_loss": 0.003954824525862932, + "step": 49380 + }, + { + "epoch": 44.41546762589928, + "grad_norm": 0.2561337649822235, + "learning_rate": 8.309765396500213e-06, + "loss": 0.0145, + "step": 49390 + }, + { + "action_loss": 0.002047070302069187, + "epoch": 44.41546762589928, + "step": 49390 + }, + { + "epoch": 44.41546762589928, + "step": 49390, + "torque_loss": 0.1376371830701828 + }, + { + "epoch": 44.41546762589928, + "force_loss": 0.00330272875726223, + "step": 49390 + }, + { + "epoch": 44.42446043165468, + "grad_norm": 0.18997599184513092, + "learning_rate": 8.294558166356419e-06, + "loss": 0.0144, + "step": 49400 + }, + { + "action_loss": 0.004301346838474274, + "epoch": 44.42446043165468, + "step": 49400 + }, + { + "epoch": 44.42446043165468, + "step": 49400, + "torque_loss": 0.11567624658346176 + }, + { + "epoch": 44.42446043165468, + "force_loss": 0.003047328442335129, + "step": 49400 + }, + { + "epoch": 44.43345323741007, + "grad_norm": 0.09938976168632507, + "learning_rate": 8.279363605224683e-06, + "loss": 0.0133, + "step": 49410 + }, + { + "action_loss": 0.003768334863707423, + "epoch": 44.43345323741007, + "step": 49410 + }, + { + "epoch": 44.43345323741007, + "step": 49410, + "torque_loss": 0.13831429183483124 + }, + { + "epoch": 44.43345323741007, + "force_loss": 0.0023436248302459717, + "step": 49410 + }, + { + "epoch": 44.44244604316547, + "grad_norm": 0.07708617299795151, + "learning_rate": 8.264181717720704e-06, + "loss": 0.014, + "step": 49420 + }, + { + "action_loss": 0.0058227344416081905, + "epoch": 44.44244604316547, + "step": 49420 + }, + { + "epoch": 44.44244604316547, + "step": 49420, + "torque_loss": 0.14846259355545044 + }, + { + "epoch": 44.44244604316547, + "force_loss": 0.013669963926076889, + "step": 49420 + }, + { + "epoch": 44.451438848920866, + "grad_norm": 0.11816935241222382, + "learning_rate": 8.249012508456361e-06, + "loss": 0.0159, + "step": 49430 + }, + { + "action_loss": 0.0023793091531842947, + "epoch": 44.451438848920866, + "step": 49430 + }, + { + "epoch": 44.451438848920866, + "step": 49430, + "torque_loss": 0.1441594362258911 + }, + { + "epoch": 44.451438848920866, + "force_loss": 0.003016282804310322, + "step": 49430 + }, + { + "epoch": 44.460431654676256, + "grad_norm": 0.08664724230766296, + "learning_rate": 8.233855982039646e-06, + "loss": 0.014, + "step": 49440 + }, + { + "action_loss": 0.001746080699376762, + "epoch": 44.460431654676256, + "step": 49440 + }, + { + "epoch": 44.460431654676256, + "step": 49440, + "torque_loss": 0.1471930742263794 + }, + { + "epoch": 44.460431654676256, + "force_loss": 0.0032019547652453184, + "step": 49440 + }, + { + "epoch": 44.469424460431654, + "grad_norm": 0.09860538691282272, + "learning_rate": 8.218712143074708e-06, + "loss": 0.0171, + "step": 49450 + }, + { + "action_loss": 0.0014260044554248452, + "epoch": 44.469424460431654, + "step": 49450 + }, + { + "epoch": 44.469424460431654, + "step": 49450, + "torque_loss": 0.12285075336694717 + }, + { + "epoch": 44.469424460431654, + "force_loss": 0.0033392657060176134, + "step": 49450 + }, + { + "epoch": 44.47841726618705, + "grad_norm": 0.06341202557086945, + "learning_rate": 8.203580996161858e-06, + "loss": 0.0135, + "step": 49460 + }, + { + "action_loss": 0.0006078608566895127, + "epoch": 44.47841726618705, + "step": 49460 + }, + { + "epoch": 44.47841726618705, + "step": 49460, + "torque_loss": 0.13634148240089417 + }, + { + "epoch": 44.47841726618705, + "force_loss": 0.001436496269889176, + "step": 49460 + }, + { + "epoch": 44.48741007194245, + "grad_norm": 0.2088707834482193, + "learning_rate": 8.188462545897512e-06, + "loss": 0.014, + "step": 49470 + }, + { + "action_loss": 0.002264623064547777, + "epoch": 44.48741007194245, + "step": 49470 + }, + { + "epoch": 44.48741007194245, + "step": 49470, + "torque_loss": 0.09197241067886353 + }, + { + "epoch": 44.48741007194245, + "force_loss": 0.0027277201879769564, + "step": 49470 + }, + { + "epoch": 44.49640287769784, + "grad_norm": 0.09228070825338364, + "learning_rate": 8.173356796874304e-06, + "loss": 0.0126, + "step": 49480 + }, + { + "action_loss": 0.002317664911970496, + "epoch": 44.49640287769784, + "step": 49480 + }, + { + "epoch": 44.49640287769784, + "step": 49480, + "torque_loss": 0.0934019684791565 + }, + { + "epoch": 44.49640287769784, + "force_loss": 0.009517152793705463, + "step": 49480 + }, + { + "epoch": 44.50539568345324, + "grad_norm": 0.06284049898386002, + "learning_rate": 8.158263753680906e-06, + "loss": 0.0172, + "step": 49490 + }, + { + "action_loss": 0.002216239692643285, + "epoch": 44.50539568345324, + "step": 49490 + }, + { + "epoch": 44.50539568345324, + "step": 49490, + "torque_loss": 0.14668191969394684 + }, + { + "epoch": 44.50539568345324, + "force_loss": 0.0034530481789261103, + "step": 49490 + }, + { + "epoch": 44.514388489208635, + "grad_norm": 0.09228930622339249, + "learning_rate": 8.143183420902239e-06, + "loss": 0.0144, + "step": 49500 + }, + { + "action_loss": 0.0014816907932981849, + "epoch": 44.514388489208635, + "step": 49500 + }, + { + "epoch": 44.514388489208635, + "step": 49500, + "torque_loss": 0.10810700803995132 + }, + { + "epoch": 44.514388489208635, + "force_loss": 0.009996780194342136, + "step": 49500 + }, + { + "epoch": 44.523381294964025, + "grad_norm": 0.2832128405570984, + "learning_rate": 8.128115803119258e-06, + "loss": 0.0135, + "step": 49510 + }, + { + "action_loss": 0.0010376755381003022, + "epoch": 44.523381294964025, + "step": 49510 + }, + { + "epoch": 44.523381294964025, + "step": 49510, + "torque_loss": 0.12448078393936157 + }, + { + "epoch": 44.523381294964025, + "force_loss": 0.0013605793938040733, + "step": 49510 + }, + { + "epoch": 44.53237410071942, + "grad_norm": 0.12625567615032196, + "learning_rate": 8.11306090490916e-06, + "loss": 0.0134, + "step": 49520 + }, + { + "action_loss": 0.0011574003146961331, + "epoch": 44.53237410071942, + "step": 49520 + }, + { + "epoch": 44.53237410071942, + "step": 49520, + "torque_loss": 0.13235025107860565 + }, + { + "epoch": 44.53237410071942, + "force_loss": 0.0017351250862702727, + "step": 49520 + }, + { + "epoch": 44.54136690647482, + "grad_norm": 0.05436331033706665, + "learning_rate": 8.098018730845169e-06, + "loss": 0.0139, + "step": 49530 + }, + { + "action_loss": 0.0009508302318863571, + "epoch": 44.54136690647482, + "step": 49530 + }, + { + "epoch": 44.54136690647482, + "step": 49530, + "torque_loss": 0.14779029786586761 + }, + { + "epoch": 44.54136690647482, + "force_loss": 0.0007588908192701638, + "step": 49530 + }, + { + "epoch": 44.55035971223022, + "grad_norm": 0.11662055552005768, + "learning_rate": 8.082989285496745e-06, + "loss": 0.014, + "step": 49540 + }, + { + "action_loss": 0.0014115944504737854, + "epoch": 44.55035971223022, + "step": 49540 + }, + { + "epoch": 44.55035971223022, + "step": 49540, + "torque_loss": 0.07627858966588974 + }, + { + "epoch": 44.55035971223022, + "force_loss": 0.003981321584433317, + "step": 49540 + }, + { + "epoch": 44.55935251798561, + "grad_norm": 0.047712501138448715, + "learning_rate": 8.067972573429416e-06, + "loss": 0.0149, + "step": 49550 + }, + { + "action_loss": 0.0008498501847498119, + "epoch": 44.55935251798561, + "step": 49550 + }, + { + "epoch": 44.55935251798561, + "step": 49550, + "torque_loss": 0.08005679398775101 + }, + { + "epoch": 44.55935251798561, + "force_loss": 0.0014413176104426384, + "step": 49550 + }, + { + "epoch": 44.568345323741006, + "grad_norm": 0.48789462447166443, + "learning_rate": 8.052968599204874e-06, + "loss": 0.0165, + "step": 49560 + }, + { + "action_loss": 0.004135263618081808, + "epoch": 44.568345323741006, + "step": 49560 + }, + { + "epoch": 44.568345323741006, + "step": 49560, + "torque_loss": 0.10088077932596207 + }, + { + "epoch": 44.568345323741006, + "force_loss": 0.006796931382268667, + "step": 49560 + }, + { + "epoch": 44.577338129496404, + "grad_norm": 0.10422906279563904, + "learning_rate": 8.037977367380922e-06, + "loss": 0.0141, + "step": 49570 + }, + { + "action_loss": 0.0007956252084113657, + "epoch": 44.577338129496404, + "step": 49570 + }, + { + "epoch": 44.577338129496404, + "step": 49570, + "torque_loss": 0.16722308099269867 + }, + { + "epoch": 44.577338129496404, + "force_loss": 0.001192752388305962, + "step": 49570 + }, + { + "epoch": 44.5863309352518, + "grad_norm": 0.29155808687210083, + "learning_rate": 8.022998882511495e-06, + "loss": 0.0169, + "step": 49580 + }, + { + "action_loss": 0.0014681977918371558, + "epoch": 44.5863309352518, + "step": 49580 + }, + { + "epoch": 44.5863309352518, + "step": 49580, + "torque_loss": 0.14347128570079803 + }, + { + "epoch": 44.5863309352518, + "force_loss": 0.0034073113929480314, + "step": 49580 + }, + { + "epoch": 44.59532374100719, + "grad_norm": 0.2645726501941681, + "learning_rate": 8.008033149146677e-06, + "loss": 0.0147, + "step": 49590 + }, + { + "action_loss": 0.005384637042880058, + "epoch": 44.59532374100719, + "step": 49590 + }, + { + "epoch": 44.59532374100719, + "step": 49590, + "torque_loss": 0.10157445818185806 + }, + { + "epoch": 44.59532374100719, + "force_loss": 0.010809145867824554, + "step": 49590 + }, + { + "epoch": 44.60431654676259, + "grad_norm": 0.21352891623973846, + "learning_rate": 7.993080171832656e-06, + "loss": 0.0144, + "step": 49600 + }, + { + "action_loss": 0.0006976434378884733, + "epoch": 44.60431654676259, + "step": 49600 + }, + { + "epoch": 44.60431654676259, + "step": 49600, + "torque_loss": 0.12141027301549911 + }, + { + "epoch": 44.60431654676259, + "force_loss": 0.0011753566795960069, + "step": 49600 + }, + { + "epoch": 44.61330935251799, + "grad_norm": 0.06730925291776657, + "learning_rate": 7.978139955111752e-06, + "loss": 0.0161, + "step": 49610 + }, + { + "action_loss": 0.003273202106356621, + "epoch": 44.61330935251799, + "step": 49610 + }, + { + "epoch": 44.61330935251799, + "step": 49610, + "torque_loss": 0.11216571182012558 + }, + { + "epoch": 44.61330935251799, + "force_loss": 0.010554053820669651, + "step": 49610 + }, + { + "epoch": 44.62230215827338, + "grad_norm": 0.04671144112944603, + "learning_rate": 7.9632125035224e-06, + "loss": 0.0157, + "step": 49620 + }, + { + "action_loss": 0.002263119211420417, + "epoch": 44.62230215827338, + "step": 49620 + }, + { + "epoch": 44.62230215827338, + "step": 49620, + "torque_loss": 0.1412576138973236 + }, + { + "epoch": 44.62230215827338, + "force_loss": 0.0049514793790876865, + "step": 49620 + }, + { + "epoch": 44.631294964028775, + "grad_norm": 0.31017956137657166, + "learning_rate": 7.948297821599177e-06, + "loss": 0.0138, + "step": 49630 + }, + { + "action_loss": 0.0010347046190872788, + "epoch": 44.631294964028775, + "step": 49630 + }, + { + "epoch": 44.631294964028775, + "step": 49630, + "torque_loss": 0.10887827724218369 + }, + { + "epoch": 44.631294964028775, + "force_loss": 0.0031630315352231264, + "step": 49630 + }, + { + "epoch": 44.64028776978417, + "grad_norm": 0.15130667388439178, + "learning_rate": 7.933395913872755e-06, + "loss": 0.0142, + "step": 49640 + }, + { + "action_loss": 0.008818390779197216, + "epoch": 44.64028776978417, + "step": 49640 + }, + { + "epoch": 44.64028776978417, + "step": 49640, + "torque_loss": 0.15408599376678467 + }, + { + "epoch": 44.64028776978417, + "force_loss": 0.011250992305576801, + "step": 49640 + }, + { + "epoch": 44.64928057553957, + "grad_norm": 0.2945329546928406, + "learning_rate": 7.918506784869972e-06, + "loss": 0.0152, + "step": 49650 + }, + { + "action_loss": 0.0011330684646964073, + "epoch": 44.64928057553957, + "step": 49650 + }, + { + "epoch": 44.64928057553957, + "step": 49650, + "torque_loss": 0.12201186269521713 + }, + { + "epoch": 44.64928057553957, + "force_loss": 0.0032301582396030426, + "step": 49650 + }, + { + "epoch": 44.65827338129496, + "grad_norm": 0.05091472342610359, + "learning_rate": 7.903630439113707e-06, + "loss": 0.0158, + "step": 49660 + }, + { + "action_loss": 0.0014601141447201371, + "epoch": 44.65827338129496, + "step": 49660 + }, + { + "epoch": 44.65827338129496, + "step": 49660, + "torque_loss": 0.1302904486656189 + }, + { + "epoch": 44.65827338129496, + "force_loss": 0.00391970993950963, + "step": 49660 + }, + { + "epoch": 44.66726618705036, + "grad_norm": 0.14738763868808746, + "learning_rate": 7.888766881123044e-06, + "loss": 0.0159, + "step": 49670 + }, + { + "action_loss": 0.0007106612320058048, + "epoch": 44.66726618705036, + "step": 49670 + }, + { + "epoch": 44.66726618705036, + "step": 49670, + "torque_loss": 0.10596217960119247 + }, + { + "epoch": 44.66726618705036, + "force_loss": 0.0027635181322693825, + "step": 49670 + }, + { + "epoch": 44.67625899280576, + "grad_norm": 0.18586307764053345, + "learning_rate": 7.873916115413099e-06, + "loss": 0.0141, + "step": 49680 + }, + { + "action_loss": 0.0012354138307273388, + "epoch": 44.67625899280576, + "step": 49680 + }, + { + "epoch": 44.67625899280576, + "step": 49680, + "torque_loss": 0.12358730286359787 + }, + { + "epoch": 44.67625899280576, + "force_loss": 0.0026613890659064054, + "step": 49680 + }, + { + "epoch": 44.685251798561154, + "grad_norm": 0.06968514621257782, + "learning_rate": 7.85907814649518e-06, + "loss": 0.0187, + "step": 49690 + }, + { + "action_loss": 0.0010834741406142712, + "epoch": 44.685251798561154, + "step": 49690 + }, + { + "epoch": 44.685251798561154, + "step": 49690, + "torque_loss": 0.12469297647476196 + }, + { + "epoch": 44.685251798561154, + "force_loss": 0.00414871284738183, + "step": 49690 + }, + { + "epoch": 44.694244604316545, + "grad_norm": 0.04685192182660103, + "learning_rate": 7.844252978876649e-06, + "loss": 0.016, + "step": 49700 + }, + { + "action_loss": 0.0021343789994716644, + "epoch": 44.694244604316545, + "step": 49700 + }, + { + "epoch": 44.694244604316545, + "step": 49700, + "torque_loss": 0.12869907915592194 + }, + { + "epoch": 44.694244604316545, + "force_loss": 0.010410395450890064, + "step": 49700 + }, + { + "epoch": 44.70323741007194, + "grad_norm": 0.5662211179733276, + "learning_rate": 7.829440617061001e-06, + "loss": 0.0169, + "step": 49710 + }, + { + "action_loss": 0.003900849027559161, + "epoch": 44.70323741007194, + "step": 49710 + }, + { + "epoch": 44.70323741007194, + "step": 49710, + "torque_loss": 0.11429709196090698 + }, + { + "epoch": 44.70323741007194, + "force_loss": 0.004489378537982702, + "step": 49710 + }, + { + "epoch": 44.71223021582734, + "grad_norm": 0.0439014658331871, + "learning_rate": 7.814641065547851e-06, + "loss": 0.0168, + "step": 49720 + }, + { + "action_loss": 0.0008110936614684761, + "epoch": 44.71223021582734, + "step": 49720 + }, + { + "epoch": 44.71223021582734, + "step": 49720, + "torque_loss": 0.15931938588619232 + }, + { + "epoch": 44.71223021582734, + "force_loss": 0.0017350729322060943, + "step": 49720 + }, + { + "epoch": 44.72122302158273, + "grad_norm": 0.05959755927324295, + "learning_rate": 7.79985432883289e-06, + "loss": 0.013, + "step": 49730 + }, + { + "action_loss": 0.0011860455852001905, + "epoch": 44.72122302158273, + "step": 49730 + }, + { + "epoch": 44.72122302158273, + "step": 49730, + "torque_loss": 0.12883572280406952 + }, + { + "epoch": 44.72122302158273, + "force_loss": 0.0029928397852927446, + "step": 49730 + }, + { + "epoch": 44.73021582733813, + "grad_norm": 0.08484892547130585, + "learning_rate": 7.78508041140797e-06, + "loss": 0.0143, + "step": 49740 + }, + { + "action_loss": 0.0005358399939723313, + "epoch": 44.73021582733813, + "step": 49740 + }, + { + "epoch": 44.73021582733813, + "step": 49740, + "torque_loss": 0.09793832153081894 + }, + { + "epoch": 44.73021582733813, + "force_loss": 0.0011657123686745763, + "step": 49740 + }, + { + "epoch": 44.739208633093526, + "grad_norm": 0.3519071042537689, + "learning_rate": 7.770319317760993e-06, + "loss": 0.0155, + "step": 49750 + }, + { + "action_loss": 0.000978046446107328, + "epoch": 44.739208633093526, + "step": 49750 + }, + { + "epoch": 44.739208633093526, + "step": 49750, + "torque_loss": 0.11245503276586533 + }, + { + "epoch": 44.739208633093526, + "force_loss": 0.003304282436147332, + "step": 49750 + }, + { + "epoch": 44.74820143884892, + "grad_norm": 0.06182730942964554, + "learning_rate": 7.755571052376004e-06, + "loss": 0.0146, + "step": 49760 + }, + { + "action_loss": 0.0021554799750447273, + "epoch": 44.74820143884892, + "step": 49760 + }, + { + "epoch": 44.74820143884892, + "step": 49760, + "torque_loss": 0.15364956855773926 + }, + { + "epoch": 44.74820143884892, + "force_loss": 0.004587630741298199, + "step": 49760 + }, + { + "epoch": 44.757194244604314, + "grad_norm": 0.07076491415500641, + "learning_rate": 7.740835619733128e-06, + "loss": 0.0135, + "step": 49770 + }, + { + "action_loss": 0.003983645234256983, + "epoch": 44.757194244604314, + "step": 49770 + }, + { + "epoch": 44.757194244604314, + "step": 49770, + "torque_loss": 0.1310625970363617 + }, + { + "epoch": 44.757194244604314, + "force_loss": 0.006321063730865717, + "step": 49770 + }, + { + "epoch": 44.76618705035971, + "grad_norm": 0.10592120885848999, + "learning_rate": 7.726113024308601e-06, + "loss": 0.0159, + "step": 49780 + }, + { + "action_loss": 0.0009449923527427018, + "epoch": 44.76618705035971, + "step": 49780 + }, + { + "epoch": 44.76618705035971, + "step": 49780, + "torque_loss": 0.16312932968139648 + }, + { + "epoch": 44.76618705035971, + "force_loss": 0.0033867955207824707, + "step": 49780 + }, + { + "epoch": 44.77517985611511, + "grad_norm": 0.15752017498016357, + "learning_rate": 7.711403270574746e-06, + "loss": 0.0167, + "step": 49790 + }, + { + "action_loss": 0.002825170988216996, + "epoch": 44.77517985611511, + "step": 49790 + }, + { + "epoch": 44.77517985611511, + "step": 49790, + "torque_loss": 0.1041218712925911 + }, + { + "epoch": 44.77517985611511, + "force_loss": 0.007644938305020332, + "step": 49790 + }, + { + "epoch": 44.78417266187051, + "grad_norm": 0.0501573346555233, + "learning_rate": 7.696706363000039e-06, + "loss": 0.0141, + "step": 49800 + }, + { + "action_loss": 0.0017316960729658604, + "epoch": 44.78417266187051, + "step": 49800 + }, + { + "epoch": 44.78417266187051, + "step": 49800, + "torque_loss": 0.12151408195495605 + }, + { + "epoch": 44.78417266187051, + "force_loss": 0.002438431140035391, + "step": 49800 + }, + { + "epoch": 44.7931654676259, + "grad_norm": 0.16803665459156036, + "learning_rate": 7.682022306048959e-06, + "loss": 0.0137, + "step": 49810 + }, + { + "action_loss": 0.00035620061680674553, + "epoch": 44.7931654676259, + "step": 49810 + }, + { + "epoch": 44.7931654676259, + "step": 49810, + "torque_loss": 0.14025768637657166 + }, + { + "epoch": 44.7931654676259, + "force_loss": 0.0005227602086961269, + "step": 49810 + }, + { + "epoch": 44.802158273381295, + "grad_norm": 0.20629088580608368, + "learning_rate": 7.667351104182186e-06, + "loss": 0.0149, + "step": 49820 + }, + { + "action_loss": 0.0008661316242069006, + "epoch": 44.802158273381295, + "step": 49820 + }, + { + "epoch": 44.802158273381295, + "step": 49820, + "torque_loss": 0.13550396263599396 + }, + { + "epoch": 44.802158273381295, + "force_loss": 0.002790125785395503, + "step": 49820 + }, + { + "epoch": 44.81115107913669, + "grad_norm": 0.051408298313617706, + "learning_rate": 7.652692761856395e-06, + "loss": 0.0134, + "step": 49830 + }, + { + "action_loss": 0.0007608411251567304, + "epoch": 44.81115107913669, + "step": 49830 + }, + { + "epoch": 44.81115107913669, + "step": 49830, + "torque_loss": 0.11166875809431076 + }, + { + "epoch": 44.81115107913669, + "force_loss": 0.0031106378883123398, + "step": 49830 + }, + { + "epoch": 44.82014388489208, + "grad_norm": 0.09923563152551651, + "learning_rate": 7.63804728352444e-06, + "loss": 0.0142, + "step": 49840 + }, + { + "action_loss": 0.0020305428188294172, + "epoch": 44.82014388489208, + "step": 49840 + }, + { + "epoch": 44.82014388489208, + "step": 49840, + "torque_loss": 0.1581265926361084 + }, + { + "epoch": 44.82014388489208, + "force_loss": 0.007345882710069418, + "step": 49840 + }, + { + "epoch": 44.82913669064748, + "grad_norm": 0.3739672303199768, + "learning_rate": 7.623414673635215e-06, + "loss": 0.0146, + "step": 49850 + }, + { + "action_loss": 0.0009307024884037673, + "epoch": 44.82913669064748, + "step": 49850 + }, + { + "epoch": 44.82913669064748, + "step": 49850, + "torque_loss": 0.1286327838897705 + }, + { + "epoch": 44.82913669064748, + "force_loss": 0.0067672282457351685, + "step": 49850 + }, + { + "epoch": 44.83812949640288, + "grad_norm": 0.28007903695106506, + "learning_rate": 7.608794936633723e-06, + "loss": 0.0205, + "step": 49860 + }, + { + "action_loss": 0.0015352214686572552, + "epoch": 44.83812949640288, + "step": 49860 + }, + { + "epoch": 44.83812949640288, + "step": 49860, + "torque_loss": 0.11712837219238281 + }, + { + "epoch": 44.83812949640288, + "force_loss": 0.0037732094060629606, + "step": 49860 + }, + { + "epoch": 44.847122302158276, + "grad_norm": 0.06705943495035172, + "learning_rate": 7.594188076961056e-06, + "loss": 0.0168, + "step": 49870 + }, + { + "action_loss": 0.0010187792358919978, + "epoch": 44.847122302158276, + "step": 49870 + }, + { + "epoch": 44.847122302158276, + "step": 49870, + "torque_loss": 0.110748790204525 + }, + { + "epoch": 44.847122302158276, + "force_loss": 0.0007314844406209886, + "step": 49870 + }, + { + "epoch": 44.856115107913666, + "grad_norm": 0.24257680773735046, + "learning_rate": 7.579594099054382e-06, + "loss": 0.0161, + "step": 49880 + }, + { + "action_loss": 0.001507192268036306, + "epoch": 44.856115107913666, + "step": 49880 + }, + { + "epoch": 44.856115107913666, + "step": 49880, + "torque_loss": 0.11246427893638611 + }, + { + "epoch": 44.856115107913666, + "force_loss": 0.006308602634817362, + "step": 49880 + }, + { + "epoch": 44.865107913669064, + "grad_norm": 0.07981456071138382, + "learning_rate": 7.565013007346983e-06, + "loss": 0.014, + "step": 49890 + }, + { + "action_loss": 0.005797725170850754, + "epoch": 44.865107913669064, + "step": 49890 + }, + { + "epoch": 44.865107913669064, + "step": 49890, + "torque_loss": 0.2362171858549118 + }, + { + "epoch": 44.865107913669064, + "force_loss": 0.0074445023201406, + "step": 49890 + }, + { + "epoch": 44.87410071942446, + "grad_norm": 0.11106700450181961, + "learning_rate": 7.5504448062682035e-06, + "loss": 0.0176, + "step": 49900 + }, + { + "action_loss": 0.0008172406814992428, + "epoch": 44.87410071942446, + "step": 49900 + }, + { + "epoch": 44.87410071942446, + "step": 49900, + "torque_loss": 0.13382510840892792 + }, + { + "epoch": 44.87410071942446, + "force_loss": 0.0021993634290993214, + "step": 49900 + }, + { + "epoch": 44.88309352517986, + "grad_norm": 0.30454111099243164, + "learning_rate": 7.53588950024347e-06, + "loss": 0.0156, + "step": 49910 + }, + { + "action_loss": 0.001323816948570311, + "epoch": 44.88309352517986, + "step": 49910 + }, + { + "epoch": 44.88309352517986, + "step": 49910, + "torque_loss": 0.10558053851127625 + }, + { + "epoch": 44.88309352517986, + "force_loss": 0.0026909213047474623, + "step": 49910 + }, + { + "epoch": 44.89208633093525, + "grad_norm": 0.11809254437685013, + "learning_rate": 7.5213470936943145e-06, + "loss": 0.0147, + "step": 49920 + }, + { + "action_loss": 0.004369642119854689, + "epoch": 44.89208633093525, + "step": 49920 + }, + { + "epoch": 44.89208633093525, + "step": 49920, + "torque_loss": 0.11722224950790405 + }, + { + "epoch": 44.89208633093525, + "force_loss": 0.010220506228506565, + "step": 49920 + }, + { + "epoch": 44.90107913669065, + "grad_norm": 0.07059824466705322, + "learning_rate": 7.506817591038323e-06, + "loss": 0.0172, + "step": 49930 + }, + { + "action_loss": 0.0011751716956496239, + "epoch": 44.90107913669065, + "step": 49930 + }, + { + "epoch": 44.90107913669065, + "step": 49930, + "torque_loss": 0.07866986840963364 + }, + { + "epoch": 44.90107913669065, + "force_loss": 0.0024234424345195293, + "step": 49930 + }, + { + "epoch": 44.910071942446045, + "grad_norm": 0.12487969547510147, + "learning_rate": 7.492300996689183e-06, + "loss": 0.0245, + "step": 49940 + }, + { + "action_loss": 0.0012136775767430663, + "epoch": 44.910071942446045, + "step": 49940 + }, + { + "epoch": 44.910071942446045, + "step": 49940, + "torque_loss": 0.08027943968772888 + }, + { + "epoch": 44.910071942446045, + "force_loss": 0.0012687615817412734, + "step": 49940 + }, + { + "epoch": 44.919064748201436, + "grad_norm": 0.36301109194755554, + "learning_rate": 7.477797315056645e-06, + "loss": 0.0155, + "step": 49950 + }, + { + "action_loss": 0.004123583901673555, + "epoch": 44.919064748201436, + "step": 49950 + }, + { + "epoch": 44.919064748201436, + "step": 49950, + "torque_loss": 0.15322358906269073 + }, + { + "epoch": 44.919064748201436, + "force_loss": 0.010679654777050018, + "step": 49950 + }, + { + "epoch": 44.92805755395683, + "grad_norm": 0.30525267124176025, + "learning_rate": 7.463306550546539e-06, + "loss": 0.0144, + "step": 49960 + }, + { + "action_loss": 0.003829879453405738, + "epoch": 44.92805755395683, + "step": 49960 + }, + { + "epoch": 44.92805755395683, + "step": 49960, + "torque_loss": 0.13717468082904816 + }, + { + "epoch": 44.92805755395683, + "force_loss": 0.0047232164070010185, + "step": 49960 + }, + { + "epoch": 44.93705035971223, + "grad_norm": 0.10683489590883255, + "learning_rate": 7.448828707560812e-06, + "loss": 0.0136, + "step": 49970 + }, + { + "action_loss": 0.006063038017600775, + "epoch": 44.93705035971223, + "step": 49970 + }, + { + "epoch": 44.93705035971223, + "step": 49970, + "torque_loss": 0.1413004845380783 + }, + { + "epoch": 44.93705035971223, + "force_loss": 0.009604431688785553, + "step": 49970 + }, + { + "epoch": 44.94604316546763, + "grad_norm": 0.07475993037223816, + "learning_rate": 7.4343637904974e-06, + "loss": 0.0151, + "step": 49980 + }, + { + "action_loss": 0.003994505386799574, + "epoch": 44.94604316546763, + "step": 49980 + }, + { + "epoch": 44.94604316546763, + "step": 49980, + "torque_loss": 0.10486545413732529 + }, + { + "epoch": 44.94604316546763, + "force_loss": 0.008612277917563915, + "step": 49980 + }, + { + "epoch": 44.95503597122302, + "grad_norm": 0.08893385529518127, + "learning_rate": 7.419911803750401e-06, + "loss": 0.015, + "step": 49990 + }, + { + "action_loss": 0.0025657371152192354, + "epoch": 44.95503597122302, + "step": 49990 + }, + { + "epoch": 44.95503597122302, + "step": 49990, + "torque_loss": 0.13204152882099152 + }, + { + "epoch": 44.95503597122302, + "force_loss": 0.006318904925137758, + "step": 49990 + }, + { + "epoch": 44.96402877697842, + "grad_norm": 0.21625585854053497, + "learning_rate": 7.405472751709935e-06, + "loss": 0.0191, + "step": 50000 + } + ], + "logging_steps": 10, + "max_steps": 60000, + "num_input_tokens_seen": 0, + "num_train_epochs": 54, + "save_steps": 10000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 16, + "trial_name": null, + "trial_params": null +}