debisoft commited on
Commit
9fc9885
·
verified ·
1 Parent(s): 6209995

Initial commit

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ replay.mp4 filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -1 +1 @@
1
- {"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNwAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu", "__module__": "stable_baselines3.sac.policies", "__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ", "__init__": "<function MultiInputPolicy.__init__ at 0x7880d9bf2160>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7880d9bf9140>"}, "verbose": 1, "policy_kwargs": {"use_sde": false}, "num_timesteps": 1000000, "_total_timesteps": 1000000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1760729029192534752, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'collections.OrderedDict'>", ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAACOGtr8f+gs/2FzOPfYKMr61XpO+cV/OPYWBtz5ySpE/PVXOPQt8xL5P344/bDfOPZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAAB7AjD8T5sa/1IS/P63ERz++imC+5I3gP8GJAb9XM6W+Xx2Kv0VTJD84ySQ/Ot+Lv5RoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAAiA4GviefYL+0sK88aH0HP1QbO795My8/YkxAviOGtr8f+gs/2FzOPfOjfrl5EXC8pwq6uvdoOrz4nzy8BHtCPTb9Hzs0eq+8lWHxuvMeZD8dDhY/WrmTPwh2Kj6F/rY+ysEXwHDla7/2CjK+tV6TvnFfzj0QhJ+5h1t0vOUqVLt6Iki8QS46vAR7Qj39/B87GnqvvKPnQbsIPVM/Q8tyP8mwij9wPKk+xhkcP1Y5k7/omM6+hYG3PnJKkT89Vc49/JQTuOk9cbyVIim72GZHvE5qNrz6ekI9HQEgOyR6r7z7pSu7K/zAPraeqD7nysw+E3KNva+XM0DTpzDAXCl4Pwt8xL5P344/bDfOPQTbKLrn6oC8bVwgu50zHbxL1C68Zg5JPQpT1zrvWTC8uEcJu5RoDksESxOGlGgSdJRSlHUu", "achieved_goal": "[[-1.4259685 0.5467853 0.10076302]\n [-0.17386994 -0.28783193 0.10076798]\n [ 0.35841003 1.1350844 0.10074852]\n [-0.38375887 1.1161898 0.10069165]]", "desired_goal": "[[ 1.099613 -1.5538963 1.4962411 ]\n [ 0.7803448 -0.21927926 1.7543302 ]\n [-0.5060082 -0.32265732 -1.0790213 ]\n [ 0.6418956 0.64369535 -1.0927498 ]]", "observation": "[[-1.3091481e-01 -8.7742847e-01 2.1446563e-02 5.2925730e-01\n -7.3088574e-01 6.8437916e-01 -1.8779138e-01 -1.4259685e+00\n 5.4678530e-01 1.0076302e-01 -2.4284403e-04 -1.4652603e-02\n -1.4193848e-03 -1.1377565e-02 -1.1512749e-02 4.7480598e-02\n 2.4412400e-03 -2.1420576e-02 -1.8415923e-03]\n [ 8.9109725e-01 5.8615285e-01 1.1540940e+00 1.6646588e-01\n 3.5741058e-01 -2.3712029e+00 -9.2146969e-01 -1.7386994e-01\n -2.8783193e-01 1.0076798e-01 -3.0425237e-04 -1.4914400e-02\n -3.2374200e-03 -1.2215251e-02 -1.1363567e-02 4.7480598e-02\n 2.4412267e-03 -2.1420527e-02 -2.9587529e-03]\n [ 8.2515001e-01 9.4841403e-01 1.0835201e+00 3.3053923e-01\n 6.0976827e-01 -1.1501873e+00 -4.0351033e-01 3.5841003e-01\n 1.1350844e+00 1.0074852e-01 -3.5186284e-05 -1.4724233e-02\n -2.5807966e-03 -1.2170516e-02 -1.1133743e-02 4.7480561e-02\n 2.4414726e-03 -2.1420546e-02 -2.6191461e-03]\n [ 3.7692389e-01 3.2933587e-01 3.9998552e-01 -6.9065236e-02\n 2.8061330e+00 -2.7602432e+00 9.6938109e-01 -3.8375887e-01\n 1.1161898e+00 1.0069165e-01 -6.4413273e-04 -1.5737010e-02\n -2.4469153e-03 -9.5948251e-03 -1.0670732e-02 4.9085997e-02\n 1.6427946e-03 -1.0763629e-02 -2.0947289e-03]]"}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVeAAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBAAAAAAAAAABAQEBlIwFbnVtcHmUjAVkdHlwZZSTlIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksEhZSMAUOUdJRSlC4="}, "_last_original_obs": {":type:": "<class 'collections.OrderedDict'>", ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAAD5kF74c4Hg9+8CjPHELj7zv4Pe8YcGjPFWBGz3M1P890L+jPEILIb16nPs9PLujPJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAADTtxD3bAQq+fwBFPghKjD0wd5y8UplWPjbQL7143eW8CFSrPBB1Zz25IGQ9CtejPJRoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAA3BXAvINsl74bogQ+y7K+PSVEmr7SGGs+SGDfPD5kF74c4Hg9+8CjPMD41zfK25c3nmHhOYjRHTgL+gS4QPG7LLy5tjJ7S7WyP2ZrOv/mHD5Jrr09CxCUPp2XpTr9l9Q9fpVqv8Um/jRxC4+87+D3vGHBozxAfmI3u/1ut3NHmblpFMW3+8Got0g1ay2AKFsycrJnMqD4E7qEOhE+Y0ZBPhv4jj55Ui89In1KPg+36r4Hs508VYEbPczU/z3Qv6M8INaMOD86Gzf+IwS49rKpt94rGLYTN2ex25w0NL600i6acPu4M8WDPZN1yDwCRjs+aBNvveL1gT+j94e/vOuPPUILIb16nPs9PLujPO+nY7jWCPO4IOC1NxrZLzlTUg04IxsQOZNPBLqRp/E7HogUOpRoDksESxOGlGgSdJRSlHUu", "achieved_goal": "[[-0.14784333 0.0607606 0.01998948]\n [-0.01746151 -0.03025862 0.01998967]\n [ 0.03796514 0.1249176 0.01998892]\n [-0.03931738 0.12285705 0.01998674]]", "desired_goal": "[[ 0.09615555 -0.1347727 0.1923847 ]\n [ 0.06850058 -0.0190998 0.20956925]\n [-0.04292317 -0.02805971 0.02091409]\n [ 0.05650812 0.05569527 0.02 ]]", "observation": "[[-2.3447923e-02 -2.9574975e-01 1.2952463e-01 9.3114458e-02\n -3.0130115e-01 2.2958687e-01 2.7267590e-02 -1.4784333e-01\n 6.0760602e-02 1.9989481e-02 2.5745830e-05 1.8102950e-05\n 4.2988075e-04 3.7626858e-05 -3.1704123e-05 5.3416438e-12\n 2.1272051e-08 -2.1105498e-08 8.9797744e-04]\n [ 1.5322493e-01 9.2617579e-02 2.8918490e-01 1.2633685e-03\n 1.0380552e-01 -9.1634357e-01 4.7339395e-07 -1.7461510e-02\n -3.0258624e-02 1.9989671e-02 1.3500045e-05 -1.4244982e-05\n -2.9235668e-04 -2.3493734e-05 -2.0117490e-05 1.3370034e-11\n 1.2756686e-08 1.3486543e-08 -5.6446530e-04]\n [ 1.4182478e-01 1.8874507e-01 2.7923664e-01 4.2803261e-02\n 1.9774297e-01 -4.5842788e-01 1.9250406e-02 3.7965138e-02\n 1.2491760e-01 1.9988924e-02 6.7156041e-05 9.2522814e-06\n -3.1504773e-05 -2.0229705e-05 -2.2675299e-06 -3.3646230e-09\n 1.6820870e-07 9.5818006e-11 -1.1989586e-04]\n [ 6.4340971e-02 2.4470126e-02 1.8288425e-01 -5.8368117e-02\n 1.0153162e+00 -1.0622448e+00 7.0273846e-02 -3.9317377e-02\n 1.2285705e-01 1.9986741e-02 -5.4277418e-05 -1.1588789e-04\n 2.1681248e-05 1.6770177e-04 3.3693690e-05 1.3743019e-04\n -5.0472579e-04 7.3747118e-03 5.6660350e-04]]"}, "_episode_num": 20718, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": 0.0, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwEkAAAAAAACMAWyUSzKMAXSUR0DHEPK8DjiodX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEOsgZCOWdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHEPF12aDxdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEcDOHFgldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEowuscQzdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEpBybQTmdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEo56nivQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEyilP8AJdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHE7Qbn5i3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHE6MRxtHhdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHE6Bkf9xZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFD1GAkLQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFMQXyiEhdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFLKYE4ecdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFK/49HMEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFUmJpFkQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFdEgEEDAdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFcAU34sVdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFb9x2jfvdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFlyOHWSVdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFuXfwZwXdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHFusZR8+idX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFtV9+gDidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFtOeWfK7dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHF3OmvW6LdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGCg9xIatdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGCK/M4cWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGC6HO8kEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGQDYkE9udX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGZ2DrZ8KdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGYaR0U48dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGYV2TxG2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGiS2H+IedX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGrwvvjOtdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGqglyBCldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGqdvS+g2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG0Zr30wrdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG9j2L5ymdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG8KUqx1QdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG8AieNDMdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHGElHBk7dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHPYJu2qldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHN7oUzsQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHN4bn5i3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHX5Z0SyudX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHhGn889wdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHfkCJXQudX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHgX1pTMrdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHty8g6ltdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHH6fwgDA8dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHH4+xIJ7cdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHH49WQwK0dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHICgz544ZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHILkW69TQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIKJmh/RWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIKNs54nndX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHIKkY2sJZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIUE0UGmldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIdIKYzBRdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIbjyrgfmdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIbyjSG8FdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIk/zxwyZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIt6CtihGdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIsYBDG96dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIsmYplSTdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI10B6rvLdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI+zJp35fdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHI/JUPxx2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI9VXV9WqdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI9fzUZvUdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJG0WhysCdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJTgaYNRWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJSfTiKixdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJTsZccENdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJf28f3evdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJpqwljVhdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJn/s7dSEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJoPg5zYFdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJyGJUHY6dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJ7kAFPi2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJ5uZiNKidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJ58Sh8IBdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKDbNQj2SdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKMkfPompdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKKrjFQ2udX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKKz2OAAidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKUZ75VOsdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKd1NWU8ndX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKb/ssxwidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKcHtBv74dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKldmjCYUdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DHKpEPczqKdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DHKsjm0VrRdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKuupIczZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKs4rOJLvdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKuJB7eEadX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLFWOwPiDdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLHho4+8odX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLF4XIlt1dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLGDrLQokdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVhgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKImJiImJiYmJiYmJiYmJiYmJiYmJiYiJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiImJiYmJiYmJiYmIiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmIiImJiYmJiYllLg=="}, "_n_updates": 249975, "buffer_size": 1000000, "batch_size": 256, "learning_starts": 100, "tau": 0.005, "gamma": 0.99, "gradient_steps": 1, "optimize_memory_usage": false, "replay_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwQRGljdFJlcGxheUJ1ZmZlcpSTlC4=", "__module__": "stable_baselines3.common.buffers", "__annotations__": "{'observation_space': <class 'gymnasium.spaces.dict.Dict'>, 'obs_shape': dict[str, tuple[int, ...]], 'observations': dict[str, numpy.ndarray], 'next_observations': dict[str, numpy.ndarray]}", "__doc__": "\n Dict Replay buffer used in off-policy algorithms like SAC/TD3.\n Extends the ReplayBuffer to use dictionary observations\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ", "__init__": "<function DictReplayBuffer.__init__ at 0x7880d9b13740>", "add": "<function DictReplayBuffer.add at 0x7880d9b13880>", "sample": "<function DictReplayBuffer.sample at 0x7880d9b13920>", "_get_samples": "<function DictReplayBuffer._get_samples at 0x7880d9b139c0>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7880d9f9c740>"}, "replay_buffer_kwargs": {}, "n_steps": 1, "train_freq": {":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>", ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLAWgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"}, "use_sde_at_warmup": false, "target_entropy": -4.0, "ent_coef": "auto", "target_update_interval": 1, "observation_space": {":type:": "<class 'gymnasium.spaces.dict.Dict'>", ":serialized:": "gAWVFAQAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5R9lCiMDWFjaGlldmVkX2dvYWyUjBRneW1uYXNpdW0uc3BhY2VzLmJveJSMA0JveJSTlCmBlH2UKIwFZHR5cGWUjAVudW1weZSMBWR0eXBllJOUjAJmNJSJiIeUUpQoSwOMATyUTk5OSv////9K/////0sAdJRijAZfc2hhcGWUSwOFlIwDbG93lIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMAAAAAAAAAAAAIMEAACDBAAAgwZRoE0sDhZSMAUOUdJRSlIwNYm91bmRlZF9iZWxvd5RoGyiWAwAAAAAAAAABAQGUaBCMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLA4WUaB50lFKUjARoaWdolGgbKJYMAAAAAAAAAAAAIEEAACBBAAAgQZRoE0sDhZRoHnSUUpSMDWJvdW5kZWRfYWJvdmWUaBsolgMAAAAAAAAAAQEBlGglSwOFlGgedJRSlIwIbG93X3JlcHKUjAUtMTAuMJSMCWhpZ2hfcmVwcpSMBDEwLjCUjApfbnBfcmFuZG9tlE51YowMZGVzaXJlZF9nb2FslGgKKYGUfZQoaA1oE2gWSwOFlGgYaBsolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgTSwOFlGgedJRSlGghaBsolgMAAAAAAAAAAQEBlGglSwOFlGgedJRSlGgraBsolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgTSwOFlGgedJRSlGgwaBsolgMAAAAAAAAAAQEBlGglSwOFlGgedJRSlGg1jAUtMTAuMJRoN4wEMTAuMJRoOU51YowLb2JzZXJ2YXRpb26UaAopgZR9lChoDWgTaBZLE4WUaBhoGyiWTAAAAAAAAAAAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBlGgTSxOFlGgedJRSlGghaBsolhMAAAAAAAAAAQEBAQEBAQEBAQEBAQEBAQEBAZRoJUsThZRoHnSUUpRoK2gbKJZMAAAAAAAAAAAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEGUaBNLE4WUaB50lFKUaDBoGyiWEwAAAAAAAAABAQEBAQEBAQEBAQEBAQEBAQEBlGglSxOFlGgedJRSlGg1jAUtMTAuMJRoN4wEMTAuMJRoOU51YnVoFk5oDU5oOU51Yi4=", "spaces": "{'achieved_goal': Box(-10.0, 10.0, (3,), float32), 'desired_goal': Box(-10.0, 10.0, (3,), float32), 'observation': Box(-10.0, 10.0, (19,), float32)}", "_shape": null, "dtype": null, "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVSgMAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLBIWUjANsb3eUjBNudW1weS5fY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QolhAAAAAAAAAAAACAvwAAgL8AAIC/AACAv5RoC0sEhZSMAUOUdJRSlIwNYm91bmRlZF9iZWxvd5RoEyiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlGgWdJRSlIwEaGlnaJRoEyiWEAAAAAAAAAAAAIA/AACAPwAAgD8AAIA/lGgLSwSFlGgWdJRSlIwNYm91bmRlZF9hYm92ZZRoEyiWBAAAAAAAAAABAQEBlGgdSwSFlGgWdJRSlIwIbG93X3JlcHKUjAQtMS4wlIwJaGlnaF9yZXBylIwDMS4wlIwKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlGgyjBRfX2JpdF9nZW5lcmF0b3JfY3RvcpSTlIwTbnVtcHkucmFuZG9tLl9wY2c2NJSMBVBDRzY0lJOUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaD+KEEriVSlwqOzXxi1zIFl2Y0iMA2luY5SKEfHEtne+FXjAYDPZiktEv/kAdYwKaGFzX3VpbnQzMpRLAIwIdWludGVnZXKUSwB1jBpudW1weS5yYW5kb20uYml0X2dlbmVyYXRvcpSMG19fcHl4X3VucGlja2xlX1NlZWRTZXF1ZW5jZZSTlGhEjAxTZWVkU2VxdWVuY2WUk5RKIqLqA06HlFKUKIoQhaAkgiRLBjsR6u3PH8Oqf0sAaBMolhAAAAAAAAAA0T03a/v5L4vGQdSIDnQ+IJRoCIwCdTSUiYiHlFKUKEsDaAxOTk5K/////0r/////SwB0lGJLBIWUaBZ0lFKUSwQpdJRihpRihZRSlHViLg==", "dtype": "float32", "_shape": [4], "low": "[-1. -1. -1. -1.]", "bounded_below": "[ True True True True]", "high": "[1. 1. 1. 1.]", "bounded_above": "[ True True True True]", "low_repr": "-1.0", "high_repr": "1.0", "_np_random": "Generator(PCG64)"}, "n_envs": 4, "lr_schedule": {":type:": "<class 'stable_baselines3.common.utils.FloatSchedule'>", ":serialized:": "gAWVeQAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMDUZsb2F0U2NoZWR1bGWUk5QpgZR9lIwOdmFsdWVfc2NoZWR1bGWUaACMEENvbnN0YW50U2NoZWR1bGWUk5QpgZR9lIwDdmFslEc/M6kqMFUyYXNic2Iu", "value_schedule": "ConstantSchedule(val=0.0003)"}, "batch_norm_stats": [], "batch_norm_stats_target": [], "system_info": {"OS": "Linux-6.6.105+-x86_64-with-glibc2.35 # 1 SMP Thu Oct 2 10:42:05 UTC 2025", "Python": "3.12.12", "Stable-Baselines3": "2.7.0", "PyTorch": "2.8.0+cu126", "GPU Enabled": "False", "Numpy": "2.0.2", "Cloudpickle": "3.1.1", "Gymnasium": "1.2.1", "OpenAI Gym": "0.25.2"}}
 
1
+ {"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNwAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu", "__module__": "stable_baselines3.sac.policies", "__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ", "__init__": "<function MultiInputPolicy.__init__ at 0x7ffa4746f240>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7ffa4747e0c0>"}, "verbose": 1, "policy_kwargs": {"use_sde": false}, "num_timesteps": 1000, "_total_timesteps": 1000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1760748840173752029, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'collections.OrderedDict'>", ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAAC2WqD9Xo8W/4ghbvdqjKT91ttO/4ghbvdKYnj85tJc/4ghbvadG1z/b7ty+4ghbvZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAAAdnO79TlZQ/z+mFvkqh1j9vnYM/e1lSPxe2gz7kz6g/HB6OP7dU4r+QRaG/4jhLvZRoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAAE/LSPt01Tj4NyxJAz0q5PQUiOTxrrwI+OiGWvy2WqD9Xo8W/4ghbvR18ETwiFli+QY6kPsQSxD3pY7u9FXnVPAR3Eb11ayK99MoCPhPy0j7dNU4+DcsSQM9KuT0FIjk8a68CPjohlr/aoyk/dbbTv+IIW70dfBE8IhZYvkGOpD7EEsQ96WO7vRV51TwEdxG9dWsivfTKAj4T8tI+3TVOPg3LEkDPSrk9BSI5PGuvAj46IZa/0piePzm0lz/iCFu9HXwRPCIWWL5BjqQ+xBLEPelju70VedU8BHcRvXVrIr30ygI+E/LSPt01Tj4NyxJAz0q5PQUiOTxrrwI+OiGWv6dG1z/b7ty+4ghbvR18ETwiFli+QY6kPsQSxD3pY7u9FXnVPAR3Eb11ayK99MoCPpRoDksESxOGlGgSdJRSlHUu", "achieved_goal": "[[ 1.317083 -1.5440472 -0.05347527]\n [ 0.6626564 -1.6540056 -0.05347527]\n [ 1.2390387 1.1851875 -0.05347527]\n [ 1.6818436 -0.43150982 -0.05347527]]", "desired_goal": "[[-0.7320408 1.160807 -0.26154944]\n [ 1.6767972 1.028242 0.82167786]\n [ 0.2572486 1.3188443 1.1102939 ]\n [-1.7682103 -1.2599354 -0.04961479]]", "observation": "[[ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 1.317083 -1.5440472 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]\n [ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 0.6626564 -1.6540056 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]\n [ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 1.2390387 1.1851875 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]\n [ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 1.6818436 -0.43150982 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]]"}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVeAAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWBAAAAAAAAAABAQEBlIwFbnVtcHmUjAVkdHlwZZSTlIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksEhZSMAUOUdJRSlC4="}, "_last_original_obs": {":type:": "<class 'collections.OrderedDict'>", ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAACpPxT09cOG9CtejPJdxET3b7/S9CtejPJJztj1aQwE+CtejPCNgBT55QGG8CtejPJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAAIhQhL3eW8Y9BjSqPS/5/j2VMLA9+N8jPjTuVTypyeA9Ad04PjF0Fb5gd869AgfJPZRoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAA6nIdPRlsGqxDI0o+AAAAAAAAAIAAAAAAAAAAACpPxT09cOG9CtejPAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOpyHT0ZbBqsQyNKPgAAAAAAAACAAAAAAAAAAACXcRE92+/0vQrXozwAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADqch09GWwarEMjSj4AAAAAAAAAgAAAAAAAAAAAknO2PVpDAT4K16M8AAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA6nIdPRlsGqxDI0o+AAAAAAAAAIAAAAAAAAAAACNgBT55QGG8CtejPAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJRoDksESxOGlGgSdJRSlHUu", "achieved_goal": "[[ 0.0963424 -0.11007736 0.02 ]\n [ 0.03550872 -0.11959811 0.02 ]\n [ 0.08908762 0.12623349 0.02 ]\n [ 0.13024954 -0.01374828 0.02 ]]", "desired_goal": "[[-0.06460673 0.09685491 0.08310704]\n [ 0.12449872 0.08603016 0.16003406]\n [ 0.01305728 0.10975964 0.18053056]\n [-0.14595105 -0.10081363 0.0981579 ]]", "observation": "[[ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 9.6342400e-02\n -1.1007736e-01 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n [ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 3.5508718e-02\n -1.1959811e-01 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n [ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 8.9087620e-02\n 1.2623349e-01 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n [ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 1.3024954e-01\n -1.3748282e-02 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]]"}, "_episode_num": 20, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": 0.0, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVgAIAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwEkAAAAAAACMAWyUSzKMAXSUR0AQeaF23azvdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AOzJlrdnCgdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AMfGff4yoGdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AKSB5HEuQIdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AciyD7IkqudX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AbeCiAUcn3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AaTgxagVXWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AZNAQg9vCNdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AiP3A2ycCpdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AhtbypaRp2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AhIK0lZ5iWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Agk7rcCYCydX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AmPrUsnRb9dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AltPC2tuDSdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AlH/xUedTYdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Akk3YL9deIdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AqUgGKQ7tBdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0ApyIhQm/nGdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0ApM+oLofSydX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Aop4hUzbeudWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVNgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKImJiYmJiYmJiYmJiYmJiYmJiYmJZS4="}, "_n_updates": 225, "buffer_size": 1000000, "batch_size": 256, "learning_starts": 100, "tau": 0.005, "gamma": 0.99, "gradient_steps": 1, "optimize_memory_usage": false, "replay_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOQAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwQRGljdFJlcGxheUJ1ZmZlcpSTlC4=", "__module__": "stable_baselines3.common.buffers", "__annotations__": "{'observation_space': <class 'gymnasium.spaces.dict.Dict'>, 'obs_shape': dict[str, tuple[int, ...]], 'observations': dict[str, numpy.ndarray], 'next_observations': dict[str, numpy.ndarray]}", "__doc__": "\n Dict Replay buffer used in off-policy algorithms like SAC/TD3.\n Extends the ReplayBuffer to use dictionary observations\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ", "__init__": "<function DictReplayBuffer.__init__ at 0x7ffa475cc900>", "add": "<function DictReplayBuffer.add at 0x7ffa475cca40>", "sample": "<function DictReplayBuffer.sample at 0x7ffa475ccae0>", "_get_samples": "<function DictReplayBuffer._get_samples at 0x7ffa475ccb80>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7ffa47579e80>"}, "replay_buffer_kwargs": {}, "n_steps": 1, "train_freq": {":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>", ":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLAWgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"}, "use_sde_at_warmup": false, "target_entropy": -4.0, "ent_coef": "auto", "target_update_interval": 1, "observation_space": {":type:": "<class 'gymnasium.spaces.dict.Dict'>", ":serialized:": "gAWVFAQAAAAAAACMFWd5bW5hc2l1bS5zcGFjZXMuZGljdJSMBERpY3SUk5QpgZR9lCiMBnNwYWNlc5R9lCiMDWFjaGlldmVkX2dvYWyUjBRneW1uYXNpdW0uc3BhY2VzLmJveJSMA0JveJSTlCmBlH2UKIwFZHR5cGWUjAVudW1weZSMBWR0eXBllJOUjAJmNJSJiIeUUpQoSwOMATyUTk5OSv////9K/////0sAdJRijAZfc2hhcGWUSwOFlIwDbG93lIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYMAAAAAAAAAAAAIMEAACDBAAAgwZRoE0sDhZSMAUOUdJRSlIwNYm91bmRlZF9iZWxvd5RoGyiWAwAAAAAAAAABAQGUaBCMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLA4WUaB50lFKUjARoaWdolGgbKJYMAAAAAAAAAAAAIEEAACBBAAAgQZRoE0sDhZRoHnSUUpSMDWJvdW5kZWRfYWJvdmWUaBsolgMAAAAAAAAAAQEBlGglSwOFlGgedJRSlIwIbG93X3JlcHKUjAUtMTAuMJSMCWhpZ2hfcmVwcpSMBDEwLjCUjApfbnBfcmFuZG9tlE51YowMZGVzaXJlZF9nb2FslGgKKYGUfZQoaA1oE2gWSwOFlGgYaBsolgwAAAAAAAAAAAAgwQAAIMEAACDBlGgTSwOFlGgedJRSlGghaBsolgMAAAAAAAAAAQEBlGglSwOFlGgedJRSlGgraBsolgwAAAAAAAAAAAAgQQAAIEEAACBBlGgTSwOFlGgedJRSlGgwaBsolgMAAAAAAAAAAQEBlGglSwOFlGgedJRSlGg1jAUtMTAuMJRoN4wEMTAuMJRoOU51YowLb2JzZXJ2YXRpb26UaAopgZR9lChoDWgTaBZLE4WUaBhoGyiWTAAAAAAAAAAAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBAAAgwQAAIMEAACDBlGgTSxOFlGgedJRSlGghaBsolhMAAAAAAAAAAQEBAQEBAQEBAQEBAQEBAQEBAZRoJUsThZRoHnSUUpRoK2gbKJZMAAAAAAAAAAAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEEAACBBAAAgQQAAIEGUaBNLE4WUaB50lFKUaDBoGyiWEwAAAAAAAAABAQEBAQEBAQEBAQEBAQEBAQEBlGglSxOFlGgedJRSlGg1jAUtMTAuMJRoN4wEMTAuMJRoOU51YnVoFk5oDU5oOU51Yi4=", "spaces": "{'achieved_goal': Box(-10.0, 10.0, (3,), float32), 'desired_goal': Box(-10.0, 10.0, (3,), float32), 'observation': Box(-10.0, 10.0, (19,), float32)}", "_shape": null, "dtype": null, "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVSgMAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLBIWUjANsb3eUjBNudW1weS5fY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QolhAAAAAAAAAAAACAvwAAgL8AAIC/AACAv5RoC0sEhZSMAUOUdJRSlIwNYm91bmRlZF9iZWxvd5RoEyiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlGgWdJRSlIwEaGlnaJRoEyiWEAAAAAAAAAAAAIA/AACAPwAAgD8AAIA/lGgLSwSFlGgWdJRSlIwNYm91bmRlZF9hYm92ZZRoEyiWBAAAAAAAAAABAQEBlGgdSwSFlGgWdJRSlIwIbG93X3JlcHKUjAQtMS4wlIwJaGlnaF9yZXBylIwDMS4wlIwKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlGgyjBRfX2JpdF9nZW5lcmF0b3JfY3RvcpSTlIwTbnVtcHkucmFuZG9tLl9wY2c2NJSMBVBDRzY0lJOUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaD+KEOu4EztTgI4GSV8TIgT+uDSMA2luY5SKEMsFRWPEy4EeUByoeFDlRF91jApoYXNfdWludDMylEsAjAh1aW50ZWdlcpRLAHWMGm51bXB5LnJhbmRvbS5iaXRfZ2VuZXJhdG9ylIwbX19weXhfdW5waWNrbGVfU2VlZFNlcXVlbmNllJOUaESMDFNlZWRTZXF1ZW5jZZSTlEoiouoDToeUUpQoihHx1TnGMbnkBZpEVOfll2G3AEsAaBMolhAAAAAAAAAAsfq4KIuFWt9iUeSGGy5hupRoCIwCdTSUiYiHlFKUKEsDaAxOTk5K/////0r/////SwB0lGJLBIWUaBZ0lFKUSwQpdJRihpRihZRSlHViLg==", "dtype": "float32", "_shape": [4], "low": "[-1. -1. -1. -1.]", "bounded_below": "[ True True True True]", "high": "[1. 1. 1. 1.]", "bounded_above": "[ True True True True]", "low_repr": "-1.0", "high_repr": "1.0", "_np_random": "Generator(PCG64)"}, "n_envs": 4, "lr_schedule": {":type:": "<class 'stable_baselines3.common.utils.FloatSchedule'>", ":serialized:": "gAWVeQAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMDUZsb2F0U2NoZWR1bGWUk5QpgZR9lIwOdmFsdWVfc2NoZWR1bGWUaACMEENvbnN0YW50U2NoZWR1bGWUk5QpgZR9lIwDdmFslEc/M6kqMFUyYXNic2Iu", "value_schedule": "ConstantSchedule(val=0.0003)"}, "batch_norm_stats": [], "batch_norm_stats_target": [], "system_info": {"OS": "Linux-6.6.105+-x86_64-with-glibc2.35 # 1 SMP Thu Oct 2 10:42:05 UTC 2025", "Python": "3.12.12", "Stable-Baselines3": "2.7.0", "PyTorch": "2.8.0+cu126", "GPU Enabled": "False", "Numpy": "2.0.2", "Cloudpickle": "3.1.1", "Gymnasium": "1.2.1", "OpenAI Gym": "0.25.2"}}
replay.mp4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280a6d84b86c8327241f23058cea9c00e8c087d7f999755add6dd740ce0cb87d
3
+ size 554593
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -50.0, "std_reward": 0.0, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2025-10-17T22:41:39.865381"}
 
1
+ {"mean_reward": -50.0, "std_reward": 0.0, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2025-10-18T00:54:16.866152"}
sac-PandaPickAndPlace-v3.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69d058b78a45eaa4b808c83d00cd95f18f472a05c4bb8bfe9742bafe26bde416
3
- size 3306169
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a002b9a150ea0d8c90729ca3537c3caa361e1d0f0284792c035816d57ebd2b9
3
+ size 3302609
sac-PandaPickAndPlace-v3/actor.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df18b23c6e502ba74b36f4b9d94a7768783d8cd30bce51587b780c0637662a04
3
  size 603099
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6147e1c3f5b249f91b9f6361b48adcbf6cabe00f5da0b995fe3ef462fb14d5c
3
  size 603099
sac-PandaPickAndPlace-v3/critic.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83945f26857ecfc12e7d04c6e7c0c1d8dee60ae7a4cb746a051885e0b4ad9035
3
  size 1189751
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062093717e105ca4e3ba55b18fbef7511070e609125d942e5e3dbf1d6020e0a2
3
  size 1189751
sac-PandaPickAndPlace-v3/data CHANGED
@@ -4,28 +4,28 @@
4
  ":serialized:": "gAWVNwAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu",
5
  "__module__": "stable_baselines3.sac.policies",
6
  "__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
7
- "__init__": "<function MultiInputPolicy.__init__ at 0x7880d9bf2160>",
8
  "__abstractmethods__": "frozenset()",
9
- "_abc_impl": "<_abc._abc_data object at 0x7880d9bf9140>"
10
  },
11
  "verbose": 1,
12
  "policy_kwargs": {
13
  "use_sde": false
14
  },
15
- "num_timesteps": 1000000,
16
- "_total_timesteps": 1000000,
17
  "_num_timesteps_at_start": 0,
18
  "seed": null,
19
  "action_noise": null,
20
- "start_time": 1760729029192534752,
21
  "learning_rate": 0.0003,
22
  "tensorboard_log": null,
23
  "_last_obs": {
24
  ":type:": "<class 'collections.OrderedDict'>",
25
- ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAACOGtr8f+gs/2FzOPfYKMr61XpO+cV/OPYWBtz5ySpE/PVXOPQt8xL5P344/bDfOPZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAAB7AjD8T5sa/1IS/P63ERz++imC+5I3gP8GJAb9XM6W+Xx2Kv0VTJD84ySQ/Ot+Lv5RoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAAiA4GviefYL+0sK88aH0HP1QbO795My8/YkxAviOGtr8f+gs/2FzOPfOjfrl5EXC8pwq6uvdoOrz4nzy8BHtCPTb9Hzs0eq+8lWHxuvMeZD8dDhY/WrmTPwh2Kj6F/rY+ysEXwHDla7/2CjK+tV6TvnFfzj0QhJ+5h1t0vOUqVLt6Iki8QS46vAR7Qj39/B87GnqvvKPnQbsIPVM/Q8tyP8mwij9wPKk+xhkcP1Y5k7/omM6+hYG3PnJKkT89Vc49/JQTuOk9cbyVIim72GZHvE5qNrz6ekI9HQEgOyR6r7z7pSu7K/zAPraeqD7nysw+E3KNva+XM0DTpzDAXCl4Pwt8xL5P344/bDfOPQTbKLrn6oC8bVwgu50zHbxL1C68Zg5JPQpT1zrvWTC8uEcJu5RoDksESxOGlGgSdJRSlHUu",
26
- "achieved_goal": "[[-1.4259685 0.5467853 0.10076302]\n [-0.17386994 -0.28783193 0.10076798]\n [ 0.35841003 1.1350844 0.10074852]\n [-0.38375887 1.1161898 0.10069165]]",
27
- "desired_goal": "[[ 1.099613 -1.5538963 1.4962411 ]\n [ 0.7803448 -0.21927926 1.7543302 ]\n [-0.5060082 -0.32265732 -1.0790213 ]\n [ 0.6418956 0.64369535 -1.0927498 ]]",
28
- "observation": "[[-1.3091481e-01 -8.7742847e-01 2.1446563e-02 5.2925730e-01\n -7.3088574e-01 6.8437916e-01 -1.8779138e-01 -1.4259685e+00\n 5.4678530e-01 1.0076302e-01 -2.4284403e-04 -1.4652603e-02\n -1.4193848e-03 -1.1377565e-02 -1.1512749e-02 4.7480598e-02\n 2.4412400e-03 -2.1420576e-02 -1.8415923e-03]\n [ 8.9109725e-01 5.8615285e-01 1.1540940e+00 1.6646588e-01\n 3.5741058e-01 -2.3712029e+00 -9.2146969e-01 -1.7386994e-01\n -2.8783193e-01 1.0076798e-01 -3.0425237e-04 -1.4914400e-02\n -3.2374200e-03 -1.2215251e-02 -1.1363567e-02 4.7480598e-02\n 2.4412267e-03 -2.1420527e-02 -2.9587529e-03]\n [ 8.2515001e-01 9.4841403e-01 1.0835201e+00 3.3053923e-01\n 6.0976827e-01 -1.1501873e+00 -4.0351033e-01 3.5841003e-01\n 1.1350844e+00 1.0074852e-01 -3.5186284e-05 -1.4724233e-02\n -2.5807966e-03 -1.2170516e-02 -1.1133743e-02 4.7480561e-02\n 2.4414726e-03 -2.1420546e-02 -2.6191461e-03]\n [ 3.7692389e-01 3.2933587e-01 3.9998552e-01 -6.9065236e-02\n 2.8061330e+00 -2.7602432e+00 9.6938109e-01 -3.8375887e-01\n 1.1161898e+00 1.0069165e-01 -6.4413273e-04 -1.5737010e-02\n -2.4469153e-03 -9.5948251e-03 -1.0670732e-02 4.9085997e-02\n 1.6427946e-03 -1.0763629e-02 -2.0947289e-03]]"
29
  },
30
  "_last_episode_starts": {
31
  ":type:": "<class 'numpy.ndarray'>",
@@ -33,25 +33,25 @@
33
  },
34
  "_last_original_obs": {
35
  ":type:": "<class 'collections.OrderedDict'>",
36
- ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAAD5kF74c4Hg9+8CjPHELj7zv4Pe8YcGjPFWBGz3M1P890L+jPEILIb16nPs9PLujPJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAADTtxD3bAQq+fwBFPghKjD0wd5y8UplWPjbQL7143eW8CFSrPBB1Zz25IGQ9CtejPJRoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAA3BXAvINsl74bogQ+y7K+PSVEmr7SGGs+SGDfPD5kF74c4Hg9+8CjPMD41zfK25c3nmHhOYjRHTgL+gS4QPG7LLy5tjJ7S7WyP2ZrOv/mHD5Jrr09CxCUPp2XpTr9l9Q9fpVqv8Um/jRxC4+87+D3vGHBozxAfmI3u/1ut3NHmblpFMW3+8Got0g1ay2AKFsycrJnMqD4E7qEOhE+Y0ZBPhv4jj55Ui89In1KPg+36r4Hs508VYEbPczU/z3Qv6M8INaMOD86Gzf+IwS49rKpt94rGLYTN2ex25w0NL600i6acPu4M8WDPZN1yDwCRjs+aBNvveL1gT+j94e/vOuPPUILIb16nPs9PLujPO+nY7jWCPO4IOC1NxrZLzlTUg04IxsQOZNPBLqRp/E7HogUOpRoDksESxOGlGgSdJRSlHUu",
37
- "achieved_goal": "[[-0.14784333 0.0607606 0.01998948]\n [-0.01746151 -0.03025862 0.01998967]\n [ 0.03796514 0.1249176 0.01998892]\n [-0.03931738 0.12285705 0.01998674]]",
38
- "desired_goal": "[[ 0.09615555 -0.1347727 0.1923847 ]\n [ 0.06850058 -0.0190998 0.20956925]\n [-0.04292317 -0.02805971 0.02091409]\n [ 0.05650812 0.05569527 0.02 ]]",
39
- "observation": "[[-2.3447923e-02 -2.9574975e-01 1.2952463e-01 9.3114458e-02\n -3.0130115e-01 2.2958687e-01 2.7267590e-02 -1.4784333e-01\n 6.0760602e-02 1.9989481e-02 2.5745830e-05 1.8102950e-05\n 4.2988075e-04 3.7626858e-05 -3.1704123e-05 5.3416438e-12\n 2.1272051e-08 -2.1105498e-08 8.9797744e-04]\n [ 1.5322493e-01 9.2617579e-02 2.8918490e-01 1.2633685e-03\n 1.0380552e-01 -9.1634357e-01 4.7339395e-07 -1.7461510e-02\n -3.0258624e-02 1.9989671e-02 1.3500045e-05 -1.4244982e-05\n -2.9235668e-04 -2.3493734e-05 -2.0117490e-05 1.3370034e-11\n 1.2756686e-08 1.3486543e-08 -5.6446530e-04]\n [ 1.4182478e-01 1.8874507e-01 2.7923664e-01 4.2803261e-02\n 1.9774297e-01 -4.5842788e-01 1.9250406e-02 3.7965138e-02\n 1.2491760e-01 1.9988924e-02 6.7156041e-05 9.2522814e-06\n -3.1504773e-05 -2.0229705e-05 -2.2675299e-06 -3.3646230e-09\n 1.6820870e-07 9.5818006e-11 -1.1989586e-04]\n [ 6.4340971e-02 2.4470126e-02 1.8288425e-01 -5.8368117e-02\n 1.0153162e+00 -1.0622448e+00 7.0273846e-02 -3.9317377e-02\n 1.2285705e-01 1.9986741e-02 -5.4277418e-05 -1.1588789e-04\n 2.1681248e-05 1.6770177e-04 3.3693690e-05 1.3743019e-04\n -5.0472579e-04 7.3747118e-03 5.6660350e-04]]"
40
  },
41
- "_episode_num": 20718,
42
  "use_sde": false,
43
  "sde_sample_freq": -1,
44
  "_current_progress_remaining": 0.0,
45
  "_stats_window_size": 100,
46
  "ep_info_buffer": {
47
  ":type:": "<class 'collections.deque'>",
48
- ":serialized:": "gAWV4AsAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwEkAAAAAAACMAWyUSzKMAXSUR0DHEPK8DjiodX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEOsgZCOWdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHEPF12aDxdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEcDOHFgldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEowuscQzdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEpBybQTmdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEo56nivQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHEyilP8AJdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHE7Qbn5i3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHE6MRxtHhdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHE6Bkf9xZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFD1GAkLQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFMQXyiEhdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFLKYE4ecdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFK/49HMEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFUmJpFkQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFdEgEEDAdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFcAU34sVdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFb9x2jfvdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFlyOHWSVdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFuXfwZwXdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHFusZR8+idX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFtV9+gDidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHFtOeWfK7dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHF3OmvW6LdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGCg9xIatdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGCK/M4cWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGC6HO8kEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGQDYkE9udX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGZ2DrZ8KdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGYaR0U48dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGYV2TxG2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGiS2H+IedX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGrwvvjOtdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGqglyBCldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHGqdvS+g2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG0Zr30wrdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG9j2L5ymdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG8KUqx1QdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHG8AieNDMdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHGElHBk7dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHPYJu2qldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHN7oUzsQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHN4bn5i3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHX5Z0SyudX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHhGn889wdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHfkCJXQudX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHgX1pTMrdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHHty8g6ltdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHH6fwgDA8dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHH4+xIJ7cdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHH49WQwK0dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHICgz544ZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHILkW69TQdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIKJmh/RWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIKNs54nndX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHIKkY2sJZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIUE0UGmldX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIdIKYzBRdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIbjyrgfmdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIbyjSG8FdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIk/zxwyZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIt6CtihGdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIsYBDG96dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHIsmYplSTdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI10B6rvLdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI+zJp35fdX2UKGgGRwAAAAAAAAAAaAdLAWgIR0DHI/JUPxx2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI9VXV9WqdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHI9fzUZvUdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJG0WhysCdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJTgaYNRWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJSfTiKixdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJTsZccENdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJf28f3evdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJpqwljVhdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJn/s7dSEdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJoPg5zYFdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJyGJUHY6dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJ7kAFPi2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJ5uZiNKidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHJ58Sh8IBdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKDbNQj2SdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKMkfPompdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKKrjFQ2udX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKKz2OAAidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKUZ75VOsdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKd1NWU8ndX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKb/ssxwidX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKcHtBv74dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKldmjCYUdX2UKGgGR8AkAAAAAAAAaAdLC2gIR0DHKpEPczqKdX2UKGgGR8AiAAAAAAAAaAdLCmgIR0DHKsjm0VrRdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKuupIczZdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKs4rOJLvdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHKuJB7eEadX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLFWOwPiDdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLHho4+8odX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLF4XIlt1dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0DHLGDrLQokdWUu"
49
  },
50
  "ep_success_buffer": {
51
  ":type:": "<class 'collections.deque'>",
52
- ":serialized:": "gAWVhgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKImJiImJiYmJiYmJiYmJiYmJiYmJiYiJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmJiImJiYmJiYmJiYmIiYmJiYmJiYmJiYmJiYmJiYmJiYmJiYmIiImJiYmJiYllLg=="
53
  },
54
- "_n_updates": 249975,
55
  "buffer_size": 1000000,
56
  "batch_size": 256,
57
  "learning_starts": 100,
@@ -65,12 +65,12 @@
65
  "__module__": "stable_baselines3.common.buffers",
66
  "__annotations__": "{'observation_space': <class 'gymnasium.spaces.dict.Dict'>, 'obs_shape': dict[str, tuple[int, ...]], 'observations': dict[str, numpy.ndarray], 'next_observations': dict[str, numpy.ndarray]}",
67
  "__doc__": "\n Dict Replay buffer used in off-policy algorithms like SAC/TD3.\n Extends the ReplayBuffer to use dictionary observations\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
68
- "__init__": "<function DictReplayBuffer.__init__ at 0x7880d9b13740>",
69
- "add": "<function DictReplayBuffer.add at 0x7880d9b13880>",
70
- "sample": "<function DictReplayBuffer.sample at 0x7880d9b13920>",
71
- "_get_samples": "<function DictReplayBuffer._get_samples at 0x7880d9b139c0>",
72
  "__abstractmethods__": "frozenset()",
73
- "_abc_impl": "<_abc._abc_data object at 0x7880d9f9c740>"
74
  },
75
  "replay_buffer_kwargs": {},
76
  "n_steps": 1,
@@ -92,7 +92,7 @@
92
  },
93
  "action_space": {
94
  ":type:": "<class 'gymnasium.spaces.box.Box'>",
95
- ":serialized:": "gAWVSgMAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLBIWUjANsb3eUjBNudW1weS5fY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QolhAAAAAAAAAAAACAvwAAgL8AAIC/AACAv5RoC0sEhZSMAUOUdJRSlIwNYm91bmRlZF9iZWxvd5RoEyiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlGgWdJRSlIwEaGlnaJRoEyiWEAAAAAAAAAAAAIA/AACAPwAAgD8AAIA/lGgLSwSFlGgWdJRSlIwNYm91bmRlZF9hYm92ZZRoEyiWBAAAAAAAAAABAQEBlGgdSwSFlGgWdJRSlIwIbG93X3JlcHKUjAQtMS4wlIwJaGlnaF9yZXBylIwDMS4wlIwKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlGgyjBRfX2JpdF9nZW5lcmF0b3JfY3RvcpSTlIwTbnVtcHkucmFuZG9tLl9wY2c2NJSMBVBDRzY0lJOUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaD+KEEriVSlwqOzXxi1zIFl2Y0iMA2luY5SKEfHEtne+FXjAYDPZiktEv/kAdYwKaGFzX3VpbnQzMpRLAIwIdWludGVnZXKUSwB1jBpudW1weS5yYW5kb20uYml0X2dlbmVyYXRvcpSMG19fcHl4X3VucGlja2xlX1NlZWRTZXF1ZW5jZZSTlGhEjAxTZWVkU2VxdWVuY2WUk5RKIqLqA06HlFKUKIoQhaAkgiRLBjsR6u3PH8Oqf0sAaBMolhAAAAAAAAAA0T03a/v5L4vGQdSIDnQ+IJRoCIwCdTSUiYiHlFKUKEsDaAxOTk5K/////0r/////SwB0lGJLBIWUaBZ0lFKUSwQpdJRihpRihZRSlHViLg==",
96
  "dtype": "float32",
97
  "_shape": [
98
  4
 
4
  ":serialized:": "gAWVNwAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLnNhYy5wb2xpY2llc5SMEE11bHRpSW5wdXRQb2xpY3mUk5Qu",
5
  "__module__": "stable_baselines3.sac.policies",
6
  "__doc__": "\n Policy class (with both actor and critic) for SAC.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param use_expln: Use ``expln()`` function instead of ``exp()`` when using gSDE to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param clip_mean: Clip the mean output when using gSDE to avoid numerical instability.\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n :param n_critics: Number of critic networks to create.\n :param share_features_extractor: Whether to share or not the features extractor\n between the actor and the critic (this saves computation time)\n ",
7
+ "__init__": "<function MultiInputPolicy.__init__ at 0x7ffa4746f240>",
8
  "__abstractmethods__": "frozenset()",
9
+ "_abc_impl": "<_abc._abc_data object at 0x7ffa4747e0c0>"
10
  },
11
  "verbose": 1,
12
  "policy_kwargs": {
13
  "use_sde": false
14
  },
15
+ "num_timesteps": 1000,
16
+ "_total_timesteps": 1000,
17
  "_num_timesteps_at_start": 0,
18
  "seed": null,
19
  "action_noise": null,
20
+ "start_time": 1760748840173752029,
21
  "learning_rate": 0.0003,
22
  "tensorboard_log": null,
23
  "_last_obs": {
24
  ":type:": "<class 'collections.OrderedDict'>",
25
+ ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAAC2WqD9Xo8W/4ghbvdqjKT91ttO/4ghbvdKYnj85tJc/4ghbvadG1z/b7ty+4ghbvZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAAAdnO79TlZQ/z+mFvkqh1j9vnYM/e1lSPxe2gz7kz6g/HB6OP7dU4r+QRaG/4jhLvZRoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAAE/LSPt01Tj4NyxJAz0q5PQUiOTxrrwI+OiGWvy2WqD9Xo8W/4ghbvR18ETwiFli+QY6kPsQSxD3pY7u9FXnVPAR3Eb11ayK99MoCPhPy0j7dNU4+DcsSQM9KuT0FIjk8a68CPjohlr/aoyk/dbbTv+IIW70dfBE8IhZYvkGOpD7EEsQ96WO7vRV51TwEdxG9dWsivfTKAj4T8tI+3TVOPg3LEkDPSrk9BSI5PGuvAj46IZa/0piePzm0lz/iCFu9HXwRPCIWWL5BjqQ+xBLEPelju70VedU8BHcRvXVrIr30ygI+E/LSPt01Tj4NyxJAz0q5PQUiOTxrrwI+OiGWv6dG1z/b7ty+4ghbvR18ETwiFli+QY6kPsQSxD3pY7u9FXnVPAR3Eb11ayK99MoCPpRoDksESxOGlGgSdJRSlHUu",
26
+ "achieved_goal": "[[ 1.317083 -1.5440472 -0.05347527]\n [ 0.6626564 -1.6540056 -0.05347527]\n [ 1.2390387 1.1851875 -0.05347527]\n [ 1.6818436 -0.43150982 -0.05347527]]",
27
+ "desired_goal": "[[-0.7320408 1.160807 -0.26154944]\n [ 1.6767972 1.028242 0.82167786]\n [ 0.2572486 1.3188443 1.1102939 ]\n [-1.7682103 -1.2599354 -0.04961479]]",
28
+ "observation": "[[ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 1.317083 -1.5440472 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]\n [ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 0.6626564 -1.6540056 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]\n [ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 1.2390387 1.1851875 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]\n [ 0.41200313 0.20137735 2.2936432 0.09047472 0.01129961 0.12762229\n -1.172889 1.6818436 -0.43150982 -0.05347527 0.00887969 -0.21102193\n 0.3213978 0.09573892 -0.09149916 0.02605871 -0.03551389 -0.03965326\n 0.12772733]]"
29
  },
30
  "_last_episode_starts": {
31
  ":type:": "<class 'numpy.ndarray'>",
 
33
  },
34
  "_last_original_obs": {
35
  ":type:": "<class 'collections.OrderedDict'>",
36
+ ":serialized:": "gAWVjAIAAAAAAACMC2NvbGxlY3Rpb25zlIwLT3JkZXJlZERpY3SUk5QpUpQojA1hY2hpZXZlZF9nb2FslIwTbnVtcHkuX2NvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYwAAAAAAAAACpPxT09cOG9CtejPJdxET3b7/S9CtejPJJztj1aQwE+CtejPCNgBT55QGG8CtejPJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLBEsDhpSMAUOUdJRSlIwMZGVzaXJlZF9nb2FslGgHKJYwAAAAAAAAAIhQhL3eW8Y9BjSqPS/5/j2VMLA9+N8jPjTuVTypyeA9Ad04PjF0Fb5gd869AgfJPZRoDksESwOGlGgSdJRSlIwLb2JzZXJ2YXRpb26UaAcoljABAAAAAAAA6nIdPRlsGqxDI0o+AAAAAAAAAIAAAAAAAAAAACpPxT09cOG9CtejPAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAOpyHT0ZbBqsQyNKPgAAAAAAAACAAAAAAAAAAACXcRE92+/0vQrXozwAAAAAAAAAgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAADqch09GWwarEMjSj4AAAAAAAAAgAAAAAAAAAAAknO2PVpDAT4K16M8AAAAAAAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA6nIdPRlsGqxDI0o+AAAAAAAAAIAAAAAAAAAAACNgBT55QGG8CtejPAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJRoDksESxOGlGgSdJRSlHUu",
37
+ "achieved_goal": "[[ 0.0963424 -0.11007736 0.02 ]\n [ 0.03550872 -0.11959811 0.02 ]\n [ 0.08908762 0.12623349 0.02 ]\n [ 0.13024954 -0.01374828 0.02 ]]",
38
+ "desired_goal": "[[-0.06460673 0.09685491 0.08310704]\n [ 0.12449872 0.08603016 0.16003406]\n [ 0.01305728 0.10975964 0.18053056]\n [-0.14595105 -0.10081363 0.0981579 ]]",
39
+ "observation": "[[ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 9.6342400e-02\n -1.1007736e-01 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n [ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 3.5508718e-02\n -1.1959811e-01 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n [ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 8.9087620e-02\n 1.2623349e-01 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]\n [ 3.8439669e-02 -2.1944723e-12 1.9740014e-01 0.0000000e+00\n -0.0000000e+00 0.0000000e+00 0.0000000e+00 1.3024954e-01\n -1.3748282e-02 2.0000000e-02 0.0000000e+00 -0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00 0.0000000e+00\n 0.0000000e+00 0.0000000e+00 0.0000000e+00]]"
40
  },
41
+ "_episode_num": 20,
42
  "use_sde": false,
43
  "sde_sample_freq": -1,
44
  "_current_progress_remaining": 0.0,
45
  "_stats_window_size": 100,
46
  "ep_info_buffer": {
47
  ":type:": "<class 'collections.deque'>",
48
+ ":serialized:": "gAWVgAIAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwEkAAAAAAACMAWyUSzKMAXSUR0AQeaF23azvdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AOzJlrdnCgdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AMfGff4yoGdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AKSB5HEuQIdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AciyD7IkqudX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AbeCiAUcn3dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AaTgxagVXWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AZNAQg9vCNdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AiP3A2ycCpdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AhtbypaRp2dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AhIK0lZ5iWdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Agk7rcCYCydX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AmPrUsnRb9dX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AltPC2tuDSdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AlH/xUedTYdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Akk3YL9deIdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0AqUgGKQ7tBdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0ApyIhQm/nGdX2UKGgGR8BJAAAAAAAAaAdLMmgIR0ApM+oLofSydX2UKGgGR8BJAAAAAAAAaAdLMmgIR0Aop4hUzbeudWUu"
49
  },
50
  "ep_success_buffer": {
51
  ":type:": "<class 'collections.deque'>",
52
+ ":serialized:": "gAWVNgAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKImJiYmJiYmJiYmJiYmJiYmJiYmJZS4="
53
  },
54
+ "_n_updates": 225,
55
  "buffer_size": 1000000,
56
  "batch_size": 256,
57
  "learning_starts": 100,
 
65
  "__module__": "stable_baselines3.common.buffers",
66
  "__annotations__": "{'observation_space': <class 'gymnasium.spaces.dict.Dict'>, 'obs_shape': dict[str, tuple[int, ...]], 'observations': dict[str, numpy.ndarray], 'next_observations': dict[str, numpy.ndarray]}",
67
  "__doc__": "\n Dict Replay buffer used in off-policy algorithms like SAC/TD3.\n Extends the ReplayBuffer to use dictionary observations\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
68
+ "__init__": "<function DictReplayBuffer.__init__ at 0x7ffa475cc900>",
69
+ "add": "<function DictReplayBuffer.add at 0x7ffa475cca40>",
70
+ "sample": "<function DictReplayBuffer.sample at 0x7ffa475ccae0>",
71
+ "_get_samples": "<function DictReplayBuffer._get_samples at 0x7ffa475ccb80>",
72
  "__abstractmethods__": "frozenset()",
73
+ "_abc_impl": "<_abc._abc_data object at 0x7ffa47579e80>"
74
  },
75
  "replay_buffer_kwargs": {},
76
  "n_steps": 1,
 
92
  },
93
  "action_space": {
94
  ":type:": "<class 'gymnasium.spaces.box.Box'>",
95
+ ":serialized:": "gAWVSgMAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLBIWUjANsb3eUjBNudW1weS5fY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QolhAAAAAAAAAAAACAvwAAgL8AAIC/AACAv5RoC0sEhZSMAUOUdJRSlIwNYm91bmRlZF9iZWxvd5RoEyiWBAAAAAAAAAABAQEBlGgIjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwSFlGgWdJRSlIwEaGlnaJRoEyiWEAAAAAAAAAAAAIA/AACAPwAAgD8AAIA/lGgLSwSFlGgWdJRSlIwNYm91bmRlZF9hYm92ZZRoEyiWBAAAAAAAAAABAQEBlGgdSwSFlGgWdJRSlIwIbG93X3JlcHKUjAQtMS4wlIwJaGlnaF9yZXBylIwDMS4wlIwKX25wX3JhbmRvbZSMFG51bXB5LnJhbmRvbS5fcGlja2xllIwQX19nZW5lcmF0b3JfY3RvcpSTlGgyjBRfX2JpdF9nZW5lcmF0b3JfY3RvcpSTlIwTbnVtcHkucmFuZG9tLl9wY2c2NJSMBVBDRzY0lJOUhZRSlH2UKIwNYml0X2dlbmVyYXRvcpSMBVBDRzY0lIwFc3RhdGWUfZQoaD+KEOu4EztTgI4GSV8TIgT+uDSMA2luY5SKEMsFRWPEy4EeUByoeFDlRF91jApoYXNfdWludDMylEsAjAh1aW50ZWdlcpRLAHWMGm51bXB5LnJhbmRvbS5iaXRfZ2VuZXJhdG9ylIwbX19weXhfdW5waWNrbGVfU2VlZFNlcXVlbmNllJOUaESMDFNlZWRTZXF1ZW5jZZSTlEoiouoDToeUUpQoihHx1TnGMbnkBZpEVOfll2G3AEsAaBMolhAAAAAAAAAAsfq4KIuFWt9iUeSGGy5hupRoCIwCdTSUiYiHlFKUKEsDaAxOTk5K/////0r/////SwB0lGJLBIWUaBZ0lFKUSwQpdJRihpRihZRSlHViLg==",
96
  "dtype": "float32",
97
  "_shape": [
98
  4
sac-PandaPickAndPlace-v3/ent_coef_optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d63e2993384291c4f78dc618589f5e19ad8dfaa3304d1d993cbee0a0f3da1aee
3
  size 2401
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:649320b5bba9cab1d8c102e3f336cd7b06cdc0b303ea5bd9df550f5feb940261
3
  size 2401
sac-PandaPickAndPlace-v3/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2198abc4713a92e60db71b107ba5265d807bb7e2abd616045810e8bd8e0b9afa
3
  size 1489603
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8ab7bde251b1d8270beff0cad9e2df43ec65beb2bc553e38a14132789c06a0
3
  size 1489603
sac-PandaPickAndPlace-v3/pytorch_variables.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97394119d28daead4eef43c0d5cc16bf66d4f65dc6c641e4e3a6e5457166973a
3
  size 1577
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:692afe983c6c3894750e1a53fc5ac6303b1200d7e0ee2367ed3c112836944b04
3
  size 1577
vec_normalize.pkl CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:defda930915fe2ab61079939a431e6100f0325c71f8a01eef8032751c2fe3e08
3
  size 3473
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09e03c1f8c0846c0aa2882678b2961f91fd02577d42bf46db4a5747227fef2b1
3
  size 3473