Test commit
Browse files- README.md +1 -1
- config.json +1 -1
- ppo-LunarLander-v2.zip +1 -1
- ppo-LunarLander-v2/data +16 -16
- ppo-LunarLander-v2/policy.optimizer.pth +1 -1
- ppo-LunarLander-v2/policy.pth +1 -1
- replay.mp4 +0 -0
- results.json +1 -1
README.md
CHANGED
|
@@ -16,7 +16,7 @@ model-index:
|
|
| 16 |
type: LunarLander-v2
|
| 17 |
metrics:
|
| 18 |
- type: mean_reward
|
| 19 |
-
value: -
|
| 20 |
name: mean_reward
|
| 21 |
verified: false
|
| 22 |
---
|
|
|
|
| 16 |
type: LunarLander-v2
|
| 17 |
metrics:
|
| 18 |
- type: mean_reward
|
| 19 |
+
value: -452.92 +/- 339.69
|
| 20 |
name: mean_reward
|
| 21 |
verified: false
|
| 22 |
---
|
config.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==", "__module__": "stable_baselines3.common.policies", "__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ", "__init__": "<function ActorCriticPolicy.__init__ at 0x7aa4555da710>", "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x7aa4555da7a0>", "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x7aa4555da830>", "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x7aa4555da8c0>", "_build": "<function ActorCriticPolicy._build at 0x7aa4555da950>", "forward": "<function ActorCriticPolicy.forward at 0x7aa4555da9e0>", "extract_features": "<function ActorCriticPolicy.extract_features at 0x7aa4555daa70>", "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x7aa4555dab00>", "_predict": "<function ActorCriticPolicy._predict at 0x7aa4555dab90>", "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x7aa4555dac20>", "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x7aa4555dacb0>", "predict_values": "<function ActorCriticPolicy.predict_values at 0x7aa4555dad40>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x7aa455589100>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 6144, "_total_timesteps": 5000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1729161582106002499, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAKx6Jr86cla9OJVFv0ngn7+VSlc8kxkOPwAAgD8AAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAACUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="}, "_last_original_obs": null, "_episode_num": 0, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": -0.2287999999999999, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVqAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwGF5XdCVrymMAWyUS3uMAXSURz/CgDA8B+4LdX2UKGgGR8BYLqvFFUhnaAdLaWgIRz/Rhxo7FKkEdX2UKGgGR8BuNjQqqfe2aAdLcWgIRz/aPtD2JzkqdX2UKGgGR8Bjcrjebd8BaAdLYGgIRz/guI68xsVMdX2UKGgGR8BcfxacI7eVaAdLU2gIRz/jxDLKV6eHdX2UKGgGR8B0GIL9deIEaAdLVWgIRz/mxA8jiXIEdX2UKGgGR8BZXo46wMYuaAdLamgIRz/q6NEPUaybdX2UKGgGR8BZx2fbsWweaAdLO2gIRz/tCi7CiyprdX2UKGgGR8By7osAeaKDaAdLbWgIRz/wtQCSzPa+dX2UKGgGR8BfrPTodMkAaAdLZWgIRz/yUqQRwqAjdX2UKGgGR8Bhe6EUTL4faAdLYmgIRz/z3Lidat9ydX2UKGgGR8Bje0vmHP/raAdLUWgIRz/1JSiudPLxdX2UKGgGR8B232hIvrWzaAdLWWgIRz/2g5eZ5Rj0dX2UKGgGR8BZW2K/EfknaAdLR2gIRz/3l5fMOf/WdX2UKGgGR8BWfamXPZ7HaAdLTmgIRz/4xciW3Sa3dX2UKGgGR8BiKcQGwA2iaAdLSGgIRz/58E7nxJ/YdX2UKGgGR8BrhIjGDL8raAdLVWgIRz/7PrGBFuvVdX2UKGgGR8BgrR1oxpL3aAdLU2gIRz/8fViF0xM4dX2UKGgGR8BuHG7tiQT3aAdLPWgIRz/9Z2dNFjNIdX2UKGgGR8BVi28dxQzlaAdLZWgIRz//AZOzposadX2UKGgGR8BhLHgpBomHaAdLfmgIR0AAnWOIZZSvdX2UKGgGR8Bf4AavRqoIaAdLVmgIR0ABSWVu76HkdX2UKGgGR8BgCmPPszEaaAdLdWgIR0ANPtv4ubqhdX2UKGgGR8BNzzaCcwxnaAdLWWgIR0AOGtyPuG9IdX2UKGgGR8BZNfR3NcGDaAdLamgIR0APGxrzoUzsdX2UKGgGR8Bdyjst03fiaAdLWWgIR0AP8EcKgIyCdX2UKGgGR8A2W2y9mHxjaAdLY2gIR0AQaIEbHZK4dX2UKGgGR8Bpxe2JBPbgaAdLfWgIR0AQ7U6PsAvMdX2UKGgGR8BfaK19fCyhaAdLZWgIR0ARVtZV4oqkdX2UKGgGR8Bfjz8xbjcVaAdLaWgIR0ARwk2P1ct5dX2UKGgGR8BnRNr433pOaAdLS2gIR0ASDbSJCSiedX2UKGgGR8BYl0YTCcgAaAdLUGgIR0ASXObAk9lmdX2UKGgGR8BnQQAQxvehaAdLYWgIR0ASvCGetjkNdX2UKGgGR0A0N5Zr56+naAdLVmgIR0ATFYA80UGndX2UKGgGR8BaxM5jpcHGaAdLYWgIR0ATfc580DU3dX2UKGgGR8BO41PFefI0aAdLTGgIR0AT1RIjGDL9dX2UKGgGR8BKILFOwgTzaAdLRWgIR0AUJSOzY287dX2UKGgGR8BpJQzP8hs7aAdLYGgIR0AUjJEH+qBFdX2UKGgGR8BWp8rd30PIaAdLPmgIR0AUyfywwCbMdX2UKGgGR8BsCqQ5myxBaAdLcGgIR0AVP3TNMXabdX2UKGgGR8BgRNT72tdSaAdLTWgIR0AViSA6Mir1dX2UKGgGR8B1Sjru6VdHaAdLWmgIR0AV4ydnTRYzdX2UKGgGR8ByRx7ojfNzaAdLcWgIR0AWVMM7U5MldX2UKGgGR8BT3gXMyJsPaAdLbGgIR0AWv/GVAzHkdX2UKGgGR8BmIyA4GUwBaAdLfGgIR0AcC1NQCSzPdX2UKGgGR8B1uvhP0qYraAdLdmgIR0AchX5nDiwTdX2UKGgGR8B07DFDOTq0aAdLnWgIR0AdNaxHG0eEdX2UKGgGR8B2iYj8k2P1aAdLgGgIR0AduQFLWZqmdX2UKGgGR8BlQEB2fTTfaAdLZGgIR0AeHbBXS0BwdX2UKGgGR8BZLh3A2ycDaAdLUmgIR0AebpdKNAC5dX2UKGgGR8BkqkXtShrWaAdLSmgIR0AeuIUJv5xjdX2UKGgGR8BimuQdS2piaAdLR2gIR0Ae/ag2606YdX2UKGgGR8BogGRcNYr8aAdLfmgIR0AfgLVnVXmvdX2UKGgGR8BhgqlchTwVaAdLS2gIR0AfzaoMrmQsdX2UKGgGR8BHVVjy4FzNaAdLRGgIR0AgDWDHwPRRdX2UKGgGR8BkEmOZLIxQaAdLamgIR0AgQUsWfseGdX2UKGgGR8BxIpg3Lmp3aAdLR2gIR0AgaF9KEnLJdX2UKGgGR8BqwxrnDBM0aAdLTWgIR0Agj7BwdbPhdX2UKGgGR8BGutRvWH1waAdLOmgIR0AgrCa7VawEdX2UKGgGR8BkEjIikftAaAdLcmgIR0Ag5doFmnO0dX2UKGgGR8BLY7kGRmseaAdLe2gIR0AhIqebutwKdX2UKGgGR8Be/3I6r/83aAdLZWgIR0AhVLxI8QqadX2UKGgGR8B2QiG8EmpmaAdLXmgIR0Ahi0hvBJqZdX2UKGgGR8BZ4KJqIrOJaAdLcWgIR0Ahy/FBIFvAdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 30, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdgIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAABAQEBAQEBAZRoCIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksIhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoESiWCAAAAAAAAAABAQEBAQEBAZRoFUsIhZRoGXSUUpSMBl9zaGFwZZRLCIWUjANsb3eUaBEoliAAAAAAAAAAAAC0wgAAtMIAAKDAAACgwNsPScAAAKDAAAAAgAAAAICUaAtLCIWUaBl0lFKUjARoaWdolGgRKJYgAAAAAAAAAAAAtEIAALRCAACgQAAAoEDbD0lAAACgQAAAgD8AAIA/lGgLSwiFlGgZdJRSlIwIbG93X3JlcHKUjFtbLTkwLiAgICAgICAgLTkwLiAgICAgICAgIC01LiAgICAgICAgIC01LiAgICAgICAgIC0zLjE0MTU5MjcgIC01LgogIC0wLiAgICAgICAgIC0wLiAgICAgICBdlIwJaGlnaF9yZXBylIxTWzkwLiAgICAgICAgOTAuICAgICAgICAgNS4gICAgICAgICA1LiAgICAgICAgIDMuMTQxNTkyNyAgNS4KICAxLiAgICAgICAgIDEuICAgICAgIF2UjApfbnBfcmFuZG9tlE51Yi4=", "dtype": "float32", "bounded_below": "[ True True True True True True True True]", "bounded_above": "[ True True True True True True True True]", "_shape": [8], "low": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "low_repr": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high_repr": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWV2wAAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgOjApfbnBfcmFuZG9tlE51Yi4=", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": null}, "n_envs": 1, "n_steps": 2048, "gamma": 0.99, "gae_lambda": 0.95, "ent_coef": 0.0, "vf_coef": 0.5, "max_grad_norm": 0.5, "batch_size": 64, "n_epochs": 10, "clip_range": {":type:": "<class 'function'>", ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz/JmZmZmZmahZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"}, "clip_range_vf": null, "normalize_advantage": true, "target_kl": null, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8zqSowVTJhhZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"}, "system_info": {"OS": "Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024", "Python": "3.10.12", "Stable-Baselines3": "2.0.0a5", "PyTorch": "2.4.1+cu121", "GPU Enabled": "False", "Numpy": "1.26.4", "Cloudpickle": "2.2.1", "Gymnasium": "0.28.1", "OpenAI Gym": "0.25.2"}}
|
|
|
|
| 1 |
+
{"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==", "__module__": "stable_baselines3.common.policies", "__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ", "__init__": "<function ActorCriticPolicy.__init__ at 0x79ec585489d0>", "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x79ec58548a60>", "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x79ec58548af0>", "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x79ec58548b80>", "_build": "<function ActorCriticPolicy._build at 0x79ec58548c10>", "forward": "<function ActorCriticPolicy.forward at 0x79ec58548ca0>", "extract_features": "<function ActorCriticPolicy.extract_features at 0x79ec58548d30>", "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x79ec58548dc0>", "_predict": "<function ActorCriticPolicy._predict at 0x79ec58548e50>", "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x79ec58548ee0>", "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x79ec58548f70>", "predict_values": "<function ActorCriticPolicy.predict_values at 0x79ec58549000>", "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x79ec584ea0c0>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 6144, "_total_timesteps": 5000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1729166522809859021, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAACgrjssZbM/gfeMPj4QY77IiQK8MAXKvQAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdAAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYBAAAAAAAAAACUjAVudW1weZSMBWR0eXBllJOUjAJiMZSJiIeUUpQoSwOMAXyUTk5OSv////9K/////0sAdJRiSwGFlIwBQ5R0lFKULg=="}, "_last_original_obs": null, "_episode_num": 0, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": -0.2287999999999999, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVqAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwFjfOTaCcwyMAWyUS0uMAXSURz+3jn3cpLEldX2UKGgGR8Bh/WstCiRGaAdLYmgIRz/Ib/ffoA4odX2UKGgGR8BZ79DhLoOhaAdLSmgIRz/QyYG+sYEXdX2UKGgGR8Birlvl2eQNaAdLUGgIRz/Vp22Xsw+MdX2UKGgGR8BaKgEdNnGsaAdLTmgIRz/a003wTdtVdX2UKGgGR8BwAExpL26DaAdLaWgIRz/g87hegL7XdX2UKGgGR8Bhr6jzqbBoaAdLimgIRz/lsxO+IuXedX2UKGgGR8BQ1Os5n13/aAdLQmgIRz/ns+/xlQMydX2UKGgGR8BdUb3wkPc0aAdLQWgIRz/pu9vjwQUYdX2UKGgGR8BqbP531SOzaAdLUWgIRz/sLQgLZzxPdX2UKGgGR8B9jNkNFz+4aAdLaGgIRz/vV0tAcDKYdX2UKGgGR8ByvsrI5o4/aAdLW2gIRz/xGT5ftx+8dX2UKGgGR8BYAEvTPSlWaAdLbWgIRz/y0X+ERJ2/dX2UKGgGR8BSooL1EmY0aAdLSmgIRz/0KAjIJZ4fdX2UKGgGR8BWqqE8JUo8aAdLSGgIRz/1Wv4dp7C0dX2UKGgGR8Bv1wevIOpbaAdLemgIRz/3M4xUNrj6dX2UKGgGR8BzDu7kGRmsaAdLWGgIRz/4g0O3DvVmdX2UKGgGR8BzpdvS+g14aAdLYmgIRz/6Ib83uNPydX2UKGgGR8BiCeWWyC4CaAdLfGgIRz/7/t2LYPGydX2UKGgGR8Bda5rpJPIoaAdLO2gIRz/82svIwM6SdX2UKGgGR8Bgu63AmAskaAdLSWgIRz/96wY+B6KMdX2UKGgGR8BgQOC/XXiBaAdLTmgIRz//DkIX0oSddX2UKGgGR8Ble0BjnV5KaAdLcGgIR0AJBClabF0gdX2UKGgGR8BTkJmdy1eCaAdLQmgIR0AJhKJ2t+1CdX2UKGgGR8Bdg3oLXtjTaAdLY2gIR0AKSLAHmig1dX2UKGgGR8B33z/yXlbNaAdLeGgIR0ALT1f3N9pidX2UKGgGR8B10OeK8+RpaAdLYmgIR0AMNqi48U22dX2UKGgGR8Bb+joyKvV3aAdLU2gIR0AM+FnIyTIOdX2UKGgGR8Bna0JF9a2XaAdLaWgIR0AN4mLLpzLfdX2UKGgGR8B07SI/JNj9aAdLXGgIR0AOkXxe9i+ddX2UKGgGR8BSZ7WI42jxaAdLSGgIR0APF3yI55qudX2UKGgGR8B1SHKU3XI2aAdLWWgIR0APwEpy6tkndX2UKGgGR8BhfUolUp/gaAdLZGgIR0AQPeO4oZyddX2UKGgGR8BdtlYZEUj+aAdLU2gIR0AQj0Yj0L+hdX2UKGgGR8Bh0PT7VJ+VaAdLcGgIR0ARBKoQ4CIUdX2UKGgGR8BixP4sVclgaAdLYGgIR0ARYir1dxACdX2UKGgGR8BbASONo8ISaAdLTWgIR0ARqjmCAc1gdX2UKGgGR8BfcCjDbah6aAdLZWgIR0ASIVDa4+bFdX2UKGgGR8Bx0v4ubqhUaAdLa2gIR0ASn1g6U7jldX2UKGgGR8B1MFr56+nJaAdLZ2gIR0ATIV+I/JNkdX2UKGgGR8B2FnAckt2+aAdLV2gIR0ATh9oexOcldX2UKGgGR8BK5Kur6tT2aAdLQWgIR0AT0e5nUUfxdX2UKGgGR8BumC+WWyC4aAdLY2gIR0AUQNd7fHghdX2UKGgGR8Biold5Y5ktaAdLRGgIR0AUgsXizcASdX2UKGgGR8B3pGHEdeY2aAdLjGgIR0AZZHZsbedkdX2UKGgGR8BQobEcbR4RaAdLPGgIR0AZnO6d1+y7dX2UKGgGR8BWQAHiWE9MaAdLVGgIR0AZ7U8V58jSdX2UKGgGR8BjK7DEWIoFaAdLfmgIR0Aaal3yI55rdX2UKGgGR8BdS6CtihFmaAdLaGgIR0Aazwb2lEZ0dX2UKGgGR8BpD4RoRIz4aAdLfWgIR0AbUkgOjIq9dX2UKGgGR8BnV/oJRfnfaAdLbWgIR0AbusySFGoadX2UKGgGR8BU21oHs1KoaAdLTGgIR0AcApnYg7o0dX2UKGgGR8BpwiIk7fYSaAdLl2gIR0AclZbILgGbdX2UKGgGR8BlP93IMjNZaAdLeWgIR0AdCzY287IUdX2UKGgGR8BpkwdU83dcaAdLZmgIR0Adck2P1ct5dX2UKGgGR8Biy1xOtW+5aAdLVGgIR0AdxaTwDvE1dX2UKGgGR8BXAS+Yc/+saAdLfGgIR0AeQP6KtPpIdX2UKGgGR0BDxaxHG0eEaAdLWGgIR0Ael0U47zTXdX2UKGgGR8BgOvMnqmj1aAdLgmgIR0AfI/lhgE2YdX2UKGgGR8BiT0dPtUn5aAdLgWgIR0AfqI7/4qPPdX2UKGgGR8BZthLXcxj8aAdLWmgIR0Af/Hp8neBQdX2UKGgGR8Bi1fvKEFnqaAdLa2gIR0AgMuoxYaHcdX2UKGgGR8BUccPJ7sv7aAdLSmgIR0AgVw1ivxH5dX2UKGgGR8BnTvKp1ie/aAdLhWgIR0AgmfjCHh0hdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 30, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdgIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMDWJvdW5kZWRfYmVsb3eUjBJudW1weS5jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWCAAAAAAAAAABAQEBAQEBAZRoCIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksIhZSMAUOUdJRSlIwNYm91bmRlZF9hYm92ZZRoESiWCAAAAAAAAAABAQEBAQEBAZRoFUsIhZRoGXSUUpSMBl9zaGFwZZRLCIWUjANsb3eUaBEoliAAAAAAAAAAAAC0wgAAtMIAAKDAAACgwNsPScAAAKDAAAAAgAAAAICUaAtLCIWUaBl0lFKUjARoaWdolGgRKJYgAAAAAAAAAAAAtEIAALRCAACgQAAAoEDbD0lAAACgQAAAgD8AAIA/lGgLSwiFlGgZdJRSlIwIbG93X3JlcHKUjFtbLTkwLiAgICAgICAgLTkwLiAgICAgICAgIC01LiAgICAgICAgIC01LiAgICAgICAgIC0zLjE0MTU5MjcgIC01LgogIC0wLiAgICAgICAgIC0wLiAgICAgICBdlIwJaGlnaF9yZXBylIxTWzkwLiAgICAgICAgOTAuICAgICAgICAgNS4gICAgICAgICA1LiAgICAgICAgIDMuMTQxNTkyNyAgNS4KICAxLiAgICAgICAgIDEuICAgICAgIF2UjApfbnBfcmFuZG9tlE51Yi4=", "dtype": "float32", "bounded_below": "[ True True True True True True True True]", "bounded_above": "[ True True True True True True True True]", "_shape": [8], "low": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "low_repr": "[-90. -90. -5. -5. -3.1415927 -5.\n -0. -0. ]", "high_repr": "[90. 90. 5. 5. 3.1415927 5.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWV2wAAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFW51bXB5LmNvcmUubXVsdGlhcnJheZSMBnNjYWxhcpSTlIwFbnVtcHmUjAVkdHlwZZSTlIwCaTiUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYkMIBAAAAAAAAACUhpRSlIwFc3RhcnSUaAhoDkMIAAAAAAAAAACUhpRSlIwGX3NoYXBllCmMBWR0eXBllGgOjApfbnBfcmFuZG9tlE51Yi4=", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": null}, "n_envs": 1, "n_steps": 2048, "gamma": 0.99, "gae_lambda": 0.95, "ent_coef": 0.0, "vf_coef": 0.5, "max_grad_norm": 0.5, "batch_size": 64, "n_epochs": 10, "clip_range": {":type:": "<class 'function'>", ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz/JmZmZmZmahZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"}, "clip_range_vf": null, "normalize_advantage": true, "target_kl": null, "lr_schedule": {":type:": "<class 'function'>", ":serialized:": "gAWVxQIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMSS91c3IvbG9jYWwvbGliL3B5dGhvbjMuMTAvZGlzdC1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuEQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjEkvdXNyL2xvY2FsL2xpYi9weXRob24zLjEwL2Rpc3QtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUjBxjbG91ZHBpY2tsZS5jbG91ZHBpY2tsZV9mYXN0lIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaB99lH2UKGgWaA2MDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8zqSowVTJhhZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"}, "system_info": {"OS": "Linux-6.1.85+-x86_64-with-glibc2.35 # 1 SMP PREEMPT_DYNAMIC Thu Jun 27 21:05:47 UTC 2024", "Python": "3.10.12", "Stable-Baselines3": "2.0.0a5", "PyTorch": "2.4.1+cu121", "GPU Enabled": "False", "Numpy": "1.26.4", "Cloudpickle": "2.2.1", "Gymnasium": "0.28.1", "OpenAI Gym": "0.25.2"}}
|
ppo-LunarLander-v2.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 145330
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:539d9385f69a92d6447fdcfbb19214ae72896a75cf04e2519f881f23519f52b5
|
| 3 |
size 145330
|
ppo-LunarLander-v2/data
CHANGED
|
@@ -4,20 +4,20 @@
|
|
| 4 |
":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==",
|
| 5 |
"__module__": "stable_baselines3.common.policies",
|
| 6 |
"__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
| 7 |
-
"__init__": "<function ActorCriticPolicy.__init__ at
|
| 8 |
-
"_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at
|
| 9 |
-
"reset_noise": "<function ActorCriticPolicy.reset_noise at
|
| 10 |
-
"_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at
|
| 11 |
-
"_build": "<function ActorCriticPolicy._build at
|
| 12 |
-
"forward": "<function ActorCriticPolicy.forward at
|
| 13 |
-
"extract_features": "<function ActorCriticPolicy.extract_features at
|
| 14 |
-
"_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at
|
| 15 |
-
"_predict": "<function ActorCriticPolicy._predict at
|
| 16 |
-
"evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at
|
| 17 |
-
"get_distribution": "<function ActorCriticPolicy.get_distribution at
|
| 18 |
-
"predict_values": "<function ActorCriticPolicy.predict_values at
|
| 19 |
"__abstractmethods__": "frozenset()",
|
| 20 |
-
"_abc_impl": "<_abc._abc_data object at
|
| 21 |
},
|
| 22 |
"verbose": 1,
|
| 23 |
"policy_kwargs": {},
|
|
@@ -26,12 +26,12 @@
|
|
| 26 |
"_num_timesteps_at_start": 0,
|
| 27 |
"seed": null,
|
| 28 |
"action_noise": null,
|
| 29 |
-
"start_time":
|
| 30 |
"learning_rate": 0.0003,
|
| 31 |
"tensorboard_log": null,
|
| 32 |
"_last_obs": {
|
| 33 |
":type:": "<class 'numpy.ndarray'>",
|
| 34 |
-
":serialized:": "
|
| 35 |
},
|
| 36 |
"_last_episode_starts": {
|
| 37 |
":type:": "<class 'numpy.ndarray'>",
|
|
@@ -45,7 +45,7 @@
|
|
| 45 |
"_stats_window_size": 100,
|
| 46 |
"ep_info_buffer": {
|
| 47 |
":type:": "<class 'collections.deque'>",
|
| 48 |
-
":serialized:": "
|
| 49 |
},
|
| 50 |
"ep_success_buffer": {
|
| 51 |
":type:": "<class 'collections.deque'>",
|
|
|
|
| 4 |
":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==",
|
| 5 |
"__module__": "stable_baselines3.common.policies",
|
| 6 |
"__doc__": "\n Policy class for actor-critic algorithms (has both policy and value prediction).\n Used by A2C, PPO and the likes.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param ortho_init: Whether to use or not orthogonal initialization\n :param use_sde: Whether to use State Dependent Exploration or not\n :param log_std_init: Initial value for the log standard deviation\n :param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n :param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n :param features_extractor_class: Features extractor to use.\n :param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n :param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
| 7 |
+
"__init__": "<function ActorCriticPolicy.__init__ at 0x79ec585489d0>",
|
| 8 |
+
"_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x79ec58548a60>",
|
| 9 |
+
"reset_noise": "<function ActorCriticPolicy.reset_noise at 0x79ec58548af0>",
|
| 10 |
+
"_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x79ec58548b80>",
|
| 11 |
+
"_build": "<function ActorCriticPolicy._build at 0x79ec58548c10>",
|
| 12 |
+
"forward": "<function ActorCriticPolicy.forward at 0x79ec58548ca0>",
|
| 13 |
+
"extract_features": "<function ActorCriticPolicy.extract_features at 0x79ec58548d30>",
|
| 14 |
+
"_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x79ec58548dc0>",
|
| 15 |
+
"_predict": "<function ActorCriticPolicy._predict at 0x79ec58548e50>",
|
| 16 |
+
"evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x79ec58548ee0>",
|
| 17 |
+
"get_distribution": "<function ActorCriticPolicy.get_distribution at 0x79ec58548f70>",
|
| 18 |
+
"predict_values": "<function ActorCriticPolicy.predict_values at 0x79ec58549000>",
|
| 19 |
"__abstractmethods__": "frozenset()",
|
| 20 |
+
"_abc_impl": "<_abc._abc_data object at 0x79ec584ea0c0>"
|
| 21 |
},
|
| 22 |
"verbose": 1,
|
| 23 |
"policy_kwargs": {},
|
|
|
|
| 26 |
"_num_timesteps_at_start": 0,
|
| 27 |
"seed": null,
|
| 28 |
"action_noise": null,
|
| 29 |
+
"start_time": 1729166522809859021,
|
| 30 |
"learning_rate": 0.0003,
|
| 31 |
"tensorboard_log": null,
|
| 32 |
"_last_obs": {
|
| 33 |
":type:": "<class 'numpy.ndarray'>",
|
| 34 |
+
":serialized:": "gAWVlQAAAAAAAACMEm51bXB5LmNvcmUubnVtZXJpY5SMC19mcm9tYnVmZmVylJOUKJYgAAAAAAAAAACgrjssZbM/gfeMPj4QY77IiQK8MAXKvQAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCZjSUiYiHlFKUKEsDjAE8lE5OTkr/////Sv////9LAHSUYksBSwiGlIwBQ5R0lFKULg=="
|
| 35 |
},
|
| 36 |
"_last_episode_starts": {
|
| 37 |
":type:": "<class 'numpy.ndarray'>",
|
|
|
|
| 45 |
"_stats_window_size": 100,
|
| 46 |
"ep_info_buffer": {
|
| 47 |
":type:": "<class 'collections.deque'>",
|
| 48 |
+
":serialized:": "gAWVqAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwFjfOTaCcwyMAWyUS0uMAXSURz+3jn3cpLEldX2UKGgGR8Bh/WstCiRGaAdLYmgIRz/Ib/ffoA4odX2UKGgGR8BZ79DhLoOhaAdLSmgIRz/QyYG+sYEXdX2UKGgGR8Birlvl2eQNaAdLUGgIRz/Vp22Xsw+MdX2UKGgGR8BaKgEdNnGsaAdLTmgIRz/a003wTdtVdX2UKGgGR8BwAExpL26DaAdLaWgIRz/g87hegL7XdX2UKGgGR8Bhr6jzqbBoaAdLimgIRz/lsxO+IuXedX2UKGgGR8BQ1Os5n13/aAdLQmgIRz/ns+/xlQMydX2UKGgGR8BdUb3wkPc0aAdLQWgIRz/pu9vjwQUYdX2UKGgGR8BqbP531SOzaAdLUWgIRz/sLQgLZzxPdX2UKGgGR8B9jNkNFz+4aAdLaGgIRz/vV0tAcDKYdX2UKGgGR8ByvsrI5o4/aAdLW2gIRz/xGT5ftx+8dX2UKGgGR8BYAEvTPSlWaAdLbWgIRz/y0X+ERJ2/dX2UKGgGR8BSooL1EmY0aAdLSmgIRz/0KAjIJZ4fdX2UKGgGR8BWqqE8JUo8aAdLSGgIRz/1Wv4dp7C0dX2UKGgGR8Bv1wevIOpbaAdLemgIRz/3M4xUNrj6dX2UKGgGR8BzDu7kGRmsaAdLWGgIRz/4g0O3DvVmdX2UKGgGR8BzpdvS+g14aAdLYmgIRz/6Ib83uNPydX2UKGgGR8BiCeWWyC4CaAdLfGgIRz/7/t2LYPGydX2UKGgGR8Bda5rpJPIoaAdLO2gIRz/82svIwM6SdX2UKGgGR8Bgu63AmAskaAdLSWgIRz/96wY+B6KMdX2UKGgGR8BgQOC/XXiBaAdLTmgIRz//DkIX0oSddX2UKGgGR8Ble0BjnV5KaAdLcGgIR0AJBClabF0gdX2UKGgGR8BTkJmdy1eCaAdLQmgIR0AJhKJ2t+1CdX2UKGgGR8Bdg3oLXtjTaAdLY2gIR0AKSLAHmig1dX2UKGgGR8B33z/yXlbNaAdLeGgIR0ALT1f3N9pidX2UKGgGR8B10OeK8+RpaAdLYmgIR0AMNqi48U22dX2UKGgGR8Bb+joyKvV3aAdLU2gIR0AM+FnIyTIOdX2UKGgGR8Bna0JF9a2XaAdLaWgIR0AN4mLLpzLfdX2UKGgGR8B07SI/JNj9aAdLXGgIR0AOkXxe9i+ddX2UKGgGR8BSZ7WI42jxaAdLSGgIR0APF3yI55qudX2UKGgGR8B1SHKU3XI2aAdLWWgIR0APwEpy6tkndX2UKGgGR8BhfUolUp/gaAdLZGgIR0AQPeO4oZyddX2UKGgGR8BdtlYZEUj+aAdLU2gIR0AQj0Yj0L+hdX2UKGgGR8Bh0PT7VJ+VaAdLcGgIR0ARBKoQ4CIUdX2UKGgGR8BixP4sVclgaAdLYGgIR0ARYir1dxACdX2UKGgGR8BbASONo8ISaAdLTWgIR0ARqjmCAc1gdX2UKGgGR8BfcCjDbah6aAdLZWgIR0ASIVDa4+bFdX2UKGgGR8Bx0v4ubqhUaAdLa2gIR0ASn1g6U7jldX2UKGgGR8B1MFr56+nJaAdLZ2gIR0ATIV+I/JNkdX2UKGgGR8B2FnAckt2+aAdLV2gIR0ATh9oexOcldX2UKGgGR8BK5Kur6tT2aAdLQWgIR0AT0e5nUUfxdX2UKGgGR8BumC+WWyC4aAdLY2gIR0AUQNd7fHghdX2UKGgGR8Biold5Y5ktaAdLRGgIR0AUgsXizcASdX2UKGgGR8B3pGHEdeY2aAdLjGgIR0AZZHZsbedkdX2UKGgGR8BQobEcbR4RaAdLPGgIR0AZnO6d1+y7dX2UKGgGR8BWQAHiWE9MaAdLVGgIR0AZ7U8V58jSdX2UKGgGR8BjK7DEWIoFaAdLfmgIR0Aaal3yI55rdX2UKGgGR8BdS6CtihFmaAdLaGgIR0Aazwb2lEZ0dX2UKGgGR8BpD4RoRIz4aAdLfWgIR0AbUkgOjIq9dX2UKGgGR8BnV/oJRfnfaAdLbWgIR0AbusySFGoadX2UKGgGR8BU21oHs1KoaAdLTGgIR0AcApnYg7o0dX2UKGgGR8BpwiIk7fYSaAdLl2gIR0AclZbILgGbdX2UKGgGR8BlP93IMjNZaAdLeWgIR0AdCzY287IUdX2UKGgGR8BpkwdU83dcaAdLZmgIR0Adck2P1ct5dX2UKGgGR8Biy1xOtW+5aAdLVGgIR0AdxaTwDvE1dX2UKGgGR8BXAS+Yc/+saAdLfGgIR0AeQP6KtPpIdX2UKGgGR0BDxaxHG0eEaAdLWGgIR0Ael0U47zTXdX2UKGgGR8BgOvMnqmj1aAdLgmgIR0AfI/lhgE2YdX2UKGgGR8BiT0dPtUn5aAdLgWgIR0AfqI7/4qPPdX2UKGgGR8BZthLXcxj8aAdLWmgIR0Af/Hp8neBQdX2UKGgGR8Bi1fvKEFnqaAdLa2gIR0AgMuoxYaHcdX2UKGgGR8BUccPJ7sv7aAdLSmgIR0AgVw1ivxH5dX2UKGgGR8BnTvKp1ie/aAdLhWgIR0AgmfjCHh0hdWUu"
|
| 49 |
},
|
| 50 |
"ep_success_buffer": {
|
| 51 |
":type:": "<class 'collections.deque'>",
|
ppo-LunarLander-v2/policy.optimizer.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 87978
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2c839c83a18765db0fe97b8ff7d813b2bc577e71561c15686f6ca28a5dd2e45
|
| 3 |
size 87978
|
ppo-LunarLander-v2/policy.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 43634
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:82c2c2c616f5c615dc346922e1672753fcb57d328bd27c4865a731f238e013b7
|
| 3 |
size 43634
|
replay.mp4
CHANGED
|
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
|
results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"mean_reward": -
|
|
|
|
| 1 |
+
{"mean_reward": -452.91611059999997, "std_reward": 339.68845440777, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2024-10-17T12:02:14.714279"}
|