OdedKBio commited on
Commit
d4ce52e
·
verified ·
1 Parent(s): 92f896a

Test commit

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: LunarLander-v3
17
  metrics:
18
  - type: mean_reward
19
- value: -1149.29 +/- 829.81
20
  name: mean_reward
21
  verified: false
22
  ---
 
16
  type: LunarLander-v3
17
  metrics:
18
  - type: mean_reward
19
+ value: -198.68 +/- 121.66
20
  name: mean_reward
21
  verified: false
22
  ---
config.json CHANGED
@@ -1 +1 @@
1
- {"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==", "__module__": "stable_baselines3.common.policies", "__firstlineno__": 416, "__doc__": "\nPolicy class for actor-critic algorithms (has both policy and value prediction).\nUsed by A2C, PPO and the likes.\n\n:param observation_space: Observation space\n:param action_space: Action space\n:param lr_schedule: Learning rate schedule (could be constant)\n:param net_arch: The specification of the policy and value networks.\n:param activation_fn: Activation function\n:param ortho_init: Whether to use or not orthogonal initialization\n:param use_sde: Whether to use State Dependent Exploration or not\n:param log_std_init: Initial value for the log standard deviation\n:param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n:param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n:param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n:param features_extractor_class: Features extractor to use.\n:param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n:param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n:param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n:param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n:param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n", "__init__": "<function ActorCriticPolicy.__init__ at 0x118dedb20>", "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x118dedbc0>", "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x118dedc60>", "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x118dedd00>", "_build": "<function ActorCriticPolicy._build at 0x118dedda0>", "forward": "<function ActorCriticPolicy.forward at 0x118dede40>", "extract_features": "<function ActorCriticPolicy.extract_features at 0x118dedee0>", "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x118dedf80>", "_predict": "<function ActorCriticPolicy._predict at 0x118dee020>", "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x118dee0c0>", "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x118dee160>", "predict_values": "<function ActorCriticPolicy.predict_values at 0x118dee200>", "__static_attributes__": ["action_dist", "action_net", "activation_fn", "dist_kwargs", "features_dim", "features_extractor", "log_std", "log_std_init", "mlp_extractor", "net_arch", "optimizer", "ortho_init", "pi_features_extractor", "share_features_extractor", "use_sde", "value_net", "vf_features_extractor"], "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x118df97c0>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 6144, "_total_timesteps": 5000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1755601290765069000, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlgAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWIAAAAAAAAAC6DVY+B6qRPycDHD8PwwS/Jek/vgDgbL0AAAAAAAAAAJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLAUsIhpSMAUOUdJRSlC4="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdQAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWAQAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksBhZSMAUOUdJRSlC4="}, "_last_original_obs": null, "_episode_num": 0, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": -0.2287999999999999, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVbAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwE8uDVYp2EGMAWyUS1CMAXSURz/V6F/QSi/PdX2UKGgGR8BQ2GIj4YaYaAdLPmgIRz/WgTAWSEDhdX2UKGgGR8BlqUIcBEKFaAdLXWgIRz/Xg2606YE4dX2UKGgGR8B2BjReC04SaAdLXmgIRz/YgDzRQaaTdX2UKGgGR8BPOGOlwcYJaAdLR2gIRz/ZQxesxO+JdX2UKGgGR8BXcNNSIgvEaAdLdWgIRz/afReC04R3dX2UKGgGR8BvY+V7hNucaAdLU2gIRz/bXZXdTHbRdX2UKGgGR8Bo3BE0BOpLaAdLeGgIRz/cqmj0th/idX2UKGgGR8Bh/zUwztTlaAdLg2gIRz/eHjIaLn9vdX2UKGgGR8BWbc/UvwmWaAdLaGgIRz/fNS619fCzdX2UKGgGR8B0Wr3L3bmEaAdLYWgIRz/gGWldkauPdX2UKGgGR8BxXl4LThHcaAdLZ2gIRz/gpA+pwS8KdX2UKGgGR8B8n9EWqLjxaAdLYWgIRz/hJosZpBX0dX2UKGgGR8BsPdtALRa5aAdLUGgIRz/hkKmbb1yvdX2UKGgGR8Bhbx8YyfthaAdLaGgIRz/iGdAgPmPpdX2UKGgGR8BgsInjQzDXaAdLU2gIRz/ihQN0/4ZddX2UKGgGR8BqmwhhYvFnaAdLbWgIRz/jFLWZqmCRdX2UKGgGR8BeYkx20Re1aAdLW2gIRz/jjLB9Cu2adX2UKGgGR8BcX7R0EHMVaAdLQGgIRz/j4EOiFj/ddX2UKGgGR8BzYr3nIQvpaAdLc2gIRz/kf/3nIQvpdX2UKGgGR8BvVy3d9Dx9aAdLfWgIRz/lNBOYYzi0dX2UKGgGR8BxDNzfaYeDaAdLgWgIRz/xtadMCcPOdX2UKGgGR8BqYk1TBInSaAdLZmgIRz/x+14Pf8/EdX2UKGgGR8BZ9iK77Kq5aAdLRGgIRz/yJhvze40/dX2UKGgGR8BHzVR+BpYcaAdLRWgIRz/yUnogV45cdX2UKGgGR8BS4XQtz0YkaAdLVWgIRz/yiTpxFRYSdX2UKGgGR8BWM8R6F/QTaAdLYmgIRz/yyjxkNFz/dX2UKGgGR8BYJWFi8WbgaAdLVmgIRz/zA4GUwBYFdX2UKGgGR8B0vvrPdEb6aAdLaWgIRz/zTFqBVdX1dX2UKGgGR8ByYNfpljEvaAdLcmgIRz/zmBSUC7sfdX2UKGgGR8BRLyhJyyUtaAdLQmgIRz/zwPNFBppOdX2UKGgGR8BjBgna37UHaAdLf2gIRz/0HAmAskIHdX2UKGgGR7+QbEP1+RYBaAdLd2gIRz/0b/XGwRoRdX2UKGgGR8BcSqoZQ53laAdLUGgIRz/0ovrWy1NQdX2UKGgGR8BGo8+A3DNyaAdLWGgIRz/03wTdtVJddX2UKGgGR8BIDsOG0u14aAdLW2gIRz/1GahHskY5dX2UKGgGR8BVABNVR1oyaAdLXWgIRz/1Vpj+aScLdX2UKGgGR8BNNjaXa8HwaAdLQ2gIRz/1gmeDnNgSdX2UKGgGR8By9aRfWtlqaAdLYGgIRz/1xQaaTfSAdX2UKGgGR8BybtsuWa+faAdLYmgIRz/2BkiD/VAidX2UKGgGR8BiA9w97ngYaAdLbmgIRz/2UNSZSeiBdX2UKGgGR8BrTaCrcTJyaAdLcmgIRz/2nNLUTcqOdX2UKGgGR8Bj1yhN/OMVaAdLRGgIRz/2yQtBfKISdX2UKGgGR8BhNe0eEIw/aAdLY2gIRz/9WHck+otMdX2UKGgGR8BclTxoZhrnaAdLX2gIRz/9l+Vkc0cfdX2UKGgGR8BjbMf9xZMdaAdLi2gIRz/9+9rXUYsNdX2UKGgGR8BRa0tI065oaAdLXGgIRz/+Oez2OAAidX2UKGgGR8BtjW8RL9MsaAdLbWgIRz/+hLbpNbkfdX2UKGgGR8BfbruYx+KCaAdLRGgIRz/+sABDG96DdX2UKGgGR8BgkqqABkqdaAdLWmgIRz/+7QC0WuYAdX2UKGgGR8BsCF0DEFW5aAdLd2gIRz//QDmr8zhxdX2UKGgGR8Btbv/zasZHaAdLgGgIRz//miQDFId3dX2UKGgGR8BySE1vVEuyaAdLZWgIRz//3/YJ3PiUdX2UKGgGR8BV7k1uR9w4aAdLeGgIR0AAGlwcYIjXdX2UKGgGR8BUhrPhQ3xXaAdLRGgIR0AAL9sJpnHvdX2UKGgGR8BqF8HIIWxhaAdLcGgIR0AAVea8Yht+dX2UKGgGR8ByqeQ0XP7faAdLfGgIR0AAgfU4JeE7dX2UKGgGR8BbanSro4dZaAdLU2gIR0AAnlXA/LTydX2UKGgGR8BmQNX5nDiwaAdLYmgIR0AAwCnxaxHHdX2UKGgGR8BusIg1WKdhaAdLiWgIR0AA8XpGFzuGdX2UKGgGR8Bu/ybBoEjgaAdLe2gIR0ABHO6d1+y7dX2UKGgGR8BzGfcqOLiuaAdLa2gIR0ABQp2ECeVcdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 30, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdwIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLCIWUjANsb3eUjBNudW1weS5fY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QoliAAAAAAAAAAAAAgwAAAIMAAACDBAAAgwdsPycAAACDBAAAAgAAAAICUaAtLCIWUjAFDlHSUUpSMDWJvdW5kZWRfYmVsb3eUaBMolggAAAAAAAAAAQEBAQEBAQGUaAiMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLCIWUaBZ0lFKUjARoaWdolGgTKJYgAAAAAAAAAAAAIEAAACBAAAAgQQAAIEHbD8lAAAAgQQAAgD8AAIA/lGgLSwiFlGgWdJRSlIwNYm91bmRlZF9hYm92ZZRoEyiWCAAAAAAAAAABAQEBAQEBAZRoHUsIhZRoFnSUUpSMCGxvd19yZXBylIxbWyAtMi41ICAgICAgICAtMi41ICAgICAgIC0xMC4gICAgICAgIC0xMC4gICAgICAgICAtNi4yODMxODU1IC0xMC4KICAtMC4gICAgICAgICAtMC4gICAgICAgXZSMCWhpZ2hfcmVwcpSMU1sgMi41ICAgICAgICAyLjUgICAgICAgMTAuICAgICAgICAxMC4gICAgICAgICA2LjI4MzE4NTUgMTAuCiAgMS4gICAgICAgICAxLiAgICAgICBdlIwKX25wX3JhbmRvbZROdWIu", "dtype": "float32", "_shape": [8], "low": "[ -2.5 -2.5 -10. -10. -6.2831855 -10.\n -0. -0. ]", "bounded_below": "[ True True True True True True True True]", "high": "[ 2.5 2.5 10. 10. 6.2831855 10.\n 1. 1. ]", "bounded_above": "[ True True True True True True True True]", "low_repr": "[ -2.5 -2.5 -10. -10. -6.2831855 -10.\n -0. -0. ]", "high_repr": "[ 2.5 2.5 10. 10. 6.2831855 10.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWV3AAAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFm51bXB5Ll9jb3JlLm11bHRpYXJyYXmUjAZzY2FsYXKUk5SMBW51bXB5lIwFZHR5cGWUk5SMAmk4lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJDCAQAAAAAAAAAlIaUUpSMBXN0YXJ0lGgIaA5DCAAAAAAAAAAAlIaUUpSMBl9zaGFwZZQpjAVkdHlwZZRoDowKX25wX3JhbmRvbZROdWIu", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": null}, "n_envs": 1, "n_steps": 2048, "gamma": 0.99, "gae_lambda": 0.95, "ent_coef": 0.0, "vf_coef": 0.5, "max_grad_norm": 0.5, "rollout_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNgAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwNUm9sbG91dEJ1ZmZlcpSTlC4=", "__module__": "stable_baselines3.common.buffers", "__firstlineno__": 343, "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'advantages': <class 'numpy.ndarray'>, 'returns': <class 'numpy.ndarray'>, 'episode_starts': <class 'numpy.ndarray'>, 'log_probs': <class 'numpy.ndarray'>, 'values': <class 'numpy.ndarray'>}", "__doc__": "\nRollout buffer used in on-policy algorithms like A2C/PPO.\nIt corresponds to ``buffer_size`` transitions collected\nusing the current policy.\nThis experience will be discarded after the policy update.\nIn order to use PPO objective, we also store the current value of each state\nand the log probability of each taken action.\n\nThe term rollout here refers to the model-free notion and should not\nbe used with the concept of rollout used in model-based RL or planning.\nHence, it is only involved in policy and value function training but not action selection.\n\n:param buffer_size: Max number of element in the buffer\n:param observation_space: Observation space\n:param action_space: Action space\n:param device: PyTorch device\n:param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n Equivalent to classic advantage when set to 1.\n:param gamma: Discount factor\n:param n_envs: Number of parallel environments\n", "__init__": "<function RolloutBuffer.__init__ at 0x1189b1d00>", "reset": "<function RolloutBuffer.reset at 0x1189b1da0>", "compute_returns_and_advantage": "<function RolloutBuffer.compute_returns_and_advantage at 0x1189b1e40>", "add": "<function RolloutBuffer.add at 0x1189b1f80>", "get": "<function RolloutBuffer.get at 0x1189b2020>", "_get_samples": "<function RolloutBuffer._get_samples at 0x1189b20c0>", "__static_attributes__": ["actions", "advantages", "episode_starts", "full", "gae_lambda", "gamma", "generator_ready", "log_probs", "observations", "returns", "rewards", "values"], "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x1189a1380>"}, "rollout_buffer_kwargs": {}, "batch_size": 64, "n_epochs": 10, "clip_range": {":type:": "<class 'stable_baselines3.common.utils.FloatSchedule'>", ":serialized:": "gAWVeQAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMDUZsb2F0U2NoZWR1bGWUk5QpgZR9lIwOdmFsdWVfc2NoZWR1bGWUaACMEENvbnN0YW50U2NoZWR1bGWUk5QpgZR9lIwDdmFslEc/yZmZmZmZmnNic2Iu", "value_schedule": "ConstantSchedule(val=0.2)"}, "clip_range_vf": null, "normalize_advantage": true, "target_kl": null, "lr_schedule": {":type:": "<class 'stable_baselines3.common.utils.FloatSchedule'>", ":serialized:": "gAWVeQAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMDUZsb2F0U2NoZWR1bGWUk5QpgZR9lIwOdmFsdWVfc2NoZWR1bGWUaACMEENvbnN0YW50U2NoZWR1bGWUk5QpgZR9lIwDdmFslEc/M6kqMFUyYXNic2Iu", "value_schedule": "ConstantSchedule(val=0.0003)"}, "system_info": {"OS": "macOS-15.6-arm64-arm-64bit-Mach-O Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:55 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6031", "Python": "3.13.4", "Stable-Baselines3": "2.7.0", "PyTorch": "2.8.0", "GPU Enabled": "False", "Numpy": "2.2.6", "Cloudpickle": "3.1.1", "Gymnasium": "1.2.0"}}
 
1
+ {"policy_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVOwAAAAAAAACMIXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5wb2xpY2llc5SMEUFjdG9yQ3JpdGljUG9saWN5lJOULg==", "__module__": "stable_baselines3.common.policies", "__firstlineno__": 416, "__doc__": "\nPolicy class for actor-critic algorithms (has both policy and value prediction).\nUsed by A2C, PPO and the likes.\n\n:param observation_space: Observation space\n:param action_space: Action space\n:param lr_schedule: Learning rate schedule (could be constant)\n:param net_arch: The specification of the policy and value networks.\n:param activation_fn: Activation function\n:param ortho_init: Whether to use or not orthogonal initialization\n:param use_sde: Whether to use State Dependent Exploration or not\n:param log_std_init: Initial value for the log standard deviation\n:param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n:param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n:param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n:param features_extractor_class: Features extractor to use.\n:param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n:param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n:param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n:param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n:param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n", "__init__": "<function ActorCriticPolicy.__init__ at 0x1215edb20>", "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x1215edbc0>", "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x1215edc60>", "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x1215edd00>", "_build": "<function ActorCriticPolicy._build at 0x1215edda0>", "forward": "<function ActorCriticPolicy.forward at 0x1215ede40>", "extract_features": "<function ActorCriticPolicy.extract_features at 0x1215edee0>", "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x1215edf80>", "_predict": "<function ActorCriticPolicy._predict at 0x1215ee020>", "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x1215ee0c0>", "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x1215ee160>", "predict_values": "<function ActorCriticPolicy.predict_values at 0x1215ee200>", "__static_attributes__": ["action_dist", "action_net", "activation_fn", "dist_kwargs", "features_dim", "features_extractor", "log_std", "log_std_init", "mlp_extractor", "net_arch", "optimizer", "ortho_init", "pi_features_extractor", "share_features_extractor", "use_sde", "value_net", "vf_features_extractor"], "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x1215f9880>"}, "verbose": 1, "policy_kwargs": {}, "num_timesteps": 6144, "_total_timesteps": 5000, "_num_timesteps_at_start": 0, "seed": null, "action_noise": null, "start_time": 1755601405810613000, "learning_rate": 0.0003, "tensorboard_log": null, "_last_obs": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVlgAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWIAAAAAAAAABa5yS/zkrUPe43bL8hUbK/W4moPkS5Mb4AAAAAAAAAAJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLAUsIhpSMAUOUdJRSlC4="}, "_last_episode_starts": {":type:": "<class 'numpy.ndarray'>", ":serialized:": "gAWVdQAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWAQAAAAAAAAAAlIwFbnVtcHmUjAVkdHlwZZSTlIwCYjGUiYiHlFKUKEsDjAF8lE5OTkr/////Sv////9LAHSUYksBhZSMAUOUdJRSlC4="}, "_last_original_obs": null, "_episode_num": 0, "use_sde": false, "sde_sample_freq": -1, "_current_progress_remaining": -0.2287999999999999, "_stats_window_size": 100, "ep_info_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVqAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwEdm0w8GLUGMAWyUS0WMAXSURz/X8q4H5aePdX2UKGgGR8Bw6GVPepGXaAdLT2gIRz/Y2uPmxMWXdX2UKGgGR8BZbT8gpz91aAdLY2gIRz/Z6ciGFi8WdX2UKGgGR8BtxshkiD/VaAdLf2gIRz/bWu5jH4oJdX2UKGgGR8BgCsenyd4FaAdLVmgIRz/cQQcxTKkmdX2UKGgGR8BwYf56+nIiaAdLX2gIRz/dPxx1gYxddX2UKGgGR8BZ9UdV/+bWaAdLS2gIRz/d9kBjnV5KdX2UKGgGR8BsiL3oLXtjaAdLP2gIRz/ejcVQAMlUdX2UKGgGR8Biq0SVW0Z4aAdLSmgIRz/fRhttQ9A5dX2UKGgGR8BX64ZdfLLZaAdLWWgIRz/gGE4//vORdX2UKGgGR8BYsbxd6cAjaAdLcGgIRz/gssYl6Z6VdX2UKGgGR8BNpl6qsEJTaAdLRGgIRz/hBOP/7zkIdX2UKGgGR8Bg/6TlkpZwaAdLaGgIRz/hjlo11nuidX2UKGgGR8BXZIMjNY8uaAdLRmgIRz/h5L7Gecx1dX2UKGgGR8BdJtitq59WaAdLgGgIRz/ilAE+xGDudX2UKGgGR8Bjux+MIeHSaAdLVWgIRz/jAGjbi6xxdX2UKGgGR8Bg2I8+zMRpaAdLd2gIRz/jnq/ub7TEdX2UKGgGR8BRsHwTdtVJaAdLU2gIRz/kCfHxSYPYdX2UKGgGR8BpvBakhzNmaAdLPmgIRz/kWBreqJdjdX2UKGgGR8Bslsrf+CK8aAdLamgIRz/k5HmRvFWGdX2UKGgGR8BhirtG/etTaAdLQmgIRz/lOHerMkhSdX2UKGgGR8BYnwK8cuJ2aAdLU2gIRz/lphvze40/dX2UKGgGR8BcvLaZhKDkaAdLT2gIRz/mD9GZuyeJdX2UKGgGR8BMBZEDyOJdaAdLWWgIRz/yRJiAlOXWdX2UKGgGR8BuBBeqrBCVaAdLb2gIRz/ykPpY9xIbdX2UKGgGR8BL7XcYZVGTaAdLQmgIRz/yuwkgOjIrdX2UKGgGR8B2TVpcophGaAdLi2gIRz/zHpnpSrHVdX2UKGgGR8BlMnXkHUtqaAdLUGgIRz/zUJKJ2t+1dX2UKGgGR8BmWLoQnQY2aAdLWGgIRz/zimuTzND/dX2UKGgGR8BkjqbSZ0CBaAdLQWgIRz/zs+JP69CedX2UKGgGR8BnIBFw1ivxaAdLSWgIRz/z4jW07bL2dX2UKGgGR8BRXiiudPLxaAdLc2gIRz/0LpNbkfcOdX2UKGgGR8B0NidXko4NaAdLfGgIRz/0hiTdLxqgdX2UKGgGR8BkSIKv3ai9aAdLd2gIRz/01Sn+AEt/dX2UKGgGR8BdLYkmhM8HaAdLh2gIRz/1MynDR+jNdX2UKGgGR8B5RL0oScslaAdLW2gIRz/1bw8W9DhMdX2UKGgGR8BAQIVmBe5XaAdLlGgIRz/12sV+I/JOdX2UKGgGR8BTjy6pYLb6aAdLZ2gIRz/2Hu/k/8l5dX2UKGgGR8Bb1Ck0rK/3aAdLdGgIRz/2brLQokRjdX2UKGgGR8BiLkkjX4CZaAdLV2gIRz/2ql54W1twdX2UKGgGR8BorAoCuEElaAdLSGgIRz/22iQDFId3dX2UKGgGR8BxFG+7Dl5oaAdLcGgIRz/3JlJ6IFeOdX2UKGgGRz/+SHuZ1FH8aAdLfmgIRz/3e6qbSZ0CdX2UKGgGR8B3EMm7aqS6aAdLgmgIRz/+IYWLxZuAdX2UKGgGR8BhOBw0fozOaAdLRmgIRz/+TXSSeRPodX2UKGgGR8Bc5dld1MdtaAdLXmgIRz/+ihN/OMVDdX2UKGgGR8BjA3kFOfukaAdLTGgIRz/+ufRNRFZxdX2UKGgGR8BclOsT37DVaAdLV2gIRz/+8IzFdcB2dX2UKGgGR8BzlaemNzbOaAdLdmgIRz//P5tWMju8dX2UKGgGR8BrFJcLSeAeaAdLYWgIRz//fkeZG8VYdX2UKGgGR8Bb8n84xUNsaAdLTGgIRz//rXDm8ujAdX2UKGgGR8BTAg2l2vB8aAdLO2gIRz//0cbR4QjEdX2UKGgGR8Ba1piiItUXaAdLbGgIR0AADNbC79Q5dX2UKGgGR8BkK6DRMN+caAdLiGgIR0AAO7Dl5nlGdX2UKGgGR8BibPXCj1wpaAdLNmgIR0AAS33Hq/ucdX2UKGgGR8B0HDLbHp8naAdLXWgIR0AAaebutwJgdX2UKGgGR8BbKqlLvkR0aAdLPmgIR0AAfOt4iX6ZdX2UKGgGR8BaXBKHwgDBaAdLaWgIR0AAn0qYqoZRdX2UKGgGR8BknuKhtcfOaAdLdmgIR0AAxgAp8WsSdX2UKGgGR8BYO9UGVzIWaAdLfGgIR0AA8bJfYzzmdX2UKGgGR8BJgo42jwhGaAdLSGgIR0ABCgTRIBikdX2UKGgGR8BxjJUjs2NvaAdLgWgIR0ABNkQPI4lydX2UKGgGR8Bt0FY+0PYnaAdLaGgIR0ABWIqLCN0edX2UKGgGR8BcAUfxMFlkaAdLTWgIR0ABcImgJ1JUdWUu"}, "ep_success_buffer": {":type:": "<class 'collections.deque'>", ":serialized:": "gAWVIAAAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKULg=="}, "_n_updates": 30, "observation_space": {":type:": "<class 'gymnasium.spaces.box.Box'>", ":serialized:": "gAWVdwIAAAAAAACMFGd5bW5hc2l1bS5zcGFjZXMuYm94lIwDQm94lJOUKYGUfZQojAVkdHlwZZSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGKMBl9zaGFwZZRLCIWUjANsb3eUjBNudW1weS5fY29yZS5udW1lcmljlIwLX2Zyb21idWZmZXKUk5QoliAAAAAAAAAAAAAgwAAAIMAAACDBAAAgwdsPycAAACDBAAAAgAAAAICUaAtLCIWUjAFDlHSUUpSMDWJvdW5kZWRfYmVsb3eUaBMolggAAAAAAAAAAQEBAQEBAQGUaAiMAmIxlImIh5RSlChLA4wBfJROTk5K/////0r/////SwB0lGJLCIWUaBZ0lFKUjARoaWdolGgTKJYgAAAAAAAAAAAAIEAAACBAAAAgQQAAIEHbD8lAAAAgQQAAgD8AAIA/lGgLSwiFlGgWdJRSlIwNYm91bmRlZF9hYm92ZZRoEyiWCAAAAAAAAAABAQEBAQEBAZRoHUsIhZRoFnSUUpSMCGxvd19yZXBylIxbWyAtMi41ICAgICAgICAtMi41ICAgICAgIC0xMC4gICAgICAgIC0xMC4gICAgICAgICAtNi4yODMxODU1IC0xMC4KICAtMC4gICAgICAgICAtMC4gICAgICAgXZSMCWhpZ2hfcmVwcpSMU1sgMi41ICAgICAgICAyLjUgICAgICAgMTAuICAgICAgICAxMC4gICAgICAgICA2LjI4MzE4NTUgMTAuCiAgMS4gICAgICAgICAxLiAgICAgICBdlIwKX25wX3JhbmRvbZROdWIu", "dtype": "float32", "_shape": [8], "low": "[ -2.5 -2.5 -10. -10. -6.2831855 -10.\n -0. -0. ]", "bounded_below": "[ True True True True True True True True]", "high": "[ 2.5 2.5 10. 10. 6.2831855 10.\n 1. 1. ]", "bounded_above": "[ True True True True True True True True]", "low_repr": "[ -2.5 -2.5 -10. -10. -6.2831855 -10.\n -0. -0. ]", "high_repr": "[ 2.5 2.5 10. 10. 6.2831855 10.\n 1. 1. ]", "_np_random": null}, "action_space": {":type:": "<class 'gymnasium.spaces.discrete.Discrete'>", ":serialized:": "gAWV3AAAAAAAAACMGWd5bW5hc2l1bS5zcGFjZXMuZGlzY3JldGWUjAhEaXNjcmV0ZZSTlCmBlH2UKIwBbpSMFm51bXB5Ll9jb3JlLm11bHRpYXJyYXmUjAZzY2FsYXKUk5SMBW51bXB5lIwFZHR5cGWUk5SMAmk4lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJDCAQAAAAAAAAAlIaUUpSMBXN0YXJ0lGgIaA5DCAAAAAAAAAAAlIaUUpSMBl9zaGFwZZQpjAVkdHlwZZRoDowKX25wX3JhbmRvbZROdWIu", "n": "4", "start": "0", "_shape": [], "dtype": "int64", "_np_random": null}, "n_envs": 1, "n_steps": 2048, "gamma": 0.99, "gae_lambda": 0.95, "ent_coef": 0.0, "vf_coef": 0.5, "max_grad_norm": 0.5, "rollout_buffer_class": {":type:": "<class 'abc.ABCMeta'>", ":serialized:": "gAWVNgAAAAAAAACMIHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi5idWZmZXJzlIwNUm9sbG91dEJ1ZmZlcpSTlC4=", "__module__": "stable_baselines3.common.buffers", "__firstlineno__": 343, "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'advantages': <class 'numpy.ndarray'>, 'returns': <class 'numpy.ndarray'>, 'episode_starts': <class 'numpy.ndarray'>, 'log_probs': <class 'numpy.ndarray'>, 'values': <class 'numpy.ndarray'>}", "__doc__": "\nRollout buffer used in on-policy algorithms like A2C/PPO.\nIt corresponds to ``buffer_size`` transitions collected\nusing the current policy.\nThis experience will be discarded after the policy update.\nIn order to use PPO objective, we also store the current value of each state\nand the log probability of each taken action.\n\nThe term rollout here refers to the model-free notion and should not\nbe used with the concept of rollout used in model-based RL or planning.\nHence, it is only involved in policy and value function training but not action selection.\n\n:param buffer_size: Max number of element in the buffer\n:param observation_space: Observation space\n:param action_space: Action space\n:param device: PyTorch device\n:param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n Equivalent to classic advantage when set to 1.\n:param gamma: Discount factor\n:param n_envs: Number of parallel environments\n", "__init__": "<function RolloutBuffer.__init__ at 0x1212b1d00>", "reset": "<function RolloutBuffer.reset at 0x1212b1da0>", "compute_returns_and_advantage": "<function RolloutBuffer.compute_returns_and_advantage at 0x1212b1e40>", "add": "<function RolloutBuffer.add at 0x1212b1f80>", "get": "<function RolloutBuffer.get at 0x1212b2020>", "_get_samples": "<function RolloutBuffer._get_samples at 0x1212b20c0>", "__static_attributes__": ["actions", "advantages", "episode_starts", "full", "gae_lambda", "gamma", "generator_ready", "log_probs", "observations", "returns", "rewards", "values"], "__abstractmethods__": "frozenset()", "_abc_impl": "<_abc._abc_data object at 0x1212a1900>"}, "rollout_buffer_kwargs": {}, "batch_size": 64, "n_epochs": 10, "clip_range": {":type:": "<class 'stable_baselines3.common.utils.FloatSchedule'>", ":serialized:": "gAWVeQAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMDUZsb2F0U2NoZWR1bGWUk5QpgZR9lIwOdmFsdWVfc2NoZWR1bGWUaACMEENvbnN0YW50U2NoZWR1bGWUk5QpgZR9lIwDdmFslEc/yZmZmZmZmnNic2Iu", "value_schedule": "ConstantSchedule(val=0.2)"}, "clip_range_vf": null, "normalize_advantage": true, "target_kl": null, "lr_schedule": {":type:": "<class 'stable_baselines3.common.utils.FloatSchedule'>", ":serialized:": "gAWVeQAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMDUZsb2F0U2NoZWR1bGWUk5QpgZR9lIwOdmFsdWVfc2NoZWR1bGWUaACMEENvbnN0YW50U2NoZWR1bGWUk5QpgZR9lIwDdmFslEc/M6kqMFUyYXNic2Iu", "value_schedule": "ConstantSchedule(val=0.0003)"}, "system_info": {"OS": "macOS-15.6-arm64-arm-64bit-Mach-O Darwin Kernel Version 24.6.0: Mon Jul 14 11:30:55 PDT 2025; root:xnu-11417.140.69~1/RELEASE_ARM64_T6031", "Python": "3.13.4", "Stable-Baselines3": "2.7.0", "PyTorch": "2.8.0", "GPU Enabled": "False", "Numpy": "2.2.6", "Cloudpickle": "3.1.1", "Gymnasium": "1.2.0"}}
ppo-LunarLander-v2.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14c868f8ec98f08581149226a1121243419d38d52a02be6e44e86eb23cace294
3
- size 147972
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e198ce1bfa572dfc95bb694b6da46bf0b69953736e94d1012890342ddca0f0b5
3
+ size 148052
ppo-LunarLander-v2/data CHANGED
@@ -5,18 +5,18 @@
5
  "__module__": "stable_baselines3.common.policies",
6
  "__firstlineno__": 416,
7
  "__doc__": "\nPolicy class for actor-critic algorithms (has both policy and value prediction).\nUsed by A2C, PPO and the likes.\n\n:param observation_space: Observation space\n:param action_space: Action space\n:param lr_schedule: Learning rate schedule (could be constant)\n:param net_arch: The specification of the policy and value networks.\n:param activation_fn: Activation function\n:param ortho_init: Whether to use or not orthogonal initialization\n:param use_sde: Whether to use State Dependent Exploration or not\n:param log_std_init: Initial value for the log standard deviation\n:param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n:param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n:param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n:param features_extractor_class: Features extractor to use.\n:param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n:param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n:param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n:param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n:param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n",
8
- "__init__": "<function ActorCriticPolicy.__init__ at 0x118dedb20>",
9
- "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x118dedbc0>",
10
- "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x118dedc60>",
11
- "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x118dedd00>",
12
- "_build": "<function ActorCriticPolicy._build at 0x118dedda0>",
13
- "forward": "<function ActorCriticPolicy.forward at 0x118dede40>",
14
- "extract_features": "<function ActorCriticPolicy.extract_features at 0x118dedee0>",
15
- "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x118dedf80>",
16
- "_predict": "<function ActorCriticPolicy._predict at 0x118dee020>",
17
- "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x118dee0c0>",
18
- "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x118dee160>",
19
- "predict_values": "<function ActorCriticPolicy.predict_values at 0x118dee200>",
20
  "__static_attributes__": [
21
  "action_dist",
22
  "action_net",
@@ -37,7 +37,7 @@
37
  "vf_features_extractor"
38
  ],
39
  "__abstractmethods__": "frozenset()",
40
- "_abc_impl": "<_abc._abc_data object at 0x118df97c0>"
41
  },
42
  "verbose": 1,
43
  "policy_kwargs": {},
@@ -46,12 +46,12 @@
46
  "_num_timesteps_at_start": 0,
47
  "seed": null,
48
  "action_noise": null,
49
- "start_time": 1755601290765069000,
50
  "learning_rate": 0.0003,
51
  "tensorboard_log": null,
52
  "_last_obs": {
53
  ":type:": "<class 'numpy.ndarray'>",
54
- ":serialized:": "gAWVlgAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWIAAAAAAAAAC6DVY+B6qRPycDHD8PwwS/Jek/vgDgbL0AAAAAAAAAAJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLAUsIhpSMAUOUdJRSlC4="
55
  },
56
  "_last_episode_starts": {
57
  ":type:": "<class 'numpy.ndarray'>",
@@ -65,7 +65,7 @@
65
  "_stats_window_size": 100,
66
  "ep_info_buffer": {
67
  ":type:": "<class 'collections.deque'>",
68
- ":serialized:": "gAWVbAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwE8uDVYp2EGMAWyUS1CMAXSURz/V6F/QSi/PdX2UKGgGR8BQ2GIj4YaYaAdLPmgIRz/WgTAWSEDhdX2UKGgGR8BlqUIcBEKFaAdLXWgIRz/Xg2606YE4dX2UKGgGR8B2BjReC04SaAdLXmgIRz/YgDzRQaaTdX2UKGgGR8BPOGOlwcYJaAdLR2gIRz/ZQxesxO+JdX2UKGgGR8BXcNNSIgvEaAdLdWgIRz/afReC04R3dX2UKGgGR8BvY+V7hNucaAdLU2gIRz/bXZXdTHbRdX2UKGgGR8Bo3BE0BOpLaAdLeGgIRz/cqmj0th/idX2UKGgGR8Bh/zUwztTlaAdLg2gIRz/eHjIaLn9vdX2UKGgGR8BWbc/UvwmWaAdLaGgIRz/fNS619fCzdX2UKGgGR8B0Wr3L3bmEaAdLYWgIRz/gGWldkauPdX2UKGgGR8BxXl4LThHcaAdLZ2gIRz/gpA+pwS8KdX2UKGgGR8B8n9EWqLjxaAdLYWgIRz/hJosZpBX0dX2UKGgGR8BsPdtALRa5aAdLUGgIRz/hkKmbb1yvdX2UKGgGR8Bhbx8YyfthaAdLaGgIRz/iGdAgPmPpdX2UKGgGR8BgsInjQzDXaAdLU2gIRz/ihQN0/4ZddX2UKGgGR8BqmwhhYvFnaAdLbWgIRz/jFLWZqmCRdX2UKGgGR8BeYkx20Re1aAdLW2gIRz/jjLB9Cu2adX2UKGgGR8BcX7R0EHMVaAdLQGgIRz/j4EOiFj/ddX2UKGgGR8BzYr3nIQvpaAdLc2gIRz/kf/3nIQvpdX2UKGgGR8BvVy3d9Dx9aAdLfWgIRz/lNBOYYzi0dX2UKGgGR8BxDNzfaYeDaAdLgWgIRz/xtadMCcPOdX2UKGgGR8BqYk1TBInSaAdLZmgIRz/x+14Pf8/EdX2UKGgGR8BZ9iK77Kq5aAdLRGgIRz/yJhvze40/dX2UKGgGR8BHzVR+BpYcaAdLRWgIRz/yUnogV45cdX2UKGgGR8BS4XQtz0YkaAdLVWgIRz/yiTpxFRYSdX2UKGgGR8BWM8R6F/QTaAdLYmgIRz/yyjxkNFz/dX2UKGgGR8BYJWFi8WbgaAdLVmgIRz/zA4GUwBYFdX2UKGgGR8B0vvrPdEb6aAdLaWgIRz/zTFqBVdX1dX2UKGgGR8ByYNfpljEvaAdLcmgIRz/zmBSUC7sfdX2UKGgGR8BRLyhJyyUtaAdLQmgIRz/zwPNFBppOdX2UKGgGR8BjBgna37UHaAdLf2gIRz/0HAmAskIHdX2UKGgGR7+QbEP1+RYBaAdLd2gIRz/0b/XGwRoRdX2UKGgGR8BcSqoZQ53laAdLUGgIRz/0ovrWy1NQdX2UKGgGR8BGo8+A3DNyaAdLWGgIRz/03wTdtVJddX2UKGgGR8BIDsOG0u14aAdLW2gIRz/1GahHskY5dX2UKGgGR8BVABNVR1oyaAdLXWgIRz/1Vpj+aScLdX2UKGgGR8BNNjaXa8HwaAdLQ2gIRz/1gmeDnNgSdX2UKGgGR8By9aRfWtlqaAdLYGgIRz/1xQaaTfSAdX2UKGgGR8BybtsuWa+faAdLYmgIRz/2BkiD/VAidX2UKGgGR8BiA9w97ngYaAdLbmgIRz/2UNSZSeiBdX2UKGgGR8BrTaCrcTJyaAdLcmgIRz/2nNLUTcqOdX2UKGgGR8Bj1yhN/OMVaAdLRGgIRz/2yQtBfKISdX2UKGgGR8BhNe0eEIw/aAdLY2gIRz/9WHck+otMdX2UKGgGR8BclTxoZhrnaAdLX2gIRz/9l+Vkc0cfdX2UKGgGR8BjbMf9xZMdaAdLi2gIRz/9+9rXUYsNdX2UKGgGR8BRa0tI065oaAdLXGgIRz/+Oez2OAAidX2UKGgGR8BtjW8RL9MsaAdLbWgIRz/+hLbpNbkfdX2UKGgGR8BfbruYx+KCaAdLRGgIRz/+sABDG96DdX2UKGgGR8BgkqqABkqdaAdLWmgIRz/+7QC0WuYAdX2UKGgGR8BsCF0DEFW5aAdLd2gIRz//QDmr8zhxdX2UKGgGR8Btbv/zasZHaAdLgGgIRz//miQDFId3dX2UKGgGR8BySE1vVEuyaAdLZWgIRz//3/YJ3PiUdX2UKGgGR8BV7k1uR9w4aAdLeGgIR0AAGlwcYIjXdX2UKGgGR8BUhrPhQ3xXaAdLRGgIR0AAL9sJpnHvdX2UKGgGR8BqF8HIIWxhaAdLcGgIR0AAVea8Yht+dX2UKGgGR8ByqeQ0XP7faAdLfGgIR0AAgfU4JeE7dX2UKGgGR8BbanSro4dZaAdLU2gIR0AAnlXA/LTydX2UKGgGR8BmQNX5nDiwaAdLYmgIR0AAwCnxaxHHdX2UKGgGR8BusIg1WKdhaAdLiWgIR0AA8XpGFzuGdX2UKGgGR8Bu/ybBoEjgaAdLe2gIR0ABHO6d1+y7dX2UKGgGR8BzGfcqOLiuaAdLa2gIR0ABQp2ECeVcdWUu"
69
  },
70
  "ep_success_buffer": {
71
  ":type:": "<class 'collections.deque'>",
@@ -110,12 +110,12 @@
110
  "__firstlineno__": 343,
111
  "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'advantages': <class 'numpy.ndarray'>, 'returns': <class 'numpy.ndarray'>, 'episode_starts': <class 'numpy.ndarray'>, 'log_probs': <class 'numpy.ndarray'>, 'values': <class 'numpy.ndarray'>}",
112
  "__doc__": "\nRollout buffer used in on-policy algorithms like A2C/PPO.\nIt corresponds to ``buffer_size`` transitions collected\nusing the current policy.\nThis experience will be discarded after the policy update.\nIn order to use PPO objective, we also store the current value of each state\nand the log probability of each taken action.\n\nThe term rollout here refers to the model-free notion and should not\nbe used with the concept of rollout used in model-based RL or planning.\nHence, it is only involved in policy and value function training but not action selection.\n\n:param buffer_size: Max number of element in the buffer\n:param observation_space: Observation space\n:param action_space: Action space\n:param device: PyTorch device\n:param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n Equivalent to classic advantage when set to 1.\n:param gamma: Discount factor\n:param n_envs: Number of parallel environments\n",
113
- "__init__": "<function RolloutBuffer.__init__ at 0x1189b1d00>",
114
- "reset": "<function RolloutBuffer.reset at 0x1189b1da0>",
115
- "compute_returns_and_advantage": "<function RolloutBuffer.compute_returns_and_advantage at 0x1189b1e40>",
116
- "add": "<function RolloutBuffer.add at 0x1189b1f80>",
117
- "get": "<function RolloutBuffer.get at 0x1189b2020>",
118
- "_get_samples": "<function RolloutBuffer._get_samples at 0x1189b20c0>",
119
  "__static_attributes__": [
120
  "actions",
121
  "advantages",
@@ -131,7 +131,7 @@
131
  "values"
132
  ],
133
  "__abstractmethods__": "frozenset()",
134
- "_abc_impl": "<_abc._abc_data object at 0x1189a1380>"
135
  },
136
  "rollout_buffer_kwargs": {},
137
  "batch_size": 64,
 
5
  "__module__": "stable_baselines3.common.policies",
6
  "__firstlineno__": 416,
7
  "__doc__": "\nPolicy class for actor-critic algorithms (has both policy and value prediction).\nUsed by A2C, PPO and the likes.\n\n:param observation_space: Observation space\n:param action_space: Action space\n:param lr_schedule: Learning rate schedule (could be constant)\n:param net_arch: The specification of the policy and value networks.\n:param activation_fn: Activation function\n:param ortho_init: Whether to use or not orthogonal initialization\n:param use_sde: Whether to use State Dependent Exploration or not\n:param log_std_init: Initial value for the log standard deviation\n:param full_std: Whether to use (n_features x n_actions) parameters\n for the std instead of only (n_features,) when using gSDE\n:param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure\n a positive standard deviation (cf paper). It allows to keep variance\n above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.\n:param squash_output: Whether to squash the output using a tanh function,\n this allows to ensure boundaries when using gSDE.\n:param features_extractor_class: Features extractor to use.\n:param features_extractor_kwargs: Keyword arguments\n to pass to the features extractor.\n:param share_features_extractor: If True, the features extractor is shared between the policy and value networks.\n:param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n:param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n:param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n",
8
+ "__init__": "<function ActorCriticPolicy.__init__ at 0x1215edb20>",
9
+ "_get_constructor_parameters": "<function ActorCriticPolicy._get_constructor_parameters at 0x1215edbc0>",
10
+ "reset_noise": "<function ActorCriticPolicy.reset_noise at 0x1215edc60>",
11
+ "_build_mlp_extractor": "<function ActorCriticPolicy._build_mlp_extractor at 0x1215edd00>",
12
+ "_build": "<function ActorCriticPolicy._build at 0x1215edda0>",
13
+ "forward": "<function ActorCriticPolicy.forward at 0x1215ede40>",
14
+ "extract_features": "<function ActorCriticPolicy.extract_features at 0x1215edee0>",
15
+ "_get_action_dist_from_latent": "<function ActorCriticPolicy._get_action_dist_from_latent at 0x1215edf80>",
16
+ "_predict": "<function ActorCriticPolicy._predict at 0x1215ee020>",
17
+ "evaluate_actions": "<function ActorCriticPolicy.evaluate_actions at 0x1215ee0c0>",
18
+ "get_distribution": "<function ActorCriticPolicy.get_distribution at 0x1215ee160>",
19
+ "predict_values": "<function ActorCriticPolicy.predict_values at 0x1215ee200>",
20
  "__static_attributes__": [
21
  "action_dist",
22
  "action_net",
 
37
  "vf_features_extractor"
38
  ],
39
  "__abstractmethods__": "frozenset()",
40
+ "_abc_impl": "<_abc._abc_data object at 0x1215f9880>"
41
  },
42
  "verbose": 1,
43
  "policy_kwargs": {},
 
46
  "_num_timesteps_at_start": 0,
47
  "seed": null,
48
  "action_noise": null,
49
+ "start_time": 1755601405810613000,
50
  "learning_rate": 0.0003,
51
  "tensorboard_log": null,
52
  "_last_obs": {
53
  ":type:": "<class 'numpy.ndarray'>",
54
+ ":serialized:": "gAWVlgAAAAAAAACME251bXB5Ll9jb3JlLm51bWVyaWOUjAtfZnJvbWJ1ZmZlcpSTlCiWIAAAAAAAAABa5yS/zkrUPe43bL8hUbK/W4moPkS5Mb4AAAAAAAAAAJSMBW51bXB5lIwFZHR5cGWUk5SMAmY0lImIh5RSlChLA4wBPJROTk5K/////0r/////SwB0lGJLAUsIhpSMAUOUdJRSlC4="
55
  },
56
  "_last_episode_starts": {
57
  ":type:": "<class 'numpy.ndarray'>",
 
65
  "_stats_window_size": 100,
66
  "ep_info_buffer": {
67
  ":type:": "<class 'collections.deque'>",
68
+ ":serialized:": "gAWVqAcAAAAAAACMC2NvbGxlY3Rpb25zlIwFZGVxdWWUk5QpS2SGlFKUKH2UKIwBcpRHwEdm0w8GLUGMAWyUS0WMAXSURz/X8q4H5aePdX2UKGgGR8Bw6GVPepGXaAdLT2gIRz/Y2uPmxMWXdX2UKGgGR8BZbT8gpz91aAdLY2gIRz/Z6ciGFi8WdX2UKGgGR8BtxshkiD/VaAdLf2gIRz/bWu5jH4oJdX2UKGgGR8BgCsenyd4FaAdLVmgIRz/cQQcxTKkmdX2UKGgGR8BwYf56+nIiaAdLX2gIRz/dPxx1gYxddX2UKGgGR8BZ9UdV/+bWaAdLS2gIRz/d9kBjnV5KdX2UKGgGR8BsiL3oLXtjaAdLP2gIRz/ejcVQAMlUdX2UKGgGR8Biq0SVW0Z4aAdLSmgIRz/fRhttQ9A5dX2UKGgGR8BX64ZdfLLZaAdLWWgIRz/gGE4//vORdX2UKGgGR8BYsbxd6cAjaAdLcGgIRz/gssYl6Z6VdX2UKGgGR8BNpl6qsEJTaAdLRGgIRz/hBOP/7zkIdX2UKGgGR8Bg/6TlkpZwaAdLaGgIRz/hjlo11nuidX2UKGgGR8BXZIMjNY8uaAdLRmgIRz/h5L7Gecx1dX2UKGgGR8BdJtitq59WaAdLgGgIRz/ilAE+xGDudX2UKGgGR8Bjux+MIeHSaAdLVWgIRz/jAGjbi6xxdX2UKGgGR8Bg2I8+zMRpaAdLd2gIRz/jnq/ub7TEdX2UKGgGR8BRsHwTdtVJaAdLU2gIRz/kCfHxSYPYdX2UKGgGR8BpvBakhzNmaAdLPmgIRz/kWBreqJdjdX2UKGgGR8Bslsrf+CK8aAdLamgIRz/k5HmRvFWGdX2UKGgGR8BhirtG/etTaAdLQmgIRz/lOHerMkhSdX2UKGgGR8BYnwK8cuJ2aAdLU2gIRz/lphvze40/dX2UKGgGR8BcvLaZhKDkaAdLT2gIRz/mD9GZuyeJdX2UKGgGR8BMBZEDyOJdaAdLWWgIRz/yRJiAlOXWdX2UKGgGR8BuBBeqrBCVaAdLb2gIRz/ykPpY9xIbdX2UKGgGR8BL7XcYZVGTaAdLQmgIRz/yuwkgOjIrdX2UKGgGR8B2TVpcophGaAdLi2gIRz/zHpnpSrHVdX2UKGgGR8BlMnXkHUtqaAdLUGgIRz/zUJKJ2t+1dX2UKGgGR8BmWLoQnQY2aAdLWGgIRz/zimuTzND/dX2UKGgGR8BkjqbSZ0CBaAdLQWgIRz/zs+JP69CedX2UKGgGR8BnIBFw1ivxaAdLSWgIRz/z4jW07bL2dX2UKGgGR8BRXiiudPLxaAdLc2gIRz/0LpNbkfcOdX2UKGgGR8B0NidXko4NaAdLfGgIRz/0hiTdLxqgdX2UKGgGR8BkSIKv3ai9aAdLd2gIRz/01Sn+AEt/dX2UKGgGR8BdLYkmhM8HaAdLh2gIRz/1MynDR+jNdX2UKGgGR8B5RL0oScslaAdLW2gIRz/1bw8W9DhMdX2UKGgGR8BAQIVmBe5XaAdLlGgIRz/12sV+I/JOdX2UKGgGR8BTjy6pYLb6aAdLZ2gIRz/2Hu/k/8l5dX2UKGgGR8Bb1Ck0rK/3aAdLdGgIRz/2brLQokRjdX2UKGgGR8BiLkkjX4CZaAdLV2gIRz/2ql54W1twdX2UKGgGR8BorAoCuEElaAdLSGgIRz/22iQDFId3dX2UKGgGR8BxFG+7Dl5oaAdLcGgIRz/3JlJ6IFeOdX2UKGgGRz/+SHuZ1FH8aAdLfmgIRz/3e6qbSZ0CdX2UKGgGR8B3EMm7aqS6aAdLgmgIRz/+IYWLxZuAdX2UKGgGR8BhOBw0fozOaAdLRmgIRz/+TXSSeRPodX2UKGgGR8Bc5dld1MdtaAdLXmgIRz/+ihN/OMVDdX2UKGgGR8BjA3kFOfukaAdLTGgIRz/+ufRNRFZxdX2UKGgGR8BclOsT37DVaAdLV2gIRz/+8IzFdcB2dX2UKGgGR8BzlaemNzbOaAdLdmgIRz//P5tWMju8dX2UKGgGR8BrFJcLSeAeaAdLYWgIRz//fkeZG8VYdX2UKGgGR8Bb8n84xUNsaAdLTGgIRz//rXDm8ujAdX2UKGgGR8BTAg2l2vB8aAdLO2gIRz//0cbR4QjEdX2UKGgGR8Ba1piiItUXaAdLbGgIR0AADNbC79Q5dX2UKGgGR8BkK6DRMN+caAdLiGgIR0AAO7Dl5nlGdX2UKGgGR8BibPXCj1wpaAdLNmgIR0AAS33Hq/ucdX2UKGgGR8B0HDLbHp8naAdLXWgIR0AAaebutwJgdX2UKGgGR8BbKqlLvkR0aAdLPmgIR0AAfOt4iX6ZdX2UKGgGR8BaXBKHwgDBaAdLaWgIR0AAn0qYqoZRdX2UKGgGR8BknuKhtcfOaAdLdmgIR0AAxgAp8WsSdX2UKGgGR8BYO9UGVzIWaAdLfGgIR0AA8bJfYzzmdX2UKGgGR8BJgo42jwhGaAdLSGgIR0ABCgTRIBikdX2UKGgGR8BxjJUjs2NvaAdLgWgIR0ABNkQPI4lydX2UKGgGR8Bt0FY+0PYnaAdLaGgIR0ABWIqLCN0edX2UKGgGR8BcAUfxMFlkaAdLTWgIR0ABcImgJ1JUdWUu"
69
  },
70
  "ep_success_buffer": {
71
  ":type:": "<class 'collections.deque'>",
 
110
  "__firstlineno__": 343,
111
  "__annotations__": "{'observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'advantages': <class 'numpy.ndarray'>, 'returns': <class 'numpy.ndarray'>, 'episode_starts': <class 'numpy.ndarray'>, 'log_probs': <class 'numpy.ndarray'>, 'values': <class 'numpy.ndarray'>}",
112
  "__doc__": "\nRollout buffer used in on-policy algorithms like A2C/PPO.\nIt corresponds to ``buffer_size`` transitions collected\nusing the current policy.\nThis experience will be discarded after the policy update.\nIn order to use PPO objective, we also store the current value of each state\nand the log probability of each taken action.\n\nThe term rollout here refers to the model-free notion and should not\nbe used with the concept of rollout used in model-based RL or planning.\nHence, it is only involved in policy and value function training but not action selection.\n\n:param buffer_size: Max number of element in the buffer\n:param observation_space: Observation space\n:param action_space: Action space\n:param device: PyTorch device\n:param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator\n Equivalent to classic advantage when set to 1.\n:param gamma: Discount factor\n:param n_envs: Number of parallel environments\n",
113
+ "__init__": "<function RolloutBuffer.__init__ at 0x1212b1d00>",
114
+ "reset": "<function RolloutBuffer.reset at 0x1212b1da0>",
115
+ "compute_returns_and_advantage": "<function RolloutBuffer.compute_returns_and_advantage at 0x1212b1e40>",
116
+ "add": "<function RolloutBuffer.add at 0x1212b1f80>",
117
+ "get": "<function RolloutBuffer.get at 0x1212b2020>",
118
+ "_get_samples": "<function RolloutBuffer._get_samples at 0x1212b20c0>",
119
  "__static_attributes__": [
120
  "actions",
121
  "advantages",
 
131
  "values"
132
  ],
133
  "__abstractmethods__": "frozenset()",
134
+ "_abc_impl": "<_abc._abc_data object at 0x1212a1900>"
135
  },
136
  "rollout_buffer_kwargs": {},
137
  "batch_size": 64,
ppo-LunarLander-v2/policy.optimizer.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c74c81803a98d1bcee0f730716e82aefc492a5dff44dbcb6d84d38f24c5ff27
3
  size 88375
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b86869a613eb7a0e01bd35ee81323f4527936395aa1bfe15ea4bf27e0342bdd
3
  size 88375
ppo-LunarLander-v2/policy.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:537f243dc6b4567559fd3478f873945016ef19d7b0ea176606c853ccd44dbdf4
3
  size 43967
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f859f6dabe7e89ed43b9821fd4911a84d737d667f49b6d14c654ee570792ae
3
  size 43967
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": -1149.2852482, "std_reward": 829.8063976940232, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2025-08-19T14:01:33.958021"}
 
1
+ {"mean_reward": -198.6814756598789, "std_reward": 121.65680407173441, "is_deterministic": true, "n_eval_episodes": 10, "eval_datetime": "2025-08-19T14:03:28.461023"}