loisonchambers commited on Apr 28

Commit

12d0891

verified ·

1 Parent(s): 0e5daae

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
.python-version +1 -0
.vscode/settings.json +10 -0
README.md +0 -0
poetry.lock +0 -0
ppo-LunarLander-v2.pt +3 -0
ppo.py +133 -0
ppo_old.py +790 -0
pyproject.toml +23 -0
ruff.toml +104 -0
runs/CartPole-v1_ppo_1_1776758028/events.out.tfevents.1776758028.BL-LFERNANDEZ.local.eurecat.org.10534.0 +3 -0
runs/CartPole-v1_ppo_1_1776759190/events.out.tfevents.1776759190.BL-LFERNANDEZ.local.eurecat.org.13333.0 +3 -0
runs/CartPole-v1_ppo_1_1776759930/events.out.tfevents.1776759930.BL-LFERNANDEZ.local.eurecat.org.16675.0 +3 -0
runs/CartPole-v1_ppo_1_1776760089/events.out.tfevents.1776760089.BL-LFERNANDEZ.local.eurecat.org.17273.0 +3 -0
runs/CartPole-v1_ppo_1_1776760573/events.out.tfevents.1776760573.BL-LFERNANDEZ.local.eurecat.org.19521.0 +3 -0
runs/CartPole-v1_ppo_1_1776760603/events.out.tfevents.1776760800.BL-LFERNANDEZ.local.eurecat.org.19762.0 +3 -0
runs/CartPole-v1_ppo_1_1776761629/events.out.tfevents.1776761629.BL-LFERNANDEZ.local.eurecat.org.40907.0 +3 -0
runs/CartPole-v1_ppo_1_1776764954/events.out.tfevents.1776764954.BL-LFERNANDEZ.local.eurecat.org.45905.0 +3 -0
runs/CartPole-v1_ppo_1_1776765078/events.out.tfevents.1776765078.BL-LFERNANDEZ.local.eurecat.org.46277.0 +3 -0
runs/CartPole-v1_ppo_1_1776765121/events.out.tfevents.1776765121.BL-LFERNANDEZ.local.eurecat.org.46547.0 +3 -0
runs/CartPole-v1_ppo_1_1776765317/events.out.tfevents.1776765317.BL-LFERNANDEZ.local.eurecat.org.47512.0 +3 -0
runs/CartPole-v1_ppo_1_1776765438/events.out.tfevents.1776765438.BL-LFERNANDEZ.local.eurecat.org.47896.0 +3 -0
runs/CartPole-v1_ppo_1_1776765498/events.out.tfevents.1776765498.BL-LFERNANDEZ.local.eurecat.org.48020.0 +3 -0
runs/CartPole-v1_ppo_1_1776765547/events.out.tfevents.1776765547.BL-LFERNANDEZ.local.eurecat.org.48309.0 +3 -0
runs/CartPole-v1_ppo_1_1776765580/events.out.tfevents.1776765580.BL-LFERNANDEZ.local.eurecat.org.48524.0 +3 -0
runs/CartPole-v1_ppo_1_1776765943/events.out.tfevents.1776765945.BL-LFERNANDEZ.local.eurecat.org.49910.0 +3 -0
runs/CartPole-v1_ppo_1_1776766122/events.out.tfevents.1776766124.BL-LFERNANDEZ.local.eurecat.org.50518.0 +3 -0
runs/CartPole-v1_ppo_1_1776766281/events.out.tfevents.1776766283.BL-LFERNANDEZ.local.eurecat.org.51127.0 +3 -0
runs/CartPole-v1_ppo_1_1776766423/events.out.tfevents.1776766423.BL-LFERNANDEZ.local.eurecat.org.51653.0 +3 -0
runs/CartPole-v1_ppo_1_1776766445/events.out.tfevents.1776766445.BL-LFERNANDEZ.local.eurecat.org.51862.0 +3 -0
runs/CartPole-v1_ppo_1_1776767609/events.out.tfevents.1776767609.BL-LFERNANDEZ.local.eurecat.org.55609.0 +3 -0
runs/CartPole-v1_ppo_1_1776767664/events.out.tfevents.1776767664.BL-LFERNANDEZ.local.eurecat.org.55869.0 +3 -0
runs/CartPole-v1_ppo_1_1776767691/events.out.tfevents.1776767691.BL-LFERNANDEZ.local.eurecat.org.55998.0 +3 -0
runs/CartPole-v1_ppo_1_1776767743/events.out.tfevents.1776767743.BL-LFERNANDEZ.local.eurecat.org.56120.0 +3 -0
runs/CartPole-v1_ppo_1_1776767808/events.out.tfevents.1776767808.BL-LFERNANDEZ.local.eurecat.org.56378.0 +3 -0
runs/CartPole-v1_ppo_1_1776767863/events.out.tfevents.1776767863.BL-LFERNANDEZ.local.eurecat.org.56483.0 +3 -0
runs/CartPole-v1_ppo_1_1776768348/events.out.tfevents.1776768348.BL-LFERNANDEZ.local.eurecat.org.57900.0 +3 -0
runs/CartPole-v1_ppo_1_1776768490/events.out.tfevents.1776768490.BL-LFERNANDEZ.local.eurecat.org.58209.0 +3 -0
runs/CartPole-v1_ppo_1_1776768658/events.out.tfevents.1776768658.BL-LFERNANDEZ.local.eurecat.org.58694.0 +3 -0
runs/CartPole-v1_ppo_1_1776768770/events.out.tfevents.1776768770.BL-LFERNANDEZ.local.eurecat.org.59216.0 +3 -0
runs/CartPole-v1_ppo_1_1776768821/events.out.tfevents.1776768821.BL-LFERNANDEZ.local.eurecat.org.59474.0 +3 -0
runs/CartPole-v1_ppo_1_1776769606/events.out.tfevents.1776769606.BL-LFERNANDEZ.local.eurecat.org.62559.0 +3 -0
runs/CartPole-v1_ppo_1_1776769691/events.out.tfevents.1776769691.BL-LFERNANDEZ.local.eurecat.org.62771.0 +3 -0
runs/CartPole-v1_ppo_1_1776770441/events.out.tfevents.1776770441.BL-LFERNANDEZ.local.eurecat.org.64367.0 +3 -0
runs/CartPole-v1_ppo_1_1776770456/events.out.tfevents.1776770456.BL-LFERNANDEZ.local.eurecat.org.64462.0 +3 -0
runs/CartPole-v1_ppo_1_1776779529/events.out.tfevents.1776779529.BL-LFERNANDEZ.local.eurecat.org.84023.0 +3 -0
runs/CartPole-v1_ppo_1_1776779596/events.out.tfevents.1776779596.BL-LFERNANDEZ.local.eurecat.org.106839.0 +3 -0
runs/CartPole-v1_ppo_1_1776779670/events.out.tfevents.1776779672.BL-LFERNANDEZ.local.eurecat.org.209000.0 +3 -0
runs/CartPole-v1_ppo_1_1776779742/events.out.tfevents.1776779745.BL-LFERNANDEZ.local.eurecat.org.209486.0 +3 -0
runs/CartPole-v1_ppo_1_1776779888/events.out.tfevents.1776779890.BL-LFERNANDEZ.local.eurecat.org.210132.0 +3 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+wandb/run-20260421_160121-lnmebn7s/run-lnmebn7s.wandb filter=lfs diff=lfs merge=lfs -text
+wandb/run-20260421_165557-ngbvlof1/run-ngbvlof1.wandb filter=lfs diff=lfs merge=lfs -text

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.9.5

.vscode/settings.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+    "editor.formatOnSave": true,
+    "editor.codeActionsOnSave": {
+        "source.fixAll": "explicit",
+        "source.organizeImports": "explicit"
+    },
+    "[python]": {
+        "editor.defaultFormatter": "charliermarsh.ruff"
+    }
+}

README.md ADDED Viewed

File without changes

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

ppo-LunarLander-v2.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae83d259ed07741805748cf65c6a7c0b1279ccf2fa154cac1d305453e0becfc8
+size 44165

ppo.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import os
+import gymnasium as gym
+import numpy as np  # Ensure numpy is imported
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.distributions.categorical import Categorical
+# --- Hyperparameters ---
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+env_id = "LunarLander-v2"
+total_timesteps = 500000
+learning_rate = 2.5e-4
+num_envs = 4
+num_steps = 128
+batch_size = num_envs * num_steps
+minibatch_size = 32
+update_epochs = 4
+clip_coef = 0.2
+ent_coef = 0.01
+# --- Model Architecture ---
+def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
+    torch.nn.init.orthogonal_(layer.weight, std)
+    torch.nn.init.constant_(layer.bias, bias_const)
+    return layer
+class Agent(nn.Module):
+    def __init__(self, envs):
+        super().__init__()
+        self.critic = nn.Sequential(
+            layer_init(
+                nn.Linear(
+                    np.array(envs.single_observation_space.shape).prod(), 64
+                )
+            ),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, 64)),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, 1), std=1.0),
+        )
+        self.actor = nn.Sequential(
+            layer_init(
+                nn.Linear(
+                    np.array(envs.single_observation_space.shape).prod(), 64
+                )
+            ),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, 64)),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, envs.single_action_space.n), std=0.01),
+        )
+    def get_value(self, x):
+        return self.critic(x)
+    def get_action_and_value(self, x, action=None):
+        logits = self.actor(x)
+        probs = Categorical(logits=logits)
+        if action is None:
+            action = probs.sample()
+        return action, probs.log_prob(action), probs.entropy(), self.critic(x)
+# --- Training Loop Setup ---
+if __name__ == "__main__":
+    envs = gym.vector.SyncVectorEnv(
+        [lambda: gym.make(env_id) for _ in range(num_envs)]
+    )
+    agent = Agent(envs).to(device)
+    optimizer = optim.Adam(agent.parameters(), lr=learning_rate, eps=1e-5)
+    # Storage setup
+    obs = torch.zeros(
+        (num_steps, num_envs) + envs.single_observation_space.shape
+    ).to(device)
+    actions = torch.zeros(
+        (num_steps, num_envs) + envs.single_action_space.shape
+    ).to(device)
+    logprobs = torch.zeros((num_steps, num_envs)).to(device)
+    rewards = torch.zeros((num_steps, num_envs)).to(device)
+    dones = torch.zeros((num_steps, num_envs)).to(device)
+    values = torch.zeros((num_steps, num_envs)).to(device)
+    global_step = 0
+    next_obs, _ = envs.reset()
+    next_obs = torch.Tensor(next_obs).to(device)
+    next_done = torch.zeros(num_envs).to(device)
+    for iteration in range(1, total_timesteps // batch_size + 1):
+        # 1. Rollout phase
+        for step in range(num_steps):
+            global_step += num_envs
+            obs[step] = next_obs
+            dones[step] = next_done
+            with torch.no_grad():
+                action, logprob, _, value = agent.get_action_and_value(
+                    next_obs
+                )
+                values[step] = value.flatten()
+            actions[step] = action
+            logprobs[step] = logprob
+            next_obs, reward, terminations, activations, infos = envs.step(
+                action.cpu().numpy()
+            )
+            next_done = np.logical_or(terminations, activations)
+            rewards[step] = torch.tensor(reward).to(device).view(-1)
+            next_obs, next_done = (
+                torch.Tensor(next_obs).to(device),
+                torch.Tensor(next_done).to(device),
+            )
+        # 2. Advantage Calculation (GAE can be added here, simplified for brevity)
+        # 3. PPO Update Logic (Actor and Critic Loss)
+        # ... [Policy Update Logic goes here] ...
+        print(f"Step: {global_step} | Training...")
+    # --- Save and Upload ---
+    model_name = "ppo-LunarLander-v2"
+    torch.save(agent.state_dict(), f"{model_name}.pt")
+    # Upload to Hub
+    # Replace 'your-username' with your actual HF username
+    repo_id = f"your-username/{model_name}"
+    push_to_hub_fast(
+        repo_id=repo_id, folder_path="./", token=os.getenv("HF_TOKEN")
+    )

ppo_old.py ADDED Viewed

	@@ -0,0 +1,790 @@

+import argparse
+import os
+import random
+import time
+from distutils.util import strtobool
+import gym
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.distributions.categorical import Categorical
+from torch.utils.tensorboard import SummaryWriter
+def make_env(gym_id, seed, idx, capture_video, run_name):
+    def thunk():
+        env = gym.make(gym_id)
+        env = gym.wrappers.RecordEpisodeStatistics(env)
+        if capture_video:
+            if idx == 0:
+                env = gym.wrappers.RecordVideo(
+                    env,
+                    f"videos/{run_name}",
+                    episode_trigger=lambda t: t % 1000 == 0,
+                )
+        env.seed(seed)
+        env.action_space.seed(seed)
+        env.observation_space.seed(seed)
+        return env
+    return thunk
+def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
+    torch.nn.init.orthogonal_(layer.weight, std)
+    torch.nn.init.constant_(layer.bias, bias_const)
+    return layer
+class Agent(nn.Module):
+    def __init__(self, envs):
+        super(Agent, self).__init__()
+        self.critic = nn.Sequential(
+            layer_init(
+                nn.Linear(
+                    np.array(envs.single_observation_space.shape).prod(), 64
+                )
+            ),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, 64)),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, 1), std=1.0),
+        )
+        self.actor = nn.Sequential(
+            layer_init(
+                nn.Linear(
+                    np.array(envs.single_observation_space.shape).prod(), 64
+                )
+            ),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, 64)),
+            nn.Tanh(),
+            layer_init(nn.Linear(64, envs.single_action_space.n), std=0.01),
+        )
+    def get_value(self, x):
+        return self.critic(x)
+    def get_action_and_value(self, x, action=None):
+        logits = self.actor(x)
+        probs = Categorical(logits=logits)
+        if action is None:
+            action = probs.sample()
+        return action, probs.log_prob(action), probs.entropy(), self.critic(x)
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--exp-name",
+        type=str,
+        default=os.path.basename(__file__).rstrip(".py"),
+        help="the name of this experiment",
+    )
+    parser.add_argument(
+        "--gym-id",
+        type=str,
+        default="CartPole-v1",
+        help="the id of the gym environment",
+    )
+    parser.add_argument(
+        "--learning-rate",
+        type=float,
+        default=2.5e-4,
+        help="the learning rate of the optimizer",
+    )
+    parser.add_argument(
+        "--seed", type=int, default=1, help="seed of the experiment"
+    )
+    parser.add_argument(
+        "--total-timesteps",
+        type=int,
+        default=25000,
+        help="total timesteps of the experiments",
+    )
+    parser.add_argument(
+        "--torch-deterministic",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        nargs="?",
+        const=True,
+        help="if toggled, `torch.backeds.cudnn.deterministic=False`",
+    )
+    parser.add_argument(
+        "--cuda",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        nargs="?",
+        const=True,
+        help="if toggled, cuda will not be enabled by default",
+    )
+    parser.add_argument(
+        "--track",
+        type=lambda x: bool(strtobool(x)),
+        default=False,
+        nargs="?",
+        const=True,
+        help="if toggled, this experiment will be tracked with Weights and Biases",
+    )
+    parser.add_argument(
+        "--wandb-project-name",
+        type=str,
+        default="cleanRL",
+        help="the wandb's project name",
+    )
+    parser.add_argument(
+        "--wandb-entity",
+        type=str,
+        default=None,
+        help="the entity (team) of wandb's project",
+    )
+    parser.add_argument(
+        "--num-envs",
+        type=int,
+        default=4,
+        help="the number of parallel game environments",
+    )
+    parser.add_argument(
+        "--capture-video",
+        type=lambda x: bool(strtobool(x)),
+        default=False,
+        nargs="?",
+        const=True,
+        help="whether to capture videos of the agent performances (check out `videos` folder)",
+    )
+    parser.add_argument(
+        "--num-steps",
+        type=int,
+        default=128,
+        help="the number of steps to run in each environment per policy rollout",
+    )
+    parser.add_argument(
+        "--anneal-lr",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        nargs="?",
+        const=True,
+        help="Toggle learning rate annealing for policy and value networks",
+    )
+    parser.add_argument(
+        "--gae",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        nargs="?",
+        const=True,
+        help="Use GAE for advantage computation",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=0.99, help="the discount factor gamma"
+    )
+    parser.add_argument(
+        "--gae-lambda",
+        type=float,
+        default=0.95,
+        help="the lambda for the general advantage estimation",
+    )
+    parser.add_argument(
+        "--num-minibatches",
+        type=int,
+        default=4,
+        help="the number of mini-batches",
+    )
+    parser.add_argument(
+        "--update-epochs",
+        type=int,
+        default=4,
+        help="the K epochs to update the policy",
+    )
+    parser.add_argument(
+        "--norm-adv",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        nargs="?",
+        const=True,
+        help="Toggles advantages normalization",
+    )
+    parser.add_argument(
+        "--clip-coef",
+        type=float,
+        default=0.2,
+        help="the surrogate clipping coefficient",
+    )
+    parser.add_argument(
+        "--clip-vloss",
+        type=lambda x: bool(strtobool(x)),
+        default=True,
+        nargs="?",
+        const=True,
+        help="Toggles wheter or not to use a clipped loss for the value function, as per the paper",
+    )
+    parser.add_argument(
+        "--ent-coef",
+        type=float,
+        default=0.01,
+        help="coefficient of the entropy",
+    )
+    parser.add_argument(
+        "--vf-coef",
+        type=float,
+        default=0.5,
+        help="coefficient of the value function",
+    )
+    parser.add_argument(
+        "--max-grad-norm",
+        type=float,
+        default=0.5,
+        help="the maximum norm for the gradient clipping",
+    )
+    parser.add_argument(
+        "--target-kl",
+        type=float,
+        default=None,
+        help="the target KL divergence threshold",
+    )
+    args = parser.parse_args()
+    args.batch_size = int(args.num_envs * args.num_steps)
+    args.minibatch_size = int(args.batch_size // args.num_minibatches)
+    return args
+if __name__ == "__main__":
+    args = parse_args()
+    print(args)
+    run_name = f"{args.gym_id}_{args.exp_name}_{args.seed}_{int(time.time())}"
+    if args.track:
+        import wandb
+        wandb.init(
+            project=args.wandb_project_name,
+            entity=args.wandb_entity,
+            sync_tensorboard=True,
+            config=vars(args),
+            name=run_name,
+            monitor_gym=True,
+            save_code=True,
+        )
+    writer = SummaryWriter(f"runs/{run_name}")
+    writer.add_text(
+        "hyperparameters",
+        "|param|value|\n|-|-|\n%s"
+        % (
+            "\n".join(
+                [f"|{key}|{value}|" for key, value in vars(args).items()]
+            )
+        ),
+    )
+    random.seed(args.seed)
+    np.random.seed(args.seed)
+    torch.manual_seed(args.seed)
+    torch.backends.cudnn.deterministic = args.torch_deterministic
+    device = torch.device(
+        "cuda" if torch.cuda.is_available() and args.cuda else "cpu"
+    )
+    envs = gym.vector.SyncVectorEnv(
+        [
+            make_env(
+                args.gym_id, args.seed + i, i, args.capture_video, run_name
+            )
+            for i in range(args.num_envs)
+        ]
+    )
+    assert isinstance(envs.single_action_space, gym.spaces.Discrete), (
+        "only discrete action space is supported"
+    )
+    agent = Agent(envs).to(device)
+    optimizer = optim.Adam(agent.parameters(), lr=args.learning_rate, eps=1e-5)
+    # ALGO Logic: Storage setup
+    obs = torch.zeros(
+        (args.num_steps, args.num_envs) + envs.single_observation_space.shape
+    ).to(device)
+    actions = torch.zeros(
+        (args.num_steps, args.num_envs) + envs.single_action_space.shape
+    ).to(device)
+    logprobs = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    rewards = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    dones = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    values = torch.zeros((args.num_steps, args.num_envs)).to(device)
+    # TRY NOT TO MODIFY: start the game
+    global_step = 0
+    start_time = time.time()
+    next_obs = torch.Tensor(envs.reset()).to(device)
+    next_done = torch.zeros(args.num_envs).to(device)
+    num_updates = args.total_timesteps // args.batch_size
+    for update in range(1, num_updates + 1):
+        # Annealing the rate if instructed to do so.abs
+        if args.anneal_lr:
+            frac = 1.0 - (update - 1.0) / num_updates
+            lrnow = frac * args.learning_rate
+            optimizer.param_groups[0]["lr"] = lrnow
+        for step in range(args.num_steps):
+            global_step += 1 * args.num_envs
+            obs[step] = next_obs
+            dones[step] = next_done
+            # ALGO LOGIC : action logic
+            with torch.no_grad():
+                action, logprob, _, value = agent.get_action_and_value(
+                    next_obs
+                )
+                values[step] = value.flatten()
+            actions[step] = action
+            logprobs[step] = logprob
+            # TRY NOT TO MODIFY: execute the game and log data.abs
+            next_obs, reward, done, info = envs.step(action.cpu().numpy())
+            rewards[step] = torch.tensor(reward).to(device).view(-1)
+            next_obs, next_done = (
+                torch.Tensor(next_obs).to(device),
+                torch.Tensor(done).to(device),
+            )
+            if isinstance(info, dict) and "episode" in info:
+                for item in info["episode"]:
+                    if item is not None:
+                        print(
+                            f"global_step={global_step}, episodic_return={item['r']}"
+                        )
+                        writer.add_scalar(
+                            "charts/episodic_return", item["r"], global_step
+                        )
+                        writer.add_scalar(
+                            "charts/episodic_length", item["l"], global_step
+                        )
+                        break
+            # bootstrap reward if not done
+            with torch.no_grad():
+                next_value = agent.get_value(next_obs).reshape(1, -1)
+                if args.gae:
+                    advantages = torch.zeros_like(rewards).to(device)
+                    lastgaelam = 0
+                    for t in reversed(range(args.num_steps)):
+                        if t == args.num_steps - 1:
+                            nextnonterminal = 1.0 - next_done
+                            nextvalues = next_value
+                        else:
+                            nextnonterminal = 1.0 - dones[t + 1]
+                            nextvalues = values[t + 1]
+                        delta = (
+                            rewards[t]
+                            + args.gamma * nextvalues * nextnonterminal
+                            - values[t]
+                        )
+                        advantages[t] = lastgaelam = (
+                            delta
+                            + args.gamma
+                            * args.gae_lambda
+                            * nextnonterminal
+                            * lastgaelam
+                        )
+                    returns = advantages + values
+                else:
+                    returns = torch.zeros_like(rewards).to(device)
+                    for t in reversed(range(args.num_steps)):
+                        if t == args.num_steps - 1:
+                            nextnonterminal = 1.0 - next_done
+                            next_return = next_value
+                        else:
+                            nextnonterminal = 1.0 - dones[t + 1]
+                            next_return = returns[t + 1]
+                        returns[t] = (
+                            rewards[t]
+                            + args.gamma * nextnonterminal * next_return
+                        )
+                    advantages = returns - values
+            b_obs = obs.reshape((-1,) + envs.single_observation_space.shape)
+            b_logprobs = logprobs.reshape(-1)
+            b_actions = actions.reshape((-1,) + envs.single_action_space.shape)
+            b_advantages = advantages.reshape(-1)
+            b_returns = returns.reshape(-1)
+            b_values = values.reshape(-1)
+            # Optimizaing the policy and value network
+            b_inds = np.arange(args.batch_size)
+            clipfracs = []
+            for epoch in range(args.update_epochs):
+                np.random.shuffle(b_inds)
+                for start in range(0, args.batch_size, args.minibatch_size):
+                    end = start + args.minibatch_size
+                    mb_inds = b_inds[start:end]
+                    _, newlogprob, entropy, newvalue = (
+                        agent.get_action_and_value(  # POSIBLE ERROR AQUI
+                            b_obs[mb_inds], b_actions.long()[mb_inds]
+                        )
+                    )
+                    logratio = newlogprob - b_logprobs[mb_inds]
+                    ratio = logratio.exp()
+                    with torch.no_grad():
+                        # calculate approx kl as in http://joschu.net/blog/kl-aprox.html
+                        old_approx_kl = (-logratio).mean()
+                        approx_kl = ((ratio - 1) - logratio).mean()
+                        clipfracs += [
+                            ((ratio - 1.0).abs() > args.clip_coef)
+                            .float()
+                            .mean()
+                        ]
+                    mb_advantages = b_advantages[mb_inds]
+                    if args.norm_adv:
+                        mb_advantages = (
+                            mb_advantages - mb_advantages.mean()
+                        ) / (mb_advantages.std() + 1e-8)
+                    # Policy loss
+                    pg_loss1 = -mb_advantages * ratio
+                    pg_loss2 = -mb_advantages * torch.clamp(
+                        ratio, 1 - args.clip_coef, 1 + args.clip_coef
+                    )
+                    pg_loss = torch.max(pg_loss1, pg_loss2).mean()
+                    # Value loss
+                    newvalue = newvalue.view(-1)
+                    if args.clip_vloss:
+                        v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
+                        v_clipped = b_values[mb_inds] + torch.clamp(
+                            newvalue - b_values[mb_inds],
+                            -args.clip_coef,
+                            args.clip_coef,
+                        )
+                        v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
+                        v_loss_max = torch.max(
+                            v_loss_unclipped, v_loss_clipped
+                        )
+                        v_loss = 0.5 * v_loss_max.mean()
+                    else:
+                        v_loss = (
+                            0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()
+                        )
+                    # Entropy loss
+                    entropy_loss = entropy.mean()
+                    loss = (
+                        pg_loss
+                        - args.ent_coef * entropy_loss
+                        + v_loss * args.vf_coef
+                    )
+                    optimizer.zero_grad()
+                    loss.backward()
+                    nn.utils.clip_grad_norm_(
+                        agent.parameters(), args.max_grad_norm
+                    )
+                    optimizer.step()
+                if args.target_kl is not None:
+                    if approx_kl > args.target_kl:
+                        break
+        y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy()
+        var_y = np.var(y_true)
+        explained_var = (
+            np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y
+        )
+        # TRY NOT TO MODIFY: record rewards for plotting purposes
+        writer.add_scalar(
+            "charts/learning_rate",
+            optimizer.param_groups[0]["lr"],
+            global_step,
+        )
+        writer.add_scalar("losses/value_loss", v_loss.item(), global_step)
+        writer.add_scalar("losses/policy_loss", pg_loss.item(), global_step)
+        writer.add_scalar("losses/entropy", entropy_loss.item(), global_step)
+        writer.add_scalar("losses/approx_kl", approx_kl.item(), global_step)
+        writer.add_scalar("losses/clipfrac", np.mean(clipfracs), global_step)
+        writer.add_scalar(
+            "losses/explained_variance", explained_var, global_step
+        )
+        print("SPS:", int(global_step / (time.time() - start_time)))
+        writer.add_scalar(
+            "charts/SPS",
+            int(global_step / (time.time() - start_time)),
+            global_step,
+        )
+    envs.close()
+    writer.close()
+##############################################################################
+############################## Huggingface ###################################
+##############################################################################
+import datetime
+import json
+import shutil
+import tempfile
+from pathlib import Path
+import imageio
+from huggingface_hub import HfApi, upload_folder
+from huggingface_hub.repocard import metadata_eval_result, metadata_save
+from wasabi import Printer
+msg = Printer()
+def package_to_hub(
+    repo_id,
+    model,
+    hyperparameters,
+    eval_env,
+    video_fps=30,
+    commit_message="Push agent to the Hub",
+    token=None,
+    logs=None,
+):
+    """
+    Evaluate, Generate a video and Upload a model to Hugging Face Hub.
+    This method does the complete pipeline:
+    - It evaluates the model
+    - It generates the model card
+    - It generates a replay video of the agent
+    - It pushes everything to the hub
+    :param repo_id: id of the model repository from the Hugging Face Hub
+    :param model: trained model
+    :param eval_env: environment used to evaluate the agent
+    :param fps: number of fps for rendering the video
+    :param commit_message: commit message
+    :param logs: directory on local machine of tensorboard logs you'd like to upload
+    """
+    msg.info(
+        "This function will save, evaluate, generate a video of your agent, "
+        "create a model card and push everything to the hub. "
+        "It might take up to 1min. \n "
+        "This is a work in progress: if you encounter a bug, please open an issue."
+    )
+    # Step 1: Clone or create the repo
+    repo_url = HfApi().create_repo(
+        repo_id=repo_id,
+        token=token,
+        private=False,
+        exist_ok=True,
+    )
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        tmpdirname = Path(tmpdirname)
+        # Step 2: Save the model
+        torch.save(model.state_dict(), tmpdirname / "model.pt")
+        # Step 3: Evaluate the model and build JSON
+        mean_reward, std_reward = _evaluate_agent(eval_env, 10, model)
+        # First get datetime
+        eval_datetime = datetime.datetime.now()
+        eval_form_datetime = eval_datetime.isoformat()
+        evaluate_data = {
+            "env_id": hyperparameters.env_id,
+            "mean_reward": mean_reward,
+            "std_reward": std_reward,
+            "n_evaluation_episodes": 10,
+            "eval_datetime": eval_form_datetime,
+        }
+        # Write a JSON file
+        with open(tmpdirname / "results.json", "w") as outfile:
+            json.dump(evaluate_data, outfile)
+        # Step 4: Generate a video
+        video_path = tmpdirname / "replay.mp4"
+        record_video(eval_env, model, video_path, video_fps)
+        # Step 5: Generate the model card
+        generated_model_card, metadata = _generate_model_card(
+            "PPO",
+            hyperparameters.env_id,
+            mean_reward,
+            std_reward,
+            hyperparameters,
+        )
+        _save_model_card(tmpdirname, generated_model_card, metadata)
+        # Step 6: Add logs if needed
+        if logs:
+            _add_logdir(tmpdirname, Path(logs))
+        msg.info(f"Pushing repo {repo_id} to the Hugging Face Hub")
+        repo_url = upload_folder(
+            repo_id=repo_id,
+            folder_path=tmpdirname,
+            path_in_repo="",
+            commit_message=commit_message,
+            token=token,
+        )
+        msg.info(
+            f"Your model is pushed to the Hub. You can view your model here: {repo_url}"
+        )
+    return repo_url
+def _evaluate_agent(env, n_eval_episodes, policy):
+    """
+    Evaluate the agent for ``n_eval_episodes`` episodes and returns average reward and std of reward.
+    :param env: The evaluation environment
+    :param n_eval_episodes: Number of episode to evaluate the agent
+    :param policy: The agent
+    """
+    episode_rewards = []
+    for episode in range(n_eval_episodes):
+        state = env.reset()
+        step = 0
+        done = False
+        total_rewards_ep = 0
+        while done is False:
+            state = torch.Tensor(state).to(device)
+            action, _, _, _ = policy.get_action_and_value(state)
+            new_state, reward, done, info = env.step(action.cpu().numpy())
+            total_rewards_ep += reward
+            if done:
+                break
+            state = new_state
+        episode_rewards.append(total_rewards_ep)
+    mean_reward = np.mean(episode_rewards)
+    std_reward = np.std(episode_rewards)
+    return mean_reward, std_reward
+def record_video(env, policy, out_directory, fps=30):
+    images = []
+    done = False
+    state = env.reset()
+    img = env.render(mode="rgb_array")
+    images.append(img)
+    while not done:
+        state = torch.Tensor(state).to(device)
+        # Take the action (index) that have the maximum expected future reward given that state
+        action, _, _, _ = policy.get_action_and_value(state)
+        state, reward, done, info = env.step(
+            action.cpu().numpy()
+        )  # We directly put next_state = state for recording logic
+        img = env.render(mode="rgb_array")
+        images.append(img)
+    imageio.mimsave(
+        out_directory, [np.array(img) for i, img in enumerate(images)], fps=fps
+    )
+def _generate_model_card(
+    model_name, env_id, mean_reward, std_reward, hyperparameters
+):
+    """
+    Generate the model card for the Hub
+    :param model_name: name of the model
+    :env_id: name of the environment
+    :mean_reward: mean reward of the agent
+    :std_reward: standard deviation of the mean reward of the agent
+    :hyperparameters: training arguments
+    """
+    # Step 1: Select the tags
+    metadata = generate_metadata(model_name, env_id, mean_reward, std_reward)
+    # Transform the hyperparams namespace to string
+    converted_dict = vars(hyperparameters)
+    converted_str = str(converted_dict)
+    converted_str = converted_str.split(", ")
+    converted_str = "\n".join(converted_str)
+    # Step 2: Generate the model card
+    model_card = f"""
+  # PPO Agent Playing {env_id}
+  This is a trained model of a PPO agent playing {env_id}.
+  # Hyperparameters
+  ```python
+  {converted_str}
+  ```
+  """
+    return model_card, metadata
+def generate_metadata(model_name, env_id, mean_reward, std_reward):
+    """
+    Define the tags for the model card
+    :param model_name: name of the model
+    :param env_id: name of the environment
+    :mean_reward: mean reward of the agent
+    :std_reward: standard deviation of the mean reward of the agent
+    """
+    metadata = {}
+    metadata["tags"] = [
+        env_id,
+        "ppo",
+        "deep-reinforcement-learning",
+        "reinforcement-learning",
+        "custom-implementation",
+        "deep-rl-course",
+    ]
+    # Add metrics
+    eval = metadata_eval_result(
+        model_pretty_name=model_name,
+        task_pretty_name="reinforcement-learning",
+        task_id="reinforcement-learning",
+        metrics_pretty_name="mean_reward",
+        metrics_id="mean_reward",
+        metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
+        dataset_pretty_name=env_id,
+        dataset_id=env_id,
+    )
+    # Merges both dictionaries
+    metadata = {**metadata, **eval}
+    return metadata
+def _save_model_card(local_path, generated_model_card, metadata):
+    """Saves a model card for the repository.
+    :param local_path: repository directory
+    :param generated_model_card: model card generated by _generate_model_card()
+    :param metadata: metadata
+    """
+    readme_path = local_path / "README.md"
+    readme = ""
+    if readme_path.exists():
+        with readme_path.open("r", encoding="utf8") as f:
+            readme = f.read()
+    else:
+        readme = generated_model_card
+    with readme_path.open("w", encoding="utf-8") as f:
+        f.write(readme)
+    # Save our metrics to Readme metadata
+    metadata_save(readme_path, metadata)
+def _add_logdir(local_path: Path, logdir: Path):
+    """Adds a logdir to the repository.
+    :param local_path: repository directory
+    :param logdir: logdir directory
+    """
+    if logdir.exists() and logdir.is_dir():
+        # Add the logdir to the repository under new dir called logs
+        repo_logdir = local_path / "logs"
+        # Delete current logs if they exist
+        if repo_logdir.exists():
+            shutil.rmtree(repo_logdir)
+        # Copy logdir into repo logdir
+        shutil.copytree(logdir, repo_logdir)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,23 @@

+[tool.poetry]
+name = "lunar2"
+version = "0.1.0"
+description = "Training a model on Python 3.10 for HF Hub"
+authors = ["Lois Fernandez <loisfernandezcmp@gmail.com>"]
+[tool.poetry.dependencies]
+# Restricting to Python 3.10 specifically
+python = "~3.10"
+# Core ML stack
+torch = "^2.0.0"
+transformers = {extras = ["torch"], version = "^4.35.0"}
+datasets = "^2.15.0"
+accelerate = "^0.24.0"
+# For uploading to the Hugging Face Hub
+huggingface-hub = "^0.19.0"
+gymnasium = {version = "0.28.1", extras = ["box2d"]}
+[build-system]
+requires = ["poetry-core>=1.0.0"]
+build-backend = "poetry.core.masonry.api"

ruff.toml ADDED Viewed

	@@ -0,0 +1,104 @@

+# Exclude a variety of commonly ignored directories.
+exclude = [
+    ".bzr",
+    ".direnv",
+    ".eggs",
+    ".git",
+    ".git-rewrite",
+    ".hg",
+    ".ipynb_checkpoints",
+    ".mypy_cache",
+    ".nox",
+    ".pants.d",
+    ".pyenv",
+    ".pytest_cache",
+    ".pytype",
+    ".ruff_cache",
+    ".svn",
+    ".tox",
+    ".venv",
+    ".vscode",
+    "__pypackages__",
+    "_build",
+    "buck-out",
+    "build",
+    "dist",
+    "node_modules",
+    "site-packages",
+    "venv",
+    "*/test/*",
+    "*/tests/*",
+    "*/tools/*",
+]
+# Same as Black.
+line-length = 79
+indent-width = 4
+# Assume Python 3.10
+target-version = "py310"
+[lint]
+# Enable these specific rules and rule-subsets.
+select = [
+  "E4",
+  "E7",
+  "E9",
+  "F",
+  "ERA",
+  "FAST",
+  "ANN",
+  "ASYNC",
+  "S303",
+  "S304",
+  "FBT",
+  "B",
+  "A",
+  "COM818",
+  "C4",
+  "FA",
+  "ICN",
+  "PIE",
+  "Q",
+  "RET",
+  "SIM",
+  "ARG",
+  "PTH",
+  "FLY",
+  "C90",
+  "NPY",
+  "PD",
+  "PLE03",
+  "D101",
+  "D102",
+  "D103",
+  "D105",
+  "D107",
+  "D201",
+  "D403",
+  "D419"
+]
+ignore = ["ANN101", "ANN102","ANN002", "ANN003", "ANN401", "PIE790", "RET504"]
+# Allow fix for all enabled rules (when `--fix`) is provided.
+fixable = ["ALL"]
+unfixable = []
+# Allow unused variables when underscore-prefixed.
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+[lint.per-file-ignores]
+"__init__.py" = ["F401"]
+[format]
+# Like Black, use double quotes for strings.
+quote-style = "double"
+# Like Black, indent with spaces, rather than tabs.
+indent-style = "space"
+# Like Black, respect magic trailing commas.
+skip-magic-trailing-comma = false
+# Like Black, automatically detect the appropriate line ending.
+line-ending = "auto"

runs/CartPole-v1_ppo_1_1776758028/events.out.tfevents.1776758028.BL-LFERNANDEZ.local.eurecat.org.10534.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9afa36760505e109225f5bbf594139ad908be8c8f69f10b1ef0e19c3837e4a32
+size 4505

runs/CartPole-v1_ppo_1_1776759190/events.out.tfevents.1776759190.BL-LFERNANDEZ.local.eurecat.org.13333.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:316b4b1d7aa14b1ea6a647b2beca1a6538747c49ec61f8b8066bbeba867e431c
+size 4505

runs/CartPole-v1_ppo_1_1776759930/events.out.tfevents.1776759930.BL-LFERNANDEZ.local.eurecat.org.16675.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:782ac37f9222ea7332327d7ba3f67dae23036fbbedfddcdef39f55160dab559e
+size 4646

runs/CartPole-v1_ppo_1_1776760089/events.out.tfevents.1776760089.BL-LFERNANDEZ.local.eurecat.org.17273.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bccf2c3eea4f07985090378e500d218d96ec681ada92a09b4a9ee726a233091c
+size 4505

runs/CartPole-v1_ppo_1_1776760573/events.out.tfevents.1776760573.BL-LFERNANDEZ.local.eurecat.org.19521.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:03821d01fb60402b8552445a0198fbb23ea001cf127dd5bdaa3692d968fcff93
+size 4518

runs/CartPole-v1_ppo_1_1776760603/events.out.tfevents.1776760800.BL-LFERNANDEZ.local.eurecat.org.19762.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b556eab8c87d3450e4b7e3d8b474d2181818544a1d345d5d9baeac21dbec3022
+size 5084

runs/CartPole-v1_ppo_1_1776761629/events.out.tfevents.1776761629.BL-LFERNANDEZ.local.eurecat.org.40907.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c874ddd1e6790112cb2546a418e40ad0444439dd6c1cd2f92237416a779759a8
+size 4615

runs/CartPole-v1_ppo_1_1776764954/events.out.tfevents.1776764954.BL-LFERNANDEZ.local.eurecat.org.45905.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:05a6c46f9c4c3503dd8dbc83d546033ed94f2a32a6269c270b1d0f7ec5d1a3e6
+size 387

runs/CartPole-v1_ppo_1_1776765078/events.out.tfevents.1776765078.BL-LFERNANDEZ.local.eurecat.org.46277.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da4a24a12acecc6744ffec1840df82d7d0a43e52af6e5508049146dd8d01ca37
+size 387

runs/CartPole-v1_ppo_1_1776765121/events.out.tfevents.1776765121.BL-LFERNANDEZ.local.eurecat.org.46547.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1c828152286330150089d2a3aacb183e30671ddb0128011b758cb1e635a08598
+size 387

runs/CartPole-v1_ppo_1_1776765317/events.out.tfevents.1776765317.BL-LFERNANDEZ.local.eurecat.org.47512.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2972bec30717f8de533164b54173c35d83ff6bb92090c9972fee9be0d2586536
+size 387

runs/CartPole-v1_ppo_1_1776765438/events.out.tfevents.1776765438.BL-LFERNANDEZ.local.eurecat.org.47896.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb9a29012db8c3f257843008c0b6fe5eef5838c00232ea123a25b3fb6fbbe6a1
+size 387

runs/CartPole-v1_ppo_1_1776765498/events.out.tfevents.1776765498.BL-LFERNANDEZ.local.eurecat.org.48020.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:60d1d932ada826bcd927f5fea551571925735ce2151711fcf01557d4d2fb10c3
+size 387

runs/CartPole-v1_ppo_1_1776765547/events.out.tfevents.1776765547.BL-LFERNANDEZ.local.eurecat.org.48309.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bed5acfede6c85caf003f79714c91c2d43b2a46bc61ed935565435406348d35
+size 387

runs/CartPole-v1_ppo_1_1776765580/events.out.tfevents.1776765580.BL-LFERNANDEZ.local.eurecat.org.48524.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf8a7c7dc0e56bc55a48ba5cb5574c7395b57948b3134839937b32fe27742727
+size 387

runs/CartPole-v1_ppo_1_1776765943/events.out.tfevents.1776765945.BL-LFERNANDEZ.local.eurecat.org.49910.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe38d20ce1da58e3db7c61d6e468f058e96899863695087cdf1342a10ff67936
+size 386

runs/CartPole-v1_ppo_1_1776766122/events.out.tfevents.1776766124.BL-LFERNANDEZ.local.eurecat.org.50518.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6982a170b017fbca8e61516594b134ddacbe16dcb14fbb58a8addca9e036da11
+size 386

runs/CartPole-v1_ppo_1_1776766281/events.out.tfevents.1776766283.BL-LFERNANDEZ.local.eurecat.org.51127.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:618af812c17147f953e1e421178ad38fb7c7d50e52e322e77c8e3dd9cec8ce20
+size 386

runs/CartPole-v1_ppo_1_1776766423/events.out.tfevents.1776766423.BL-LFERNANDEZ.local.eurecat.org.51653.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e42b720566506a92475a44ef430dbb18a65b5ce7389e9d2f85ccc019ca218fb2
+size 387

runs/CartPole-v1_ppo_1_1776766445/events.out.tfevents.1776766445.BL-LFERNANDEZ.local.eurecat.org.51862.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:544211ff4e7dccf156b57657f28c98e9047e5ee09d242efb47211d1dcf6bcf6a
+size 387

runs/CartPole-v1_ppo_1_1776767609/events.out.tfevents.1776767609.BL-LFERNANDEZ.local.eurecat.org.55609.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18697544a26000a565d90b8c95bec475ec6037305456213f73867826b558bb8f
+size 387

runs/CartPole-v1_ppo_1_1776767664/events.out.tfevents.1776767664.BL-LFERNANDEZ.local.eurecat.org.55869.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:721804f39bacafaa16df395e991a020282a21377b0c4a85d5d65fabe2c146d49
+size 387

runs/CartPole-v1_ppo_1_1776767691/events.out.tfevents.1776767691.BL-LFERNANDEZ.local.eurecat.org.55998.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5a8c1b7da7653289620f6915d27331744b2b5de6abde50d15052b9c0eecf3ea
+size 400

runs/CartPole-v1_ppo_1_1776767743/events.out.tfevents.1776767743.BL-LFERNANDEZ.local.eurecat.org.56120.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7410d303a58d15dfa53fd49591350c0e6d746ab775dda23a7dbeb86d5ceac169
+size 438

runs/CartPole-v1_ppo_1_1776767808/events.out.tfevents.1776767808.BL-LFERNANDEZ.local.eurecat.org.56378.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a6dea466e45090c43781b14cd8a849364bc234c6cf14714b8a4865bafed15c89
+size 438

runs/CartPole-v1_ppo_1_1776767863/events.out.tfevents.1776767863.BL-LFERNANDEZ.local.eurecat.org.56483.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:daecbdc1192a33fc47cbdcf5cebe2a7c73bb9c27376705e35835a1bae7e59768
+size 438

runs/CartPole-v1_ppo_1_1776768348/events.out.tfevents.1776768348.BL-LFERNANDEZ.local.eurecat.org.57900.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42db5c74403b1efa7b85919ffcbbb9090373fe612df1fb272719837d4ad24d4a
+size 438

runs/CartPole-v1_ppo_1_1776768490/events.out.tfevents.1776768490.BL-LFERNANDEZ.local.eurecat.org.58209.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ed66b6a9f6ce678547c16073df52d6cfd5c5d1bd5f85f060df01e71ce223ed9
+size 455

runs/CartPole-v1_ppo_1_1776768658/events.out.tfevents.1776768658.BL-LFERNANDEZ.local.eurecat.org.58694.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6705c8526854d2808b45af2747bff6840ef0998c53234d6eb99dd9e7329c79ef
+size 455

runs/CartPole-v1_ppo_1_1776768770/events.out.tfevents.1776768770.BL-LFERNANDEZ.local.eurecat.org.59216.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cba2f3486e16cd82a5c35b88c9392b0d0743d4ac84958dd37d1169855f57f1e
+size 455

runs/CartPole-v1_ppo_1_1776768821/events.out.tfevents.1776768821.BL-LFERNANDEZ.local.eurecat.org.59474.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:812c752d707e26387dc00a0c9b9c4153b88d4a6665813068336e8d87c0c62fe7
+size 455

runs/CartPole-v1_ppo_1_1776769606/events.out.tfevents.1776769606.BL-LFERNANDEZ.local.eurecat.org.62559.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:96c693f2d3db465b461568c35031823ae50200e5ebfd0905f7073bc4c73c5e58
+size 472

runs/CartPole-v1_ppo_1_1776769691/events.out.tfevents.1776769691.BL-LFERNANDEZ.local.eurecat.org.62771.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95ba12311c3d22a90a0a994044ac36b7604e9701a796d03ed7eeac3e358678f9
+size 472

runs/CartPole-v1_ppo_1_1776770441/events.out.tfevents.1776770441.BL-LFERNANDEZ.local.eurecat.org.64367.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a34c5c92ebefdfe16a1abecd6fa0827772eed1522bdd5ed9eac3ec08033d199e
+size 472

runs/CartPole-v1_ppo_1_1776770456/events.out.tfevents.1776770456.BL-LFERNANDEZ.local.eurecat.org.64462.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95d7c0875736b536bf71649f27bf8af2205663198c362e7c09ba0db53d3a53e1
+size 129276

runs/CartPole-v1_ppo_1_1776779529/events.out.tfevents.1776779529.BL-LFERNANDEZ.local.eurecat.org.84023.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2c15bf74d464fba25c98d4c8b5e0db41a073f997d78f72526a002979274917a
+size 80573

runs/CartPole-v1_ppo_1_1776779596/events.out.tfevents.1776779596.BL-LFERNANDEZ.local.eurecat.org.106839.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42220ac9036b3328c2fde82e3e4115e5e6cc70475fe9ecfb7606272b7df61cc9
+size 8246

runs/CartPole-v1_ppo_1_1776779670/events.out.tfevents.1776779672.BL-LFERNANDEZ.local.eurecat.org.209000.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2eaca50079d50ce8a79530f7267e983cbdd1e253749c8aee2e57230e6894ff9b
+size 688

runs/CartPole-v1_ppo_1_1776779742/events.out.tfevents.1776779745.BL-LFERNANDEZ.local.eurecat.org.209486.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cf0bd5e4e5ca016f6f0f843bcf53caec5ac7738c358312f90e988b79c6ff5c97
+size 688

runs/CartPole-v1_ppo_1_1776779888/events.out.tfevents.1776779890.BL-LFERNANDEZ.local.eurecat.org.210132.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8388b360350a0ae8c2c01d265393be1f572f5a175c57d4ad14569208ba8869f1
+size 688