qgallouedec HF Staff commited on
Commit
8c59eac
·
1 Parent(s): b10c50d

Upload folder using huggingface_hub

Browse files
.summary/0/events.out.tfevents.1688734393.qgallouedec-MS-7C84 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b898d838d0a199bdd913bf2154e8b21ca5352eb3ad3435190d6af5f94fa653
3
+ size 1857931
README.md CHANGED
@@ -15,7 +15,7 @@ model-index:
15
  type: assembly-v2
16
  metrics:
17
  - type: mean_reward
18
- value: 1399.65 +/- 454.30
19
  name: mean_reward
20
  verified: false
21
  ---
 
15
  type: assembly-v2
16
  metrics:
17
  - type: mean_reward
18
+ value: 3573.76 +/- 25.55
19
  name: mean_reward
20
  verified: false
21
  ---
checkpoint_p0/best_000027640_14151680_reward_3579.182.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a20e0e28635ec1b4b5d4d088604336b27e5b1e19b76bc97aa73f6b5b0f5563c7
3
+ size 98239
checkpoint_p0/checkpoint_000040920_20951040.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c41fdb482a72ae49440291297a26598aadcedd9aae920d1377b8dd8564b917
3
+ size 98567
checkpoint_p0/checkpoint_000041120_21053440.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d52d5bfa040564246c7bf2ef7e97bab5903feba7a8ca604acca87734666dd0de
3
+ size 98567
config.json CHANGED
@@ -3,37 +3,37 @@
3
  "algo": "APPO",
4
  "env": "assembly-v2",
5
  "experiment": "assembly-v2",
6
- "train_dir": "/home/qgallouedec/gia/data/envs/metaworld/train_dir",
7
- "restart_behavior": "resume",
8
- "device": "gpu",
9
  "seed": null,
10
  "num_policies": 1,
11
- "async_rl": true,
12
  "serial_mode": false,
13
  "batched_sampling": false,
14
  "num_batches_to_accumulate": 2,
15
  "worker_num_splits": 2,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
- "num_workers": 32,
19
- "num_envs_per_worker": 2,
20
  "batch_size": 1024,
21
- "num_batches_per_epoch": 1,
22
- "num_epochs": 1,
23
- "rollout": 32,
24
- "recurrence": 32,
25
  "shuffle_minibatches": false,
26
  "gamma": 0.99,
27
- "reward_scale": 0.01,
28
  "reward_clip": 1000.0,
29
- "value_bootstrap": false,
30
  "normalize_returns": true,
31
- "exploration_loss_coeff": 0.003,
32
- "value_loss_coeff": 0.5,
33
- "kl_loss_coeff": 0.0,
34
  "exploration_loss": "entropy",
35
  "gae_lambda": 0.95,
36
- "ppo_clip_ratio": 0.1,
37
  "ppo_clip_value": 1.0,
38
  "with_vtrace": false,
39
  "vtrace_rho": 1.0,
@@ -42,9 +42,9 @@
42
  "adam_eps": 1e-06,
43
  "adam_beta1": 0.9,
44
  "adam_beta2": 0.999,
45
- "max_grad_norm": 4.0,
46
- "learning_rate": 0.0001,
47
- "lr_schedule": "constant",
48
  "lr_schedule_kl_threshold": 0.008,
49
  "lr_adaptive_min": 1e-06,
50
  "lr_adaptive_max": 0.01,
@@ -59,7 +59,7 @@
59
  "force_envs_single_thread": false,
60
  "default_niceness": 0,
61
  "log_to_file": true,
62
- "experiment_summaries_interval": 10,
63
  "flush_summaries_interval": 30,
64
  "stats_avg": 100,
65
  "summaries_use_frameskip": true,
@@ -67,7 +67,7 @@
67
  "heartbeat_reporting_interval": 180,
68
  "train_for_env_steps": 100000000,
69
  "train_for_seconds": 10000000000,
70
- "save_every_sec": 120,
71
  "keep_checkpoints": 2,
72
  "load_checkpoint_kind": "latest",
73
  "save_milestones_sec": -1,
@@ -76,23 +76,23 @@
76
  "save_best_after": 100000,
77
  "benchmark": false,
78
  "encoder_mlp_layers": [
79
- 512,
80
- 512
81
  ],
82
  "encoder_conv_architecture": "convnet_simple",
83
  "encoder_conv_mlp_layers": [
84
  512
85
  ],
86
- "use_rnn": true,
87
  "rnn_size": 512,
88
  "rnn_type": "gru",
89
  "rnn_num_layers": 1,
90
  "decoder_mlp_layers": [],
91
- "nonlinearity": "elu",
92
- "policy_initialization": "orthogonal",
93
  "policy_init_gain": 1.0,
94
  "actor_critic_share_weights": true,
95
- "adaptive_stddev": true,
96
  "continuous_tanh_scale": 0.0,
97
  "initial_stddev": 1.0,
98
  "use_env_info_cache": false,
@@ -120,17 +120,15 @@
120
  "pbt_target_objective": "true_objective",
121
  "pbt_perturb_min": 1.1,
122
  "pbt_perturb_max": 1.5,
123
- "command_line": "--env assembly-v2 --experiment assembly-v2 --with_wandb True --wandb_user qgallouedec --wandb_project sample_facotry_metaworld --train_for_env_steps 100000000 --reward_scale 0.01",
124
  "cli_args": {
125
  "env": "assembly-v2",
126
  "experiment": "assembly-v2",
127
- "reward_scale": 0.01,
128
- "train_for_env_steps": 100000000,
129
  "with_wandb": true,
130
  "wandb_user": "qgallouedec",
131
  "wandb_project": "sample_facotry_metaworld"
132
  },
133
- "git_hash": "7b1b73793fe4678a4aabf1f5d02745737a790d27",
134
  "git_repo_name": "https://github.com/huggingface/gia",
135
- "wandb_unique_id": "assembly-v2_20230306_142706_299273"
136
  }
 
3
  "algo": "APPO",
4
  "env": "assembly-v2",
5
  "experiment": "assembly-v2",
6
+ "train_dir": "/home/qgallouedec/data/gia/train_dir",
7
+ "restart_behavior": "restart",
8
+ "device": "cpu",
9
  "seed": null,
10
  "num_policies": 1,
11
+ "async_rl": false,
12
  "serial_mode": false,
13
  "batched_sampling": false,
14
  "num_batches_to_accumulate": 2,
15
  "worker_num_splits": 2,
16
  "policy_workers_per_policy": 1,
17
  "max_policy_lag": 1000,
18
+ "num_workers": 8,
19
+ "num_envs_per_worker": 8,
20
  "batch_size": 1024,
21
+ "num_batches_per_epoch": 4,
22
+ "num_epochs": 2,
23
+ "rollout": 64,
24
+ "recurrence": 1,
25
  "shuffle_minibatches": false,
26
  "gamma": 0.99,
27
+ "reward_scale": 0.1,
28
  "reward_clip": 1000.0,
29
+ "value_bootstrap": true,
30
  "normalize_returns": true,
31
+ "exploration_loss_coeff": 0.0,
32
+ "value_loss_coeff": 1.3,
33
+ "kl_loss_coeff": 0.1,
34
  "exploration_loss": "entropy",
35
  "gae_lambda": 0.95,
36
+ "ppo_clip_ratio": 0.2,
37
  "ppo_clip_value": 1.0,
38
  "with_vtrace": false,
39
  "vtrace_rho": 1.0,
 
42
  "adam_eps": 1e-06,
43
  "adam_beta1": 0.9,
44
  "adam_beta2": 0.999,
45
+ "max_grad_norm": 3.5,
46
+ "learning_rate": 0.00295,
47
+ "lr_schedule": "linear_decay",
48
  "lr_schedule_kl_threshold": 0.008,
49
  "lr_adaptive_min": 1e-06,
50
  "lr_adaptive_max": 0.01,
 
59
  "force_envs_single_thread": false,
60
  "default_niceness": 0,
61
  "log_to_file": true,
62
+ "experiment_summaries_interval": 3,
63
  "flush_summaries_interval": 30,
64
  "stats_avg": 100,
65
  "summaries_use_frameskip": true,
 
67
  "heartbeat_reporting_interval": 180,
68
  "train_for_env_steps": 100000000,
69
  "train_for_seconds": 10000000000,
70
+ "save_every_sec": 15,
71
  "keep_checkpoints": 2,
72
  "load_checkpoint_kind": "latest",
73
  "save_milestones_sec": -1,
 
76
  "save_best_after": 100000,
77
  "benchmark": false,
78
  "encoder_mlp_layers": [
79
+ 64,
80
+ 64
81
  ],
82
  "encoder_conv_architecture": "convnet_simple",
83
  "encoder_conv_mlp_layers": [
84
  512
85
  ],
86
+ "use_rnn": false,
87
  "rnn_size": 512,
88
  "rnn_type": "gru",
89
  "rnn_num_layers": 1,
90
  "decoder_mlp_layers": [],
91
+ "nonlinearity": "tanh",
92
+ "policy_initialization": "torch_default",
93
  "policy_init_gain": 1.0,
94
  "actor_critic_share_weights": true,
95
+ "adaptive_stddev": false,
96
  "continuous_tanh_scale": 0.0,
97
  "initial_stddev": 1.0,
98
  "use_env_info_cache": false,
 
120
  "pbt_target_objective": "true_objective",
121
  "pbt_perturb_min": 1.1,
122
  "pbt_perturb_max": 1.5,
123
+ "command_line": "--env assembly-v2 --experiment assembly-v2 --with_wandb True --wandb_user qgallouedec --wandb_project sample_facotry_metaworld",
124
  "cli_args": {
125
  "env": "assembly-v2",
126
  "experiment": "assembly-v2",
 
 
127
  "with_wandb": true,
128
  "wandb_user": "qgallouedec",
129
  "wandb_project": "sample_facotry_metaworld"
130
  },
131
+ "git_hash": "aed90d9e164e44f91bab1d70c09fac4dee064031",
132
  "git_repo_name": "https://github.com/huggingface/gia",
133
+ "wandb_unique_id": "assembly-v2_20230707_145309_221542"
134
  }
git.diff CHANGED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ diff --git a/gia/eval/evaluator.py b/gia/eval/evaluator.py
2
+ index 91b645c..196a601 100644
3
+ --- a/gia/eval/evaluator.py
4
+ +++ b/gia/eval/evaluator.py
5
+ @@ -2,14 +2,16 @@ import torch
6
+
7
+ from gia.config.arguments import Arguments
8
+ from gia.model import GiaModel
9
+ +from typing import Optional
10
+
11
+
12
+ class Evaluator:
13
+ - def __init__(self, args: Arguments, task: str) -> None:
14
+ + def __init__(self, args: Arguments, task: str, mean_random: Optional[float] = None) -> None:
15
+ self.args = args
16
+ self.task = task
17
+ + self.mean_random = mean_random
18
+
19
+ - @torch.no_grad()
20
+ + @torch.inference_mode()
21
+ def evaluate(self, model: GiaModel) -> float:
22
+ return self._evaluate(model)
23
+
24
+ diff --git a/gia/eval/mappings.py b/gia/eval/mappings.py
25
+ deleted file mode 100644
26
+ index e7ba9d3..0000000
27
+ --- a/gia/eval/mappings.py
28
+ +++ /dev/null
29
+ @@ -1,11 +0,0 @@
30
+ -TASK_TO_ENV_MAPPING = {
31
+ - "mujoco-ant": "Ant-v4",
32
+ - "mujoco-halfcheetah": "HalfCheetah-v4",
33
+ - "mujoco-hopper": "Hopper-v4",
34
+ - "mujoco-doublependulum": "InvertedDoublePendulum-v4",
35
+ - "mujoco-pendulum": "InvertedPendulum-v4",
36
+ - "mujoco-reacher": "Reacher-v4",
37
+ - "mujoco-swimmer": "Swimmer-v4",
38
+ - "mujoco-walker": "Walker2d-v4",
39
+ - # Atari etc...
40
+ -}
41
+ diff --git a/gia/eval/rl/__init__.py b/gia/eval/rl/__init__.py
42
+ index 36d890b..85a788d 100644
43
+ --- a/gia/eval/rl/__init__.py
44
+ +++ b/gia/eval/rl/__init__.py
45
+ @@ -1,4 +1,4 @@
46
+ from .gym_evaluator import GymEvaluator
47
+ +from .envs.core import make
48
+
49
+ -
50
+ -__all__ = ["GymEvaluator"]
51
+ +__all__ = ["GymEvaluator", "make"]
52
+ diff --git a/gia/eval/rl/gia_agent.py b/gia/eval/rl/gia_agent.py
53
+ index f0d0b9b..04b9637 100644
54
+ --- a/gia/eval/rl/gia_agent.py
55
+ +++ b/gia/eval/rl/gia_agent.py
56
+ @@ -75,6 +75,11 @@ class GiaAgent:
57
+ ) -> Tuple[Tuple[Tensor, Tensor], ...]:
58
+ return tuple((k[:, :, -self._max_length :], v[:, :, -self._max_length :]) for (k, v) in past_key_values)
59
+
60
+ + def set_model(self, model: GiaModel) -> None:
61
+ + self.model = model
62
+ + self.device = next(model.parameters()).device
63
+ + self._max_length = self.model.config.max_position_embeddings
64
+ +
65
+ def reset(self, num_envs: int = 1) -> None:
66
+ if self.prompter is not None:
67
+ prompts = self.prompter.generate_prompts(num_envs)
68
+ diff --git a/gia/eval/rl/gym_evaluator.py b/gia/eval/rl/gym_evaluator.py
69
+ index f8531ee..754c05d 100644
70
+ --- a/gia/eval/rl/gym_evaluator.py
71
+ +++ b/gia/eval/rl/gym_evaluator.py
72
+ @@ -1,7 +1,7 @@
73
+ import gym
74
+ from gym.vector.vector_env import VectorEnv
75
+
76
+ -from gia.eval.mappings import TASK_TO_ENV_MAPPING
77
+ +# from gia.eval.rl.envs.mappings import TASK_TO_ENV_MAPPING
78
+ from gia.eval.rl.rl_evaluator import RLEvaluator
79
+
80
+
81
+ diff --git a/gia/eval/rl/rl_evaluator.py b/gia/eval/rl/rl_evaluator.py
82
+ index c5cc423..ca0c7da 100644
83
+ --- a/gia/eval/rl/rl_evaluator.py
84
+ +++ b/gia/eval/rl/rl_evaluator.py
85
+ @@ -8,6 +8,9 @@ from gia.eval.rl.gia_agent import GiaAgent
86
+
87
+
88
+ class RLEvaluator(Evaluator):
89
+ + def __init__(self, args, task):
90
+ + super().__init__(args, task)
91
+ + self.agent = GiaAgent()
92
+ def _build_env(self) -> VectorEnv: # TODO: maybe just a gym.Env ?
93
+ raise NotImplementedError
94
+
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30ec8024a7d641ac74f8937619fc2cca868374e55bd04a361742f47f5ffa920a
3
- size 3262990
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17026132b0314e76610a1c13739735dfc1dc3f5fd690048a892acb490f7ac397
3
+ size 2748151
sf_log.txt CHANGED
The diff for this file is too large to render. See raw diff