mmorales34 commited on
Commit
b783ef5
·
1 Parent(s): fbbe24c

pushing model

Browse files
README.md CHANGED
@@ -16,7 +16,7 @@ model-index:
16
  type: Pong-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 12.60 +/- 3.77
20
  name: mean_reward
21
  verified: false
22
  ---
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/dqpn_atari.py
46
  curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/pyproject.toml
47
  curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/poetry.lock
48
  poetry install --all-extras
49
- python dqpn_atari.py --end-policy-f=2000 --env-id=Pong-v4 --evaluation-fraction=1 --exp-name=sweep --hf-entity=pfunk --policy-tau=0.1 --save-model=true --seed=1 --start-policy-f=50000 --target-tau=1 --total-timesteps=25000000 --track=true --upload-model=true --wandb-entity=pfunk --wandb-project-name=dqpn
50
  ```
51
 
52
  # Hyperparameters
@@ -56,7 +56,7 @@ python dqpn_atari.py --end-policy-f=2000 --env-id=Pong-v4 --evaluation-fraction=
56
  'capture_video': False,
57
  'cuda': True,
58
  'end_e': 0.01,
59
- 'end_policy_f': 2000,
60
  'env_id': 'Pong-v4',
61
  'evaluation_fraction': 1.0,
62
  'exp_name': 'sweep',
@@ -65,11 +65,11 @@ python dqpn_atari.py --end-policy-f=2000 --env-id=Pong-v4 --evaluation-fraction=
65
  'hf_entity': 'pfunk',
66
  'learning_rate': 0.0001,
67
  'learning_starts': 80000,
68
- 'policy_tau': 0.1,
69
  'save_model': True,
70
  'seed': 1,
71
  'start_e': 1,
72
- 'start_policy_f': 50000,
73
  'target_network_frequency': 1000,
74
  'target_tau': 1.0,
75
  'torch_deterministic': True,
 
16
  type: Pong-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 9.60 +/- 3.32
20
  name: mean_reward
21
  verified: false
22
  ---
 
46
  curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/pyproject.toml
47
  curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/poetry.lock
48
  poetry install --all-extras
49
+ python dqpn_atari.py --end-policy-f=1000 --env-id=Pong-v4 --evaluation-fraction=1 --exp-name=sweep --hf-entity=pfunk --policy-tau=1 --save-model=true --seed=1 --start-policy-f=1000 --target-tau=1 --total-timesteps=25000000 --track=true --upload-model=true --wandb-entity=pfunk --wandb-project-name=dqpn
50
  ```
51
 
52
  # Hyperparameters
 
56
  'capture_video': False,
57
  'cuda': True,
58
  'end_e': 0.01,
59
+ 'end_policy_f': 1000,
60
  'env_id': 'Pong-v4',
61
  'evaluation_fraction': 1.0,
62
  'exp_name': 'sweep',
 
65
  'hf_entity': 'pfunk',
66
  'learning_rate': 0.0001,
67
  'learning_starts': 80000,
68
+ 'policy_tau': 1.0,
69
  'save_model': True,
70
  'seed': 1,
71
  'start_e': 1,
72
+ 'start_policy_f': 1000,
73
  'target_network_frequency': 1000,
74
  'target_tau': 1.0,
75
  'torch_deterministic': True,
events.out.tfevents.1676737042.redi.2946417.0 → events.out.tfevents.1676803801.redi.176333.0 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15b3678d2ad38ef6f47e8aa6e9d326a5344197e38d5d82dd296c219ea8782e61
3
- size 45937938
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49ebb6d40f71add7376559ca924e535b85aeb426b35df862efcb70dd59ee90dd
3
+ size 44758975
sweep.cleanrl_model CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2051d6e95bfd512150d6074c5f202d5d4268425d4f05227557ad74f3fa9227e0
3
  size 6751791
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e28a795ad9dd7ef4e9ecebe33f3028e6b39392c48ab572004e865d389e3ee451
3
  size 6751791
videos/{Pong-v4__sweep__1__1676737039-eval → Pong-v4__sweep__1__1676803797-eval}/rl-video-episode-0.mp4 RENAMED
File without changes
videos/{Pong-v4__sweep__1__1676737039-eval → Pong-v4__sweep__1__1676803797-eval}/rl-video-episode-1.mp4 RENAMED
File without changes
videos/{Pong-v4__sweep__1__1676737039-eval → Pong-v4__sweep__1__1676803797-eval}/rl-video-episode-8.mp4 RENAMED
File without changes