pushing model

Files changed (6) hide show

README.md CHANGED Viewed

@@ -16,7 +16,7 @@ model-index:
       type: Pong-v4
     metrics:
     - type: mean_reward
-      value: 12.60 +/- 3.77
       name: mean_reward
       verified: false
 ---
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/dqpn_atari.py
 curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/pyproject.toml
 curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/poetry.lock
 poetry install --all-extras
-python dqpn_atari.py --end-policy-f=2000 --env-id=Pong-v4 --evaluation-fraction=1 --exp-name=sweep --hf-entity=pfunk --policy-tau=0.1 --save-model=true --seed=1 --start-policy-f=50000 --target-tau=1 --total-timesteps=25000000 --track=true --upload-model=true --wandb-entity=pfunk --wandb-project-name=dqpn
 ```
 # Hyperparameters
@@ -56,7 +56,7 @@ python dqpn_atari.py --end-policy-f=2000 --env-id=Pong-v4 --evaluation-fraction=
  'capture_video': False,
  'cuda': True,
  'end_e': 0.01,
- 'end_policy_f': 2000,
  'env_id': 'Pong-v4',
  'evaluation_fraction': 1.0,
  'exp_name': 'sweep',
@@ -65,11 +65,11 @@ python dqpn_atari.py --end-policy-f=2000 --env-id=Pong-v4 --evaluation-fraction=
  'hf_entity': 'pfunk',
  'learning_rate': 0.0001,
  'learning_starts': 80000,
- 'policy_tau': 0.1,
  'save_model': True,
  'seed': 1,
  'start_e': 1,
- 'start_policy_f': 50000,
  'target_network_frequency': 1000,
  'target_tau': 1.0,
  'torch_deterministic': True,

       type: Pong-v4
     metrics:
     - type: mean_reward
+      value: 9.60 +/- 3.32
       name: mean_reward
       verified: false
 ---
 curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/pyproject.toml
 curl -OL https://huggingface.co/pfunk/Pong-v4-sweep-seed1/raw/main/poetry.lock
 poetry install --all-extras
+python dqpn_atari.py --end-policy-f=1000 --env-id=Pong-v4 --evaluation-fraction=1 --exp-name=sweep --hf-entity=pfunk --policy-tau=1 --save-model=true --seed=1 --start-policy-f=1000 --target-tau=1 --total-timesteps=25000000 --track=true --upload-model=true --wandb-entity=pfunk --wandb-project-name=dqpn
 ```
 # Hyperparameters
  'capture_video': False,
  'cuda': True,
  'end_e': 0.01,
+ 'end_policy_f': 1000,
  'env_id': 'Pong-v4',
  'evaluation_fraction': 1.0,
  'exp_name': 'sweep',
  'hf_entity': 'pfunk',
  'learning_rate': 0.0001,
  'learning_starts': 80000,
+ 'policy_tau': 1.0,
  'save_model': True,
  'seed': 1,
  'start_e': 1,
+ 'start_policy_f': 1000,
  'target_network_frequency': 1000,
  'target_tau': 1.0,
  'torch_deterministic': True,

events.out.tfevents.1676737042.redi.2946417.0 → events.out.tfevents.1676803801.redi.176333.0 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:15b3678d2ad38ef6f47e8aa6e9d326a5344197e38d5d82dd296c219ea8782e61
-size 45937938

 version https://git-lfs.github.com/spec/v1
+oid sha256:49ebb6d40f71add7376559ca924e535b85aeb426b35df862efcb70dd59ee90dd
+size 44758975

sweep.cleanrl_model CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2051d6e95bfd512150d6074c5f202d5d4268425d4f05227557ad74f3fa9227e0
 size 6751791

 version https://git-lfs.github.com/spec/v1
+oid sha256:e28a795ad9dd7ef4e9ecebe33f3028e6b39392c48ab572004e865d389e3ee451
 size 6751791

videos/{Pong-v4__sweep__1__1676737039-eval → Pong-v4__sweep__1__1676803797-eval}/rl-video-episode-0.mp4 RENAMED Viewed

File without changes

videos/{Pong-v4__sweep__1__1676737039-eval → Pong-v4__sweep__1__1676803797-eval}/rl-video-episode-1.mp4 RENAMED Viewed

File without changes

videos/{Pong-v4__sweep__1__1676737039-eval → Pong-v4__sweep__1__1676803797-eval}/rl-video-episode-8.mp4 RENAMED Viewed

File without changes