Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.summary/0/events.out.tfevents.1688756054.qgallouedec-MS-7C84 +3 -0
README.md +1 -1
checkpoint_p0/best_000002424_1241088_reward_382.329.pth +3 -0
checkpoint_p0/checkpoint_000019384_9924608.pth +3 -0
checkpoint_p0/checkpoint_000019544_10006528.pth +1 -1
config.json +2 -2
git.diff +20 -613
replay.mp4 +2 -2
sf_log.txt +0 -0

.summary/0/events.out.tfevents.1688756054.qgallouedec-MS-7C84 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f955a333d2694bd73fce39d6adf1e859ef640cffa250ec50cd7eea5692842928
+size 633300

README.md CHANGED Viewed

@@ -15,7 +15,7 @@ model-index:
       type: button-press-topdown-v2
     metrics:
     - type: mean_reward
-      value: 3890.30 +/- 51.21
       name: mean_reward
       verified: false
 ---

       type: button-press-topdown-v2
     metrics:
     - type: mean_reward
+      value: 387.36 +/- 89.98
       name: mean_reward
       verified: false
 ---

checkpoint_p0/best_000002424_1241088_reward_382.329.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e9f733094387689e99e7156c1e8c680629de6768ef86bf0fe73a8aff9102ddfb
+size 98239

checkpoint_p0/checkpoint_000019384_9924608.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:00fd37a13cfb4d079002c9883ada30945bbab1ace439db2deb6afe2a92b2281c
+size 98567

checkpoint_p0/checkpoint_000019544_10006528.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e61ff6325962e55bb0c272781ad9a174cce159c6c121a99764b9c39289af80be
 size 98567

 version https://git-lfs.github.com/spec/v1
+oid sha256:0fe4f65573b839d032f4a7d6f0a0b9d87baf4f0c43ffcb0d3527c79e9bc11fa7
 size 98567

config.json CHANGED Viewed

@@ -128,7 +128,7 @@
     "wandb_user": "qgallouedec",
     "wandb_project": "sample_facotry_metaworld"
   },
-  "git_hash": "aed90d9e164e44f91bab1d70c09fac4dee064031",
   "git_repo_name": "https://github.com/huggingface/gia",
-  "wandb_unique_id": "button-press-topdown-v2_20230707_171420_473912"
 }

     "wandb_user": "qgallouedec",
     "wandb_project": "sample_facotry_metaworld"
   },
+  "git_hash": "66db1b7a27030aa65fcfa2d6e3503089a7cff207",
   "git_repo_name": "https://github.com/huggingface/gia",
+  "wandb_unique_id": "button-press-topdown-v2_20230707_205412_434545"
 }

git.diff CHANGED Viewed

@@ -1,589 +1,3 @@
-diff --git a/data/envs/download_expert_scores.py b/data/envs/download_expert_scores.py
-index 4c3f06b..88b6c45 100644
---- a/data/envs/download_expert_scores.py
-+++ b/data/envs/download_expert_scores.py
-@@ -12,162 +12,162 @@ from tqdm import tqdm
- ENV_NAMES = [
--    "atari-alien",
--    "atari-amidar",
--    "atari-assault",
--    "atari-asterix",
--    "atari-asteroids",
--    "atari-atlantis",
--    "atari-bankheist",
--    "atari-battlezone",
--    "atari-beamrider",
--    "atari-berzerk",
--    "atari-bowling",
--    "atari-boxing",
--    "atari-breakout",
--    "atari-centipede",
--    "atari-choppercommand",
--    "atari-crazyclimber",
--    "atari-defender",
--    "atari-demonattack",
--    "atari-doubledunk",
--    "atari-enduro",
--    "atari-fishingderby",
--    "atari-freeway",
--    "atari-frostbite",
--    "atari-gopher",
--    "atari-gravitar",
--    "atari-hero",
--    "atari-icehockey",
--    "atari-jamesbond",
--    "atari-kangaroo",
--    "atari-krull",
--    "atari-kungfumaster",
--    "atari-montezumarevenge",
--    "atari-mspacman",
--    "atari-namethisgame",
--    "atari-phoenix",
--    "atari-pitfall",
--    "atari-pong",
--    "atari-privateeye",
--    "atari-qbert",
--    "atari-riverraid",
--    "atari-roadrunner",
--    "atari-robotank",
--    "atari-seaquest",
--    "atari-skiing",
--    "atari-solaris",
--    "atari-spaceinvaders",
--    "atari-stargunner",
--    # "atari-surround", # Not in the dataset
--    "atari-tennis",
--    "atari-timepilot",
--    "atari-tutankham",
--    "atari-upndown",
--    "atari-venture",
--    "atari-videopinball",
--    "atari-wizardofwor",
--    "atari-yarsrevenge",
--    "atari-zaxxon",
--    "babyai-action-obj-door",
--    "babyai-blocked-unlock-pickup",
--    "babyai-boss-level-no-unlock",
--    "babyai-boss-level",
--    "babyai-find-obj-s5",
--    "babyai-go-to-door",
--    # "babyai-go-to-imp-unlock",  # Not in the dataset
--    "babyai-go-to-local",
--    "babyai-go-to-obj-door",
--    "babyai-go-to-obj",
--    "babyai-go-to-red-ball-grey",
--    "babyai-go-to-red-ball-no-dists",
--    "babyai-go-to-red-ball",
--    "babyai-go-to-red-blue-ball",
--    "babyai-go-to-seq",
--    "babyai-go-to",
--    "babyai-key-corridor",
--    "babyai-key-in-box",
--    "babyai-mini-boss-level",
--    "babyai-move-two-across",
--    "babyai-one-room-s8",
--    "babyai-open-door",
--    "babyai-open-doors-order",
--    "babyai-open-red-door",
--    "babyai-open-two-doors",
--    "babyai-open",
--    "babyai-pickup-above",
--    "babyai-pickup-dist",
--    "babyai-pickup-loc",
--    "babyai-pickup",
--    "babyai-synth-loc",
--    "babyai-synth-seq",
--    "babyai-synth",
--    "babyai-unblock-pickup",
--    "babyai-unlock-local",
--    "babyai-unlock-pickup",
--    # "babyai-unlock-to-unlock",  # Not in the dataset
--    # "babyai-unlock",  # Not in the dataset
-+    # "atari-alien",
-+    # "atari-amidar",
-+    # "atari-assault",
-+    # "atari-asterix",
-+    # "atari-asteroids",
-+    # "atari-atlantis",
-+    # "atari-bankheist",
-+    # "atari-battlezone",
-+    # "atari-beamrider",
-+    # "atari-berzerk",
-+    # "atari-bowling",
-+    # "atari-boxing",
-+    # "atari-breakout",
-+    # "atari-centipede",
-+    # "atari-choppercommand",
-+    # "atari-crazyclimber",
-+    # "atari-defender",
-+    # "atari-demonattack",
-+    # "atari-doubledunk",
-+    # "atari-enduro",
-+    # "atari-fishingderby",
-+    # "atari-freeway",
-+    # "atari-frostbite",
-+    # "atari-gopher",
-+    # "atari-gravitar",
-+    # "atari-hero",
-+    # "atari-icehockey",
-+    # "atari-jamesbond",
-+    # "atari-kangaroo",
-+    # "atari-krull",
-+    # "atari-kungfumaster",
-+    # "atari-montezumarevenge",
-+    # "atari-mspacman",
-+    # "atari-namethisgame",
-+    # "atari-phoenix",
-+    # "atari-pitfall",
-+    # "atari-pong",
-+    # "atari-privateeye",
-+    # "atari-qbert",
-+    # "atari-riverraid",
-+    # "atari-roadrunner",
-+    # "atari-robotank",
-+    # "atari-seaquest",
-+    # "atari-skiing",
-+    # "atari-solaris",
-+    # "atari-spaceinvaders",
-+    # "atari-stargunner",
-+    # # "atari-surround", # Not in the dataset
-+    # "atari-tennis",
-+    # "atari-timepilot",
-+    # "atari-tutankham",
-+    # "atari-upndown",
-+    # "atari-venture",
-+    # "atari-videopinball",
-+    # "atari-wizardofwor",
-+    # "atari-yarsrevenge",
-+    # "atari-zaxxon",
-+    # "babyai-action-obj-door",
-+    # "babyai-blocked-unlock-pickup",
-+    # "babyai-boss-level-no-unlock",
-+    # "babyai-boss-level",
-+    # "babyai-find-obj-s5",
-+    # "babyai-go-to-door",
-+    # # "babyai-go-to-imp-unlock",  # Not in the dataset
-+    # "babyai-go-to-local",
-+    # "babyai-go-to-obj-door",
-+    # "babyai-go-to-obj",
-+    # "babyai-go-to-red-ball-grey",
-+    # "babyai-go-to-red-ball-no-dists",
-+    # "babyai-go-to-red-ball",
-+    # "babyai-go-to-red-blue-ball",
-+    # "babyai-go-to-seq",
-+    # "babyai-go-to",
-+    # "babyai-key-corridor",
-+    # "babyai-key-in-box",
-+    # "babyai-mini-boss-level",
-+    # "babyai-move-two-across",
-+    # "babyai-one-room-s8",
-+    # "babyai-open-door",
-+    # "babyai-open-doors-order",
-+    # "babyai-open-red-door",
-+    # "babyai-open-two-doors",
-+    # "babyai-open",
-+    # "babyai-pickup-above",
-+    # "babyai-pickup-dist",
-+    # "babyai-pickup-loc",
-+    # "babyai-pickup",
-+    # "babyai-synth-loc",
-+    # "babyai-synth-seq",
-+    # "babyai-synth",
-+    # "babyai-unblock-pickup",
-+    # "babyai-unlock-local",
-+    # "babyai-unlock-pickup",
-+    # # "babyai-unlock-to-unlock",  # Not in the dataset
-+    # # "babyai-unlock",  # Not in the dataset
-     "metaworld-assembly",
--    "metaworld-basketball",
--    "metaworld-bin-picking",
--    "metaworld-box-close",
--    "metaworld-button-press-topdown-wall",
--    "metaworld-button-press-topdown",
--    "metaworld-button-press-wall",
--    "metaworld-button-press",
--    "metaworld-coffee-button",
--    "metaworld-coffee-pull",
--    "metaworld-coffee-push",
--    "metaworld-dial-turn",
--    "metaworld-disassemble",
--    "metaworld-door-close",
--    "metaworld-door-lock",
--    "metaworld-door-open",
--    "metaworld-door-unlock",
--    "metaworld-drawer-close",
--    "metaworld-drawer-open",
--    "metaworld-faucet-close",
--    "metaworld-faucet-open",
--    "metaworld-hammer",
--    "metaworld-hand-insert",
--    "metaworld-handle-press-side",
--    "metaworld-handle-press",
--    "metaworld-handle-pull-side",
--    "metaworld-handle-pull",
--    "metaworld-lever-pull",
--    "metaworld-peg-insert-side",
--    "metaworld-peg-unplug-side",
--    "metaworld-pick-out-of-hole",
--    "metaworld-pick-place-wall",
--    "metaworld-pick-place",
--    "metaworld-plate-slide-back-side",
--    "metaworld-plate-slide-back",
--    "metaworld-plate-slide-side",
--    "metaworld-plate-slide",
--    "metaworld-push-back",
--    "metaworld-push-wall",
--    "metaworld-push",
--    "metaworld-reach-wall",
--    "metaworld-reach",
--    "metaworld-shelf-place",
--    "metaworld-soccer",
--    "metaworld-stick-pull",
--    "metaworld-stick-push",
--    "metaworld-sweep-into",
--    "metaworld-sweep",
--    "metaworld-window-close",
--    "metaworld-window-open",
--    "mujoco-ant",
--    "mujoco-doublependulum",
--    "mujoco-halfcheetah",
--    "mujoco-hopper",
-+    # "metaworld-basketball",
-+    # "metaworld-bin-picking",
-+    # "metaworld-box-close",
-+    # "metaworld-button-press-topdown-wall",
-+    # "metaworld-button-press-topdown",
-+    # "metaworld-button-press-wall",
-+    # "metaworld-button-press",
-+    # "metaworld-coffee-button",
-+    # "metaworld-coffee-pull",
-+    # "metaworld-coffee-push",
-+    # "metaworld-dial-turn",
-+    # "metaworld-disassemble",
-+    # "metaworld-door-close",
-+    # "metaworld-door-lock",
-+    # "metaworld-door-open",
-+    # "metaworld-door-unlock",
-+    # "metaworld-drawer-close",
-+    # "metaworld-drawer-open",
-+    # "metaworld-faucet-close",
-+    # "metaworld-faucet-open",
-+    # "metaworld-hammer",
-+    # "metaworld-hand-insert",
-+    # "metaworld-handle-press-side",
-+    # "metaworld-handle-press",
-+    # "metaworld-handle-pull-side",
-+    # "metaworld-handle-pull",
-+    # "metaworld-lever-pull",
-+    # "metaworld-peg-insert-side",
-+    # "metaworld-peg-unplug-side",
-+    # "metaworld-pick-out-of-hole",
-+    # "metaworld-pick-place-wall",
-+    # "metaworld-pick-place",
-+    # "metaworld-plate-slide-back-side",
-+    # "metaworld-plate-slide-back",
-+    # "metaworld-plate-slide-side",
-+    # "metaworld-plate-slide",
-+    # "metaworld-push-back",
-+    # "metaworld-push-wall",
-+    # "metaworld-push",
-+    # "metaworld-reach-wall",
-+    # "metaworld-reach",
-+    # "metaworld-shelf-place",
-+    # "metaworld-soccer",
-+    # "metaworld-stick-pull",
-+    # "metaworld-stick-push",
-+    # "metaworld-sweep-into",
-+    # "metaworld-sweep",
-+    # "metaworld-window-close",
-+    # "metaworld-window-open",
-+    # "mujoco-ant",
-+    # "mujoco-doublependulum",
-+    # "mujoco-halfcheetah",
-+    # "mujoco-hopper",
-     # "mujoco-humanoid",  # Not in the dataset
--    "mujoco-pendulum",
--    # "mujoco-pusher",  # Not in the dataset
--    "mujoco-reacher",
-+    # "mujoco-pendulum",
-+    # # "mujoco-pusher",  # Not in the dataset
-+    # "mujoco-reacher",
-     # "mujoco-standup",  # Not in the dataset
--    "mujoco-swimmer",
--    "mujoco-walker",
-+    # "mujoco-swimmer",
-+    # "mujoco-walker",
- ]
-diff --git a/data/envs/metaworld/generate_dataset.py b/data/envs/metaworld/generate_dataset.py
-index e21b237..c2b1907 100644
---- a/data/envs/metaworld/generate_dataset.py
-+++ b/data/envs/metaworld/generate_dataset.py
-@@ -142,7 +142,8 @@ def create_dataset(cfg: Config, dataset_size: int = 100_000, split: str = "train
-             # Actions shape should be [num_agents, num_actions] even if it's [1, 1]
-             actions = preprocess_actions(env_info, actions)
--
-+            # Clamp actions to be in the range of the action space
-+            actions = np.clip(actions, env.action_space.low, env.action_space.high)
-             rnn_states = policy_outputs["new_rnn_states"]
-             dataset["continuous_observations"][-1].append(observations["obs"].cpu().numpy()[0])
-             dataset["continuous_actions"][-1].append(actions[0])
-diff --git a/data/envs/metaworld/generate_dataset_all.sh b/data/envs/metaworld/generate_dataset_all.sh
-index cfdae2f..5db8c4b 100755
---- a/data/envs/metaworld/generate_dataset_all.sh
-+++ b/data/envs/metaworld/generate_dataset_all.sh
-@@ -2,58 +2,58 @@
- ENVS=(
-     assembly
--    basketball
--    bin-picking
--    box-close
--    button-press-topdown
--    button-press-topdown-wall
--    button-press
--    button-press-wall
--    coffee-button
--    coffee-pull
--    coffee-push
--    dial-turn
--    disassemble
--    door-close
--    door-lock
--    door-open
--    door-unlock
--    drawer-close
--    drawer-open
--    faucet-close
--    faucet-open
--    hammer
--    hand-insert
--    handle-press-side
--    handle-press
--    handle-pull-side
--    handle-pull
--    lever-pull
--    peg-insert-side
--    peg-unplug-side
--    pick-out-of-hole
--    pick-place
--    pick-place-wall
--    plate-slide-back-side
--    plate-slide-back
--    plate-slide-side
--    plate-slide
--    push-back
--    push
--    push-wall
--    reach
--    reach-wall
--    shelf-place
--    soccer
--    stick-pull
--    stick-push
--    sweep-into
--    sweep
--    window-close
--    window-open
-+    # basketball
-+    # bin-picking
-+    # box-close
-+    # button-press-topdown
-+    # button-press-topdown-wall
-+    # button-press
-+    # button-press-wall
-+    # coffee-button
-+    # coffee-pull
-+    # coffee-push
-+    # dial-turn
-+    # disassemble
-+    # door-close
-+    # door-lock
-+    # door-open
-+    # door-unlock
-+    # drawer-close
-+    # drawer-open
-+    # faucet-close
-+    # faucet-open
-+    # hammer
-+    # hand-insert
-+    # handle-press-side
-+    # handle-press
-+    # handle-pull-side
-+    # handle-pull
-+    # lever-pull
-+    # peg-insert-side
-+    # peg-unplug-side
-+    # pick-out-of-hole
-+    # pick-place
-+    # pick-place-wall
-+    # plate-slide-back-side
-+    # plate-slide-back
-+    # plate-slide-side
-+    # plate-slide
-+    # push-back
-+    # push
-+    # push-wall
-+    # reach
-+    # reach-wall
-+    # shelf-place
-+    # soccer
-+    # stick-pull
-+    # stick-push
-+    # sweep-into
-+    # sweep
-+    # window-close
-+    # window-open
- )
- for ENV in "${ENVS[@]}"; do
--    python -m sample_factory.huggingface.load_from_hub -r qgallouedec/sample-factory-$ENV-v2
--    python generate_dataset.py --env $ENV-v2 --experiment sample-factory-$ENV-v2 --train_dir=./train_dir
-+    python -m sample_factory.huggingface.load_from_hub -r qgallouedec/$ENV-v2
-+    python generate_dataset.py --env $ENV-v2 --experiment $ENV-v2 --train_dir=./train_dir
- done
-diff --git a/data/envs/metaworld/push_all.sh b/data/envs/metaworld/push_all.sh
-index 9d71467..5b05c6d 100755
---- a/data/envs/metaworld/push_all.sh
-+++ b/data/envs/metaworld/push_all.sh
-@@ -2,57 +2,57 @@
- ENVS=(
-     assembly
--    basketball
--    bin-picking
--    box-close
--    button-press-topdown
--    button-press-topdown-wall
--    button-press
--    button-press-wall
--    coffee-button
--    coffee-pull
--    coffee-push
--    dial-turn
--    disassemble
--    door-close
--    door-lock
--    door-open
--    door-unlock
--    drawer-close
--    drawer-open
--    faucet-close
--    faucet-open
--    hammer
--    hand-insert
--    handle-press-side
--    handle-press
--    handle-pull-side
--    handle-pull
--    lever-pull
--    peg-insert-side
--    peg-unplug-side
--    pick-out-of-hole
--    pick-place
--    pick-place-wall
--    plate-slide-back-side
--    plate-slide-back
--    plate-slide-side
--    plate-slide
--    push-back
--    push
--    push-wall
--    reach
--    reach-wall
--    shelf-place
--    soccer
--    stick-pull
--    stick-push
--    sweep-into
--    sweep
--    window-close
--    window-open
-+    # basketball
-+    # bin-picking
-+    # box-close
-+    # button-press-topdown
-+    # button-press-topdown-wall
-+    # button-press
-+    # button-press-wall
-+    # coffee-button
-+    # coffee-pull
-+    # coffee-push
-+    # dial-turn
-+    # disassemble
-+    # door-close
-+    # door-lock
-+    # door-open
-+    # door-unlock
-+    # drawer-close
-+    # drawer-open
-+    # faucet-close
-+    # faucet-open
-+    # hammer
-+    # hand-insert
-+    # handle-press-side
-+    # handle-press
-+    # handle-pull-side
-+    # handle-pull
-+    # lever-pull
-+    # peg-insert-side
-+    # peg-unplug-side
-+    # pick-out-of-hole
-+    # pick-place
-+    # pick-place-wall
-+    # plate-slide-back-side
-+    # plate-slide-back
-+    # plate-slide-side
-+    # plate-slide
-+    # push-back
-+    # push
-+    # push-wall
-+    # reach
-+    # reach-wall
-+    # shelf-place
-+    # soccer
-+    # stick-pull
-+    # stick-push
-+    # sweep-into
-+    # sweep
-+    # window-close
-+    # window-open
- )
- for ENV in "${ENVS[@]}"; do
--    python enjoy.py --algo=APPO --env $ENV-v2 --experiment $ENV-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/sample-factory-$ENV-v2 --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
-+    python enjoy.py --algo=APPO --env $ENV-v2 --experiment $ENV-v2 --train_dir=./train_dir --max_num_episodes=10 --push_to_hub --hf_repository=qgallouedec/$ENV-v2 --save_video --no_render --enjoy_script=enjoy --train_script=train --load_checkpoint_kind best
- done
-diff --git a/data/envs/metaworld/train.py b/data/envs/metaworld/train.py
-index 46dc581..c72f289 100644
---- a/data/envs/metaworld/train.py
-+++ b/data/envs/metaworld/train.py
-@@ -79,7 +79,7 @@ def override_defaults(parser: argparse.ArgumentParser) -> argparse.ArgumentParse
-         num_workers=8,
-         num_envs_per_worker=8,
-         worker_num_splits=2,
--        train_for_env_steps=100_000_000,
-+        train_for_env_steps=10_000_000,
-         encoder_mlp_layers=[64, 64],
-         env_frameskip=1,
-         nonlinearity="tanh",
-diff --git a/data/envs/metaworld/train_all.sh b/data/envs/metaworld/train_all.sh
-index dbf328a..1b3c4c8 100755
---- a/data/envs/metaworld/train_all.sh
-+++ b/data/envs/metaworld/train_all.sh
-@@ -1,7 +1,7 @@
- #!/bin/bash
- ENVS=(
--    assembly
-+    # assembly
-     basketball
-     bin-picking
-     box-close
 diff --git a/gia/eval/callback.py b/gia/eval/callback.py
 index 5c3a080..4b6198f 100644
 --- a/gia/eval/callback.py
@@ -625,38 +39,31 @@ index 91b645c..3e2cae7 100644
      def evaluate(self, model: GiaModel) -> float:
          return self._evaluate(model)
-diff --git a/gia/eval/mappings.py b/gia/eval/mappings.py
-deleted file mode 100644
-index e7ba9d3..0000000
---- a/gia/eval/mappings.py
-+++ /dev/null
-@@ -1,11 +0,0 @@
--TASK_TO_ENV_MAPPING = {
--    "mujoco-ant": "Ant-v4",
--    "mujoco-halfcheetah": "HalfCheetah-v4",
--    "mujoco-hopper": "Hopper-v4",
--    "mujoco-doublependulum": "InvertedDoublePendulum-v4",
--    "mujoco-pendulum": "InvertedPendulum-v4",
--    "mujoco-reacher": "Reacher-v4",
--    "mujoco-swimmer": "Swimmer-v4",
--    "mujoco-walker": "Walker2d-v4",
--    # Atari etc...
--}
-diff --git a/gia/eval/rl/__init__.py b/gia/eval/rl/__init__.py
-index 36d890b..da5e0c7 100644
---- a/gia/eval/rl/__init__.py
-+++ b/gia/eval/rl/__init__.py
-@@ -1,4 +1,5 @@
-+from .envs.core import make
- from .gym_evaluator import GymEvaluator
--__all__ = ["GymEvaluator"]
-+__all__ = ["GymEvaluator", "make"]
 diff --git a/gia/eval/rl/gia_agent.py b/gia/eval/rl/gia_agent.py
-index f0d0b9b..04b9637 100644
 --- a/gia/eval/rl/gia_agent.py
 +++ b/gia/eval/rl/gia_agent.py
@@ -75,6 +75,11 @@ class GiaAgent:
      ) -> Tuple[Tuple[Tensor, Tensor], ...]:
          return tuple((k[:, :, -self._max_length :], v[:, :, -self._max_length :]) for (k, v) in past_key_values)

 diff --git a/gia/eval/callback.py b/gia/eval/callback.py
 index 5c3a080..4b6198f 100644
 --- a/gia/eval/callback.py
      def evaluate(self, model: GiaModel) -> float:
          return self._evaluate(model)
+diff --git a/gia/eval/rl/envs/core.py b/gia/eval/rl/envs/core.py
+index ec5e5b2..eeaf7cb 100644
+--- a/gia/eval/rl/envs/core.py
++++ b/gia/eval/rl/envs/core.py
+@@ -177,7 +177,6 @@ def make(task_name: str, num_envs: int = 1):
+     elif task_name.startswith("metaworld"):
+         import gymnasium as gym
+-        import metaworld
+         env_id = TASK_TO_ENV_MAPPING[task_name]
+         env = gym.vector.SyncVectorEnv([lambda: gym.make(env_id)] * num_envs)
 diff --git a/gia/eval/rl/gia_agent.py b/gia/eval/rl/gia_agent.py
+index f0d0b9b..39dc0d2 100644
 --- a/gia/eval/rl/gia_agent.py
 +++ b/gia/eval/rl/gia_agent.py
+@@ -54,7 +54,7 @@ class GiaAgent:
+         self.action_space = action_space
+         self.deterministic = deterministic
+         self.device = next(model.parameters()).device
+-        self._max_length = self.model.config.max_position_embeddings - 10
++        self._max_length = self.model.config.max_position_embeddings - 100  # TODO: fix this
+         if isinstance(observation_space, spaces.Box):
+             self._observation_key = "continuous_observations"
@@ -75,6 +75,11 @@ class GiaAgent:
      ) -> Tuple[Tuple[Tensor, Tensor], ...]:
          return tuple((k[:, :, -self._max_length :], v[:, :, -self._max_length :]) for (k, v) in past_key_values)

replay.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:31b62ded5953611137a27c496f79eff9f15e9bc547305dfb49bc17e9bb659576
-size 2554290

 version https://git-lfs.github.com/spec/v1
+oid sha256:fc16ed266b6919026009eeadb788164a801028a41cf0a802e75a4c9874866967
+size 790342

sf_log.txt CHANGED Viewed

The diff for this file is too large to render. See raw diff