chenhaojun commited on Jan 8, 2025

Commit

c8144dc

verified ·

1 Parent(s): b1c8657

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +2 -0
Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_dial.xml +25 -0
Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_door_lock.xml +26 -0
Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_door_pull.xml +23 -0
Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_faucet.xml +35 -0
Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_laptop.xml +22 -0
Metaworld/zarr_path: data/metaworld_door-close_expert.zarr/data/point_cloud/12.0.0 +3 -0
Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/point_cloud/7.0.0 +3 -0
dexart-release/assets/sapien/102697/cues.txt +5 -0
dexart-release/assets/sapien/102697/meta.json +1 -0
dexart-release/assets/sapien/102697/mobility.urdf +502 -0
dexart-release/assets/sapien/102697/mobility_v2.json +1 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_1_12.mtl +12 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_1_14.mtl +12 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_1_14.obj +109 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_1_5.mtl +12 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_3_0.obj +217 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_3_5.obj +250 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_3_6.obj +169 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_4_11.obj +113 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_4_19.obj +105 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_4_3.mtl +12 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_4_33.mtl +12 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_4_4.mtl +12 -0
dexart-release/assets/sapien/102697/new_objs/102697_link_4_8.mtl +12 -0
dexart-release/assets/sapien/102697/result.json +1 -0
dexart-release/assets/sapien/102697/result_original.json +1 -0
dexart-release/assets/sapien/102697/semantics.txt +5 -0
dexart-release/dexart.egg-info/PKG-INFO +12 -0
dexart-release/dexart.egg-info/SOURCES.txt +51 -0
dexart-release/dexart.egg-info/dependency_links.txt +1 -0
dexart-release/dexart.egg-info/requires.txt +4 -0
dexart-release/dexart.egg-info/top_level.txt +1 -0
dexart-release/examples/gen_demonstration_expert.py +238 -0
dexart-release/examples/train.py +124 -0
dexart-release/examples/utils.py +66 -0
dexart-release/stable_baselines3/a2c/__init__.py +2 -0
dexart-release/stable_baselines3/a2c/a2c.py +207 -0
dexart-release/stable_baselines3/a2c/policies.py +7 -0
dexart-release/stable_baselines3/common/__init__.py +0 -0
dexart-release/stable_baselines3/common/base_class.py +835 -0
dexart-release/stable_baselines3/common/buffers.py +1010 -0
dexart-release/stable_baselines3/common/callbacks.py +602 -0
dexart-release/stable_baselines3/common/distributions.py +699 -0
dexart-release/stable_baselines3/common/env_util.py +104 -0
dexart-release/stable_baselines3/common/evaluation.py +131 -0
dexart-release/stable_baselines3/common/logger.py +644 -0
dexart-release/stable_baselines3/common/monitor.py +239 -0
dexart-release/stable_baselines3/common/noise.py +167 -0
dexart-release/stable_baselines3/common/on_policy_algorithm.py +320 -0

.gitattributes CHANGED Viewed

@@ -43,3 +43,5 @@ Metaworld/zarr_path:[[:space:]]data/metaworld_disassemble_expert.zarr/data/point
 Metaworld/zarr_path:[[:space:]]/data/haojun/datasets/3d-dp/metaworld_hammer_expert.zarr/data/depth/0.0.0 filter=lfs diff=lfs merge=lfs -text
 Metaworld/zarr_path:[[:space:]]data/metaworld_door-lock_expert.zarr/data/point_cloud/16.0.0 filter=lfs diff=lfs merge=lfs -text
 Metaworld/zarr_path:[[:space:]]/data/haojun/datasets/3d-dp/metaworld_drawer-open_expert.zarr/data/point_cloud/5.0.0 filter=lfs diff=lfs merge=lfs -text

 Metaworld/zarr_path:[[:space:]]/data/haojun/datasets/3d-dp/metaworld_hammer_expert.zarr/data/depth/0.0.0 filter=lfs diff=lfs merge=lfs -text
 Metaworld/zarr_path:[[:space:]]data/metaworld_door-lock_expert.zarr/data/point_cloud/16.0.0 filter=lfs diff=lfs merge=lfs -text
 Metaworld/zarr_path:[[:space:]]/data/haojun/datasets/3d-dp/metaworld_drawer-open_expert.zarr/data/point_cloud/5.0.0 filter=lfs diff=lfs merge=lfs -text
+Metaworld/zarr_path:[[:space:]]data/metaworld_door-close_expert.zarr/data/point_cloud/12.0.0 filter=lfs diff=lfs merge=lfs -text
+Metaworld/zarr_path:[[:space:]]data/metaworld_door-lock_expert.zarr/data/point_cloud/7.0.0 filter=lfs diff=lfs merge=lfs -text

Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_dial.xml ADDED Viewed

	@@ -0,0 +1,25 @@

+<mujoco>
+  <include file="../scene/basic_scene.xml"/>
+  <include file="../objects/assets/dial_dependencies.xml"/>
+  <include file="../objects/assets/xyz_base_dependencies.xml"/>
+  <worldbody>
+    <include file="../objects/assets/xyz_base.xml"/>
+        <body name="dial" pos="0 0.7 0.">
+          <include file="../objects/assets/dial.xml"/>
+          <site name="dialStart" pos="0 -0.05 0.035" size="0.005" rgba="0 0 1 1"/>
+        </body>
+        <site name="goal" pos="0. 0.74 0.07" size="0.02"
+              rgba=".8 0 0 1"/>
+ -->    </worldbody>
+    <actuator>
+        <position ctrllimited="true" ctrlrange="-1 1" joint="r_close" kp="400"  user="1"/>
+        <position ctrllimited="true" ctrlrange="-1 1" joint="l_close" kp="400"  user="1"/>
+    </actuator>
+    <equality>
+        <weld body1="mocap" body2="hand" solref="0.02 1"></weld>
+    </equality>
+</mujoco>

Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_door_lock.xml ADDED Viewed

	@@ -0,0 +1,26 @@

+<mujoco>
+  <include file="../scene/basic_scene.xml"/>
+  <include file="../objects/assets/doorlock_dependencies.xml"/>
+  <include file="../objects/assets/xyz_base_dependencies.xml"/>
+  <worldbody>
+    <include file="../objects/assets/xyz_base.xml"/>
+    <body name="door" pos="0 0.9 0.15">
+      <include file="../objects/assets/doorlockA.xml"/>
+    </body>
+    <site name="goal_lock" pos="0 0.74 0.12" size="0.01"
+          rgba="0 0.8 0 1"/>
+    <site name="goal_unlock" pos="0.09 0.74 0.211" size="0.01"
+          rgba="0 0 0.8 1"/>
+  </worldbody>
+  <actuator>
+      <position ctrllimited="true" ctrlrange="-1 1" joint="r_close" kp="400"  user="1"/>
+      <position ctrllimited="true" ctrlrange="-1 1" joint="l_close" kp="400"  user="1"/>
+  </actuator>
+  <equality>
+      <weld body1="mocap" body2="hand" solref="0.02 1"/>
+  </equality>
+</mujoco>

Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_door_pull.xml ADDED Viewed

	@@ -0,0 +1,23 @@

+<mujoco>
+  <include file="../scene/basic_scene.xml"/>
+  <include file="../objects/assets/doorlock_dependencies.xml"/>
+  <include file="../objects/assets/xyz_base_dependencies.xml"/>
+  <worldbody>
+    <include file="../objects/assets/xyz_base.xml"/>
+    <body name="door" pos="0 0.9 0.15">
+      <include file="../objects/assets/doorlockB.xml"/>
+    </body>
+    <site name="goal" pos="-0.49 0.46 0.15" size="0.02"
+          rgba="0 0.8 0 1"/>
+  </worldbody>
+  <actuator>
+      <position ctrllimited="true" ctrlrange="-1 1" joint="r_close" kp="400" user="1"/>
+      <position ctrllimited="true" ctrlrange="-1 1" joint="l_close" kp="400" user="1"/>
+  </actuator>
+  <equality>
+      <weld body1="mocap" body2="hand" solref="0.02 1"/>
+  </equality>
+</mujoco>

Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_faucet.xml ADDED Viewed

	@@ -0,0 +1,35 @@

+<mujoco>
+    <include file="../scene/basic_scene.xml"/>
+    <include file="../objects/assets/faucet_dependencies.xml"/>
+    <include file="../objects/assets/xyz_base_dependencies.xml"/>
+    <worldbody>
+        <include file="../objects/assets/xyz_base.xml"/>
+        <body name="faucetBase" pos="0 0.8 0">
+            <include file="../objects/assets/faucet.xml"/>
+        </body>
+        <site name="goal_open" pos="0.175 0.785 0.125" size="0.005"
+              rgba="0 .8 0 1"/>
+        <site name="goal_close" pos="-0.175 0.785 0.125" size="0.005"
+              rgba="0 0 0.8 1"/>
+<!--         <body name="box" pos="0 0.8 0.05">
+          <geom rgba="0.3 0.3 1 1" type="box" contype="1" size="0.1 0.05 0.05" name="box_left" conaffinity="1" pos="0 0 0" mass="1000" solimp="0.99 0.99 0.01" solref="0.01 1"/>
+          <geom rgba="0.3 0.3 1 1" type="box" contype="1" size="0.1 0.05 0.05" name="box_right" conaffinity="1" pos="0 0.16 0" mass="1000" solimp="0.99 0.99 0.01" solref="0.01 1"/>
+          <geom rgba="0.3 0.3 1 1" type="box" contype="1" size="0.035 0.03 0.05" name="box_front" conaffinity="1" pos="0.065 0.08 0" mass="1000" solimp="0.99 0.99 0.01" solref="0.01 1"/>
+          <geom rgba="0.3 0.3 1 1" type="box" contype="1" size="0.035 0.03 0.05" name="box_behind" conaffinity="1" pos="-0.065 0.08 0" mass="1000" solimp="0.99 0.99 0.01" solref="0.01 1"/>
+          <joint type="slide" range="-0.2 0." axis="0 1 0" name="goal_slidey" pos="0 0 0" damping="1.0"/>
+        </body>
+ -->    </worldbody>
+    <actuator>
+        <position ctrllimited="true" ctrlrange="-1 1" joint="r_close" kp="400"  user="1"/>
+        <position ctrllimited="true" ctrlrange="-1 1" joint="l_close" kp="400"  user="1"/>
+    </actuator>
+    <equality>
+        <weld body1="mocap" body2="hand" solref="0.02 1"></weld>
+    </equality>
+</mujoco>

Metaworld/metaworld/envs/assets_v2/sawyer_xyz/sawyer_laptop.xml ADDED Viewed

	@@ -0,0 +1,22 @@

+<mujoco>
+    <include file="../scene/basic_scene.xml"/>
+    <include file="../objects/assets/laptop_dependencies.xml"/>
+    <include file="../objects/assets/xyz_base_dependencies.xml"/>
+    <worldbody>
+      <include file="../objects/assets/xyz_base.xml"/>
+      <body name="laptop" pos="0 0.8 0">
+        <include file="../objects/assets/laptop.xml"/>
+      </body>
+    </worldbody>
+    <actuator>
+        <position ctrllimited="true" ctrlrange="-1 1" joint="r_close" kp="400"  user="1"/>
+        <position ctrllimited="true" ctrlrange="-1 1" joint="l_close" kp="400"  user="1"/>
+    </actuator>
+    <equality>
+        <weld body1="mocap" body2="hand" solref="0.02 1"></weld>
+    </equality>
+</mujoco>

Metaworld/zarr_path: data/metaworld_door-close_expert.zarr/data/point_cloud/12.0.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:902da15cc5607a827c4a6cf9b7c396bacd2bd244963aa4e36f400f543b23497b
+size 1231019

Metaworld/zarr_path: data/metaworld_door-lock_expert.zarr/data/point_cloud/7.0.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:32e76433f75e66cccd7b4db7962d3fc1ae4fe8915409efc427235456347323c4
+size 1213234

dexart-release/assets/sapien/102697/cues.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+link_0 hinge lever button
+link_1 slider pump_lid lid
+link_2 hinge lid lid
+link_3 hinge seat seat
+link_4 static base_body base_body

dexart-release/assets/sapien/102697/meta.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"user_id": "haozhu", "model_cat": "Toilet", "model_id": "db252ecd6286a334733badcb2e574996-0", "version": "1", "anno_id": "2697", "time_in_sec": "31"}

dexart-release/assets/sapien/102697/mobility.urdf ADDED Viewed

	@@ -0,0 +1,502 @@

+<robot name="partnet_db252ecd6286a334733badcb2e574996-0">
+	<link name="base" />
+	<link name="link_0">
+		<collision>
+			<origin xyz="0.387179130380233 -0.5229989412652956 0.2586584187924479" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_0_2.obj" />
+			</geometry>
+		</collision>
+	<collision>
+			<origin xyz="0.387179130380233 -0.5229989412652956 0.2586584187924479" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_0_0.obj" />
+			</geometry>
+		</collision>
+	<collision>
+			<origin xyz="0.387179130380233 -0.5229989412652956 0.2586584187924479" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_0_1.obj" />
+			</geometry>
+		</collision>
+	<visual name="button-part[&quot;id]">
+			<origin xyz="0.387179130380233 -0.5229989412652956 0.2586584187924479" />
+			<geometry>
+				<mesh filename="textured_objs/original-8.obj" />
+			</geometry>
+		</visual>
+		</link>
+	<joint name="joint_0" type="revolute">
+		<origin xyz="-0.387179130380233 0.5229989412652956 -0.2586584187924479" />
+		<axis xyz="-0.91844856752952 -6.467369864908537e-05 0.39554042096893877" />
+		<child link="link_0" />
+		<parent link="link_4" />
+		<limit lower="0.0" upper="0.5235987755982988" />
+	</joint>
+	<link name="link_1">
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_6.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_14.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_11.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_4.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_8.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_3.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_1.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_12.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_5.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_13.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_0.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_10.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_2.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_9.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_1_7.obj" />
+			</geometry>
+		</collision>
+		<visual name="lid-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-4.obj" />
+			</geometry>
+		</visual>
+		<visual name="lid-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-3.obj" />
+			</geometry>
+		</visual>
+		</link>
+	<joint name="joint_1" type="prismatic">
+		<origin xyz="0 0 0" />
+		<axis xyz="0 1 0" />
+		<child link="link_1" />
+		<parent link="link_4" />
+		<limit lower="0" upper="0.02" />
+	</joint>
+	<link name="link_2">
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_2_0.obj" />
+			</geometry>
+		</collision>
+		<visual name="lid-part[&quot;id]">
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="textured_objs/original-21.obj" />
+			</geometry>
+		</visual>
+		<visual name="lid-part[&quot;id]">
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="textured_objs/original-20.obj" />
+			</geometry>
+		</visual>
+		</link>
+	<joint name="joint_2" type="revolute">
+		<origin xyz="0 0.05794601786221419 -0.06205458525801076" />
+		<axis xyz="-1 0 0" />
+		<child link="link_2" />
+		<parent link="link_4" />
+		<limit lower="-0.0" upper="1.6406094968746698" />
+	</joint>
+	<link name="link_3">
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_2.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_6.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_1.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_0.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_4.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_5.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_7.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_3_3.obj" />
+			</geometry>
+		</collision>
+		<visual name="seat-part[&quot;id]">
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="textured_objs/original-18.obj" />
+			</geometry>
+		</visual>
+		<visual name="seat-part[&quot;id]">
+			<origin xyz="0 -0.05794601786221419 0.06205458525801076" />
+			<geometry>
+				<mesh filename="textured_objs/original-17.obj" />
+			</geometry>
+		</visual>
+		</link>
+	<joint name="joint_3" type="revolute">
+		<origin xyz="0 0.05794601786221419 -0.06205458525801076" />
+		<axis xyz="-1 0 0" />
+		<child link="link_3" />
+		<parent link="link_4" />
+		<limit lower="-0.0" upper="1.6406094968746698" />
+	</joint>
+	<link name="link_4">
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_0.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_18.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_22.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_12.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_27.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_23.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_33.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_2.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_13.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_28.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_34.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_7.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_1.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_10.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_6.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_19.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_3.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_26.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_8.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_25.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_11.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_21.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_9.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_17.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_30.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_16.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_31.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_20.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_5.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_29.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_15.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_4.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_24.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_32.obj" />
+			</geometry>
+		</collision>
+		<collision>
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="new_objs/102697_link_4_14.obj" />
+			</geometry>
+		</collision>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-13.obj" />
+			</geometry>
+		</visual>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-6.obj" />
+			</geometry>
+		</visual>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-15.obj" />
+			</geometry>
+		</visual>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-11.obj" />
+			</geometry>
+		</visual>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-7.obj" />
+			</geometry>
+		</visual>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-10.obj" />
+			</geometry>
+		</visual>
+		<visual name="base_body-part[&quot;id]">
+			<origin xyz="0 0 0" />
+			<geometry>
+				<mesh filename="textured_objs/original-14.obj" />
+			</geometry>
+		</visual>
+		</link>
+	<joint name="joint_4" type="fixed">
+		<origin rpy="1.570796326794897 0 -1.570796326794897" xyz="0 0 0" />
+		<child link="link_4" />
+		<parent link="base" />
+	</joint>
+</robot>

dexart-release/assets/sapien/102697/mobility_v2.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"id":0,"parent":4,"joint":"hinge","name":"lever","parts":[{"id":1,"name":"button"}],"jointData":{"axis":{"origin":[-0.387179130380233,0.5229989412652956,-0.2586584187924479],"direction":[-0.91844856752952,-0.00006467369864908537,0.39554042096893877]},"limit":{"a":0,"b":30,"noLimit":false}}},{"id":1,"parent":4,"joint":"slider","name":"pump_lid","parts":[{"id":2,"name":"lid"}],"jointData":{"axis":{"origin":[0,0,0],"direction":[0,1,0]},"limit":{"a":0,"b":0.02,"noLimit":false,"rotates":false,"noRotationLimit":false,"rotationLimit":0}}},{"id":2,"parent":4,"joint":"hinge","name":"lid","parts":[{"id":3,"name":"lid"}],"jointData":{"axis":{"origin":[0,0.05794601786221419,-0.06205458525801076],"direction":[1,0,0]},"limit":{"a":0,"b":-94,"noLimit":false}}},{"id":3,"parent":4,"joint":"hinge","name":"seat","parts":[{"id":4,"name":"seat"}],"jointData":{"axis":{"origin":[0,0.05794601786221419,-0.06205458525801076],"direction":[1,0,0]},"limit":{"a":0,"b":-94,"noLimit":false}}},{"id":4,"parent":-1,"joint":"static","name":"base_body","parts":[{"id":5,"name":"base_body"}],"jointData":{}}]

dexart-release/assets/sapien/102697/new_objs/102697_link_1_12.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.14067
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.336000 0.232000 0.584000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/new_objs/102697_link_1_14.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.14069
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.296000 0.784000 0.192000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/new_objs/102697_link_1_14.obj ADDED Viewed

	@@ -0,0 +1,109 @@

+# Blender v2.79 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib 102697_link_1_14.mtl
+o Shape_IndexedFaceSet.014_Shape_IndexedFaceSet.14061
+v 0.381286 0.716318 -0.278277
+v 0.467366 0.669121 -0.458755
+v 0.467366 0.666342 -0.458755
+v 0.314641 0.669121 -0.486500
+v 0.314641 0.666342 -0.275488
+v 0.314641 0.716318 -0.486500
+v 0.442356 0.716318 -0.486520
+v 0.406252 0.666342 -0.275488
+v 0.314641 0.716318 -0.275488
+v 0.453478 0.702433 -0.433757
+v 0.464367 0.666416 -0.486207
+v 0.397925 0.702433 -0.275488
+v 0.386833 0.666342 -0.486500
+v 0.447917 0.713539 -0.453197
+v 0.433456 0.670746 -0.350149
+v 0.411813 0.705207 -0.311600
+v 0.453478 0.710760 -0.486520
+v 0.459025 0.702433 -0.455986
+v 0.397925 0.713539 -0.300506
+v 0.409033 0.674673 -0.283855
+v 0.430634 0.693883 -0.355457
+v 0.461805 0.669121 -0.436525
+v 0.447917 0.710760 -0.439314
+vn 0.6201 0.7564 0.2082
+vn -1.0000 0.0000 0.0000
+vn 0.0000 1.0000 0.0000
+vn -0.0002 0.0000 -1.0000
+vn 0.0000 -1.0000 0.0000
+vn 0.0000 0.0000 1.0000
+vn 0.9941 0.0000 -0.1086
+vn 0.0406 0.2433 0.9691
+vn -0.0385 -0.9992 -0.0132
+vn 0.0010 -1.0000 -0.0028
+vn 0.1781 0.9826 0.0522
+vn -0.0001 -0.0002 -1.0000
+vn 0.9633 0.2356 -0.1285
+vn -0.0000 -0.0004 -1.0000
+vn 0.0038 -0.0061 -1.0000
+vn 0.4470 0.8945 -0.0000
+vn 0.7133 0.6982 0.0608
+vn 0.9470 0.2175 0.2363
+vn 0.9624 0.2498 -0.1067
+vn 0.5705 0.7506 0.3332
+vn 0.2834 0.9545 0.0928
+vn 0.6574 0.6887 0.3057
+vn 0.8546 0.1972 0.4804
+vn 0.9385 0.0321 0.3438
+vn 0.8973 0.2493 0.3642
+vn 0.9155 0.2217 0.3357
+vn 0.8943 0.3344 0.2974
+vn 0.9251 0.1885 0.3296
+vn 0.9350 0.1837 0.3034
+vn 0.9701 0.0000 0.2427
+vn 0.8018 -0.5344 0.2674
+vn 0.9470 0.2170 0.2369
+vn 0.8672 -0.4031 0.2922
+vn 0.9325 0.2086 0.2948
+vn 0.7291 0.6431 0.2341
+vn 0.7174 0.6831 0.1368
+vn 0.7541 0.6292 0.1882
+vn 0.5142 0.8410 0.1683
+usemtl Shape.14069
+s off
+f 19//1 16//1 23//1
+f 4//2 5//2 6//2
+f 6//3 1//3 7//3
+f 4//4 6//4 7//4
+f 5//5 3//5 8//5
+f 5//6 8//6 9//6
+f 1//3 6//3 9//3
+f 6//2 5//2 9//2
+f 2//7 3//7 11//7
+f 9//6 8//6 12//6
+f 1//8 9//8 12//8
+f 5//9 4//9 13//9
+f 3//5 5//5 13//5
+f 11//10 3//10 13//10
+f 7//11 1//11 14//11
+f 4//12 7//12 17//12
+f 2//13 11//13 17//13
+f 13//14 4//14 17//14
+f 11//15 13//15 17//15
+f 7//16 14//16 17//16
+f 17//17 14//17 18//17
+f 10//18 2//18 18//18
+f 2//19 17//19 18//19
+f 1//20 12//20 19//20
+f 14//21 1//21 19//21
+f 12//22 16//22 19//22
+f 12//23 8//23 20//23
+f 8//24 15//24 20//24
+f 16//25 12//25 20//25
+f 16//26 20//26 21//26
+f 10//27 16//27 21//27
+f 20//28 15//28 21//28
+f 21//29 15//29 22//29
+f 3//30 2//30 22//30
+f 8//31 3//31 22//31
+f 2//32 10//32 22//32
+f 15//33 8//33 22//33
+f 10//34 21//34 22//34
+f 16//35 10//35 23//35
+f 18//36 14//36 23//36
+f 10//37 18//37 23//37
+f 14//38 19//38 23//38

dexart-release/assets/sapien/102697/new_objs/102697_link_1_5.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.14060
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.576000 0.288000 0.088000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/new_objs/102697_link_3_0.obj ADDED Viewed

	@@ -0,0 +1,217 @@

+# Blender v2.79 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib 102697_link_3_0.mtl
+o Shape_IndexedFaceSet_Shape_IndexedFaceSet.16309
+v -0.318776 0.060655 0.107693
+v -0.063870 0.047615 -0.076335
+v -0.006959 0.042434 0.076198
+v -0.006959 0.088979 -0.019441
+v -0.309458 0.088979 0.114981
+v -0.182789 0.086387 -0.042715
+v -0.257765 0.042434 0.006414
+v -0.312052 0.042434 0.114981
+v -0.167256 0.073461 0.114981
+v -0.006959 0.042434 -0.068570
+v -0.006959 0.073461 0.060669
+v -0.006959 0.088979 -0.065989
+v -0.262922 0.086387 0.008996
+v -0.151785 0.042434 0.114981
+v -0.154809 0.063235 -0.062488
+v -0.255171 0.088979 0.114981
+v -0.265516 0.065707 -0.001350
+v -0.058683 0.083799 -0.073734
+v -0.155045 0.046362 -0.055734
+v -0.006959 0.055369 0.078780
+v -0.317239 0.052781 0.091707
+v -0.309458 0.083799 0.086544
+v -0.069027 0.088979 -0.065989
+v -0.006959 0.070873 -0.078916
+v -0.159536 0.083799 -0.058243
+v -0.107812 0.042434 -0.063407
+v -0.257765 0.045027 -0.001350
+v -0.159811 0.051020 -0.059069
+v -0.306895 0.088979 0.096890
+v -0.149191 0.060536 0.114981
+v -0.056120 0.078632 -0.076335
+v -0.314646 0.045027 0.096890
+v -0.006959 0.047615 -0.076335
+v -0.319802 0.076049 0.114981
+v -0.260328 0.078632 -0.001350
+v -0.312052 0.068285 0.081361
+v -0.250915 0.087266 0.009661
+v -0.006959 0.070873 0.065852
+v -0.304301 0.050193 0.068434
+v -0.268110 0.070873 0.003833
+v -0.006959 0.060536 0.076198
+v -0.006959 0.086387 -0.071152
+v -0.006959 0.078632 0.034833
+v -0.167107 0.053534 -0.056233
+v -0.162264 0.065707 -0.059140
+v -0.317239 0.045027 0.114981
+v -0.198290 0.081220 -0.037551
+v -0.247420 0.042434 -0.001350
+vn -0.2189 -0.8734 -0.4350
+vn 0.0000 -1.0000 0.0000
+vn 0.0000 0.0000 1.0000
+vn 1.0000 0.0000 0.0000
+vn -0.0000 1.0000 0.0000
+vn 0.2540 -0.1893 0.9485
+vn -0.0785 0.9576 -0.2772
+vn -0.0984 0.7614 -0.6407
+vn -0.0223 -0.8995 -0.4364
+vn -0.1804 -0.1959 -0.9639
+vn -0.1633 -0.5893 -0.7912
+vn -0.1721 -0.6838 -0.7091
+vn -0.3065 -0.7412 -0.5972
+vn -0.5117 0.8123 -0.2799
+vn 0.2435 0.3404 0.9082
+vn 0.2453 -0.0352 0.9688
+vn -0.1444 0.0361 -0.9889
+vn -0.0504 0.0126 -0.9986
+vn -0.1621 0.1636 -0.9731
+vn -0.1399 0.3890 -0.9106
+vn -0.2162 -0.9704 -0.1081
+vn -0.4800 -0.8321 -0.2779
+vn 0.0000 -0.8318 -0.5550
+vn 0.0000 -0.1103 -0.9939
+vn -0.9962 -0.0274 -0.0823
+vn -0.7761 0.6209 -0.1100
+vn -0.7791 0.6162 -0.1155
+vn -0.3952 0.6848 -0.6123
+vn -0.9554 0.1496 -0.2548
+vn -0.9305 0.2462 -0.2713
+vn -0.0669 0.9924 -0.1037
+vn -0.0356 0.9974 -0.0631
+vn -0.0693 0.9955 -0.0640
+vn -0.0240 0.9991 -0.0350
+vn 0.1545 0.8754 0.4580
+vn 0.1519 0.8843 0.4415
+vn -0.8035 -0.3012 -0.5135
+vn -0.7750 -0.5093 -0.3742
+vn -0.6037 -0.7165 -0.3495
+vn -0.8716 -0.0235 -0.4897
+vn -0.8750 -0.0297 -0.4832
+vn -0.7861 0.4152 -0.4579
+vn -0.7546 0.4201 -0.5041
+vn -0.7121 0.2858 -0.6413
+vn -0.8694 0.0560 -0.4909
+vn -0.8355 0.2946 -0.4637
+vn 0.2448 0.4334 0.8673
+vn 0.2223 0.6897 0.6891
+vn 0.0000 0.8937 -0.4487
+vn -0.0128 0.8231 -0.5677
+vn 0.0237 0.4474 -0.8940
+vn 0.0214 0.4580 -0.8887
+vn 0.1009 0.9773 0.1863
+vn 0.1037 0.9753 0.1952
+vn -0.4960 -0.1859 -0.8482
+vn -0.3894 -0.0969 -0.9159
+vn -0.4480 -0.3961 -0.8015
+vn -0.3790 0.1027 -0.9197
+vn -0.4224 -0.0481 -0.9051
+vn -0.4884 -0.0141 -0.8725
+vn -0.9926 -0.1156 -0.0385
+vn -0.4462 -0.8926 -0.0640
+vn -0.9107 -0.3918 -0.1305
+vn -0.9960 -0.0823 0.0336
+vn -0.4229 0.5449 -0.7240
+vn -0.4253 0.5896 -0.6867
+vn -0.5000 0.2007 -0.8425
+vn -0.4868 0.0799 -0.8698
+vn -0.4738 0.1147 -0.8731
+vn -0.1247 -0.9517 -0.2806
+vn -0.2313 -0.9228 -0.3082
+usemtl Shape.16319
+s off
+f 27//1 19//1 48//1
+f 7//2 3//2 8//2
+f 5//3 8//3 9//3
+f 4//4 3//4 10//4
+f 3//2 7//2 10//2
+f 3//4 4//4 11//4
+f 5//5 4//5 12//5
+f 4//4 10//4 12//4
+f 8//2 3//2 14//2
+f 9//3 8//3 14//3
+f 4//5 5//5 16//5
+f 5//3 9//3 16//3
+f 3//4 11//4 20//4
+f 14//6 3//6 20//6
+f 5//5 12//5 23//5
+f 12//4 10//4 24//4
+f 6//7 23//7 25//7
+f 23//8 18//8 25//8
+f 2//9 10//9 26//9
+f 10//2 7//2 26//2
+f 15//10 2//10 28//10
+f 2//11 26//11 28//11
+f 26//12 19//12 28//12
+f 19//13 27//13 28//13
+f 13//14 22//14 29//14
+f 5//5 23//5 29//5
+f 9//3 14//3 30//3
+f 20//15 9//15 30//15
+f 14//16 20//16 30//16
+f 2//17 15//17 31//17
+f 24//18 2//18 31//18
+f 15//19 25//19 31//19
+f 25//20 18//20 31//20
+f 7//21 8//21 32//21
+f 27//22 7//22 32//22
+f 10//23 2//23 33//23
+f 2//24 24//24 33//24
+f 24//4 10//4 33//4
+f 8//3 5//3 34//3
+f 21//25 1//25 34//25
+f 5//26 29//26 34//26
+f 29//27 22//27 34//27
+f 13//28 6//28 35//28
+f 21//29 34//29 36//29
+f 34//30 22//30 36//30
+f 6//31 13//31 37//31
+f 23//32 6//32 37//32
+f 13//33 29//33 37//33
+f 29//34 23//34 37//34
+f 16//35 9//35 38//35
+f 11//36 16//36 38//36
+f 20//4 11//4 38//4
+f 17//37 27//37 39//37
+f 32//38 21//38 39//38
+f 27//39 32//39 39//39
+f 36//40 17//40 39//40
+f 21//41 36//41 39//41
+f 22//42 13//42 40//42
+f 13//43 35//43 40//43
+f 35//44 17//44 40//44
+f 17//45 36//45 40//45
+f 36//46 22//46 40//46
+f 9//47 20//47 41//47
+f 38//48 9//48 41//48
+f 20//4 38//4 41//4
+f 23//49 12//49 42//49
+f 18//50 23//50 42//50
+f 12//4 24//4 42//4
+f 24//51 31//51 42//51
+f 31//52 18//52 42//52
+f 11//4 4//4 43//4
+f 4//53 16//53 43//53
+f 16//54 11//54 43//54
+f 27//55 17//55 44//55
+f 15//56 28//56 44//56
+f 28//57 27//57 44//57
+f 25//58 15//58 45//58
+f 15//59 44//59 45//59
+f 44//60 17//60 45//60
+f 1//61 21//61 46//61
+f 32//62 8//62 46//62
+f 21//63 32//63 46//63
+f 34//64 1//64 46//64
+f 8//3 34//3 46//3
+f 6//65 25//65 47//65
+f 35//66 6//66 47//66
+f 17//67 35//67 47//67
+f 45//68 17//68 47//68
+f 25//69 45//69 47//69
+f 26//2 7//2 48//2
+f 19//70 26//70 48//70
+f 7//71 27//71 48//71

dexart-release/assets/sapien/102697/new_objs/102697_link_3_5.obj ADDED Viewed

	@@ -0,0 +1,250 @@

+# Blender v2.79 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib 102697_link_3_5.mtl
+o Shape_IndexedFaceSet.005_Shape_IndexedFaceSet.16314
+v -0.006959 0.055369 0.663225
+v -0.035413 0.042434 0.789889
+v -0.006985 0.088979 0.766613
+v -0.224145 0.088979 0.575274
+v -0.112985 0.042434 0.575296
+v -0.244834 0.042434 0.624424
+v -0.216380 0.083799 0.676129
+v -0.128515 0.070873 0.575274
+v -0.154338 0.047615 0.743337
+v -0.061288 0.088979 0.779539
+v -0.006959 0.042434 0.792466
+v -0.006985 0.073461 0.683903
+v -0.006959 0.042434 0.668378
+v -0.260364 0.088979 0.590820
+v -0.030254 0.068285 0.800239
+v -0.221565 0.065707 0.678706
+v -0.268103 0.042434 0.575274
+v -0.146599 0.081220 0.745913
+v -0.081951 0.055369 0.784714
+v -0.006985 0.088979 0.789889
+v -0.270683 0.060536 0.598550
+v -0.102641 0.042434 0.764037
+v -0.206061 0.055369 0.696785
+v -0.150775 0.086767 0.728688
+v -0.087137 0.076049 0.779539
+v -0.154338 0.065707 0.745913
+v -0.262944 0.081220 0.603725
+v -0.273262 0.081220 0.575274
+v -0.262944 0.047615 0.606323
+v -0.032834 0.047615 0.797640
+v -0.006985 0.068294 0.800239
+v -0.244834 0.088979 0.619249
+v -0.030254 0.083799 0.795064
+v -0.006985 0.070878 0.676129
+v -0.087137 0.050193 0.782138
+v -0.006985 0.081215 0.722659
+v -0.213800 0.078632 0.683881
+v -0.242254 0.052781 0.645102
+v -0.180186 0.081220 0.575274
+v -0.143993 0.083799 0.745913
+v -0.079372 0.083799 0.779539
+v -0.209848 0.046828 0.679494
+v -0.262944 0.088979 0.575274
+v -0.009565 0.047615 0.660627
+v -0.092296 0.068285 0.779539
+v -0.159523 0.070873 0.740739
+v -0.275868 0.068285 0.583047
+v -0.273262 0.045027 0.575274
+v -0.032834 0.055369 0.800239
+v -0.125910 0.068285 0.575274
+v -0.006985 0.063123 0.668378
+v -0.074186 0.045027 0.782138
+v -0.204443 0.044694 0.679584
+vn -0.3528 -0.8802 0.3176
+vn 0.0000 -1.0000 -0.0000
+vn -0.0000 1.0000 -0.0000
+vn 1.0000 0.0000 0.0000
+vn 0.0000 0.0000 -1.0000
+vn 1.0000 0.0008 0.0000
+vn 1.0000 0.0006 0.0001
+vn -0.6739 -0.1041 0.7314
+vn -0.8426 0.1891 0.5042
+vn -0.7059 0.6605 0.2560
+vn -0.0513 -0.8223 0.5667
+vn 1.0000 0.0006 0.0013
+vn -0.1903 0.9645 0.1830
+vn -0.0704 0.9943 0.0806
+vn -0.5935 0.7366 0.3242
+vn -0.6160 0.6947 0.3714
+vn -0.0901 0.8767 0.4726
+vn 0.0988 0.4453 0.8899
+vn -0.0001 0.3164 0.9486
+vn 1.0000 0.0023 -0.0008
+vn 0.2531 0.9181 -0.3050
+vn -0.3097 -0.7486 0.5862
+vn -0.4927 -0.1227 0.8615
+vn 1.0000 0.0017 -0.0003
+vn 0.1515 0.9734 -0.1719
+vn 1.0000 0.0019 -0.0004
+vn 0.1659 0.9670 -0.1935
+vn -0.7073 0.1481 0.6912
+vn -0.7933 0.3506 0.4977
+vn -0.8130 0.2852 0.5077
+vn -0.8528 0.0077 0.5221
+vn -0.7975 -0.2017 0.5686
+vn -0.6081 -0.6770 0.4146
+vn -0.8486 -0.2184 0.4819
+vn 0.1702 0.9644 -0.2025
+vn 0.1908 0.9528 -0.2362
+vn -0.2498 0.9330 0.2591
+vn -0.1520 0.9623 0.2257
+vn -0.5596 0.5915 0.5805
+vn -0.5675 0.5734 0.5909
+vn -0.2916 0.2921 0.9108
+vn -0.4189 0.4197 0.8052
+vn -0.2076 0.7248 0.6569
+vn -0.2872 0.3031 0.9086
+vn -0.2434 0.8497 0.4677
+vn -0.4183 0.4227 0.8039
+vn -0.5264 -0.7109 0.4664
+vn -0.5764 -0.6997 0.4220
+vn -0.6036 -0.6544 0.4554
+vn -0.5981 0.7953 0.0993
+vn 0.6344 0.0453 -0.7717
+vn 0.8968 -0.1637 -0.4110
+vn 0.5171 -0.6211 -0.5890
+vn -0.3140 0.1256 0.9411
+vn -0.3097 0.2058 0.9283
+vn -0.4499 0.2990 0.8416
+vn -0.4707 0.2348 0.8505
+vn -0.4460 0.0014 0.8950
+vn -0.4762 -0.0095 0.8793
+vn -0.5346 0.2667 0.8020
+vn -0.6914 0.0290 0.7219
+vn -0.6165 0.4454 0.6493
+vn -0.7048 0.1501 0.6933
+vn -0.8834 0.2281 0.4094
+vn -0.8746 0.3668 0.3172
+vn -0.9481 0.0000 -0.3179
+vn -0.4393 -0.8740 0.2080
+vn -0.4713 -0.8521 0.2276
+vn -0.8850 -0.3363 0.3221
+vn -0.9562 -0.1834 0.2281
+vn -0.3008 0.0601 0.9518
+vn 0.1250 -0.3153 0.9407
+vn 0.1424 -0.2848 0.9480
+vn 0.0000 0.0000 1.0000
+vn -0.2970 -0.1701 0.9396
+vn -0.2748 -0.3056 0.9117
+vn 0.5887 0.2937 -0.7531
+vn 0.0001 -0.0008 -1.0000
+vn 0.9999 0.0100 -0.0100
+vn 0.5062 0.6097 -0.6100
+vn 0.5596 0.4600 -0.6894
+vn 0.5341 0.5376 -0.6524
+vn -0.1298 -0.9323 0.3377
+vn -0.1701 -0.7922 0.5861
+vn -0.3013 -0.7554 0.5819
+vn -0.2439 -0.6115 0.7527
+vn -0.1402 -0.9798 0.1428
+vn -0.1678 -0.9699 0.1763
+vn -0.3550 -0.8867 0.2963
+usemtl Shape.16324
+s off
+f 9//1 42//1 53//1
+f 5//2 2//2 6//2
+f 3//3 4//3 10//3
+f 2//2 5//2 11//2
+f 1//4 11//4 13//4
+f 11//2 5//2 13//2
+f 10//3 4//3 14//3
+f 5//2 6//2 17//2
+f 4//5 8//5 17//5
+f 1//6 3//6 20//6
+f 3//3 10//3 20//3
+f 11//7 1//7 20//7
+f 6//2 2//2 22//2
+f 23//8 9//8 26//8
+f 21//9 16//9 27//9
+f 4//5 17//5 28//5
+f 27//10 14//10 28//10
+f 2//11 11//11 30//11
+f 11//12 20//12 31//12
+f 10//3 14//3 32//3
+f 7//13 24//13 32//13
+f 24//14 10//14 32//14
+f 14//15 27//15 32//15
+f 27//16 7//16 32//16
+f 20//17 10//17 33//17
+f 31//18 20//18 33//18
+f 15//19 31//19 33//19
+f 12//20 1//20 34//20
+f 8//21 12//21 34//21
+f 9//22 22//22 35//22
+f 26//23 9//23 35//23
+f 3//24 1//24 36//24
+f 4//25 3//25 36//25
+f 1//26 12//26 36//26
+f 12//27 4//27 36//27
+f 16//28 23//28 37//28
+f 7//29 27//29 37//29
+f 27//30 16//30 37//30
+f 16//31 21//31 38//31
+f 23//32 16//32 38//32
+f 29//33 6//33 38//33
+f 21//34 29//34 38//34
+f 8//5 4//5 39//5
+f 4//35 12//35 39//35
+f 12//36 8//36 39//36
+f 24//37 7//37 40//37
+f 10//38 24//38 40//38
+f 7//39 37//39 40//39
+f 37//40 18//40 40//40
+f 25//41 15//41 41//41
+f 18//42 25//42 41//42
+f 33//43 10//43 41//43
+f 15//44 33//44 41//44
+f 10//45 40//45 41//45
+f 40//46 18//46 41//46
+f 9//47 23//47 42//47
+f 38//48 6//48 42//48
+f 23//49 38//49 42//49
+f 14//3 4//3 43//3
+f 4//5 28//5 43//5
+f 28//50 14//50 43//50
+f 5//51 1//51 44//51
+f 1//52 13//52 44//52
+f 13//53 5//53 44//53
+f 19//54 15//54 45//54
+f 15//55 25//55 45//55
+f 25//56 18//56 45//56
+f 18//57 26//57 45//57
+f 35//58 19//58 45//58
+f 26//59 35//59 45//59
+f 26//60 18//60 46//60
+f 23//61 26//61 46//61
+f 18//62 37//62 46//62
+f 37//63 23//63 46//63
+f 21//64 27//64 47//64
+f 27//65 28//65 47//65
+f 47//66 28//66 48//66
+f 17//67 6//67 48//67
+f 28//5 17//5 48//5
+f 6//68 29//68 48//68
+f 29//69 21//69 48//69
+f 21//70 47//70 48//70
+f 15//71 19//71 49//71
+f 30//72 11//72 49//72
+f 11//73 31//73 49//73
+f 31//74 15//74 49//74
+f 19//75 35//75 49//75
+f 35//76 30//76 49//76
+f 1//77 5//77 50//77
+f 5//78 17//78 50//78
+f 17//5 8//5 50//5
+f 34//79 1//79 51//79
+f 8//80 34//80 51//80
+f 1//81 50//81 51//81
+f 50//82 8//82 51//82
+f 22//83 2//83 52//83
+f 2//84 30//84 52//84
+f 35//85 22//85 52//85
+f 30//86 35//86 52//86
+f 6//87 22//87 53//87
+f 22//88 9//88 53//88
+f 42//89 6//89 53//89

dexart-release/assets/sapien/102697/new_objs/102697_link_3_6.obj ADDED Viewed

	@@ -0,0 +1,169 @@

+# Blender v2.79 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib 102697_link_3_6.mtl
+o Shape_IndexedFaceSet.006_Shape_IndexedFaceSet.16315
+v 0.267139 0.042434 0.557173
+v 0.174056 0.055369 0.316720
+v 0.220597 0.088979 0.557173
+v 0.117181 0.047615 0.557173
+v 0.321419 0.088979 0.316720
+v 0.176650 0.042434 0.316720
+v 0.192149 0.073461 0.316720
+v 0.272305 0.086387 0.557173
+v 0.324013 0.042434 0.316720
+v 0.132681 0.070873 0.549382
+v 0.311086 0.052781 0.466641
+v 0.272305 0.088979 0.316720
+v 0.124942 0.042434 0.557173
+v 0.308492 0.081220 0.461463
+v 0.137869 0.073461 0.557173
+v 0.282638 0.068285 0.549382
+v 0.298159 0.088979 0.469231
+v 0.334346 0.068290 0.319286
+v 0.308492 0.045027 0.458873
+v 0.124942 0.065707 0.554560
+v 0.334346 0.052781 0.316697
+v 0.184389 0.070873 0.321875
+v 0.285232 0.050193 0.539048
+v 0.324013 0.083799 0.358079
+v 0.267139 0.088979 0.557173
+v 0.137869 0.073461 0.551971
+v 0.119775 0.057957 0.554560
+v 0.174056 0.045027 0.316720
+v 0.316253 0.042434 0.383948
+v 0.331774 0.047610 0.321875
+v 0.329180 0.083803 0.316697
+v 0.272305 0.045027 0.557173
+v 0.220597 0.088979 0.551971
+v 0.119775 0.045027 0.551971
+v 0.313680 0.088979 0.386514
+v 0.282638 0.042434 0.520946
+v 0.179222 0.063128 0.316720
+vn -0.0002 0.0002 -1.0000
+vn 0.0000 0.0000 1.0000
+vn 0.0000 -1.0000 -0.0000
+vn 0.0000 1.0000 0.0000
+vn 0.8767 0.3663 0.3117
+vn 0.9441 0.1404 0.2983
+vn 0.6868 0.6920 0.2223
+vn 0.9787 0.1197 0.1671
+vn -0.3480 0.2786 0.8951
+vn -0.5230 0.8497 0.0660
+vn 0.0000 -0.0022 -1.0000
+vn 0.9879 -0.0256 0.1532
+vn -0.6125 0.7781 -0.1392
+vn 0.9409 -0.0559 0.3340
+vn 0.8012 -0.5355 0.2670
+vn 0.9537 0.2610 0.1497
+vn 0.4429 0.8828 0.1562
+vn -0.1899 0.9808 -0.0438
+vn -0.1844 0.9829 0.0000
+vn -0.4464 0.8948 0.0000
+vn -0.3652 0.9271 -0.0843
+vn -0.4071 0.9087 -0.0925
+vn -0.9578 0.1845 -0.2206
+vn -0.4906 0.3271 0.8076
+vn -0.9731 0.0000 -0.2302
+vn -0.0001 -0.0001 -1.0000
+vn 0.8869 -0.4394 0.1424
+vn 0.6665 -0.6663 -0.3344
+vn 0.9361 -0.3202 0.1452
+vn 0.5256 -0.8486 0.0607
+vn 0.6247 -0.7755 0.0915
+vn 0.0000 0.0044 -1.0000
+vn -0.0003 0.0014 -1.0000
+vn 0.7031 0.1171 -0.7014
+vn 0.9363 0.3313 0.1169
+vn -0.0001 0.0000 -1.0000
+vn 0.6020 0.0000 0.7985
+vn 0.8242 -0.1871 0.5345
+vn -0.1842 0.9821 -0.0405
+vn -0.5501 -0.8240 0.1356
+vn -0.3801 -0.9213 -0.0817
+vn -0.8628 -0.4647 -0.1991
+vn -0.6977 -0.6980 -0.1610
+vn 0.6532 0.7472 0.1226
+vn 0.6915 0.7120 0.1216
+vn 0.5539 0.8303 0.0614
+vn 0.6054 0.7923 0.0757
+vn 0.6261 -0.7451 0.2297
+vn 0.2367 -0.9698 0.0581
+vn 0.4406 -0.8777 0.1885
+vn 0.6228 -0.7475 0.2311
+vn -0.5476 0.6851 -0.4804
+vn -0.7588 0.6260 -0.1800
+vn -0.8168 0.5439 -0.1923
+vn -0.8165 0.5444 -0.1922
+vn -0.0002 0.0001 -1.0000
+usemtl Shape.16325
+s off
+f 7//1 31//1 37//1
+f 1//2 3//2 4//2
+f 3//2 1//2 8//2
+f 1//3 6//3 9//3
+f 3//4 5//4 12//4
+f 1//2 4//2 13//2
+f 6//3 1//3 13//3
+f 4//2 3//2 15//2
+f 14//5 8//5 16//5
+f 11//6 14//6 16//6
+f 5//4 3//4 17//4
+f 8//7 14//7 17//7
+f 14//8 11//8 18//8
+f 4//9 15//9 20//9
+f 15//10 10//10 20//10
+f 9//11 6//11 21//11
+f 18//12 11//12 21//12
+f 20//13 10//13 22//13
+f 11//14 16//14 23//14
+f 19//15 11//15 23//15
+f 14//16 18//16 24//16
+f 3//2 8//2 25//2
+f 17//4 3//4 25//4
+f 8//17 17//17 25//17
+f 12//18 7//18 26//18
+f 15//19 3//19 26//19
+f 10//20 15//20 26//20
+f 7//21 22//21 26//21
+f 22//22 10//22 26//22
+f 2//23 4//23 27//23
+f 4//24 20//24 27//24
+f 4//25 2//25 28//25
+f 21//26 6//26 28//26
+f 1//3 9//3 29//3
+f 11//27 19//27 30//27
+f 9//28 21//28 30//28
+f 21//29 11//29 30//29
+f 29//30 9//30 30//30
+f 19//31 29//31 30//31
+f 12//32 5//32 31//32
+f 7//33 12//33 31//33
+f 18//34 21//34 31//34
+f 24//35 18//35 31//35
+f 28//36 2//36 31//36
+f 21//36 28//36 31//36
+f 8//2 1//2 32//2
+f 16//37 8//37 32//37
+f 23//38 16//38 32//38
+f 3//4 12//4 33//4
+f 26//19 3//19 33//19
+f 12//39 26//39 33//39
+f 13//40 4//40 34//40
+f 6//41 13//41 34//41
+f 4//42 28//42 34//42
+f 28//43 6//43 34//43
+f 5//4 17//4 35//4
+f 17//44 14//44 35//44
+f 14//45 24//45 35//45
+f 31//46 5//46 35//46
+f 24//47 31//47 35//47
+f 19//48 23//48 36//48
+f 1//3 29//3 36//3
+f 29//49 19//49 36//49
+f 32//50 1//50 36//50
+f 23//51 32//51 36//51
+f 22//52 7//52 37//52
+f 20//53 22//53 37//53
+f 2//54 27//54 37//54
+f 27//55 20//55 37//55
+f 31//56 2//56 37//56

dexart-release/assets/sapien/102697/new_objs/102697_link_4_11.obj ADDED Viewed

	@@ -0,0 +1,113 @@

+# Blender v2.79 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib 102697_link_4_11.mtl
+o Shape_IndexedFaceSet.011_Shape_IndexedFaceSet.7497
+v -0.323117 -0.045453 0.089563
+v -0.106232 0.036714 0.031262
+v -0.106232 -0.014976 0.031262
+v -0.207356 0.064630 -0.188472
+v -0.342601 0.058882 0.201139
+v -0.106232 -0.022362 -0.168161
+v -0.269912 -0.053693 0.229603
+v -0.099529 0.061840 -0.178611
+v -0.268728 0.058882 0.223327
+v -0.217054 -0.014976 -0.168161
+v -0.114215 -0.048395 -0.086531
+v -0.354324 -0.050570 0.218569
+v -0.106232 -0.051916 0.009150
+v -0.349976 0.051485 0.178989
+v -0.239154 0.021942 0.215931
+v -0.224454 0.036714 -0.168161
+v -0.170368 -0.047738 -0.078248
+v -0.331607 -0.019798 0.104076
+v -0.246554 -0.022362 0.223327
+v -0.202304 -0.022362 -0.168161
+v -0.126343 0.050010 0.012102
+v -0.246554 0.044099 0.223327
+v -0.224454 -0.000205 -0.168161
+vn -0.9305 0.0000 -0.3663
+vn 0.9995 0.0000 0.0319
+vn 0.0870 -0.1295 -0.9878
+vn -0.0040 0.9999 0.0134
+vn 0.0243 0.9996 0.0176
+vn -0.2829 0.1803 0.9420
+vn -0.1273 0.0565 0.9902
+vn 0.9995 -0.0157 0.0262
+vn 0.9966 -0.0810 -0.0135
+vn 0.8380 -0.5383 -0.0897
+vn -0.4515 0.8806 -0.1437
+vn -0.9523 0.1448 0.2687
+vn 0.8116 0.0000 0.5842
+vn -0.8922 0.3024 -0.3355
+vn -0.0793 -0.9951 -0.0587
+vn -0.0330 -0.9990 -0.0300
+vn -0.0269 -0.9992 -0.0280
+vn -0.0169 -0.9992 -0.0354
+vn -0.9542 -0.1811 -0.2380
+vn -0.9782 -0.0375 -0.2042
+vn -0.9321 0.1193 -0.3421
+vn 0.7252 -0.4335 0.5349
+vn 0.7686 -0.3286 0.5489
+vn 0.8076 -0.0366 0.5886
+vn -0.0000 -0.2274 -0.9738
+vn -0.4300 -0.8588 -0.2785
+vn -0.1163 -0.2322 -0.9657
+vn 0.0000 -0.9527 -0.3038
+vn -0.2235 -0.9559 -0.1904
+vn -0.0489 -0.9657 -0.2552
+vn 0.4655 0.8769 0.1198
+vn 0.3830 0.8970 0.2205
+vn 0.1907 0.9778 0.0875
+vn 0.5197 0.7795 0.3497
+vn 0.0368 0.0552 0.9978
+vn 0.8066 0.0736 0.5865
+vn 0.2595 0.0000 0.9657
+vn 0.7069 0.0000 0.7073
+vn -0.8241 -0.4128 -0.3879
+vn -0.3724 -0.1866 -0.9091
+vn -0.7650 0.0000 -0.6440
+vn -0.9238 -0.0961 -0.3705
+usemtl Shape.7503
+s off
+f 18//1 16//1 23//1
+f 2//2 3//2 8//2
+f 6//3 4//3 8//3
+f 4//4 5//4 9//4
+f 8//5 4//5 9//5
+f 9//6 5//6 12//6
+f 7//7 9//7 12//7
+f 8//8 3//8 13//8
+f 6//9 8//9 13//9
+f 11//10 6//10 13//10
+f 5//11 4//11 14//11
+f 12//12 5//12 14//12
+f 3//13 2//13 15//13
+f 14//14 4//14 16//14
+f 12//15 1//15 17//15
+f 7//16 12//16 17//16
+f 13//17 7//17 17//17
+f 11//18 13//18 17//18
+f 1//19 12//19 18//19
+f 12//20 14//20 18//20
+f 14//21 16//21 18//21
+f 7//22 13//22 19//22
+f 13//23 3//23 19//23
+f 3//24 15//24 19//24
+f 4//25 6//25 20//25
+f 1//26 10//26 20//26
+f 10//27 4//27 20//27
+f 6//28 11//28 20//28
+f 17//29 1//29 20//29
+f 11//30 17//30 20//30
+f 2//31 8//31 21//31
+f 9//32 2//32 21//32
+f 8//33 9//33 21//33
+f 2//34 9//34 22//34
+f 9//35 7//35 22//35
+f 15//36 2//36 22//36
+f 7//37 19//37 22//37
+f 19//38 15//38 22//38
+f 10//39 1//39 23//39
+f 4//40 10//40 23//40
+f 16//41 4//41 23//41
+f 1//42 18//42 23//42

dexart-release/assets/sapien/102697/new_objs/102697_link_4_19.obj ADDED Viewed

	@@ -0,0 +1,105 @@

+# Blender v2.79 (sub 0) OBJ File: ''
+# www.blender.org
+mtllib 102697_link_4_19.mtl
+o Shape_IndexedFaceSet.019_Shape_IndexedFaceSet.7505
+v 0.418781 0.627924 -0.508767
+v 0.428328 0.622978 -0.508489
+v -0.056475 0.145332 -0.517066
+v -0.076686 0.620168 -0.567029
+v 0.366468 0.620168 -0.567029
+v -0.076686 0.125411 -0.567029
+v -0.054501 0.599577 -0.517235
+v 0.320187 0.115689 -0.504922
+v 0.322183 0.125411 -0.567029
+v 0.418075 0.269311 -0.508853
+v 0.411683 0.362817 -0.537486
+v -0.067659 0.626136 -0.530364
+v 0.388636 0.635034 -0.507943
+v 0.389069 0.133686 -0.508902
+v 0.351689 0.214056 -0.567029
+v 0.314606 0.110688 -0.506236
+v 0.413343 0.608120 -0.537486
+v 0.390810 0.209582 -0.537486
+v 0.432230 0.376647 -0.508872
+v 0.373857 0.605402 -0.567029
+v 0.359079 0.620168 -0.507943
+v -0.067415 0.625895 -0.551722
+vn 0.0041 0.9694 -0.2455
+vn 0.4154 0.7751 -0.4762
+vn 0.0000 0.0000 -1.0000
+vn -0.0322 0.0005 0.9995
+vn -0.9254 -0.0111 0.3788
+vn -0.9710 -0.0000 0.2391
+vn -0.7030 0.0033 0.7112
+vn -0.0527 0.4214 0.9053
+vn 0.0729 0.1956 0.9780
+vn 0.2100 0.9266 -0.3119
+vn 0.0149 0.0038 0.9999
+vn 0.0612 -0.0134 0.9980
+vn -0.0967 -0.9105 0.4021
+vn -0.0476 -0.2038 0.9778
+vn 0.0000 -0.9719 -0.2354
+vn 0.2833 -0.9396 -0.1922
+vn 0.1599 -0.4139 0.8961
+vn 0.5278 0.6137 -0.5872
+vn 0.8262 -0.1125 -0.5520
+vn 0.6491 -0.2811 -0.7069
+vn 0.8763 -0.1873 -0.4438
+vn 0.5947 -0.0810 -0.7998
+vn 0.5773 -0.1922 -0.7936
+vn 0.0375 -0.0010 0.9993
+vn 0.0502 -0.0064 0.9987
+vn 0.8317 -0.1098 -0.5442
+vn 0.8852 0.0147 -0.4650
+vn 0.8135 -0.0055 -0.5815
+vn 0.4969 -0.0281 -0.8673
+vn 0.5624 0.2814 -0.7775
+vn 0.5992 -0.0041 -0.8006
+vn -0.0228 0.0077 0.9997
+vn -0.0249 0.0495 0.9985
+vn -0.0031 0.0062 1.0000
+vn 0.0000 0.9366 -0.3504
+vn -0.5068 0.8619 -0.0155
+vn -0.0189 0.9998 -0.0115
+usemtl Shape.7511
+s off
+f 13//1 5//1 22//1
+f 1//2 2//2 5//2
+f 4//3 5//3 6//3
+f 7//4 3//4 8//4
+f 6//3 5//3 9//3
+f 6//5 3//5 12//5
+f 4//6 6//6 12//6
+f 3//7 7//7 12//7
+f 12//8 7//8 13//8
+f 2//9 1//9 13//9
+f 1//10 5//10 13//10
+f 8//11 2//11 13//11
+f 10//12 8//12 14//12
+f 9//3 5//3 15//3
+f 3//13 6//13 16//13
+f 8//14 3//14 16//14
+f 6//15 9//15 16//15
+f 9//16 14//16 16//16
+f 14//17 8//17 16//17
+f 5//18 2//18 17//18
+f 11//19 10//19 18//19
+f 14//20 9//20 18//20
+f 10//21 14//21 18//21
+f 15//22 11//22 18//22
+f 9//23 15//23 18//23
+f 2//24 8//24 19//24
+f 8//25 10//25 19//25
+f 10//26 11//26 19//26
+f 17//27 2//27 19//27
+f 11//28 17//28 19//28
+f 11//29 15//29 20//29
+f 15//3 5//3 20//3
+f 5//30 17//30 20//30
+f 17//31 11//31 20//31
+f 7//32 8//32 21//32
+f 13//33 7//33 21//33
+f 8//34 13//34 21//34
+f 5//35 4//35 22//35
+f 4//36 12//36 22//36
+f 12//37 13//37 22//37

dexart-release/assets/sapien/102697/new_objs/102697_link_4_3.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.7495
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.168000 0.496000 0.216000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/new_objs/102697_link_4_33.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.7525
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.272000 0.624000 0.536000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/new_objs/102697_link_4_4.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.7496
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.720000 0.472000 0.504000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/new_objs/102697_link_4_8.mtl ADDED Viewed

	@@ -0,0 +1,12 @@

+# Blender MTL File: 'None'
+# Material Count: 1
+newmtl Shape.7500
+Ns 400.000000
+Ka 0.400000 0.400000 0.400000
+Kd 0.184000 0.536000 0.280000
+Ks 0.250000 0.250000 0.250000
+Ke 0.000000 0.000000 0.000000
+Ni 1.000000
+d 0.500000
+illum 2

dexart-release/assets/sapien/102697/result.json ADDED Viewed

	@@ -0,0 +1 @@

+ [{"text": "Toilet", "name": "Toilet", "id": 0, "children": [{"text": "button", "name": "button", "id": 1, "objs": ["original-8"]}, {"text": "lid", "name": "lid", "id": 2, "objs": ["original-4", "original-3"]}, {"text": "lid", "name": "lid", "id": 3, "objs": ["original-21", "original-20"]}, {"text": "seat", "name": "seat", "id": 4, "objs": ["original-18", "original-17"]}, {"text": "base_body", "name": "base_body", "id": 5, "objs": ["original-13", "original-6", "original-15", "original-11", "original-7", "original-10", "original-14"]}]}]

dexart-release/assets/sapien/102697/result_original.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ [{"id": 0, "name": "Toilet", "text": "Toilet", "children": [{"id": 1, "name": "button", "text": "button", "objs": ["original-8"]}, {"id": 2, "name": "lid", "text": "lid", "objs": ["original-4", "original-3"]}, {"id": 3, "name": "lid", "text": "lid", "objs": ["original-21", "original-20"]}, {"id": 4, "name": "seat", "text": "seat", "objs": ["original-18", "original-17"]}]}]

dexart-release/assets/sapien/102697/semantics.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+link_0 hinge lever
+link_1 slider pump_lid
+link_2 hinge lid
+link_3 hinge seat
+link_4 static toilet_body

dexart-release/dexart.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,12 @@

+Metadata-Version: 2.1
+Name: dexart
+Version: 0.1.0
+Summary: UNKNOWN
+Home-page: https://github.com/Kami-code/dexart-release
+Author: Xiaolong Wang's Lab
+License: UNKNOWN
+Platform: UNKNOWN
+License-File: LICENSE
+UNKNOWN

dexart-release/dexart.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,51 @@

+LICENSE
+README.md
+setup.py
+dexart.egg-info/PKG-INFO
+dexart.egg-info/SOURCES.txt
+dexart.egg-info/dependency_links.txt
+dexart.egg-info/requires.txt
+dexart.egg-info/top_level.txt
+stable_baselines3/__init__.py
+stable_baselines3/pickle_utils.py
+stable_baselines3/a2c/__init__.py
+stable_baselines3/a2c/a2c.py
+stable_baselines3/a2c/policies.py
+stable_baselines3/common/__init__.py
+stable_baselines3/common/base_class.py
+stable_baselines3/common/buffers.py
+stable_baselines3/common/callbacks.py
+stable_baselines3/common/distributions.py
+stable_baselines3/common/env_util.py
+stable_baselines3/common/evaluation.py
+stable_baselines3/common/logger.py
+stable_baselines3/common/monitor.py
+stable_baselines3/common/noise.py
+stable_baselines3/common/on_policy_algorithm.py
+stable_baselines3/common/policies.py
+stable_baselines3/common/preprocessing.py
+stable_baselines3/common/running_mean_std.py
+stable_baselines3/common/save_util.py
+stable_baselines3/common/torch_layers.py
+stable_baselines3/common/type_aliases.py
+stable_baselines3/common/utils.py
+stable_baselines3/common/vec_env/__init__.py
+stable_baselines3/common/vec_env/base_vec_env.py
+stable_baselines3/common/vec_env/dummy_vec_env.py
+stable_baselines3/common/vec_env/maniskill2_utils_common.py
+stable_baselines3/common/vec_env/maniskill2_utils_wrappers_obs.py
+stable_baselines3/common/vec_env/maniskill2_vec_env.py
+stable_baselines3/common/vec_env/maniskill2_wrapper_obs.py
+stable_baselines3/common/vec_env/stacked_observations.py
+stable_baselines3/common/vec_env/subproc_vec_env.py
+stable_baselines3/common/vec_env/util.py
+stable_baselines3/common/vec_env/vec_check_nan.py
+stable_baselines3/common/vec_env/vec_extract_dict_obs.py
+stable_baselines3/common/vec_env/vec_frame_stack.py
+stable_baselines3/common/vec_env/vec_monitor.py
+stable_baselines3/common/vec_env/vec_normalize.py
+stable_baselines3/common/vec_env/vec_transpose.py
+stable_baselines3/common/vec_env/vec_video_recorder.py
+stable_baselines3/ppo/__init__.py
+stable_baselines3/ppo/policies.py
+stable_baselines3/ppo/ppo.py

dexart-release/dexart.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

dexart-release/dexart.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+transforms3d
+sapien==2.2.1
+numpy
+open3d>=0.15.1

dexart-release/dexart.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ stable_baselines3

dexart-release/examples/gen_demonstration_expert.py ADDED Viewed

	@@ -0,0 +1,238 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+import argparse
+import zarr
+import torch
+import numpy as np
+import torch.nn.functional as F
+import pytorch3d.ops as torch3d_ops
+from dexart.env.task_setting import TRAIN_CONFIG, RANDOM_CONFIG
+from dexart.env.create_env import create_env
+from stable_baselines3 import PPO
+# from examples.train import get_3d_policy_kwargs
+from train import get_3d_policy_kwargs
+from tqdm import tqdm
+from termcolor import cprint
+def parse_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--task_name', type=str, required=True)
+    parser.add_argument('--checkpoint_path', type=str, required=True)
+    parser.add_argument('--num_episodes', type=int, default=10, help='number of total episodes')
+    parser.add_argument('--use_test_set', dest='use_test_set', action='store_true', default=False)
+    parser.add_argument('--root_dir', type=str, default='data', help='directory to save data')
+    parser.add_argument('--img_size', type=int, default=84, help='image size')
+    parser.add_argument('--num_points', type=int, default=1024, help='number of points in point cloud')
+    args = parser.parse_args()
+    return args
+def downsample_with_fps(points: np.ndarray, num_points: int = 512):
+    # fast point cloud sampling using torch3d
+    points = torch.from_numpy(points).unsqueeze(0).cuda()
+    num_points = torch.tensor([num_points]).cuda()
+    # remember to only use coord to sample
+    _, sampled_indices = torch3d_ops.sample_farthest_points(points=points[...,:3], K=num_points)
+    points = points.squeeze(0).cpu().numpy()
+    points = points[sampled_indices.squeeze(0).cpu().numpy()]
+    return points
+def main():
+    args = parse_args()
+    task_name = args.task_name
+    use_test_set = args.use_test_set
+    checkpoint_path = args.checkpoint_path
+    save_dir = os.path.join(args.root_dir, 'dexart_'+args.task_name+'_expert.zarr')
+    if os.path.exists(save_dir):
+        cprint('Data already exists at {}'.format(save_dir), 'red')
+        cprint("If you want to overwrite, delete the existing directory first.", "red")
+        cprint("Do you want to overwrite? (y/n)", "red")
+        # user_input = input()
+        user_input = 'y'
+        if user_input == 'y':
+            cprint('Overwriting {}'.format(save_dir), 'red')
+            os.system('rm -rf {}'.format(save_dir))
+        else:
+            cprint('Exiting', 'red')
+            return
+    os.makedirs(save_dir, exist_ok=True)
+    if use_test_set:
+        indeces = TRAIN_CONFIG[task_name]['unseen']
+        cprint(f"using unseen instances {indeces}", 'yellow')
+    else:
+        indeces = TRAIN_CONFIG[task_name]['seen']
+        cprint(f"using seen instances {indeces}", 'yellow')
+    rand_pos = RANDOM_CONFIG[task_name]['rand_pos']
+    rand_degree = RANDOM_CONFIG[task_name]['rand_degree']
+    env = create_env(task_name=task_name,
+                     use_visual_obs=True,
+                     use_gui=False,
+                     is_eval=True,
+                     pc_noise=True,
+                     pc_seg=True,
+                     index=indeces,
+                     img_type='robot',
+                     rand_pos=rand_pos,
+                     rand_degree=rand_degree)
+    policy = PPO.load(checkpoint_path, env, 'cuda:0',
+                      policy_kwargs=get_3d_policy_kwargs(extractor_name='smallpn'),
+                      check_obs_space=False, force_load=True)
+    eval_instances = len(env.instance_list)
+    num_episodes = args.num_episodes
+    cprint(f"generate {num_episodes} episodes in total", 'yellow')
+    success_list = []
+    reward_list = []
+    total_count = 0
+    img_arrays = []
+    point_cloud_arrays = []
+    depth_arrays = []
+    state_arrays = []
+    imagin_robot_arrays = []
+    action_arrays = []
+    episode_ends_arrays = []
+    with tqdm(total=num_episodes) as pbar:
+        num_success = 0
+        while num_success < num_episodes:
+            # obs dict keys: 'instance_1-seg_gt', 'instance_1-point_cloud',
+            # 'instance_1-rgb', 'imagination_robot', 'state', 'oracle_state'
+            obs = env.reset()
+            eval_success = False
+            reward_sum = 0
+            img_arrays_sub = []
+            point_cloud_arrays_sub = []
+            depth_arrays_sub = []
+            state_arrays_sub = []
+            imagin_robot_arrays_sub = []
+            action_arrays_sub = []
+            total_count_sub = 0
+            for j in range(env.horizon):
+                if isinstance(obs, dict):
+                    for key, value in obs.items():
+                        obs[key] = value[np.newaxis, :]
+                else:
+                    obs = obs[np.newaxis, :]
+                action = policy.predict(observation=obs, deterministic=True)[0]
+                # fetch data
+                total_count_sub += 1
+                obs_state = obs['state'][0] # (32)
+                obs_imagin_robot = obs['imagination_robot'][0] # (96,7)
+                obs_point_cloud = obs['instance_1-point_cloud'][0] # (1024,3)
+                obs_depth = obs['instance_1-depth'][0] # (84,84)
+                if obs_point_cloud.shape[0] > args.num_points:
+                    obs_point_cloud = downsample_with_fps(obs_point_cloud, num_points=args.num_points)
+                obs_image = obs['instance_1-rgb'][0] # (84,84,3), [0,1]
+                # to 0-255
+                obs_image = (obs_image*255).astype(np.uint8)
+                # interpolate to target image size
+                if obs_image.shape[0] != args.img_size:
+                    obs_image = F.interpolate(torch.from_numpy(obs_image).permute(2,0,1).unsqueeze(0),
+                                            size=args.img_size).squeeze().permute(1,2,0).numpy()
+                # save data
+                img_arrays_sub.append(obs_image)
+                imagin_robot_arrays_sub.append(obs_imagin_robot)
+                point_cloud_arrays_sub.append(obs_point_cloud)
+                depth_arrays_sub.append(obs_depth)
+                state_arrays_sub.append(obs_state)
+                action_arrays_sub.append(action)
+                # step
+                obs, reward, done, _ = env.step(action)
+                reward_sum += reward
+                if env.is_eval_done:
+                    eval_success = True
+                if done:
+                    break
+            if eval_success:
+                total_count += total_count_sub
+                episode_ends_arrays.append(total_count) # the index of the last step of the episode
+                reward_list.append(reward_sum)
+                success_list.append(int(eval_success))
+                img_arrays.extend(img_arrays_sub)
+                imagin_robot_arrays.extend(imagin_robot_arrays_sub)
+                point_cloud_arrays.extend(point_cloud_arrays_sub)
+                depth_arrays.extend(depth_arrays_sub)
+                state_arrays.extend(state_arrays_sub)
+                action_arrays.extend(action_arrays_sub)
+                num_success += 1
+                pbar.update(1)
+                pbar.set_description(f"reward = {reward_sum}, success = {eval_success}")
+            else:
+                print("episode failed. continue.")
+                continue
+    cprint(f"reward_mean = {np.mean(reward_list)}, success rate = {np.mean(success_list)}", 'yellow')
+    ###############################
+    # save data
+    ###############################
+    # create zarr file
+    zarr_root = zarr.group(save_dir)
+    zarr_data = zarr_root.create_group('data')
+    zarr_meta = zarr_root.create_group('meta')
+    # save img, state, action arrays into data, and episode ends arrays into meta
+    img_arrays = np.stack(img_arrays, axis=0)
+    if img_arrays.shape[1] == 3: # make channel last
+        img_arrays = np.transpose(img_arrays, (0,2,3,1))
+    state_arrays = np.stack(state_arrays, axis=0)
+    imagin_robot_arrays = np.stack(imagin_robot_arrays, axis=0)
+    point_cloud_arrays = np.stack(point_cloud_arrays, axis=0)
+    depth_arrays = np.stack(depth_arrays, axis=0)
+    action_arrays = np.stack(action_arrays, axis=0)
+    episode_ends_arrays = np.array(episode_ends_arrays)
+    compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=1)
+    img_chunk_size = (env.horizon, img_arrays.shape[1], img_arrays.shape[2], img_arrays.shape[3])
+    imagin_robot_chunk_size = (env.horizon, imagin_robot_arrays.shape[1], imagin_robot_arrays.shape[2])
+    point_cloud_chunk_size = (env.horizon, point_cloud_arrays.shape[1], point_cloud_arrays.shape[2])
+    depth_chunk_size = (env.horizon, depth_arrays.shape[1], depth_arrays.shape[2])
+    state_chunk_size = (env.horizon, state_arrays.shape[1])
+    action_chunk_size = (env.horizon, action_arrays.shape[1])
+    zarr_data.create_dataset('img', data=img_arrays, chunks=img_chunk_size, dtype='uint8', overwrite=True, compressor=compressor)
+    zarr_data.create_dataset('state', data=state_arrays, chunks=state_chunk_size, dtype='float32', overwrite=True, compressor=compressor)
+    zarr_data.create_dataset('imagin_robot', data=imagin_robot_arrays, chunks=imagin_robot_chunk_size, dtype='float32', overwrite=True, compressor=compressor)
+    zarr_data.create_dataset('point_cloud', data=point_cloud_arrays, chunks=point_cloud_chunk_size, dtype='float32', overwrite=True, compressor=compressor)
+    zarr_data.create_dataset('depth', data=depth_arrays, chunks=depth_chunk_size, dtype='float32', overwrite=True, compressor=compressor)
+    zarr_data.create_dataset('action', data=action_arrays, chunks=action_chunk_size, dtype='float32', overwrite=True, compressor=compressor)
+    zarr_meta.create_dataset('episode_ends', data=episode_ends_arrays, dtype='int64', overwrite=True, compressor=compressor)
+    # print shape
+    cprint(f'img shape: {img_arrays.shape}, range: [{np.min(img_arrays)}, {np.max(img_arrays)}]', 'green')
+    cprint(f'imagin_robot shape: {imagin_robot_arrays.shape}, range: [{np.min(imagin_robot_arrays)}, {np.max(imagin_robot_arrays)}]', 'green')
+    cprint(f'point_cloud shape: {point_cloud_arrays.shape}, range: [{np.min(point_cloud_arrays)}, {np.max(point_cloud_arrays)}]', 'green')
+    cprint(f'depth shape: {depth_arrays.shape}, range: [{np.min(depth_arrays)}, {np.max(depth_arrays)}]', 'green')
+    cprint(f'state shape: {state_arrays.shape}, range: [{np.min(state_arrays)}, {np.max(state_arrays)}]', 'green')
+    cprint(f'action shape: {action_arrays.shape}, range: [{np.min(action_arrays)}, {np.max(action_arrays)}]', 'green')
+    cprint(f'Saved zarr file to {save_dir}', 'green')
+if __name__ == "__main__":
+    main()

dexart-release/examples/train.py ADDED Viewed

	@@ -0,0 +1,124 @@

+import os
+import sys
+sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
+import random
+from collections import OrderedDict
+import torch.nn as nn
+import argparse
+from dexart.env.create_env import create_env
+from dexart.env.task_setting import TRAIN_CONFIG, IMG_CONFIG, RANDOM_CONFIG
+from stable_baselines3.common.torch_layers import PointNetImaginationExtractorGP
+from stable_baselines3.common.vec_env.subproc_vec_env import SubprocVecEnv
+from stable_baselines3.ppo import PPO
+import torch
+BASE_DIR = os.path.dirname(os.path.abspath(__file__))
+def get_3d_policy_kwargs(extractor_name):
+    feature_extractor_class = PointNetImaginationExtractorGP
+    feature_extractor_kwargs = {"pc_key": "instance_1-point_cloud", "gt_key": "instance_1-seg_gt",
+                                "extractor_name": extractor_name,
+                                "imagination_keys": [f'imagination_{key}' for key in IMG_CONFIG['robot'].keys()],
+                                "state_key": "state"}
+    policy_kwargs = {
+        "features_extractor_class": feature_extractor_class,
+        "features_extractor_kwargs": feature_extractor_kwargs,
+        "net_arch": [dict(pi=[64, 64], vf=[64, 64])],
+        "activation_fn": nn.ReLU,
+    }
+    return policy_kwargs
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--n', type=int, default=10)
+    parser.add_argument('--workers', type=int, default=1)
+    parser.add_argument('--lr', type=float, default=3e-4)
+    parser.add_argument('--ep', type=int, default=10)
+    parser.add_argument('--bs', type=int, default=10)
+    parser.add_argument('--seed', type=int, default=100)
+    parser.add_argument('--iter', type=int, default=1000)
+    parser.add_argument('--freeze', dest='freeze', action='store_true', default=False)
+    parser.add_argument('--task_name', type=str, default="laptop")
+    parser.add_argument('--extractor_name', type=str, default="smallpn")
+    parser.add_argument('--pretrain_path', type=str, default=None)
+    args = parser.parse_args()
+    task_name = args.task_name
+    extractor_name = args.extractor_name
+    seed = args.seed if args.seed >= 0 else random.randint(0, 100000)
+    pretrain_path = args.pretrain_path
+    horizon = 200
+    env_iter = args.iter * horizon * args.n
+    print(f"freeze: {args.freeze}")
+    rand_pos = RANDOM_CONFIG[task_name]['rand_pos']
+    rand_degree = RANDOM_CONFIG[task_name]['rand_degree']
+    def create_env_fn():
+        seen_indeces = TRAIN_CONFIG[task_name]['seen']
+        environment = create_env(task_name=task_name,
+                                 use_visual_obs=True,
+                                 use_gui=False,
+                                 is_eval=False,
+                                 pc_noise=True,
+                                 index=seen_indeces,
+                                 img_type='robot',
+                                 rand_pos=rand_pos,
+                                 rand_degree=rand_degree
+                                 )
+        return environment
+    def create_eval_env_fn():
+        unseen_indeces = TRAIN_CONFIG[task_name]['unseen']
+        environment = create_env(task_name=task_name,
+                                 use_visual_obs=True,
+                                 use_gui=False,
+                                 is_eval=True,
+                                 pc_noise=True,
+                                 index=unseen_indeces,
+                                 img_type='robot',
+                                 rand_pos=rand_pos,
+                                 rand_degree=rand_degree)
+        return environment
+    env = SubprocVecEnv([create_env_fn] * args.workers, "spawn")  # train on a list of envs.
+    model = PPO("PointCloudPolicy", env, verbose=1,
+                n_epochs=args.ep,
+                n_steps=(args.n // args.workers) * horizon,
+                learning_rate=args.lr,
+                batch_size=args.bs,
+                seed=seed,
+                policy_kwargs=get_3d_policy_kwargs(extractor_name=extractor_name),
+                min_lr=args.lr,
+                max_lr=args.lr,
+                adaptive_kl=0.02,
+                target_kl=0.2,
+                )
+    if pretrain_path is not None:
+        state_dict: OrderedDict = torch.load(pretrain_path)
+        model.policy.features_extractor.extractor.load_state_dict(state_dict, strict=False)
+        print("load pretrained model: ", pretrain_path)
+    rollout = int(model.num_timesteps / (horizon * args.n))
+    # after loading or init the model, then freeze it if needed
+    if args.freeze:
+        model.policy.features_extractor.extractor.eval()
+        for param in model.policy.features_extractor.extractor.parameters():
+            param.requires_grad = False
+        print("freeze model!")
+    model.learn(
+        total_timesteps=int(env_iter),
+        reset_num_timesteps=False,
+        iter_start=rollout,
+        callback=None
+    )

dexart-release/examples/utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env python
+# -*- coding: UTF-8 -*-
+import numpy as np
+from dexart.env.task_setting import ROBUSTNESS_INIT_CAMERA_CONFIG
+import open3d as o3d
+def visualize_observation(obs, use_seg=False, img_type=None):
+    def visualize_pc_with_seg_label(cloud):
+        pc = o3d.geometry.PointCloud(o3d.utility.Vector3dVector(cloud[:, :3]))
+        def map(feature):
+            color = np.zeros((feature.shape[0], 3))
+            COLOR20 = np.array(
+                [[230, 25, 75], [60, 180, 75], [255, 225, 25], [0, 130, 200], [245, 130, 48],
+                 [145, 30, 180], [70, 240, 240], [240, 50, 230], [210, 245, 60], [250, 190, 190],
+                 [0, 128, 128], [230, 190, 255], [170, 110, 40], [255, 250, 200], [128, 0, 0],
+                 [170, 255, 195], [128, 128, 0], [255, 215, 180], [0, 0, 128], [128, 128, 128]]) / 255
+            for i in range(feature.shape[0]):
+                for j in range(feature.shape[1]):
+                    if feature[i, j] == 1:
+                        color[i, :] = COLOR20[j, :]
+            return color
+        color = map(cloud[:, 3:])
+        pc.colors = o3d.utility.Vector3dVector(color)
+        return pc
+    pc = obs["instance_1-point_cloud"]
+    if use_seg:
+        gt_seg = obs["instance_1-seg_gt"]
+        pc = np.concatenate([pc, gt_seg], axis=1)
+    pc = visualize_pc_with_seg_label(pc)
+    if img_type == "robot":
+        robot_pc = obs["imagination_robot"]
+        pc += visualize_pc_with_seg_label(robot_pc)
+    else:
+        raise NotImplementedError
+    return pc
+def get_viewpoint_camera_parameter():
+    robustness_init_camera_config = ROBUSTNESS_INIT_CAMERA_CONFIG['laptop']
+    r = robustness_init_camera_config['r']
+    phi = robustness_init_camera_config['phi']
+    theta = robustness_init_camera_config['theta']
+    center = robustness_init_camera_config['center']
+    x0, y0, z0 = center
+    # phi in [0, pi/2]
+    # theta in [0, 2 * pi]
+    x = x0 + r * np.sin(phi) * np.cos(theta)
+    y = y0 + r * np.sin(phi) * np.sin(theta)
+    z = z0 + r * np.cos(phi)
+    cam_pos = np.array([x, y, z])
+    forward = np.array([x0 - x, y0 - y, z0 - z])
+    forward /= np.linalg.norm(forward)
+    left = np.cross([0, 0, 1], forward)
+    left = left / np.linalg.norm(left)
+    up = np.cross(forward, left)
+    mat44 = np.eye(4)
+    mat44[:3, :3] = np.stack([forward, left, up], axis=1)
+    mat44[:3, 3] = cam_pos
+    return cam_pos, center, up, mat44

dexart-release/stable_baselines3/a2c/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from stable_baselines3.a2c.a2c import A2C
2	+ from stable_baselines3.a2c.policies import CnnPolicy, MlpPolicy, MultiInputPolicy

dexart-release/stable_baselines3/a2c/a2c.py ADDED Viewed

	@@ -0,0 +1,207 @@

+from typing import Any, Dict, Optional, Type, Union
+import torch as th
+from gym import spaces
+from torch.nn import functional as F
+from stable_baselines3.common.on_policy_algorithm import OnPolicyAlgorithm
+from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticPolicy, BasePolicy, MultiInputActorCriticPolicy
+from stable_baselines3.common.type_aliases import GymEnv, MaybeCallback, Schedule
+from stable_baselines3.common.utils import explained_variance
+class A2C(OnPolicyAlgorithm):
+    """
+    Advantage Actor Critic (A2C)
+    Paper: https://arxiv.org/abs/1602.01783
+    Code: This implementation borrows code from https://github.com/ikostrikov/pytorch-a2c-ppo-acktr-gail and
+    and Stable Baselines (https://github.com/hill-a/stable-baselines)
+    Introduction to A2C: https://hackernoon.com/intuitive-rl-intro-to-advantage-actor-critic-a2c-4ff545978752
+    :param policy: The policy model to use (MlpPolicy, CnnPolicy, ...)
+    :param env: The environment to learn from (if registered in Gym, can be str)
+    :param learning_rate: The learning rate, it can be a function
+        of the current progress remaining (from 1 to 0)
+    :param n_steps: The number of steps to run for each environment per update
+        (i.e. batch size is n_steps * n_env where n_env is number of environment copies running in parallel)
+    :param gamma: Discount factor
+    :param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator
+        Equivalent to classic advantage when set to 1.
+    :param ent_coef: Entropy coefficient for the loss calculation
+    :param vf_coef: Value function coefficient for the loss calculation
+    :param max_grad_norm: The maximum value for the gradient clipping
+    :param rms_prop_eps: RMSProp epsilon. It stabilizes square root computation in denominator
+        of RMSProp update
+    :param use_rms_prop: Whether to use RMSprop (default) or Adam as optimizer
+    :param use_sde: Whether to use generalized State Dependent Exploration (gSDE)
+        instead of action noise exploration (default: False)
+    :param sde_sample_freq: Sample a new noise matrix every n steps when using gSDE
+        Default: -1 (only sample at the beginning of the rollout)
+    :param normalize_advantage: Whether to normalize or not the advantage
+    :param tensorboard_log: the log location for tensorboard (if None, no logging)
+    :param create_eval_env: Whether to create a second environment that will be
+        used for evaluating the agent periodically. (Only available when passing string for the environment)
+    :param policy_kwargs: additional arguments to be passed to the policy on creation
+    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param seed: Seed for the pseudo random generators
+    :param device: Device (cpu, cuda, ...) on which the code should be run.
+        Setting it to auto, the code will be run on the GPU if possible.
+    :param _init_setup_model: Whether or not to build the network at the creation of the instance
+    """
+    policy_aliases: Dict[str, Type[BasePolicy]] = {
+        "MlpPolicy": ActorCriticPolicy,
+        "CnnPolicy": ActorCriticCnnPolicy,
+        "MultiInputPolicy": MultiInputActorCriticPolicy,
+    }
+    def __init__(
+        self,
+        policy: Union[str, Type[ActorCriticPolicy]],
+        env: Union[GymEnv, str],
+        learning_rate: Union[float, Schedule] = 7e-4,
+        n_steps: int = 5,
+        gamma: float = 0.99,
+        gae_lambda: float = 1.0,
+        ent_coef: float = 0.0,
+        vf_coef: float = 0.5,
+        max_grad_norm: float = 0.5,
+        rms_prop_eps: float = 1e-5,
+        use_rms_prop: bool = True,
+        use_sde: bool = False,
+        sde_sample_freq: int = -1,
+        normalize_advantage: bool = False,
+        tensorboard_log: Optional[str] = None,
+        create_eval_env: bool = False,
+        policy_kwargs: Optional[Dict[str, Any]] = None,
+        verbose: int = 0,
+        seed: Optional[int] = None,
+        device: Union[th.device, str] = "auto",
+        _init_setup_model: bool = True,
+    ):
+        super().__init__(
+            policy,
+            env,
+            learning_rate=learning_rate,
+            n_steps=n_steps,
+            gamma=gamma,
+            gae_lambda=gae_lambda,
+            ent_coef=ent_coef,
+            vf_coef=vf_coef,
+            max_grad_norm=max_grad_norm,
+            use_sde=use_sde,
+            sde_sample_freq=sde_sample_freq,
+            tensorboard_log=tensorboard_log,
+            policy_kwargs=policy_kwargs,
+            verbose=verbose,
+            device=device,
+            create_eval_env=create_eval_env,
+            seed=seed,
+            _init_setup_model=False,
+            supported_action_spaces=(
+                spaces.Box,
+                spaces.Discrete,
+                spaces.MultiDiscrete,
+                spaces.MultiBinary,
+            ),
+        )
+        self.normalize_advantage = normalize_advantage
+        # Update optimizer inside the policy if we want to use RMSProp
+        # (original implementation) rather than Adam
+        if use_rms_prop and "optimizer_class" not in self.policy_kwargs:
+            self.policy_kwargs["optimizer_class"] = th.optim.RMSprop
+            self.policy_kwargs["optimizer_kwargs"] = dict(alpha=0.99, eps=rms_prop_eps, weight_decay=0)
+        if _init_setup_model:
+            self._setup_model()
+    def train(self) -> None:
+        """
+        Update policy using the currently gathered
+        rollout buffer (one gradient step over whole data).
+        """
+        # Switch to train mode (this affects batch norm / dropout)
+        self.policy.set_training_mode(True)
+        # Update optimizer learning rate
+        self._update_learning_rate(self.policy.optimizer)
+        # This will only loop once (get all data in one go)
+        for rollout_data in self.rollout_buffer.get(batch_size=None):
+            actions = rollout_data.actions
+            if isinstance(self.action_space, spaces.Discrete):
+                # Convert discrete action from float to long
+                actions = actions.long().flatten()
+            values, log_prob, entropy = self.policy.evaluate_actions(rollout_data.observations, actions)
+            values = values.flatten()
+            # Normalize advantage (not present in the original implementation)
+            advantages = rollout_data.advantages
+            if self.normalize_advantage:
+                advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-8)
+            # Policy gradient loss
+            policy_loss = -(advantages * log_prob).mean()
+            # Value loss using the TD(gae_lambda) target
+            value_loss = F.mse_loss(rollout_data.returns, values)
+            # Entropy loss favor exploration
+            if entropy is None:
+                # Approximate entropy when no analytical form
+                entropy_loss = -th.mean(-log_prob)
+            else:
+                entropy_loss = -th.mean(entropy)
+            loss = policy_loss + self.ent_coef * entropy_loss + self.vf_coef * value_loss
+            # Optimization step
+            self.policy.optimizer.zero_grad()
+            loss.backward()
+            # Clip grad norm
+            th.nn.utils.clip_grad_norm_(self.policy.parameters(), self.max_grad_norm)
+            self.policy.optimizer.step()
+        explained_var = explained_variance(self.rollout_buffer.values.flatten(), self.rollout_buffer.returns.flatten())
+        self._n_updates += 1
+        self.logger.record("train/n_updates", self._n_updates, exclude="tensorboard")
+        self.logger.record("train/explained_variance", explained_var)
+        self.logger.record("train/entropy_loss", entropy_loss.item())
+        self.logger.record("train/policy_loss", policy_loss.item())
+        self.logger.record("train/value_loss", value_loss.item())
+        if hasattr(self.policy, "log_std"):
+            self.logger.record("train/std", th.exp(self.policy.log_std).mean().item())
+    def learn(
+        self,
+        total_timesteps: int,
+        callback: MaybeCallback = None,
+        log_interval: int = 100,
+        eval_env: Optional[GymEnv] = None,
+        eval_freq: int = -1,
+        n_eval_episodes: int = 5,
+        tb_log_name: str = "A2C",
+        eval_log_path: Optional[str] = None,
+        reset_num_timesteps: bool = True,
+    ) -> "A2C":
+        return super().learn(
+            total_timesteps=total_timesteps,
+            callback=callback,
+            log_interval=log_interval,
+            eval_env=eval_env,
+            eval_freq=eval_freq,
+            n_eval_episodes=n_eval_episodes,
+            tb_log_name=tb_log_name,
+            eval_log_path=eval_log_path,
+            reset_num_timesteps=reset_num_timesteps,
+        )

dexart-release/stable_baselines3/a2c/policies.py ADDED Viewed

	@@ -0,0 +1,7 @@

+# This file is here just to define MlpPolicy/CnnPolicy
+# that work for A2C
+from stable_baselines3.common.policies import ActorCriticCnnPolicy, ActorCriticPolicy, MultiInputActorCriticPolicy
+MlpPolicy = ActorCriticPolicy
+CnnPolicy = ActorCriticCnnPolicy
+MultiInputPolicy = MultiInputActorCriticPolicy

dexart-release/stable_baselines3/common/__init__.py ADDED Viewed

File without changes

dexart-release/stable_baselines3/common/base_class.py ADDED Viewed

	@@ -0,0 +1,835 @@

+"""Abstract base classes for RL algorithms."""
+import io
+import pathlib
+import time
+from abc import ABC, abstractmethod
+from collections import deque
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union
+import gym
+import numpy as np
+import torch as th
+from stable_baselines3.common import utils
+from stable_baselines3.common.callbacks import BaseCallback, CallbackList, ConvertCallback, EvalCallback
+from stable_baselines3.common.env_util import is_wrapped
+from stable_baselines3.common.logger import Logger
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.noise import ActionNoise
+from stable_baselines3.common.policies import BasePolicy
+from stable_baselines3.common.preprocessing import check_for_nested_spaces, is_image_space, is_image_space_channels_first
+from stable_baselines3.common.save_util import load_from_zip_file, recursive_getattr, recursive_setattr, save_to_zip_file
+from stable_baselines3.common.type_aliases import GymEnv, MaybeCallback, Schedule
+from stable_baselines3.common.utils import (
+    check_for_correct_spaces,
+    get_device,
+    get_schedule_fn,
+    get_system_info,
+    set_random_seed,
+    update_learning_rate,
+)
+from stable_baselines3.common.vec_env import (
+    DummyVecEnv,
+    VecEnv,
+    VecNormalize,
+    VecTransposeImage,
+    is_vecenv_wrapped,
+    unwrap_vec_normalize,
+)
+def maybe_make_env(env: Union[GymEnv, str, None], verbose: int) -> Optional[GymEnv]:
+    """If env is a string, make the environment; otherwise, return env.
+    :param env: The environment to learn from.
+    :param verbose: logging verbosity
+    :return A Gym (vector) environment.
+    """
+    if isinstance(env, str):
+        if verbose >= 1:
+            print(f"Creating environment from the given name '{env}'")
+        env = gym.make(env)
+    return env
+class BaseAlgorithm(ABC):
+    """
+    The base of RL algorithms
+    :param policy: Policy object
+    :param env: The environment to learn from
+                (if registered in Gym, can be str. Can be None for loading trained models)
+    :param learning_rate: learning rate for the optimizer,
+        it can be a function of the current progress remaining (from 1 to 0)
+    :param policy_kwargs: Additional arguments to be passed to the policy on creation
+    :param tensorboard_log: the log location for tensorboard (if None, no logging)
+    :param verbose: The verbosity level: 0 none, 1 training information, 2 debug
+    :param device: Device on which the code should run.
+        By default, it will try to use a Cuda compatible device and fallback to cpu
+        if it is not possible.
+    :param support_multi_env: Whether the algorithm supports training
+        with multiple environments (as in A2C)
+    :param create_eval_env: Whether to create a second environment that will be
+        used for evaluating the agent periodically. (Only available when passing string for the environment)
+    :param monitor_wrapper: When creating an environment, whether to wrap it
+        or not in a Monitor wrapper.
+    :param seed: Seed for the pseudo random generators
+    :param use_sde: Whether to use generalized State Dependent Exploration (gSDE)
+        instead of action noise exploration (default: False)
+    :param sde_sample_freq: Sample a new noise matrix every n steps when using gSDE
+        Default: -1 (only sample at the beginning of the rollout)
+    :param supported_action_spaces: The action spaces supported by the algorithm.
+    """
+    # Policy aliases (see _get_policy_from_name())
+    policy_aliases: Dict[str, Type[BasePolicy]] = {}
+    def __init__(
+        self,
+        policy: Type[BasePolicy],
+        env: Union[GymEnv, str, None],
+        learning_rate: Union[float, Schedule],
+        policy_kwargs: Optional[Dict[str, Any]] = None,
+        tensorboard_log: Optional[str] = None,
+        verbose: int = 0,
+        device: Union[th.device, str] = "auto",
+        support_multi_env: bool = False,
+        create_eval_env: bool = False,
+        monitor_wrapper: bool = True,
+        seed: Optional[int] = None,
+        use_sde: bool = False,
+        sde_sample_freq: int = -1,
+        supported_action_spaces: Optional[Tuple[gym.spaces.Space, ...]] = None,
+    ):
+        if isinstance(policy, str):
+            self.policy_class = self._get_policy_from_name(policy)
+        else:
+            self.policy_class = policy
+        self.device = get_device(device)
+        if verbose > 0:
+            print(f"Using {self.device} device")
+        self.env = None  # type: Optional[GymEnv]
+        # get VecNormalize object if needed
+        self._vec_normalize_env = unwrap_vec_normalize(env)
+        self.verbose = verbose
+        self.policy_kwargs = {} if policy_kwargs is None else policy_kwargs
+        self.observation_space = None  # type: Optional[gym.spaces.Space]
+        self.action_space = None  # type: Optional[gym.spaces.Space]
+        self.n_envs = None
+        self.num_timesteps = 0
+        # Used for updating schedules
+        self._total_timesteps = 0
+        # Used for computing fps, it is updated at each call of learn()
+        self._num_timesteps_at_start = 0
+        self.eval_env = None
+        self.seed = seed
+        self.action_noise = None  # type: Optional[ActionNoise]
+        self.start_time = None
+        self.policy = None
+        self.learning_rate = learning_rate
+        self.tensorboard_log = tensorboard_log
+        self.lr_schedule = None  # type: Optional[Schedule]
+        self._last_obs = None  # type: Optional[Union[np.ndarray, Dict[str, np.ndarray]]]
+        self._last_episode_starts = None  # type: Optional[np.ndarray]
+        # When using VecNormalize:
+        self._last_original_obs = None  # type: Optional[Union[np.ndarray, Dict[str, np.ndarray]]]
+        self._episode_num = 0
+        # Used for gSDE only
+        self.use_sde = use_sde
+        self.sde_sample_freq = sde_sample_freq
+        # Track the training progress remaining (from 1 to 0)
+        # this is used to update the learning rate
+        self._current_progress_remaining = 1
+        # Buffers for logging
+        self.ep_info_buffer = None  # type: Optional[deque]
+        self.ep_success_buffer = None  # type: Optional[deque]
+        # For logging (and TD3 delayed updates)
+        self._n_updates = 0  # type: int
+        # The logger object
+        self._logger = None  # type: Logger
+        # Whether the user passed a custom logger or not
+        self._custom_logger = False
+        # Create and wrap the env if needed
+        if env is not None:
+            if isinstance(env, str):
+                if create_eval_env:
+                    self.eval_env = maybe_make_env(env, self.verbose)
+            env = maybe_make_env(env, self.verbose)
+            env = self._wrap_env(env, self.verbose, monitor_wrapper)
+            self.observation_space = env.observation_space
+            self.action_space = env.action_space
+            self.n_envs = env.num_envs
+            self.env = env
+            if supported_action_spaces is not None:
+                assert isinstance(self.action_space, supported_action_spaces), (
+                    f"The algorithm only supports {supported_action_spaces} as action spaces "
+                    f"but {self.action_space} was provided"
+                )
+            if not support_multi_env and self.n_envs > 1:
+                raise ValueError(
+                    "Error: the model does not support multiple envs; it requires " "a single vectorized environment."
+                )
+            # Catch common mistake: using MlpPolicy/CnnPolicy instead of MultiInputPolicy
+            if policy in ["MlpPolicy", "CnnPolicy"] and isinstance(self.observation_space, gym.spaces.Dict):
+                raise ValueError(f"You must use `MultiInputPolicy` when working with dict observation space, not {policy}")
+            if self.use_sde and not isinstance(self.action_space, gym.spaces.Box):
+                raise ValueError("generalized State-Dependent Exploration (gSDE) can only be used with continuous actions.")
+    @staticmethod
+    def _wrap_env(env: GymEnv, verbose: int = 0, monitor_wrapper: bool = True) -> VecEnv:
+        """ "
+        Wrap environment with the appropriate wrappers if needed.
+        For instance, to have a vectorized environment
+        or to re-order the image channels.
+        :param env:
+        :param verbose:
+        :param monitor_wrapper: Whether to wrap the env in a ``Monitor`` when possible.
+        :return: The wrapped environment.
+        """
+        if not isinstance(env, VecEnv):
+            if not is_wrapped(env, Monitor) and monitor_wrapper:
+                if verbose >= 1:
+                    print("Wrapping the env with a `Monitor` wrapper")
+                env = Monitor(env)
+            if verbose >= 1:
+                print("Wrapping the env in a DummyVecEnv.")
+            env = DummyVecEnv([lambda: env])
+        # Make sure that dict-spaces are not nested (not supported)
+        check_for_nested_spaces(env.observation_space)
+        if not is_vecenv_wrapped(env, VecTransposeImage):
+            wrap_with_vectranspose = False
+            if isinstance(env.observation_space, gym.spaces.Dict):
+                # If even one of the keys is a image-space in need of transpose, apply transpose
+                # If the image spaces are not consistent (for instance one is channel first,
+                # the other channel last), VecTransposeImage will throw an error
+                for space in env.observation_space.spaces.values():
+                    wrap_with_vectranspose = wrap_with_vectranspose or (
+                        is_image_space(space) and not is_image_space_channels_first(space)
+                    )
+            else:
+                wrap_with_vectranspose = is_image_space(env.observation_space) and not is_image_space_channels_first(
+                    env.observation_space
+                )
+            if wrap_with_vectranspose:
+                if verbose >= 1:
+                    print("Wrapping the env in a VecTransposeImage.")
+                env = VecTransposeImage(env)
+        return env
+    @abstractmethod
+    def _setup_model(self) -> None:
+        """Create networks, buffer and optimizers."""
+    def set_logger(self, logger: Logger) -> None:
+        """
+        Setter for for logger object.
+        .. warning::
+          When passing a custom logger object,
+          this will overwrite ``tensorboard_log`` and ``verbose`` settings
+          passed to the constructor.
+        """
+        self._logger = logger
+        # User defined logger
+        self._custom_logger = True
+    @property
+    def logger(self) -> Logger:
+        """Getter for the logger object."""
+        return self._logger
+    def _get_eval_env(self, eval_env: Optional[GymEnv]) -> Optional[GymEnv]:
+        """
+        Return the environment that will be used for evaluation.
+        :param eval_env:)
+        :return:
+        """
+        if eval_env is None:
+            eval_env = self.eval_env
+        if eval_env is not None:
+            eval_env = self._wrap_env(eval_env, self.verbose)
+            assert eval_env.num_envs == 1
+        return eval_env
+    def _setup_lr_schedule(self) -> None:
+        """Transform to callable if needed."""
+        self.lr_schedule = get_schedule_fn(self.learning_rate)
+    def _update_current_progress_remaining(self, num_timesteps: int, total_timesteps: int) -> None:
+        """
+        Compute current progress remaining (starts from 1 and ends to 0)
+        :param num_timesteps: current number of timesteps
+        :param total_timesteps:
+        """
+        self._current_progress_remaining = 1.0 - float(num_timesteps) / float(total_timesteps)
+    def _update_learning_rate(self, optimizers: Union[List[th.optim.Optimizer], th.optim.Optimizer]) -> None:
+        """
+        Update the optimizers learning rate using the current learning rate schedule
+        and the current progress remaining (from 1 to 0).
+        :param optimizers:
+            An optimizer or a list of optimizers.
+        """
+        # Log the current learning rate
+        self.logger.record("train/learning_rate", self.lr_schedule(self._current_progress_remaining))
+        if not isinstance(optimizers, list):
+            optimizers = [optimizers]
+        for optimizer in optimizers:
+            update_learning_rate(optimizer, self.lr_schedule(self._current_progress_remaining))
+    def _excluded_save_params(self) -> List[str]:
+        """
+        Returns the names of the parameters that should be excluded from being
+        saved by pickling. E.g. replay buffers are skipped by default
+        as they take up a lot of space. PyTorch variables should be excluded
+        with this so they can be stored with ``th.save``.
+        :return: List of parameters that should be excluded from being saved with pickle.
+        """
+        return [
+            "policy",
+            "device",
+            "env",
+            "eval_env",
+            "replay_buffer",
+            "rollout_buffer",
+            "_vec_normalize_env",
+            "_episode_storage",
+            "_logger",
+            "_custom_logger",
+        ]
+    def _get_policy_from_name(self, policy_name: str) -> Type[BasePolicy]:
+        """
+        Get a policy class from its name representation.
+        The goal here is to standardize policy naming, e.g.
+        all algorithms can call upon "MlpPolicy" or "CnnPolicy",
+        and they receive respective policies that work for them.
+        :param policy_name: Alias of the policy
+        :return: A policy class (type)
+        """
+        if policy_name in self.policy_aliases:
+            return self.policy_aliases[policy_name]
+        else:
+            raise ValueError(f"Policy {policy_name} unknown")
+    def _get_torch_save_params(self) -> Tuple[List[str], List[str]]:
+        """
+        Get the name of the torch variables that will be saved with
+        PyTorch ``th.save``, ``th.load`` and ``state_dicts`` instead of the default
+        pickling strategy. This is to handle device placement correctly.
+        Names can point to specific variables under classes, e.g.
+        "policy.optimizer" would point to ``optimizer`` object of ``self.policy``
+        if this object.
+        :return:
+            List of Torch variables whose state dicts to save (e.g. th.nn.Modules),
+            and list of other Torch variables to store with ``th.save``.
+        """
+        state_dicts = ["policy"]
+        return state_dicts, []
+    def _init_callback(
+        self,
+        callback: MaybeCallback,
+        eval_env: Optional[VecEnv] = None,
+        eval_freq: int = 10000,
+        n_eval_episodes: int = 5,
+        log_path: Optional[str] = None,
+    ) -> BaseCallback:
+        """
+        :param callback: Callback(s) called at every step with state of the algorithm.
+        :param eval_freq: How many steps between evaluations; if None, do not evaluate.
+        :param n_eval_episodes: How many episodes to play per evaluation
+        :param n_eval_episodes: Number of episodes to rollout during evaluation.
+        :param log_path: Path to a folder where the evaluations will be saved
+        :return: A hybrid callback calling `callback` and performing evaluation.
+        """
+        # Convert a list of callbacks into a callback
+        if isinstance(callback, list):
+            callback = CallbackList(callback)
+        # Convert functional callback to object
+        if not isinstance(callback, BaseCallback):
+            callback = ConvertCallback(callback)
+        # Create eval callback in charge of the evaluation
+        if eval_env is not None:
+            eval_callback = EvalCallback(
+                eval_env,
+                best_model_save_path=log_path,
+                log_path=log_path,
+                eval_freq=eval_freq,
+                n_eval_episodes=n_eval_episodes,
+            )
+            callback = CallbackList([callback, eval_callback])
+        callback.init_callback(self)
+        return callback
+    def _setup_learn(
+        self,
+        total_timesteps: int,
+        eval_env: Optional[GymEnv],
+        callback: MaybeCallback = None,
+        eval_freq: int = 10000,
+        n_eval_episodes: int = 5,
+        log_path: Optional[str] = None,
+        reset_num_timesteps: bool = True,
+        tb_log_name: str = "run",
+    ) -> Tuple[int, BaseCallback]:
+        """
+        Initialize different variables needed for training.
+        :param total_timesteps: The total number of samples (env steps) to train on
+        :param eval_env: Environment to use for evaluation.
+        :param callback: Callback(s) called at every step with state of the algorithm.
+        :param eval_freq: How many steps between evaluations
+        :param n_eval_episodes: How many episodes to play per evaluation
+        :param log_path: Path to a folder where the evaluations will be saved
+        :param reset_num_timesteps: Whether to reset or not the ``num_timesteps`` attribute
+        :param tb_log_name: the name of the run for tensorboard log
+        :return:
+        """
+        self.start_time = time.time()
+        if self.ep_info_buffer is None or reset_num_timesteps:
+            # Initialize buffers if they don't exist, or reinitialize if resetting counters
+            self.ep_info_buffer = deque(maxlen=100)
+            self.ep_success_buffer = deque(maxlen=100)
+        if self.action_noise is not None:
+            self.action_noise.reset()
+        if reset_num_timesteps:
+            self.num_timesteps = 0
+            self._episode_num = 0
+        else:
+            # Make sure training timesteps are ahead of the internal counter
+            total_timesteps += self.num_timesteps
+        self._total_timesteps = total_timesteps
+        self._num_timesteps_at_start = self.num_timesteps
+        # Avoid resetting the environment when calling ``.learn()`` consecutive times
+        if reset_num_timesteps or self._last_obs is None:
+            self._last_obs = self.env.reset()  # pytype: disable=annotation-type-mismatch
+            self._last_episode_starts = np.ones((self.env.num_envs,), dtype=bool)
+            # Retrieve unnormalized observation for saving into the buffer
+            if self._vec_normalize_env is not None:
+                self._last_original_obs = self._vec_normalize_env.get_original_obs()
+        if eval_env is not None and self.seed is not None:
+            eval_env.seed(self.seed)
+        eval_env = self._get_eval_env(eval_env)
+        # Configure logger's outputs if no logger was passed
+        if not self._custom_logger:
+            self._logger = utils.configure_logger(self.verbose, self.tensorboard_log, tb_log_name, reset_num_timesteps)
+        # Create eval callback if needed
+        callback = self._init_callback(callback, eval_env, eval_freq, n_eval_episodes, log_path)
+        return total_timesteps, callback
+    def _update_info_buffer(self, infos: List[Dict[str, Any]], dones: Optional[np.ndarray] = None) -> None:
+        """
+        Retrieve reward, episode length, episode success and update the buffer
+        if using Monitor wrapper or a GoalEnv.
+        :param infos: List of additional information about the transition.
+        :param dones: Termination signals
+        """
+        if dones is None:
+            dones = np.array([False] * len(infos))
+        for idx, info in enumerate(infos):
+            maybe_ep_info = info.get("episode")
+            maybe_is_success = info.get("is_success")
+            if maybe_ep_info is not None:
+                self.ep_info_buffer.extend([maybe_ep_info])
+            if maybe_is_success is not None and dones[idx]:
+                self.ep_success_buffer.append(maybe_is_success)
+    def get_env(self) -> Optional[VecEnv]:
+        """
+        Returns the current environment (can be None if not defined).
+        :return: The current environment
+        """
+        return self.env
+    def get_vec_normalize_env(self) -> Optional[VecNormalize]:
+        """
+        Return the ``VecNormalize`` wrapper of the training env
+        if it exists.
+        :return: The ``VecNormalize`` env.
+        """
+        return self._vec_normalize_env
+    def set_env(self, env: GymEnv, force_reset: bool = True) -> None:
+        """
+        Checks the validity of the environment, and if it is coherent, set it as the current environment.
+        Furthermore wrap any non vectorized env into a vectorized
+        checked parameters:
+        - observation_space
+        - action_space
+        :param env: The environment for learning a policy
+        :param force_reset: Force call to ``reset()`` before training
+            to avoid unexpected behavior.
+            See issue https://github.com/DLR-RM/stable-baselines3/issues/597
+        """
+        # if it is not a VecEnv, make it a VecEnv
+        # and do other transformations (dict obs, image transpose) if needed
+        env = self._wrap_env(env, self.verbose)
+        # Check that the observation spaces match
+        check_for_correct_spaces(env, self.observation_space, self.action_space)
+        # Update VecNormalize object
+        # otherwise the wrong env may be used, see https://github.com/DLR-RM/stable-baselines3/issues/637
+        self._vec_normalize_env = unwrap_vec_normalize(env)
+        # Discard `_last_obs`, this will force the env to reset before training
+        # See issue https://github.com/DLR-RM/stable-baselines3/issues/597
+        if force_reset:
+            self._last_obs = None
+        self.n_envs = env.num_envs
+        self.env = env
+    @abstractmethod
+    def learn(
+        self,
+        total_timesteps: int,
+        callback: MaybeCallback = None,
+        log_interval: int = 100,
+        tb_log_name: str = "run",
+        eval_env: Optional[GymEnv] = None,
+        eval_freq: int = -1,
+        n_eval_episodes: int = 5,
+        eval_log_path: Optional[str] = None,
+        reset_num_timesteps: bool = True,
+    ) -> "BaseAlgorithm":
+        """
+        Return a trained model.
+        :param total_timesteps: The total number of samples (env steps) to train on
+        :param callback: callback(s) called at every step with state of the algorithm.
+        :param log_interval: The number of timesteps before logging.
+        :param tb_log_name: the name of the run for TensorBoard logging
+        :param eval_env: Environment that will be used to evaluate the agent
+        :param eval_freq: Evaluate the agent every ``eval_freq`` timesteps (this may vary a little)
+        :param n_eval_episodes: Number of episode to evaluate the agent
+        :param eval_log_path: Path to a folder where the evaluations will be saved
+        :param reset_num_timesteps: whether or not to reset the current timestep number (used in logging)
+        :return: the trained model
+        """
+    def predict(
+        self,
+        observation: np.ndarray,
+        state: Optional[Tuple[np.ndarray, ...]] = None,
+        episode_start: Optional[np.ndarray] = None,
+        deterministic: bool = False,
+    ) -> Tuple[np.ndarray, Optional[Tuple[np.ndarray, ...]]]:
+        """
+        Get the policy action from an observation (and optional hidden state).
+        Includes sugar-coating to handle different observations (e.g. normalizing images).
+        :param observation: the input observation
+        :param state: The last hidden states (can be None, used in recurrent policies)
+        :param episode_start: The last masks (can be None, used in recurrent policies)
+            this correspond to beginning of episodes,
+            where the hidden states of the RNN must be reset.
+        :param deterministic: Whether or not to return deterministic actions.
+        :return: the model's action and the next hidden state
+            (used in recurrent policies)
+        """
+        return self.policy.predict(observation, state, episode_start, deterministic)
+    def set_random_seed(self, seed: Optional[int] = None) -> None:
+        """
+        Set the seed of the pseudo-random generators
+        (python, numpy, pytorch, gym, action_space)
+        :param seed:
+        """
+        if seed is None:
+            return
+        set_random_seed(seed, using_cuda=self.device.type == th.device("cuda").type)
+        self.action_space.seed(seed)
+        if self.env is not None:
+            self.env.seed(seed)
+        if self.eval_env is not None:
+            self.eval_env.seed(seed)
+    def set_parameters(
+        self,
+        load_path_or_dict: Union[str, Dict[str, Dict]],
+        exact_match: bool = True,
+        device: Union[th.device, str] = "auto",
+    ) -> None:
+        """
+        Load parameters from a given zip-file or a nested dictionary containing parameters for
+        different modules (see ``get_parameters``).
+        :param load_path_or_iter: Location of the saved data (path or file-like, see ``save``), or a nested
+            dictionary containing nn.Module parameters used by the policy. The dictionary maps
+            object names to a state-dictionary returned by ``torch.nn.Module.state_dict()``.
+        :param exact_match: If True, the given parameters should include parameters for each
+            module and each of their parameters, otherwise raises an Exception. If set to False, this
+            can be used to update only specific parameters.
+        :param device: Device on which the code should run.
+        """
+        params = None
+        if isinstance(load_path_or_dict, dict):
+            params = load_path_or_dict
+        else:
+            _, params, _ = load_from_zip_file(load_path_or_dict, device=device)
+        # Keep track which objects were updated.
+        # `_get_torch_save_params` returns [params, other_pytorch_variables].
+        # We are only interested in former here.
+        objects_needing_update = set(self._get_torch_save_params()[0])
+        updated_objects = set()
+        for name in params:
+            attr = None
+            try:
+                attr = recursive_getattr(self, name)
+            except Exception:
+                # What errors recursive_getattr could throw? KeyError, but
+                # possible something else too (e.g. if key is an int?).
+                # Catch anything for now.
+                raise ValueError(f"Key {name} is an invalid object name.")
+            if isinstance(attr, th.optim.Optimizer):
+                # Optimizers do not support "strict" keyword...
+                # Seems like they will just replace the whole
+                # optimizer state with the given one.
+                # On top of this, optimizer state-dict
+                # seems to change (e.g. first ``optim.step()``),
+                # which makes comparing state dictionary keys
+                # invalid (there is also a nesting of dictionaries
+                # with lists with dictionaries with ...), adding to the
+                # mess.
+                #
+                # TL;DR: We might not be able to reliably say
+                # if given state-dict is missing keys.
+                #
+                # Solution: Just load the state-dict as is, and trust
+                # the user has provided a sensible state dictionary.
+                attr.load_state_dict(params[name])
+            else:
+                # Assume attr is th.nn.Module
+                attr.load_state_dict(params[name], strict=exact_match)
+            updated_objects.add(name)
+        if exact_match and updated_objects != objects_needing_update:
+            raise ValueError(
+                "Names of parameters do not match agents' parameters: "
+                f"expected {objects_needing_update}, got {updated_objects}"
+            )
+    @classmethod
+    def load(
+        cls,
+        path: Union[str, pathlib.Path, io.BufferedIOBase],
+        env: Optional[GymEnv] = None,
+        device: Union[th.device, str] = "auto",
+        custom_objects: Optional[Dict[str, Any]] = None,
+        print_system_info: bool = False,
+        force_reset: bool = True,
+        check_obs_space: bool = True,
+        force_load: bool = False,
+        **kwargs,
+    ) -> "BaseAlgorithm":
+        """
+        Load the model from a zip-file.
+        Warning: ``load`` re-creates the model from scratch, it does not update it in-place!
+        For an in-place load use ``set_parameters`` instead.
+        :param path: path to the file (or a file-like) where to
+            load the agent from
+        :param env: the new environment to run the loaded model on
+            (can be None if you only need prediction from a trained model) has priority over any saved environment
+        :param device: Device on which the code should run.
+        :param custom_objects: Dictionary of objects to replace
+            upon loading. If a variable is present in this dictionary as a
+            key, it will not be deserialized and the corresponding item
+            will be used instead. Similar to custom_objects in
+            ``keras.models.load_model``. Useful when you have an object in
+            file that can not be deserialized.
+        :param print_system_info: Whether to print system info from the saved model
+            and the current system info (useful to debug loading issues)
+        :param force_reset: Force call to ``reset()`` before training
+            to avoid unexpected behavior.
+            See https://github.com/DLR-RM/stable-baselines3/issues/597
+        :param kwargs: extra arguments to change the model when loading
+        :return: new model instance with loaded parameters
+        """
+        if print_system_info:
+            print("== CURRENT SYSTEM INFO ==")
+            get_system_info()
+        data, params, pytorch_variables = load_from_zip_file(
+            path, device=device, custom_objects=custom_objects, print_system_info=print_system_info
+        )
+        # Remove stored device information and replace with ours
+        if "policy_kwargs" in data:
+            if "device" in data["policy_kwargs"]:
+                del data["policy_kwargs"]["device"]
+        if not force_load:
+            if "policy_kwargs" in kwargs and kwargs["policy_kwargs"] != data["policy_kwargs"]:
+                raise ValueError(
+                    f"The specified policy kwargs do not equal the stored policy kwargs."
+                    f"Stored kwargs: {data['policy_kwargs']}, specified kwargs: {kwargs['policy_kwargs']}"
+                )
+        if "observation_space" not in data or "action_space" not in data:
+            raise KeyError("The observation_space and action_space were not given, can't verify new environments")
+        if env is not None:
+            # Wrap first if needed
+            env = cls._wrap_env(env, data["verbose"])
+            # Check if given env is valid
+            if check_obs_space:
+                check_for_correct_spaces(env, data["observation_space"], data["action_space"])
+            # Discard `_last_obs`, this will force the env to reset before training
+            # See issue https://github.com/DLR-RM/stable-baselines3/issues/597
+            if force_reset and data is not None:
+                data["_last_obs"] = None
+        else:
+            # Use stored env, if one exists. If not, continue as is (can be used for predict)
+            if "env" in data:
+                env = data["env"]
+        # noinspection PyArgumentList
+        model = cls(  # pytype: disable=not-instantiable,wrong-keyword-args
+            policy=data["policy_class"],
+            env=env,
+            device=device,
+            _init_setup_model=False,  # pytype: disable=not-instantiable,wrong-keyword-args
+        )
+        # load parameters
+        if not force_load:
+            model.__dict__.update(data)  # TODO: Helin cancelled this
+        model.__dict__.update(kwargs)
+        model._setup_model()
+        # put state_dicts back in place
+        model.set_parameters(params, exact_match=True, device=device)
+        # put other pytorch variables back in place
+        if pytorch_variables is not None:
+            for name in pytorch_variables:
+                # Skip if PyTorch variable was not defined (to ensure backward compatibility).
+                # This happens when using SAC/TQC.
+                # SAC has an entropy coefficient which can be fixed or optimized.
+                # If it is optimized, an additional PyTorch variable `log_ent_coef` is defined,
+                # otherwise it is initialized to `None`.
+                if pytorch_variables[name] is None:
+                    continue
+                # Set the data attribute directly to avoid issue when using optimizers
+                # See https://github.com/DLR-RM/stable-baselines3/issues/391
+                recursive_setattr(model, name + ".data", pytorch_variables[name].data)
+        # Sample gSDE exploration matrix, so it uses the right device
+        # see issue #44
+        if model.use_sde:
+            model.policy.reset_noise()  # pytype: disable=attribute-error
+        return model
+    def get_parameters(self) -> Dict[str, Dict]:
+        """
+        Return the parameters of the agent. This includes parameters from different networks, e.g.
+        critics (value functions) and policies (pi functions).
+        :return: Mapping of from names of the objects to PyTorch state-dicts.
+        """
+        state_dicts_names, _ = self._get_torch_save_params()
+        params = {}
+        for name in state_dicts_names:
+            attr = recursive_getattr(self, name)
+            # Retrieve state dict
+            params[name] = attr.state_dict()
+        return params
+    def save(
+        self,
+        path: Union[str, pathlib.Path, io.BufferedIOBase],
+        exclude: Optional[Iterable[str]] = None,
+        include: Optional[Iterable[str]] = None,
+    ) -> None:
+        """
+        Save all the attributes of the object and the model parameters in a zip-file.
+        :param path: path to the file where the rl agent should be saved
+        :param exclude: name of parameters that should be excluded in addition to the default ones
+        :param include: name of parameters that might be excluded but should be included anyway
+        """
+        # Copy parameter list so we don't mutate the original dict
+        data = self.__dict__.copy()
+        # Exclude is union of specified parameters (if any) and standard exclusions
+        if exclude is None:
+            exclude = []
+        exclude = set(exclude).union(self._excluded_save_params())
+        # Do not exclude params if they are specifically included
+        if include is not None:
+            exclude = exclude.difference(include)
+        state_dicts_names, torch_variable_names = self._get_torch_save_params()
+        all_pytorch_variables = state_dicts_names + torch_variable_names
+        for torch_var in all_pytorch_variables:
+            # We need to get only the name of the top most module as we'll remove that
+            var_name = torch_var.split(".")[0]
+            # Any params that are in the save vars must not be saved by data
+            exclude.add(var_name)
+        # Remove parameter entries of parameters which are to be excluded
+        for param_name in exclude:
+            data.pop(param_name, None)
+        # Build dict of torch variables
+        pytorch_variables = None
+        if torch_variable_names is not None:
+            pytorch_variables = {}
+            for name in torch_variable_names:
+                attr = recursive_getattr(self, name)
+                pytorch_variables[name] = attr
+        # Build dict of state_dicts
+        params_to_save = self.get_parameters()
+        save_to_zip_file(path, data=data, params=params_to_save, pytorch_variables=pytorch_variables)

dexart-release/stable_baselines3/common/buffers.py ADDED Viewed

	@@ -0,0 +1,1010 @@

+import warnings
+from abc import ABC, abstractmethod
+from typing import Any, Dict, Generator, List, Optional, Union
+import stable_baselines3.pickle_utils as pickle_utils
+import numpy as np
+import torch as th
+from gym import spaces
+from stable_baselines3.common.preprocessing import get_action_dim, get_obs_shape
+from stable_baselines3.common.type_aliases import (
+    DictReplayBufferSamples,
+    DictRolloutBufferSamples,
+    DictSSLRolloutBufferSamples,
+    ReplayBufferSamples,
+    RolloutBufferSamples,
+)
+from stable_baselines3.common.vec_env import VecNormalize
+try:
+    # Check memory used by replay buffer when possible
+    import psutil
+except ImportError:
+    psutil = None
+class BaseBuffer(ABC):
+    """
+    Base class that represent a buffer (rollout or replay)
+    :param buffer_size: Max number of element in the buffer
+    :param observation_space: Observation space
+    :param action_space: Action space
+    :param device: PyTorch device
+        to which the values will be converted
+    :param n_envs: Number of parallel environments
+    """
+    def __init__(
+        self,
+        buffer_size: int,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        device: Union[th.device, str] = "cpu",
+        n_envs: int = 1,
+    ):
+        super().__init__()
+        self.buffer_size = buffer_size
+        self.observation_space = observation_space
+        self.action_space = action_space
+        self.obs_shape = get_obs_shape(observation_space)
+        self.action_dim = get_action_dim(action_space)
+        self.pos = 0
+        self.full = False
+        self.device = device
+        self.n_envs = n_envs
+    @staticmethod
+    def swap_and_flatten(arr: np.ndarray) -> np.ndarray:
+        """
+        Swap and then flatten axes 0 (buffer_size) and 1 (n_envs)
+        to convert shape from [n_steps, n_envs, ...] (when ... is the shape of the features)
+        to [n_steps * n_envs, ...] (which maintain the order)
+        :param arr:
+        :return:
+        """
+        shape = arr.shape
+        if len(shape) < 3:
+            shape = shape + (1,)
+        return arr.swapaxes(0, 1).reshape(shape[0] * shape[1], *shape[2:])
+    def size(self) -> int:
+        """
+        :return: The current size of the buffer
+        """
+        if self.full:
+            return self.buffer_size
+        return self.pos
+    def add(self, *args, **kwargs) -> None:
+        """
+        Add elements to the buffer.
+        """
+        raise NotImplementedError()
+    def extend(self, *args, **kwargs) -> None:
+        """
+        Add a new batch of transitions to the buffer
+        """
+        # Do a for loop along the batch axis
+        for data in zip(*args):
+            self.add(*data)
+    def reset(self) -> None:
+        """
+        Reset the buffer.
+        """
+        self.pos = 0
+        self.full = False
+    def sample(self, batch_size: int, env: Optional[VecNormalize] = None):
+        """
+        :param batch_size: Number of element to sample
+        :param env: associated gym VecEnv
+            to normalize the observations/rewards when sampling
+        :return:
+        """
+        upper_bound = self.buffer_size if self.full else self.pos
+        batch_inds = np.random.randint(0, upper_bound, size=batch_size)
+        return self._get_samples(batch_inds, env=env)
+    @abstractmethod
+    def _get_samples(
+        self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None
+    ) -> Union[ReplayBufferSamples, RolloutBufferSamples]:
+        """
+        :param batch_inds:
+        :param env:
+        :return:
+        """
+        raise NotImplementedError()
+    def to_torch(self, array: np.ndarray, copy: bool = True) -> th.Tensor:
+        """
+        Convert a numpy array to a PyTorch tensor.
+        Note: it copies the data by default
+        :param array:
+        :param copy: Whether to copy or not the data
+            (may be useful to avoid changing things be reference)
+        :return:
+        """
+        if copy:
+            return th.tensor(array).to(self.device)
+        return th.as_tensor(array).to(self.device)
+    @staticmethod
+    def _normalize_obs(
+        obs: Union[np.ndarray, Dict[str, np.ndarray]],
+        env: Optional[VecNormalize] = None,
+    ) -> Union[np.ndarray, Dict[str, np.ndarray]]:
+        if env is not None:
+            return env.normalize_obs(obs)
+        return obs
+    @staticmethod
+    def _normalize_reward(reward: np.ndarray, env: Optional[VecNormalize] = None) -> np.ndarray:
+        if env is not None:
+            return env.normalize_reward(reward).astype(np.float32)
+        return reward
+class ExpertBuffer(BaseBuffer):
+    def __init__(self,
+                 buffer_size: int,
+                 observation_space: spaces.Space,
+                 action_space: spaces.Space,
+                 device: Union[th.device, str] = "cpu",
+                 n_envs: int = 1,
+                 dataset_path=''
+                 ):
+        super(ExpertBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        data = pickle_utils.load_data(dataset_path)
+        data_obs = []
+        data_action = []
+        self.optimize_memory_usage = False
+        # print(data[0])
+        for trajectory in data:
+            print(trajectory.keys())
+            # for k, v in trajectory.items():
+            data_obs.append(trajectory['observations'])
+            data_action.append(trajectory['actions'])
+        self.observations = np.concatenate(data_obs, axis=0)
+        self.actions = np.concatenate(data_action, axis=0)
+        assert len(self.observations) == len(self.actions), "Demo Dataset Error: Obs num does not match Action num."
+        print('Expert buffer info:', self.observations.shape, self.actions.shape)
+        self.buffer_size = len(self.observations)
+        self.full = True
+    def add(
+            self,
+            obs: np.ndarray,
+            next_obs: np.ndarray,
+            action: np.ndarray,
+            reward: np.ndarray,
+            done: np.ndarray,
+            infos: List[Dict[str, Any]],
+    ) -> None:
+        assert False, "We do not expect user to use this method."
+    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> ReplayBufferSamples:
+        # Sample randomly the env idx
+        data = (
+            self._normalize_obs(self.observations[batch_inds, :], env),
+            self.actions[batch_inds, :],
+        )
+        return ReplayBufferSamples(*tuple(map(self.to_torch, data)))
+    def sample(self, batch_size: int, env: Optional[VecNormalize] = None) -> ReplayBufferSamples:
+        """
+        Sample elements from the replay buffer.
+        Custom sampling when using memory efficient variant,
+        as we should not sample the element with index `self.pos`
+        See https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274
+        :param batch_size: Number of element to sample
+        :param env: associated gym VecEnv
+            to normalize the observations/rewards when sampling
+        :return:
+        """
+        if not self.optimize_memory_usage:
+            return super().sample(batch_size=batch_size, env=env)
+        # Do not sample the element with index `self.pos` as the transitions is invalid
+        # (we use only one array to store `obs` and `next_obs`)
+        if self.full:
+            batch_inds = (np.random.randint(1, self.buffer_size, size=batch_size) + self.pos) % self.buffer_size
+        else:
+            batch_inds = np.random.randint(0, self.pos, size=batch_size)
+        return self._get_samples(batch_inds, env=env)
+    def get_all_samples(self, env=None):
+        data = (
+            self._normalize_obs(self.observations, env),
+            self.actions,
+        )
+        return ReplayBufferSamples(*tuple(map(self.to_torch, data)), None, None, None)
+class ReplayBuffer(BaseBuffer):
+    """
+    Replay buffer used in off-policy algorithms like SAC/TD3.
+    :param buffer_size: Max number of element in the buffer
+    :param observation_space: Observation space
+    :param action_space: Action space
+    :param device:
+    :param n_envs: Number of parallel environments
+    :param optimize_memory_usage: Enable a memory efficient variant
+        of the replay buffer which reduces by almost a factor two the memory used,
+        at a cost of more complexity.
+        See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195
+        and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274
+    :param handle_timeout_termination: Handle timeout termination (due to timelimit)
+        separately and treat the task as infinite horizon task.
+        https://github.com/DLR-RM/stable-baselines3/issues/284
+    """
+    def __init__(
+        self,
+        buffer_size: int,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        device: Union[th.device, str] = "cpu",
+        n_envs: int = 1,
+        optimize_memory_usage: bool = False,
+        handle_timeout_termination: bool = True,
+    ):
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        # Adjust buffer size
+        self.buffer_size = max(buffer_size // n_envs, 1)
+        # Check that the replay buffer can fit into the memory
+        if psutil is not None:
+            mem_available = psutil.virtual_memory().available
+        self.optimize_memory_usage = optimize_memory_usage
+        self.observations = np.zeros((self.buffer_size, self.n_envs) + self.obs_shape, dtype=observation_space.dtype)
+        if optimize_memory_usage:
+            # `observations` contains also the next observation
+            self.next_observations = None
+        else:
+            self.next_observations = np.zeros((self.buffer_size, self.n_envs) + self.obs_shape, dtype=observation_space.dtype)
+        self.actions = np.zeros((self.buffer_size, self.n_envs, self.action_dim), dtype=action_space.dtype)
+        self.rewards = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.dones = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        # Handle timeouts termination properly if needed
+        # see https://github.com/DLR-RM/stable-baselines3/issues/284
+        self.handle_timeout_termination = handle_timeout_termination
+        self.timeouts = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        if psutil is not None:
+            total_memory_usage = self.observations.nbytes + self.actions.nbytes + self.rewards.nbytes + self.dones.nbytes
+            if self.next_observations is not None:
+                total_memory_usage += self.next_observations.nbytes
+            if total_memory_usage > mem_available:
+                # Convert to GB
+                total_memory_usage /= 1e9
+                mem_available /= 1e9
+                warnings.warn(
+                    "This system does not have apparently enough memory to store the complete "
+                    f"replay buffer {total_memory_usage:.2f}GB > {mem_available:.2f}GB"
+                )
+    def add(
+        self,
+        obs: np.ndarray,
+        next_obs: np.ndarray,
+        action: np.ndarray,
+        reward: np.ndarray,
+        done: np.ndarray,
+        infos: List[Dict[str, Any]],
+    ) -> None:
+        # Reshape needed when using multiple envs with discrete observations
+        # as numpy cannot broadcast (n_discrete,) to (n_discrete, 1)
+        if isinstance(self.observation_space, spaces.Discrete):
+            obs = obs.reshape((self.n_envs,) + self.obs_shape)
+            next_obs = next_obs.reshape((self.n_envs,) + self.obs_shape)
+        # Same, for actions
+        if isinstance(self.action_space, spaces.Discrete):
+            action = action.reshape((self.n_envs, self.action_dim))
+        # Copy to avoid modification by reference
+        self.observations[self.pos] = np.array(obs).copy()
+        if self.optimize_memory_usage:
+            self.observations[(self.pos + 1) % self.buffer_size] = np.array(next_obs).copy()
+        else:
+            self.next_observations[self.pos] = np.array(next_obs).copy()
+        self.actions[self.pos] = np.array(action).copy()
+        self.rewards[self.pos] = np.array(reward).copy()
+        self.dones[self.pos] = np.array(done).copy()
+        if self.handle_timeout_termination:
+            self.timeouts[self.pos] = np.array([info.get("TimeLimit.truncated", False) for info in infos])
+        self.pos += 1
+        if self.pos == self.buffer_size:
+            self.full = True
+            self.pos = 0
+    def sample(self, batch_size: int, env: Optional[VecNormalize] = None) -> ReplayBufferSamples:
+        """
+        Sample elements from the replay buffer.
+        Custom sampling when using memory efficient variant,
+        as we should not sample the element with index `self.pos`
+        See https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274
+        :param batch_size: Number of element to sample
+        :param env: associated gym VecEnv
+            to normalize the observations/rewards when sampling
+        :return:
+        """
+        if not self.optimize_memory_usage:
+            return super().sample(batch_size=batch_size, env=env)
+        # Do not sample the element with index `self.pos` as the transitions is invalid
+        # (we use only one array to store `obs` and `next_obs`)
+        if self.full:
+            batch_inds = (np.random.randint(1, self.buffer_size, size=batch_size) + self.pos) % self.buffer_size
+        else:
+            batch_inds = np.random.randint(0, self.pos, size=batch_size)
+        return self._get_samples(batch_inds, env=env)
+    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> ReplayBufferSamples:
+        # Sample randomly the env idx
+        env_indices = np.random.randint(0, high=self.n_envs, size=(len(batch_inds),))
+        if self.optimize_memory_usage:
+            next_obs = self._normalize_obs(self.observations[(batch_inds + 1) % self.buffer_size, env_indices, :], env)
+        else:
+            next_obs = self._normalize_obs(self.next_observations[batch_inds, env_indices, :], env)
+        data = (
+            self._normalize_obs(self.observations[batch_inds, env_indices, :], env),
+            self.actions[batch_inds, env_indices, :],
+            next_obs,
+            # Only use dones that are not due to timeouts
+            # deactivated by default (timeouts is initialized as an array of False)
+            (self.dones[batch_inds, env_indices] * (1 - self.timeouts[batch_inds, env_indices])).reshape(-1, 1),
+            self._normalize_reward(self.rewards[batch_inds, env_indices].reshape(-1, 1), env),
+        )
+        return ReplayBufferSamples(*tuple(map(self.to_torch, data)))
+class RolloutBuffer(BaseBuffer):
+    """
+    Rollout buffer used in on-policy algorithms like A2C/PPO.
+    It corresponds to ``buffer_size`` transitions collected
+    using the current policy.
+    This experience will be discarded after the policy update.
+    In order to use PPO objective, we also store the current value of each state
+    and the log probability of each taken action.
+    The term rollout here refers to the model-free notion and should not
+    be used with the concept of rollout used in model-based RL or planning.
+    Hence, it is only involved in policy and value function training but not action selection.
+    :param buffer_size: Max number of element in the buffer
+    :param observation_space: Observation space
+    :param action_space: Action space
+    :param device:
+    :param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator
+        Equivalent to classic advantage when set to 1.
+    :param gamma: Discount factor
+    :param n_envs: Number of parallel environments
+    """
+    def __init__(
+        self,
+        buffer_size: int,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        device: Union[th.device, str] = "cpu",
+        gae_lambda: float = 1,
+        gamma: float = 0.99,
+        n_envs: int = 1,
+    ):
+        super().__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        self.gae_lambda = gae_lambda
+        self.gamma = gamma
+        self.observations, self.actions, self.rewards, self.advantages = None, None, None, None
+        self.returns, self.episode_starts, self.values, self.log_probs = None, None, None, None
+        self.generator_ready = False
+        self.reset()
+    def reset(self) -> None:
+        self.observations = np.zeros((self.buffer_size, self.n_envs) + self.obs_shape, dtype=np.float32)
+        self.actions = np.zeros((self.buffer_size, self.n_envs, self.action_dim), dtype=np.float32)
+        self.rewards = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.returns = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.episode_starts = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.values = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.generator_ready = False
+        super().reset()
+    def compute_returns_and_advantage(self, last_values: th.Tensor, dones: np.ndarray) -> None:
+        """
+        Post-processing step: compute the lambda-return (TD(lambda) estimate)
+        and GAE(lambda) advantage.
+        Uses Generalized Advantage Estimation (https://arxiv.org/abs/1506.02438)
+        to compute the advantage. To obtain Monte-Carlo advantage estimate (A(s) = R - V(S))
+        where R is the sum of discounted reward with value bootstrap
+        (because we don't always have full episode), set ``gae_lambda=1.0`` during initialization.
+        The TD(lambda) estimator has also two special cases:
+        - TD(1) is Monte-Carlo estimate (sum of discounted rewards)
+        - TD(0) is one-step estimate with bootstrapping (r_t + gamma * v(s_{t+1}))
+        For more information, see discussion in https://github.com/DLR-RM/stable-baselines3/pull/375.
+        :param last_values: state value estimation for the last step (one for each env)
+        :param dones: if the last step was a terminal step (one bool for each env).
+        """
+        # Convert to numpy
+        last_values = last_values.clone().cpu().numpy().flatten()
+        last_gae_lam = 0
+        for step in reversed(range(self.buffer_size)):
+            if step == self.buffer_size - 1:
+                next_non_terminal = 1.0 - dones
+                next_values = last_values
+            else:
+                next_non_terminal = 1.0 - self.episode_starts[step + 1]
+                next_values = self.values[step + 1]
+            delta = self.rewards[step] + self.gamma * next_values * next_non_terminal - self.values[step]
+            last_gae_lam = delta + self.gamma * self.gae_lambda * next_non_terminal * last_gae_lam
+            self.advantages[step] = last_gae_lam
+        # TD(lambda) estimator, see Github PR #375 or "Telescoping in TD(lambda)"
+        # in David Silver Lecture 4: https://www.youtube.com/watch?v=PnHCvfgC_ZA
+        self.returns = self.advantages + self.values
+    def add(
+        self,
+        obs: np.ndarray,
+        action: np.ndarray,
+        reward: np.ndarray,
+        episode_start: np.ndarray,
+        value: th.Tensor,
+        log_prob: th.Tensor,
+    ) -> None:
+        """
+        :param obs: Observation
+        :param action: Action
+        :param reward:
+        :param episode_start: Start of episode signal.
+        :param value: estimated value of the current state
+            following the current policy.
+        :param log_prob: log probability of the action
+            following the current policy.
+        """
+        if len(log_prob.shape) == 0:
+            # Reshape 0-d tensor to avoid error
+            log_prob = log_prob.reshape(-1, 1)
+        # Reshape needed when using multiple envs with discrete observations
+        # as numpy cannot broadcast (n_discrete,) to (n_discrete, 1)
+        if isinstance(self.observation_space, spaces.Discrete):
+            obs = obs.reshape((self.n_envs,) + self.obs_shape)
+        self.observations[self.pos] = np.array(obs).copy()
+        self.actions[self.pos] = np.array(action).copy()
+        self.rewards[self.pos] = np.array(reward).copy()
+        self.episode_starts[self.pos] = np.array(episode_start).copy()
+        self.values[self.pos] = value.clone().cpu().numpy().flatten()
+        self.log_probs[self.pos] = log_prob.clone().cpu().numpy()
+        self.pos += 1
+        if self.pos == self.buffer_size:
+            self.full = True
+    def get(self, batch_size: Optional[int] = None) -> Generator[RolloutBufferSamples, None, None]:
+        assert self.full, ""
+        indices = np.random.permutation(self.buffer_size * self.n_envs)
+        # Prepare the data
+        if not self.generator_ready:
+            _tensor_names = [
+                "observations",
+                "actions",
+                "values",
+                "log_probs",
+                "advantages",
+                "returns",
+            ]
+            for tensor in _tensor_names:
+                self.__dict__[tensor] = self.swap_and_flatten(self.__dict__[tensor])
+            self.generator_ready = True
+        # Return everything, don't create minibatches
+        if batch_size is None:
+            batch_size = self.buffer_size * self.n_envs
+        start_idx = 0
+        while start_idx < self.buffer_size * self.n_envs:
+            yield self._get_samples(indices[start_idx : start_idx + batch_size])
+            start_idx += batch_size
+    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> RolloutBufferSamples:
+        data = (
+            self.observations[batch_inds],
+            self.actions[batch_inds],
+            self.values[batch_inds].flatten(),
+            self.log_probs[batch_inds].flatten(),
+            self.advantages[batch_inds].flatten(),
+            self.returns[batch_inds].flatten(),
+        )
+        return RolloutBufferSamples(*tuple(map(self.to_torch, data)))
+class DictReplayBuffer(ReplayBuffer):
+    """
+    Dict Replay buffer used in off-policy algorithms like SAC/TD3.
+    Extends the ReplayBuffer to use dictionary observations
+    :param buffer_size: Max number of element in the buffer
+    :param observation_space: Observation space
+    :param action_space: Action space
+    :param device:
+    :param n_envs: Number of parallel environments
+    :param optimize_memory_usage: Enable a memory efficient variant
+        Disabled for now (see https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702)
+    :param handle_timeout_termination: Handle timeout termination (due to timelimit)
+        separately and treat the task as infinite horizon task.
+        https://github.com/DLR-RM/stable-baselines3/issues/284
+    """
+    def __init__(
+        self,
+        buffer_size: int,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        device: Union[th.device, str] = "cpu",
+        n_envs: int = 1,
+        optimize_memory_usage: bool = False,
+        handle_timeout_termination: bool = True,
+    ):
+        super(ReplayBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        assert isinstance(self.obs_shape, dict), "DictReplayBuffer must be used with Dict obs space only"
+        self.buffer_size = max(buffer_size // n_envs, 1)
+        # Check that the replay buffer can fit into the memory
+        if psutil is not None:
+            mem_available = psutil.virtual_memory().available
+        assert optimize_memory_usage is False, "DictReplayBuffer does not support optimize_memory_usage"
+        # disabling as this adds quite a bit of complexity
+        # https://github.com/DLR-RM/stable-baselines3/pull/243#discussion_r531535702
+        self.optimize_memory_usage = optimize_memory_usage
+        self.observations = {
+            key: np.zeros((self.buffer_size, self.n_envs) + _obs_shape, dtype=observation_space[key].dtype)
+            for key, _obs_shape in self.obs_shape.items()
+        }
+        self.next_observations = {
+            key: np.zeros((self.buffer_size, self.n_envs) + _obs_shape, dtype=observation_space[key].dtype)
+            for key, _obs_shape in self.obs_shape.items()
+        }
+        self.actions = np.zeros((self.buffer_size, self.n_envs, self.action_dim), dtype=action_space.dtype)
+        self.rewards = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.dones = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        # Handle timeouts termination properly if needed
+        # see https://github.com/DLR-RM/stable-baselines3/issues/284
+        self.handle_timeout_termination = handle_timeout_termination
+        self.timeouts = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        if psutil is not None:
+            obs_nbytes = 0
+            for _, obs in self.observations.items():
+                obs_nbytes += obs.nbytes
+            total_memory_usage = obs_nbytes + self.actions.nbytes + self.rewards.nbytes + self.dones.nbytes
+            if self.next_observations is not None:
+                next_obs_nbytes = 0
+                for _, obs in self.observations.items():
+                    next_obs_nbytes += obs.nbytes
+                total_memory_usage += next_obs_nbytes
+            if total_memory_usage > mem_available:
+                # Convert to GB
+                total_memory_usage /= 1e9
+                mem_available /= 1e9
+                warnings.warn(
+                    "This system does not have apparently enough memory to store the complete "
+                    f"replay buffer {total_memory_usage:.2f}GB > {mem_available:.2f}GB"
+                )
+    def add(
+        self,
+        obs: Dict[str, np.ndarray],
+        next_obs: Dict[str, np.ndarray],
+        action: np.ndarray,
+        reward: np.ndarray,
+        done: np.ndarray,
+        infos: List[Dict[str, Any]],
+    ) -> None:
+        # Copy to avoid modification by reference
+        for key in self.observations.keys():
+            # Reshape needed when using multiple envs with discrete observations
+            # as numpy cannot broadcast (n_discrete,) to (n_discrete, 1)
+            if isinstance(self.observation_space.spaces[key], spaces.Discrete):
+                obs[key] = obs[key].reshape((self.n_envs,) + self.obs_shape[key])
+            self.observations[key][self.pos] = np.array(obs[key])
+        for key in self.next_observations.keys():
+            if isinstance(self.observation_space.spaces[key], spaces.Discrete):
+                next_obs[key] = next_obs[key].reshape((self.n_envs,) + self.obs_shape[key])
+            self.next_observations[key][self.pos] = np.array(next_obs[key]).copy()
+        # Same reshape, for actions
+        if isinstance(self.action_space, spaces.Discrete):
+            action = action.reshape((self.n_envs, self.action_dim))
+        self.actions[self.pos] = np.array(action).copy()
+        self.rewards[self.pos] = np.array(reward).copy()
+        self.dones[self.pos] = np.array(done).copy()
+        if self.handle_timeout_termination:
+            self.timeouts[self.pos] = np.array([info.get("TimeLimit.truncated", False) for info in infos])
+        self.pos += 1
+        if self.pos == self.buffer_size:
+            self.full = True
+            self.pos = 0
+    def sample(self, batch_size: int, env: Optional[VecNormalize] = None) -> DictReplayBufferSamples:
+        """
+        Sample elements from the replay buffer.
+        :param batch_size: Number of element to sample
+        :param env: associated gym VecEnv
+            to normalize the observations/rewards when sampling
+        :return:
+        """
+        return super(ReplayBuffer, self).sample(batch_size=batch_size, env=env)
+    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> DictReplayBufferSamples:
+        # Sample randomly the env idx
+        env_indices = np.random.randint(0, high=self.n_envs, size=(len(batch_inds),))
+        # Normalize if needed and remove extra dimension (we are using only one env for now)
+        obs_ = self._normalize_obs({key: obs[batch_inds, env_indices, :] for key, obs in self.observations.items()}, env)
+        next_obs_ = self._normalize_obs(
+            {key: obs[batch_inds, env_indices, :] for key, obs in self.next_observations.items()}, env
+        )
+        # Convert to torch tensor
+        observations = {key: self.to_torch(obs) for key, obs in obs_.items()}
+        next_observations = {key: self.to_torch(obs) for key, obs in next_obs_.items()}
+        return DictReplayBufferSamples(
+            observations=observations,
+            actions=self.to_torch(self.actions[batch_inds, env_indices]),
+            next_observations=next_observations,
+            # Only use dones that are not due to timeouts
+            # deactivated by default (timeouts is initialized as an array of False)
+            dones=self.to_torch(self.dones[batch_inds, env_indices] * (1 - self.timeouts[batch_inds, env_indices])).reshape(
+                -1, 1
+            ),
+            rewards=self.to_torch(self._normalize_reward(self.rewards[batch_inds, env_indices].reshape(-1, 1), env)),
+        )
+class DictRolloutBuffer(RolloutBuffer):
+    """
+    Dict Rollout buffer used in on-policy algorithms like A2C/PPO.
+    Extends the RolloutBuffer to use dictionary observations
+    It corresponds to ``buffer_size`` transitions collected
+    using the current policy.
+    This experience will be discarded after the policy update.
+    In order to use PPO objective, we also store the current value of each state
+    and the log probability of each taken action.
+    The term rollout here refers to the model-free notion and should not
+    be used with the concept of rollout used in model-based RL or planning.
+    Hence, it is only involved in policy and value function training but not action selection.
+    :param buffer_size: Max number of element in the buffer
+    :param observation_space: Observation space
+    :param action_space: Action space
+    :param device:
+    :param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator
+        Equivalent to Monte-Carlo advantage estimate when set to 1.
+    :param gamma: Discount factor
+    :param n_envs: Number of parallel environments
+    """
+    def __init__(
+        self,
+        buffer_size: int,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        device: Union[th.device, str] = "cpu",
+        gae_lambda: float = 1,
+        gamma: float = 0.99,
+        n_envs: int = 1,
+    ):
+        super(RolloutBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        assert isinstance(self.obs_shape, dict), "DictRolloutBuffer must be used with Dict obs space only"
+        self.gae_lambda = gae_lambda
+        self.gamma = gamma
+        self.observations, self.actions, self.rewards, self.advantages = None, None, None, None
+        self.returns, self.episode_starts, self.values, self.log_probs = None, None, None, None
+        self.generator_ready = False
+        self.reset()
+    def reset(self) -> None:
+        assert isinstance(self.obs_shape, dict), "DictRolloutBuffer must be used with Dict obs space only"
+        self.observations = {}
+        for key, obs_input_shape in self.obs_shape.items():
+            self.observations[key] = np.zeros((self.buffer_size, self.n_envs) + obs_input_shape, dtype=np.float32)
+        self.actions = np.zeros((self.buffer_size, self.n_envs, self.action_dim), dtype=np.float32)
+        self.rewards = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.returns = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.episode_starts = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.values = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.generator_ready = False
+        super(RolloutBuffer, self).reset()
+    def add(
+        self,
+        obs: Dict[str, np.ndarray],
+        action: np.ndarray,
+        reward: np.ndarray,
+        episode_start: np.ndarray,
+        value: th.Tensor,
+        log_prob: th.Tensor,
+    ) -> None:
+        """
+        :param obs: Observation
+        :param action: Action
+        :param reward:
+        :param episode_start: Start of episode signal.
+        :param value: estimated value of the current state
+            following the current policy.
+        :param log_prob: log probability of the action
+            following the current policy.
+        """
+        if len(log_prob.shape) == 0:
+            # Reshape 0-d tensor to avoid error
+            log_prob = log_prob.reshape(-1, 1)
+        for key in self.observations.keys():
+            obs_ = np.array(obs[key]).copy()
+            # Reshape needed when using multiple envs with discrete observations
+            # as numpy cannot broadcast (n_discrete,) to (n_discrete, 1)
+            if isinstance(self.observation_space.spaces[key], spaces.Discrete):
+                obs_ = obs_.reshape((self.n_envs,) + self.obs_shape[key])
+            self.observations[key][self.pos] = obs_
+        self.actions[self.pos] = np.array(action).copy()
+        self.rewards[self.pos] = np.array(reward).copy()
+        self.episode_starts[self.pos] = np.array(episode_start).copy()
+        self.values[self.pos] = value.clone().cpu().numpy().flatten()
+        self.log_probs[self.pos] = log_prob.clone().cpu().numpy()
+        self.pos += 1
+        if self.pos == self.buffer_size:
+            self.full = True
+    def get(self, batch_size: Optional[int] = None) -> Generator[DictRolloutBufferSamples, None, None]:
+        assert self.full, ""
+        indices = np.random.permutation(self.buffer_size * self.n_envs)
+        # Prepare the data
+        if not self.generator_ready:
+            for key, obs in self.observations.items():
+                self.observations[key] = self.swap_and_flatten(obs)
+            _tensor_names = ["actions", "values", "log_probs", "advantages", "returns"]
+            for tensor in _tensor_names:
+                self.__dict__[tensor] = self.swap_and_flatten(self.__dict__[tensor])
+            self.generator_ready = True
+        # Return everything, don't create minibatches
+        if batch_size is None:
+            batch_size = self.buffer_size * self.n_envs
+        start_idx = 0
+        while start_idx < self.buffer_size * self.n_envs:
+            yield self._get_samples(indices[start_idx : start_idx + batch_size])
+            start_idx += batch_size
+    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> DictRolloutBufferSamples:
+        return DictRolloutBufferSamples(
+            observations={key: self.to_torch(obs[batch_inds]) for (key, obs) in self.observations.items()},
+            actions=self.to_torch(self.actions[batch_inds]),
+            old_values=self.to_torch(self.values[batch_inds].flatten()),
+            old_log_prob=self.to_torch(self.log_probs[batch_inds].flatten()),
+            advantages=self.to_torch(self.advantages[batch_inds].flatten()),
+            returns=self.to_torch(self.returns[batch_inds].flatten()),
+        )
+class DictSSLRolloutBuffer(RolloutBuffer):
+    """
+    Dict Rollout buffer used in on-policy algorithms like A2C/PPO.
+    Extends the RolloutBuffer to use dictionary observations
+    It corresponds to ``buffer_size`` transitions collected
+    using the current policy.
+    This experience will be discarded after the policy update.
+    In order to use PPO objective, we also store the current value of each state
+    and the log probability of each taken action.
+    The term rollout here refers to the model-free notion and should not
+    be used with the concept of rollout used in model-based RL or planning.
+    Hence, it is only involved in policy and value function training but not action selection.
+    :param buffer_size: Max number of element in the buffer
+    :param observation_space: Observation space
+    :param action_space: Action space
+    :param device:
+    :param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator
+        Equivalent to Monte-Carlo advantage estimate when set to 1.
+    :param gamma: Discount factor
+    :param n_envs: Number of parallel environments
+    """
+    def __init__(
+        self,
+        buffer_size: int,
+        observation_space: spaces.Space,
+        action_space: spaces.Space,
+        device: Union[th.device, str] = "cpu",
+        gae_lambda: float = 1,
+        gamma: float = 0.99,
+        n_envs: int = 1,
+    ):
+        super(RolloutBuffer, self).__init__(buffer_size, observation_space, action_space, device, n_envs=n_envs)
+        assert isinstance(self.obs_shape, dict), "DictRolloutBuffer must be used with Dict obs space only"
+        self.gae_lambda = gae_lambda
+        self.gamma = gamma
+        self.observations, self.actions, self.rewards, self.advantages = None, None, None, None
+        self.returns, self.episode_starts, self.values, self.log_probs = None, None, None, None
+        self.generator_ready = False
+        self.reset()
+    def reset(self) -> None:
+        assert isinstance(self.obs_shape, dict), "DictRolloutBuffer must be used with Dict obs space only"
+        self.observations = {}
+        for key, obs_input_shape in self.obs_shape.items():
+            self.observations[key] = np.zeros((self.buffer_size, self.n_envs) + obs_input_shape, dtype=np.float32)
+        self.actions = np.zeros((self.buffer_size, self.n_envs, self.action_dim), dtype=np.float32)
+        self.rewards = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.returns = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.episode_starts = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.values = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.log_probs = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.advantages = np.zeros((self.buffer_size, self.n_envs), dtype=np.float32)
+        self.generator_ready = False
+        super(RolloutBuffer, self).reset()
+    def add(
+        self,
+        obs: Dict[str, np.ndarray],
+        action: np.ndarray,
+        reward: np.ndarray,
+        episode_start: np.ndarray,
+        value: th.Tensor,
+        log_prob: th.Tensor,
+    ) -> None:
+        """
+        :param obs: Observation
+        :param action: Action
+        :param reward:
+        :param episode_start: Start of episode signal.
+        :param value: estimated value of the current state
+            following the current policy.
+        :param log_prob: log probability of the action
+            following the current policy.
+        """
+        if len(log_prob.shape) == 0:
+            # Reshape 0-d tensor to avoid error
+            log_prob = log_prob.reshape(-1, 1)
+        for key in self.observations.keys():
+            obs_ = np.array(obs[key]).copy()
+            # Reshape needed when using multiple envs with discrete observations
+            # as numpy cannot broadcast (n_discrete,) to (n_discrete, 1)
+            if isinstance(self.observation_space.spaces[key], spaces.Discrete):
+                obs_ = obs_.reshape((self.n_envs,) + self.obs_shape[key])
+            self.observations[key][self.pos] = obs_
+        self.actions[self.pos] = np.array(action).copy()
+        self.rewards[self.pos] = np.array(reward).copy()
+        self.episode_starts[self.pos] = np.array(episode_start).copy()
+        self.values[self.pos] = value.clone().cpu().numpy().flatten()
+        self.log_probs[self.pos] = log_prob.clone().cpu().numpy()
+        self.pos += 1
+        if self.pos == self.buffer_size:
+            self.full = True
+    def get(self, batch_size: Optional[int] = None) -> Generator[DictRolloutBufferSamples, None, None]:
+        assert self.full, ""
+        indices = np.random.permutation(self.buffer_size * self.n_envs)
+        # Prepare the data
+        if not self.generator_ready:
+            for key, obs in self.observations.items():
+                self.observations[key] = self.swap_and_flatten(obs)
+            _tensor_names = ["actions", "values", "log_probs", "advantages", "returns"]
+            for tensor in _tensor_names:
+                self.__dict__[tensor] = self.swap_and_flatten(self.__dict__[tensor])
+            self.generator_ready = True
+        # Return everything, don't create minibatches
+        if batch_size is None:
+            batch_size = self.buffer_size * self.n_envs
+        start_idx = 0
+        while start_idx < self.buffer_size * self.n_envs:
+            yield self._get_samples(indices[start_idx : start_idx + batch_size])
+            start_idx += batch_size
+    def _get_samples(self, batch_inds: np.ndarray, env: Optional[VecNormalize] = None) -> DictSSLRolloutBufferSamples:
+        def get_next_obs(obs, ind, i):
+            result = {}
+            for key, obs in obs.items():
+                future_batch_inds = np.clip(ind + i, 0, len(obs)-1)
+                next_obs = self.to_torch(obs[future_batch_inds])
+                result[key] = next_obs
+            return result
+        def get_next_action(actions, ind, i):
+            future_batch_inds = np.clip(ind + i, 0, len(actions) - 1)
+            return self.to_torch(actions[future_batch_inds])
+        next_observations = [get_next_obs(self.observations, batch_inds, i) for i in range(4)]
+        next_actions = [get_next_action(self.actions, batch_inds, i) for i in range(4)]
+        return DictSSLRolloutBufferSamples(
+            observations={key: self.to_torch(obs[batch_inds]) for (key, obs) in self.observations.items()},
+            next_observations=next_observations,
+            actions=self.to_torch(self.actions[batch_inds]),
+            next_actions=next_actions,
+            old_values=self.to_torch(self.values[batch_inds].flatten()),
+            old_log_prob=self.to_torch(self.log_probs[batch_inds].flatten()),
+            advantages=self.to_torch(self.advantages[batch_inds].flatten()),
+            returns=self.to_torch(self.returns[batch_inds].flatten()),
+        )

dexart-release/stable_baselines3/common/callbacks.py ADDED Viewed

	@@ -0,0 +1,602 @@

+import os
+import warnings
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Dict, List, Optional, Union
+import gym
+import numpy as np
+from stable_baselines3.common import base_class  # pytype: disable=pyi-error
+from stable_baselines3.common.evaluation import evaluate_policy
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, sync_envs_normalization
+class BaseCallback(ABC):
+    """
+    Base class for callback.
+    :param verbose:
+    """
+    def __init__(self, verbose: int = 0):
+        super().__init__()
+        # The RL model
+        self.model = None  # type: Optional[base_class.BaseAlgorithm]
+        # An alias for self.model.get_env(), the environment used for training
+        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
+        # Number of time the callback was called
+        self.n_calls = 0  # type: int
+        # n_envs * n times env.step() was called
+        self.num_timesteps = 0  # type: int
+        self.verbose = verbose
+        self.locals: Dict[str, Any] = {}
+        self.globals: Dict[str, Any] = {}
+        self.logger = None
+        # Sometimes, for event callback, it is useful
+        # to have access to the parent object
+        self.parent = None  # type: Optional[BaseCallback]
+    # Type hint as string to avoid circular import
+    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
+        """
+        Initialize the callback by saving references to the
+        RL model and the training environment for convenience.
+        """
+        self.model = model
+        self.training_env = model.get_env()
+        self.logger = model.logger
+        self._init_callback()
+    def _init_callback(self) -> None:
+        pass
+    def on_training_start(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
+        # Those are reference and will be updated automatically
+        self.locals = locals_
+        self.globals = globals_
+        self._on_training_start()
+    def _on_training_start(self) -> None:
+        pass
+    def on_rollout_start(self) -> None:
+        self._on_rollout_start()
+    def _on_rollout_start(self) -> None:
+        pass
+    @abstractmethod
+    def _on_step(self) -> bool:
+        """
+        :return: If the callback returns False, training is aborted early.
+        """
+        return True
+    def on_step(self) -> bool:
+        """
+        This method will be called by the model after each call to ``env.step()``.
+        For child callback (of an ``EventCallback``), this will be called
+        when the event is triggered.
+        :return: If the callback returns False, training is aborted early.
+        """
+        self.n_calls += 1
+        # timesteps start at zero
+        self.num_timesteps = self.model.num_timesteps
+        return self._on_step()
+    def on_training_end(self) -> None:
+        self._on_training_end()
+    def _on_training_end(self) -> None:
+        pass
+    def on_rollout_end(self) -> None:
+        self._on_rollout_end()
+    def _on_rollout_end(self) -> None:
+        pass
+    def update_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+        :param locals_: the local variables during rollout collection
+        """
+        self.locals.update(locals_)
+        self.update_child_locals(locals_)
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables on sub callbacks.
+        :param locals_: the local variables during rollout collection
+        """
+        pass
+class EventCallback(BaseCallback):
+    """
+    Base class for triggering callback on event.
+    :param callback: Callback that will be called
+        when an event is triggered.
+    :param verbose:
+    """
+    def __init__(self, callback: Optional[BaseCallback] = None, verbose: int = 0):
+        super().__init__(verbose=verbose)
+        self.callback = callback
+        # Give access to the parent
+        if callback is not None:
+            self.callback.parent = self
+    def init_callback(self, model: "base_class.BaseAlgorithm") -> None:
+        super().init_callback(model)
+        if self.callback is not None:
+            self.callback.init_callback(self.model)
+    def _on_training_start(self) -> None:
+        if self.callback is not None:
+            self.callback.on_training_start(self.locals, self.globals)
+    def _on_event(self) -> bool:
+        if self.callback is not None:
+            return self.callback.on_step()
+        return True
+    def _on_step(self) -> bool:
+        return True
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+        :param locals_: the local variables during rollout collection
+        """
+        if self.callback is not None:
+            self.callback.update_locals(locals_)
+class CallbackList(BaseCallback):
+    """
+    Class for chaining callbacks.
+    :param callbacks: A list of callbacks that will be called
+        sequentially.
+    """
+    def __init__(self, callbacks: List[BaseCallback]):
+        super().__init__()
+        assert isinstance(callbacks, list)
+        self.callbacks = callbacks
+    def _init_callback(self) -> None:
+        for callback in self.callbacks:
+            callback.init_callback(self.model)
+    def _on_training_start(self) -> None:
+        for callback in self.callbacks:
+            callback.on_training_start(self.locals, self.globals)
+    def _on_rollout_start(self) -> None:
+        for callback in self.callbacks:
+            callback.on_rollout_start()
+    def _on_step(self) -> bool:
+        continue_training = True
+        for callback in self.callbacks:
+            # Return False (stop training) if at least one callback returns False
+            continue_training = callback.on_step() and continue_training
+        return continue_training
+    def _on_rollout_end(self) -> None:
+        for callback in self.callbacks:
+            callback.on_rollout_end()
+    def _on_training_end(self) -> None:
+        for callback in self.callbacks:
+            callback.on_training_end()
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+        :param locals_: the local variables during rollout collection
+        """
+        for callback in self.callbacks:
+            callback.update_locals(locals_)
+class CheckpointCallback(BaseCallback):
+    """
+    Callback for saving a model every ``save_freq`` calls
+    to ``env.step()``.
+    .. warning::
+      When using multiple environments, each call to  ``env.step()``
+      will effectively correspond to ``n_envs`` steps.
+      To account for that, you can use ``save_freq = max(save_freq // n_envs, 1)``
+    :param save_freq:
+    :param save_path: Path to the folder where the model will be saved.
+    :param name_prefix: Common prefix to the saved models
+    :param verbose:
+    """
+    def __init__(self, save_freq: int, save_path: str, name_prefix: str = "rl_model", verbose: int = 0):
+        super().__init__(verbose)
+        self.save_freq = save_freq
+        self.save_path = save_path
+        self.name_prefix = name_prefix
+    def _init_callback(self) -> None:
+        # Create folder if needed
+        if self.save_path is not None:
+            os.makedirs(self.save_path, exist_ok=True)
+    def _on_step(self) -> bool:
+        if self.n_calls % self.save_freq == 0:
+            path = os.path.join(self.save_path, f"{self.name_prefix}_{self.num_timesteps}_steps")
+            self.model.save(path)
+            if self.verbose > 1:
+                print(f"Saving model checkpoint to {path}")
+        return True
+class ConvertCallback(BaseCallback):
+    """
+    Convert functional callback (old-style) to object.
+    :param callback:
+    :param verbose:
+    """
+    def __init__(self, callback: Callable[[Dict[str, Any], Dict[str, Any]], bool], verbose: int = 0):
+        super().__init__(verbose)
+        self.callback = callback
+    def _on_step(self) -> bool:
+        if self.callback is not None:
+            return self.callback(self.locals, self.globals)
+        return True
+class EvalCallback(EventCallback):
+    """
+    Callback for evaluating an agent.
+    .. warning::
+      When using multiple environments, each call to  ``env.step()``
+      will effectively correspond to ``n_envs`` steps.
+      To account for that, you can use ``eval_freq = max(eval_freq // n_envs, 1)``
+    :param eval_env: The environment used for initialization
+    :param callback_on_new_best: Callback to trigger
+        when there is a new best model according to the ``mean_reward``
+    :param callback_after_eval: Callback to trigger after every evaluation
+    :param n_eval_episodes: The number of episodes to test the agent
+    :param eval_freq: Evaluate the agent every ``eval_freq`` call of the callback.
+    :param log_path: Path to a folder where the evaluations (``evaluations.npz``)
+        will be saved. It will be updated at each evaluation.
+    :param best_model_save_path: Path to a folder where the best model
+        according to performance on the eval env will be saved.
+    :param deterministic: Whether the evaluation should
+        use a stochastic or deterministic actions.
+    :param render: Whether to render or not the environment during evaluation
+    :param verbose:
+    :param warn: Passed to ``evaluate_policy`` (warns if ``eval_env`` has not been
+        wrapped with a Monitor wrapper)
+    """
+    def __init__(
+        self,
+        eval_env: Union[gym.Env, VecEnv],
+        callback_on_new_best: Optional[BaseCallback] = None,
+        callback_after_eval: Optional[BaseCallback] = None,
+        n_eval_episodes: int = 5,
+        eval_freq: int = 10000,
+        log_path: Optional[str] = None,
+        best_model_save_path: Optional[str] = None,
+        deterministic: bool = True,
+        render: bool = False,
+        verbose: int = 1,
+        warn: bool = True,
+    ):
+        super().__init__(callback_after_eval, verbose=verbose)
+        self.callback_on_new_best = callback_on_new_best
+        if self.callback_on_new_best is not None:
+            # Give access to the parent
+            self.callback_on_new_best.parent = self
+        self.n_eval_episodes = n_eval_episodes
+        self.eval_freq = eval_freq
+        self.best_mean_reward = -np.inf
+        self.last_mean_reward = -np.inf
+        self.deterministic = deterministic
+        self.render = render
+        self.warn = warn
+        # Convert to VecEnv for consistency
+        if not isinstance(eval_env, VecEnv):
+            eval_env = DummyVecEnv([lambda: eval_env])
+        self.eval_env = eval_env
+        self.best_model_save_path = best_model_save_path
+        # Logs will be written in ``evaluations.npz``
+        if log_path is not None:
+            log_path = os.path.join(log_path, "evaluations")
+        self.log_path = log_path
+        self.evaluations_results = []
+        self.evaluations_timesteps = []
+        self.evaluations_length = []
+        # For computing success rate
+        self._is_success_buffer = []
+        self.evaluations_successes = []
+    def _init_callback(self) -> None:
+        # Does not work in some corner cases, where the wrapper is not the same
+        if not isinstance(self.training_env, type(self.eval_env)):
+            warnings.warn("Training and eval env are not of the same type" f"{self.training_env} != {self.eval_env}")
+        # Create folders if needed
+        if self.best_model_save_path is not None:
+            os.makedirs(self.best_model_save_path, exist_ok=True)
+        if self.log_path is not None:
+            os.makedirs(os.path.dirname(self.log_path), exist_ok=True)
+        # Init callback called on new best model
+        if self.callback_on_new_best is not None:
+            self.callback_on_new_best.init_callback(self.model)
+    def _log_success_callback(self, locals_: Dict[str, Any], globals_: Dict[str, Any]) -> None:
+        """
+        Callback passed to the  ``evaluate_policy`` function
+        in order to log the success rate (when applicable),
+        for instance when using HER.
+        :param locals_:
+        :param globals_:
+        """
+        info = locals_["info"]
+        if locals_["done"]:
+            maybe_is_success = info.get("is_success")
+            if maybe_is_success is not None:
+                self._is_success_buffer.append(maybe_is_success)
+    def _on_step(self) -> bool:
+        continue_training = True
+        if self.eval_freq > 0 and self.n_calls % self.eval_freq == 0:
+            # Sync training and eval env if there is VecNormalize
+            if self.model.get_vec_normalize_env() is not None:
+                try:
+                    sync_envs_normalization(self.training_env, self.eval_env)
+                except AttributeError:
+                    raise AssertionError(
+                        "Training and eval env are not wrapped the same way, "
+                        "see https://stable-baselines3.readthedocs.io/en/master/guide/callbacks.html#evalcallback "
+                        "and warning above."
+                    )
+            # Reset success rate buffer
+            self._is_success_buffer = []
+            episode_rewards, episode_lengths = evaluate_policy(
+                self.model,
+                self.eval_env,
+                n_eval_episodes=self.n_eval_episodes,
+                render=self.render,
+                deterministic=self.deterministic,
+                return_episode_rewards=True,
+                warn=self.warn,
+                callback=self._log_success_callback,
+            )
+            if self.log_path is not None:
+                self.evaluations_timesteps.append(self.num_timesteps)
+                self.evaluations_results.append(episode_rewards)
+                self.evaluations_length.append(episode_lengths)
+                kwargs = {}
+                # Save success log if present
+                if len(self._is_success_buffer) > 0:
+                    self.evaluations_successes.append(self._is_success_buffer)
+                    kwargs = dict(successes=self.evaluations_successes)
+                np.savez(
+                    self.log_path,
+                    timesteps=self.evaluations_timesteps,
+                    results=self.evaluations_results,
+                    ep_lengths=self.evaluations_length,
+                    **kwargs,
+                )
+            mean_reward, std_reward = np.mean(episode_rewards), np.std(episode_rewards)
+            mean_ep_length, std_ep_length = np.mean(episode_lengths), np.std(episode_lengths)
+            self.last_mean_reward = mean_reward
+            if self.verbose > 0:
+                print(f"Eval num_timesteps={self.num_timesteps}, " f"episode_reward={mean_reward:.2f} +/- {std_reward:.2f}")
+                print(f"Episode length: {mean_ep_length:.2f} +/- {std_ep_length:.2f}")
+            # Add to current Logger
+            self.logger.record("eval/mean_reward", float(mean_reward))
+            self.logger.record("eval/mean_ep_length", mean_ep_length)
+            if len(self._is_success_buffer) > 0:
+                success_rate = np.mean(self._is_success_buffer)
+                if self.verbose > 0:
+                    print(f"Success rate: {100 * success_rate:.2f}%")
+                self.logger.record("eval/success_rate", success_rate)
+            # Dump log so the evaluation results are printed with the correct timestep
+            self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
+            self.logger.dump(self.num_timesteps)
+            if mean_reward > self.best_mean_reward:
+                if self.verbose > 0:
+                    print("New best mean reward!")
+                if self.best_model_save_path is not None:
+                    self.model.save(os.path.join(self.best_model_save_path, "best_model"))
+                self.best_mean_reward = mean_reward
+                # Trigger callback on new best model, if needed
+                if self.callback_on_new_best is not None:
+                    continue_training = self.callback_on_new_best.on_step()
+            # Trigger callback after every evaluation, if needed
+            if self.callback is not None:
+                continue_training = continue_training and self._on_event()
+        return continue_training
+    def update_child_locals(self, locals_: Dict[str, Any]) -> None:
+        """
+        Update the references to the local variables.
+        :param locals_: the local variables during rollout collection
+        """
+        if self.callback:
+            self.callback.update_locals(locals_)
+class StopTrainingOnRewardThreshold(BaseCallback):
+    """
+    Stop the training once a threshold in episodic reward
+    has been reached (i.e. when the model is good enough).
+    It must be used with the ``EvalCallback``.
+    :param reward_threshold:  Minimum expected reward per episode
+        to stop training.
+    :param verbose:
+    """
+    def __init__(self, reward_threshold: float, verbose: int = 0):
+        super().__init__(verbose=verbose)
+        self.reward_threshold = reward_threshold
+    def _on_step(self) -> bool:
+        assert self.parent is not None, "``StopTrainingOnMinimumReward`` callback must be used " "with an ``EvalCallback``"
+        # Convert np.bool_ to bool, otherwise callback() is False won't work
+        continue_training = bool(self.parent.best_mean_reward < self.reward_threshold)
+        if self.verbose > 0 and not continue_training:
+            print(
+                f"Stopping training because the mean reward {self.parent.best_mean_reward:.2f} "
+                f" is above the threshold {self.reward_threshold}"
+            )
+        return continue_training
+class EveryNTimesteps(EventCallback):
+    """
+    Trigger a callback every ``n_steps`` timesteps
+    :param n_steps: Number of timesteps between two trigger.
+    :param callback: Callback that will be called
+        when the event is triggered.
+    """
+    def __init__(self, n_steps: int, callback: BaseCallback):
+        super().__init__(callback)
+        self.n_steps = n_steps
+        self.last_time_trigger = 0
+    def _on_step(self) -> bool:
+        if (self.num_timesteps - self.last_time_trigger) >= self.n_steps:
+            self.last_time_trigger = self.num_timesteps
+            return self._on_event()
+        return True
+class StopTrainingOnMaxEpisodes(BaseCallback):
+    """
+    Stop the training once a maximum number of episodes are played.
+    For multiple environments presumes that, the desired behavior is that the agent trains on each env for ``max_episodes``
+    and in total for ``max_episodes * n_envs`` episodes.
+    :param max_episodes: Maximum number of episodes to stop training.
+    :param verbose: Select whether to print information about when training ended by reaching ``max_episodes``
+    """
+    def __init__(self, max_episodes: int, verbose: int = 0):
+        super().__init__(verbose=verbose)
+        self.max_episodes = max_episodes
+        self._total_max_episodes = max_episodes
+        self.n_episodes = 0
+    def _init_callback(self) -> None:
+        # At start set total max according to number of envirnments
+        self._total_max_episodes = self.max_episodes * self.training_env.num_envs
+    def _on_step(self) -> bool:
+        # Check that the `dones` local variable is defined
+        assert "dones" in self.locals, "`dones` variable is not defined, please check your code next to `callback.on_step()`"
+        self.n_episodes += np.sum(self.locals["dones"]).item()
+        continue_training = self.n_episodes < self._total_max_episodes
+        if self.verbose > 0 and not continue_training:
+            mean_episodes_per_env = self.n_episodes / self.training_env.num_envs
+            mean_ep_str = (
+                f"with an average of {mean_episodes_per_env:.2f} episodes per env" if self.training_env.num_envs > 1 else ""
+            )
+            print(
+                f"Stopping training with a total of {self.num_timesteps} steps because the "
+                f"{self.locals.get('tb_log_name')} model reached max_episodes={self.max_episodes}, "
+                f"by playing for {self.n_episodes} episodes "
+                f"{mean_ep_str}"
+            )
+        return continue_training
+class StopTrainingOnNoModelImprovement(BaseCallback):
+    """
+    Stop the training early if there is no new best model (new best mean reward) after more than N consecutive evaluations.
+    It is possible to define a minimum number of evaluations before start to count evaluations without improvement.
+    It must be used with the ``EvalCallback``.
+    :param max_no_improvement_evals: Maximum number of consecutive evaluations without a new best model.
+    :param min_evals: Number of evaluations before start to count evaluations without improvements.
+    :param verbose: Verbosity of the output (set to 1 for info messages)
+    """
+    def __init__(self, max_no_improvement_evals: int, min_evals: int = 0, verbose: int = 0):
+        super().__init__(verbose=verbose)
+        self.max_no_improvement_evals = max_no_improvement_evals
+        self.min_evals = min_evals
+        self.last_best_mean_reward = -np.inf
+        self.no_improvement_evals = 0
+    def _on_step(self) -> bool:
+        assert self.parent is not None, "``StopTrainingOnNoModelImprovement`` callback must be used with an ``EvalCallback``"
+        continue_training = True
+        if self.n_calls > self.min_evals:
+            if self.parent.best_mean_reward > self.last_best_mean_reward:
+                self.no_improvement_evals = 0
+            else:
+                self.no_improvement_evals += 1
+                if self.no_improvement_evals > self.max_no_improvement_evals:
+                    continue_training = False
+        self.last_best_mean_reward = self.parent.best_mean_reward
+        if self.verbose > 0 and not continue_training:
+            print(
+                f"Stopping training because there was no new best model in the last {self.no_improvement_evals:d} evaluations"
+            )
+        return continue_training

dexart-release/stable_baselines3/common/distributions.py ADDED Viewed

	@@ -0,0 +1,699 @@

+"""Probability distributions."""
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Tuple, Union
+import gym
+import torch as th
+from gym import spaces
+from torch import nn
+from torch.distributions import Bernoulli, Categorical, Normal
+from stable_baselines3.common.preprocessing import get_action_dim
+class Distribution(ABC):
+    """Abstract base class for distributions."""
+    def __init__(self):
+        super().__init__()
+        self.distribution = None
+    @abstractmethod
+    def proba_distribution_net(self, *args, **kwargs) -> Union[nn.Module, Tuple[nn.Module, nn.Parameter]]:
+        """Create the layers and parameters that represent the distribution.
+        Subclasses must define this, but the arguments and return type vary between
+        concrete classes."""
+    @abstractmethod
+    def proba_distribution(self, *args, **kwargs) -> "Distribution":
+        """Set parameters of the distribution.
+        :return: self
+        """
+    @abstractmethod
+    def log_prob(self, x: th.Tensor) -> th.Tensor:
+        """
+        Returns the log likelihood
+        :param x: the taken action
+        :return: The log likelihood of the distribution
+        """
+    @abstractmethod
+    def entropy(self) -> Optional[th.Tensor]:
+        """
+        Returns Shannon's entropy of the probability
+        :return: the entropy, or None if no analytical form is known
+        """
+    @abstractmethod
+    def sample(self) -> th.Tensor:
+        """
+        Returns a sample from the probability distribution
+        :return: the stochastic action
+        """
+    @abstractmethod
+    def mode(self) -> th.Tensor:
+        """
+        Returns the most likely action (deterministic output)
+        from the probability distribution
+        :return: the stochastic action
+        """
+    def get_actions(self, deterministic: bool = False) -> th.Tensor:
+        """
+        Return actions according to the probability distribution.
+        :param deterministic:
+        :return:
+        """
+        if deterministic:
+            return self.mode()
+        return self.sample()
+    @abstractmethod
+    def actions_from_params(self, *args, **kwargs) -> th.Tensor:
+        """
+        Returns samples from the probability distribution
+        given its parameters.
+        :return: actions
+        """
+    @abstractmethod
+    def log_prob_from_params(self, *args, **kwargs) -> Tuple[th.Tensor, th.Tensor]:
+        """
+        Returns samples and the associated log probabilities
+        from the probability distribution given its parameters.
+        :return: actions and log prob
+        """
+def sum_independent_dims(tensor: th.Tensor) -> th.Tensor:
+    """
+    Continuous actions are usually considered to be independent,
+    so we can sum components of the ``log_prob`` or the entropy.
+    :param tensor: shape: (n_batch, n_actions) or (n_batch,)
+    :return: shape: (n_batch,)
+    """
+    if len(tensor.shape) > 1:
+        tensor = tensor.sum(dim=1)
+    else:
+        tensor = tensor.sum()
+    return tensor
+class DiagGaussianDistribution(Distribution):
+    """
+    Gaussian distribution with diagonal covariance matrix, for continuous actions.
+    :param action_dim:  Dimension of the action space.
+    """
+    def __init__(self, action_dim: int):
+        super().__init__()
+        self.action_dim = action_dim
+        self.mean_actions = None
+        self.log_std = None
+    def proba_distribution_net(self, latent_dim: int, log_std_init: float = 0.0) -> Tuple[nn.Module, nn.Parameter]:
+        """
+        Create the layers and parameter that represent the distribution:
+        one output will be the mean of the Gaussian, the other parameter will be the
+        standard deviation (log std in fact to allow negative values)
+        :param latent_dim: Dimension of the last layer of the policy (before the action layer)
+        :param log_std_init: Initial value for the log standard deviation
+        :return:
+        """
+        mean_actions = nn.Linear(latent_dim, self.action_dim)
+        # TODO: allow action dependent std
+        log_std = nn.Parameter(th.ones(self.action_dim) * log_std_init, requires_grad=True)
+        return mean_actions, log_std
+    def proba_distribution(self, mean_actions: th.Tensor, log_std: th.Tensor) -> "DiagGaussianDistribution":
+        """
+        Create the distribution given its parameters (mean, std)
+        :param mean_actions:
+        :param log_std:
+        :return:
+        """
+        action_std = th.ones_like(mean_actions) * log_std.exp()
+        self.distribution = Normal(mean_actions, action_std)
+        return self
+    def log_prob(self, actions: th.Tensor) -> th.Tensor:
+        """
+        Get the log probabilities of actions according to the distribution.
+        Note that you must first call the ``proba_distribution()`` method.
+        :param actions:
+        :return:
+        """
+        log_prob = self.distribution.log_prob(actions)
+        return sum_independent_dims(log_prob)
+    def entropy(self) -> th.Tensor:
+        return sum_independent_dims(self.distribution.entropy())
+    def sample(self) -> th.Tensor:
+        # Reparametrization trick to pass gradients
+        return self.distribution.rsample()
+    def mode(self) -> th.Tensor:
+        return self.distribution.mean
+    def actions_from_params(self, mean_actions: th.Tensor, log_std: th.Tensor, deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(mean_actions, log_std)
+        return self.get_actions(deterministic=deterministic)
+    def log_prob_from_params(self, mean_actions: th.Tensor, log_std: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+        """
+        Compute the log probability of taking an action
+        given the distribution parameters.
+        :param mean_actions:
+        :param log_std:
+        :return:
+        """
+        actions = self.actions_from_params(mean_actions, log_std)
+        log_prob = self.log_prob(actions)
+        return actions, log_prob
+class SquashedDiagGaussianDistribution(DiagGaussianDistribution):
+    """
+    Gaussian distribution with diagonal covariance matrix, followed by a squashing function (tanh) to ensure bounds.
+    :param action_dim: Dimension of the action space.
+    :param epsilon: small value to avoid NaN due to numerical imprecision.
+    """
+    def __init__(self, action_dim: int, epsilon: float = 1e-6):
+        super().__init__(action_dim)
+        # Avoid NaN (prevents division by zero or log of zero)
+        self.epsilon = epsilon
+        self.gaussian_actions = None
+    def proba_distribution(self, mean_actions: th.Tensor, log_std: th.Tensor) -> "SquashedDiagGaussianDistribution":
+        super().proba_distribution(mean_actions, log_std)
+        return self
+    def log_prob(self, actions: th.Tensor, gaussian_actions: Optional[th.Tensor] = None) -> th.Tensor:
+        # Inverse tanh
+        # Naive implementation (not stable): 0.5 * torch.log((1 + x) / (1 - x))
+        # We use numpy to avoid numerical instability
+        if gaussian_actions is None:
+            # It will be clipped to avoid NaN when inversing tanh
+            gaussian_actions = TanhBijector.inverse(actions)
+        # Log likelihood for a Gaussian distribution
+        log_prob = super().log_prob(gaussian_actions)
+        # Squash correction (from original SAC implementation)
+        # this comes from the fact that tanh is bijective and differentiable
+        log_prob -= th.sum(th.log(1 - actions**2 + self.epsilon), dim=1)
+        return log_prob
+    def entropy(self) -> Optional[th.Tensor]:
+        # No analytical form,
+        # entropy needs to be estimated using -log_prob.mean()
+        return None
+    def sample(self) -> th.Tensor:
+        # Reparametrization trick to pass gradients
+        self.gaussian_actions = super().sample()
+        return th.tanh(self.gaussian_actions)
+    def mode(self) -> th.Tensor:
+        self.gaussian_actions = super().mode()
+        # Squash the output
+        return th.tanh(self.gaussian_actions)
+    def log_prob_from_params(self, mean_actions: th.Tensor, log_std: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+        action = self.actions_from_params(mean_actions, log_std)
+        log_prob = self.log_prob(action, self.gaussian_actions)
+        return action, log_prob
+class CategoricalDistribution(Distribution):
+    """
+    Categorical distribution for discrete actions.
+    :param action_dim: Number of discrete actions
+    """
+    def __init__(self, action_dim: int):
+        super().__init__()
+        self.action_dim = action_dim
+    def proba_distribution_net(self, latent_dim: int) -> nn.Module:
+        """
+        Create the layer that represents the distribution:
+        it will be the logits of the Categorical distribution.
+        You can then get probabilities using a softmax.
+        :param latent_dim: Dimension of the last layer
+            of the policy network (before the action layer)
+        :return:
+        """
+        action_logits = nn.Linear(latent_dim, self.action_dim)
+        return action_logits
+    def proba_distribution(self, action_logits: th.Tensor) -> "CategoricalDistribution":
+        self.distribution = Categorical(logits=action_logits)
+        return self
+    def log_prob(self, actions: th.Tensor) -> th.Tensor:
+        return self.distribution.log_prob(actions)
+    def entropy(self) -> th.Tensor:
+        return self.distribution.entropy()
+    def sample(self) -> th.Tensor:
+        return self.distribution.sample()
+    def mode(self) -> th.Tensor:
+        return th.argmax(self.distribution.probs, dim=1)
+    def actions_from_params(self, action_logits: th.Tensor, deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(action_logits)
+        return self.get_actions(deterministic=deterministic)
+    def log_prob_from_params(self, action_logits: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+        actions = self.actions_from_params(action_logits)
+        log_prob = self.log_prob(actions)
+        return actions, log_prob
+class MultiCategoricalDistribution(Distribution):
+    """
+    MultiCategorical distribution for multi discrete actions.
+    :param action_dims: List of sizes of discrete action spaces
+    """
+    def __init__(self, action_dims: List[int]):
+        super().__init__()
+        self.action_dims = action_dims
+    def proba_distribution_net(self, latent_dim: int) -> nn.Module:
+        """
+        Create the layer that represents the distribution:
+        it will be the logits (flattened) of the MultiCategorical distribution.
+        You can then get probabilities using a softmax on each sub-space.
+        :param latent_dim: Dimension of the last layer
+            of the policy network (before the action layer)
+        :return:
+        """
+        action_logits = nn.Linear(latent_dim, sum(self.action_dims))
+        return action_logits
+    def proba_distribution(self, action_logits: th.Tensor) -> "MultiCategoricalDistribution":
+        self.distribution = [Categorical(logits=split) for split in th.split(action_logits, tuple(self.action_dims), dim=1)]
+        return self
+    def log_prob(self, actions: th.Tensor) -> th.Tensor:
+        # Extract each discrete action and compute log prob for their respective distributions
+        return th.stack(
+            [dist.log_prob(action) for dist, action in zip(self.distribution, th.unbind(actions, dim=1))], dim=1
+        ).sum(dim=1)
+    def entropy(self) -> th.Tensor:
+        return th.stack([dist.entropy() for dist in self.distribution], dim=1).sum(dim=1)
+    def sample(self) -> th.Tensor:
+        return th.stack([dist.sample() for dist in self.distribution], dim=1)
+    def mode(self) -> th.Tensor:
+        return th.stack([th.argmax(dist.probs, dim=1) for dist in self.distribution], dim=1)
+    def actions_from_params(self, action_logits: th.Tensor, deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(action_logits)
+        return self.get_actions(deterministic=deterministic)
+    def log_prob_from_params(self, action_logits: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+        actions = self.actions_from_params(action_logits)
+        log_prob = self.log_prob(actions)
+        return actions, log_prob
+class BernoulliDistribution(Distribution):
+    """
+    Bernoulli distribution for MultiBinary action spaces.
+    :param action_dim: Number of binary actions
+    """
+    def __init__(self, action_dims: int):
+        super().__init__()
+        self.action_dims = action_dims
+    def proba_distribution_net(self, latent_dim: int) -> nn.Module:
+        """
+        Create the layer that represents the distribution:
+        it will be the logits of the Bernoulli distribution.
+        :param latent_dim: Dimension of the last layer
+            of the policy network (before the action layer)
+        :return:
+        """
+        action_logits = nn.Linear(latent_dim, self.action_dims)
+        return action_logits
+    def proba_distribution(self, action_logits: th.Tensor) -> "BernoulliDistribution":
+        self.distribution = Bernoulli(logits=action_logits)
+        return self
+    def log_prob(self, actions: th.Tensor) -> th.Tensor:
+        return self.distribution.log_prob(actions).sum(dim=1)
+    def entropy(self) -> th.Tensor:
+        return self.distribution.entropy().sum(dim=1)
+    def sample(self) -> th.Tensor:
+        return self.distribution.sample()
+    def mode(self) -> th.Tensor:
+        return th.round(self.distribution.probs)
+    def actions_from_params(self, action_logits: th.Tensor, deterministic: bool = False) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(action_logits)
+        return self.get_actions(deterministic=deterministic)
+    def log_prob_from_params(self, action_logits: th.Tensor) -> Tuple[th.Tensor, th.Tensor]:
+        actions = self.actions_from_params(action_logits)
+        log_prob = self.log_prob(actions)
+        return actions, log_prob
+class StateDependentNoiseDistribution(Distribution):
+    """
+    Distribution class for using generalized State Dependent Exploration (gSDE).
+    Paper: https://arxiv.org/abs/2005.05719
+    It is used to create the noise exploration matrix and
+    compute the log probability of an action with that noise.
+    :param action_dim: Dimension of the action space.
+    :param full_std: Whether to use (n_features x n_actions) parameters
+        for the std instead of only (n_features,)
+    :param use_expln: Use ``expln()`` function instead of ``exp()`` to ensure
+        a positive standard deviation (cf paper). It allows to keep variance
+        above zero and prevent it from growing too fast. In practice, ``exp()`` is usually enough.
+    :param squash_output: Whether to squash the output using a tanh function,
+        this ensures bounds are satisfied.
+    :param learn_features: Whether to learn features for gSDE or not.
+        This will enable gradients to be backpropagated through the features
+        ``latent_sde`` in the code.
+    :param epsilon: small value to avoid NaN due to numerical imprecision.
+    """
+    def __init__(
+        self,
+        action_dim: int,
+        full_std: bool = True,
+        use_expln: bool = False,
+        squash_output: bool = False,
+        learn_features: bool = False,
+        epsilon: float = 1e-6,
+    ):
+        super().__init__()
+        self.action_dim = action_dim
+        self.latent_sde_dim = None
+        self.mean_actions = None
+        self.log_std = None
+        self.weights_dist = None
+        self.exploration_mat = None
+        self.exploration_matrices = None
+        self._latent_sde = None
+        self.use_expln = use_expln
+        self.full_std = full_std
+        self.epsilon = epsilon
+        self.learn_features = learn_features
+        if squash_output:
+            self.bijector = TanhBijector(epsilon)
+        else:
+            self.bijector = None
+    def get_std(self, log_std: th.Tensor) -> th.Tensor:
+        """
+        Get the standard deviation from the learned parameter
+        (log of it by default). This ensures that the std is positive.
+        :param log_std:
+        :return:
+        """
+        if self.use_expln:
+            # From gSDE paper, it allows to keep variance
+            # above zero and prevent it from growing too fast
+            below_threshold = th.exp(log_std) * (log_std <= 0)
+            # Avoid NaN: zeros values that are below zero
+            safe_log_std = log_std * (log_std > 0) + self.epsilon
+            above_threshold = (th.log1p(safe_log_std) + 1.0) * (log_std > 0)
+            std = below_threshold + above_threshold
+        else:
+            # Use normal exponential
+            std = th.exp(log_std)
+        if self.full_std:
+            return std
+        # Reduce the number of parameters:
+        return th.ones(self.latent_sde_dim, self.action_dim).to(log_std.device) * std
+    def sample_weights(self, log_std: th.Tensor, batch_size: int = 1) -> None:
+        """
+        Sample weights for the noise exploration matrix,
+        using a centered Gaussian distribution.
+        :param log_std:
+        :param batch_size:
+        """
+        std = self.get_std(log_std)
+        self.weights_dist = Normal(th.zeros_like(std), std)
+        # Reparametrization trick to pass gradients
+        self.exploration_mat = self.weights_dist.rsample()
+        # Pre-compute matrices in case of parallel exploration
+        self.exploration_matrices = self.weights_dist.rsample((batch_size,))
+    def proba_distribution_net(
+        self, latent_dim: int, log_std_init: float = -2.0, latent_sde_dim: Optional[int] = None
+    ) -> Tuple[nn.Module, nn.Parameter]:
+        """
+        Create the layers and parameter that represent the distribution:
+        one output will be the deterministic action, the other parameter will be the
+        standard deviation of the distribution that control the weights of the noise matrix.
+        :param latent_dim: Dimension of the last layer of the policy (before the action layer)
+        :param log_std_init: Initial value for the log standard deviation
+        :param latent_sde_dim: Dimension of the last layer of the features extractor
+            for gSDE. By default, it is shared with the policy network.
+        :return:
+        """
+        # Network for the deterministic action, it represents the mean of the distribution
+        mean_actions_net = nn.Linear(latent_dim, self.action_dim)
+        # When we learn features for the noise, the feature dimension
+        # can be different between the policy and the noise network
+        self.latent_sde_dim = latent_dim if latent_sde_dim is None else latent_sde_dim
+        # Reduce the number of parameters if needed
+        log_std = th.ones(self.latent_sde_dim, self.action_dim) if self.full_std else th.ones(self.latent_sde_dim, 1)
+        # Transform it to a parameter so it can be optimized
+        log_std = nn.Parameter(log_std * log_std_init, requires_grad=True)
+        # Sample an exploration matrix
+        self.sample_weights(log_std)
+        return mean_actions_net, log_std
+    def proba_distribution(
+        self, mean_actions: th.Tensor, log_std: th.Tensor, latent_sde: th.Tensor
+    ) -> "StateDependentNoiseDistribution":
+        """
+        Create the distribution given its parameters (mean, std)
+        :param mean_actions:
+        :param log_std:
+        :param latent_sde:
+        :return:
+        """
+        # Stop gradient if we don't want to influence the features
+        self._latent_sde = latent_sde if self.learn_features else latent_sde.detach()
+        variance = th.mm(self._latent_sde**2, self.get_std(log_std) ** 2)
+        self.distribution = Normal(mean_actions, th.sqrt(variance + self.epsilon))
+        return self
+    def log_prob(self, actions: th.Tensor) -> th.Tensor:
+        if self.bijector is not None:
+            gaussian_actions = self.bijector.inverse(actions)
+        else:
+            gaussian_actions = actions
+        # log likelihood for a gaussian
+        log_prob = self.distribution.log_prob(gaussian_actions)
+        # Sum along action dim
+        log_prob = sum_independent_dims(log_prob)
+        if self.bijector is not None:
+            # Squash correction (from original SAC implementation)
+            log_prob -= th.sum(self.bijector.log_prob_correction(gaussian_actions), dim=1)
+        return log_prob
+    def entropy(self) -> Optional[th.Tensor]:
+        if self.bijector is not None:
+            # No analytical form,
+            # entropy needs to be estimated using -log_prob.mean()
+            return None
+        return sum_independent_dims(self.distribution.entropy())
+    def sample(self) -> th.Tensor:
+        noise = self.get_noise(self._latent_sde)
+        actions = self.distribution.mean + noise
+        if self.bijector is not None:
+            return self.bijector.forward(actions)
+        return actions
+    def mode(self) -> th.Tensor:
+        actions = self.distribution.mean
+        if self.bijector is not None:
+            return self.bijector.forward(actions)
+        return actions
+    def get_noise(self, latent_sde: th.Tensor) -> th.Tensor:
+        latent_sde = latent_sde if self.learn_features else latent_sde.detach()
+        # Default case: only one exploration matrix
+        if len(latent_sde) == 1 or len(latent_sde) != len(self.exploration_matrices):
+            return th.mm(latent_sde, self.exploration_mat)
+        # Use batch matrix multiplication for efficient computation
+        # (batch_size, n_features) -> (batch_size, 1, n_features)
+        latent_sde = latent_sde.unsqueeze(1)
+        # (batch_size, 1, n_actions)
+        noise = th.bmm(latent_sde, self.exploration_matrices)
+        return noise.squeeze(1)
+    def actions_from_params(
+        self, mean_actions: th.Tensor, log_std: th.Tensor, latent_sde: th.Tensor, deterministic: bool = False
+    ) -> th.Tensor:
+        # Update the proba distribution
+        self.proba_distribution(mean_actions, log_std, latent_sde)
+        return self.get_actions(deterministic=deterministic)
+    def log_prob_from_params(
+        self, mean_actions: th.Tensor, log_std: th.Tensor, latent_sde: th.Tensor
+    ) -> Tuple[th.Tensor, th.Tensor]:
+        actions = self.actions_from_params(mean_actions, log_std, latent_sde)
+        log_prob = self.log_prob(actions)
+        return actions, log_prob
+class TanhBijector:
+    """
+    Bijective transformation of a probability distribution
+    using a squashing function (tanh)
+    TODO: use Pyro instead (https://pyro.ai/)
+    :param epsilon: small value to avoid NaN due to numerical imprecision.
+    """
+    def __init__(self, epsilon: float = 1e-6):
+        super().__init__()
+        self.epsilon = epsilon
+    @staticmethod
+    def forward(x: th.Tensor) -> th.Tensor:
+        return th.tanh(x)
+    @staticmethod
+    def atanh(x: th.Tensor) -> th.Tensor:
+        """
+        Inverse of Tanh
+        Taken from Pyro: https://github.com/pyro-ppl/pyro
+        0.5 * torch.log((1 + x ) / (1 - x))
+        """
+        return 0.5 * (x.log1p() - (-x).log1p())
+    @staticmethod
+    def inverse(y: th.Tensor) -> th.Tensor:
+        """
+        Inverse tanh.
+        :param y:
+        :return:
+        """
+        eps = th.finfo(y.dtype).eps
+        # Clip the action to avoid NaN
+        return TanhBijector.atanh(y.clamp(min=-1.0 + eps, max=1.0 - eps))
+    def log_prob_correction(self, x: th.Tensor) -> th.Tensor:
+        # Squash correction (from original SAC implementation)
+        return th.log(1.0 - th.tanh(x) ** 2 + self.epsilon)
+def make_proba_distribution(
+    action_space: gym.spaces.Space, use_sde: bool = False, dist_kwargs: Optional[Dict[str, Any]] = None
+) -> Distribution:
+    """
+    Return an instance of Distribution for the correct type of action space
+    :param action_space: the input action space
+    :param use_sde: Force the use of StateDependentNoiseDistribution
+        instead of DiagGaussianDistribution
+    :param dist_kwargs: Keyword arguments to pass to the probability distribution
+    :return: the appropriate Distribution object
+    """
+    if dist_kwargs is None:
+        dist_kwargs = {}
+    if isinstance(action_space, spaces.Box):
+        assert len(action_space.shape) == 1, "Error: the action space must be a vector"
+        cls = StateDependentNoiseDistribution if use_sde else DiagGaussianDistribution
+        return cls(get_action_dim(action_space), **dist_kwargs)
+    elif isinstance(action_space, spaces.Discrete):
+        return CategoricalDistribution(action_space.n, **dist_kwargs)
+    elif isinstance(action_space, spaces.MultiDiscrete):
+        return MultiCategoricalDistribution(action_space.nvec, **dist_kwargs)
+    elif isinstance(action_space, spaces.MultiBinary):
+        return BernoulliDistribution(action_space.n, **dist_kwargs)
+    else:
+        raise NotImplementedError(
+            "Error: probability distribution, not implemented for action space"
+            f"of type {type(action_space)}."
+            " Must be of type Gym Spaces: Box, Discrete, MultiDiscrete or MultiBinary."
+        )
+def kl_divergence(dist_true: Distribution, dist_pred: Distribution) -> th.Tensor:
+    """
+    Wrapper for the PyTorch implementation of the full form KL Divergence
+    :param dist_true: the p distribution
+    :param dist_pred: the q distribution
+    :return: KL(dist_true||dist_pred)
+    """
+    # KL Divergence for different distribution types is out of scope
+    assert dist_true.__class__ == dist_pred.__class__, "Error: input distributions should be the same type"
+    # MultiCategoricalDistribution is not a PyTorch Distribution subclass
+    # so we need to implement it ourselves!
+    if isinstance(dist_pred, MultiCategoricalDistribution):
+        assert dist_pred.action_dims == dist_true.action_dims, "Error: distributions must have the same input space"
+        return th.stack(
+            [th.distributions.kl_divergence(p, q) for p, q in zip(dist_true.distribution, dist_pred.distribution)],
+            dim=1,
+        ).sum(dim=1)
+    # Use the PyTorch kl_divergence implementation
+    else:
+        return th.distributions.kl_divergence(dist_true.distribution, dist_pred.distribution)

dexart-release/stable_baselines3/common/env_util.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import os
+from typing import Any, Callable, Dict, Optional, Type, Union
+import gym
+from stable_baselines3.common.monitor import Monitor
+from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv, VecEnv
+def unwrap_wrapper(env: gym.Env, wrapper_class: Type[gym.Wrapper]) -> Optional[gym.Wrapper]:
+    """
+    Retrieve a ``VecEnvWrapper`` object by recursively searching.
+    :param env: Environment to unwrap
+    :param wrapper_class: Wrapper to look for
+    :return: Environment unwrapped till ``wrapper_class`` if it has been wrapped with it
+    """
+    env_tmp = env
+    while isinstance(env_tmp, gym.Wrapper):
+        if isinstance(env_tmp, wrapper_class):
+            return env_tmp
+        env_tmp = env_tmp.env
+    return None
+def is_wrapped(env: Type[gym.Env], wrapper_class: Type[gym.Wrapper]) -> bool:
+    """
+    Check if a given environment has been wrapped with a given wrapper.
+    :param env: Environment to check
+    :param wrapper_class: Wrapper class to look for
+    :return: True if environment has been wrapped with ``wrapper_class``.
+    """
+    return unwrap_wrapper(env, wrapper_class) is not None
+def make_vec_env(
+    env_id: Union[str, Type[gym.Env]],
+    n_envs: int = 1,
+    seed: Optional[int] = None,
+    start_index: int = 0,
+    monitor_dir: Optional[str] = None,
+    wrapper_class: Optional[Callable[[gym.Env], gym.Env]] = None,
+    env_kwargs: Optional[Dict[str, Any]] = None,
+    vec_env_cls: Optional[Type[Union[DummyVecEnv, SubprocVecEnv]]] = None,
+    vec_env_kwargs: Optional[Dict[str, Any]] = None,
+    monitor_kwargs: Optional[Dict[str, Any]] = None,
+    wrapper_kwargs: Optional[Dict[str, Any]] = None,
+) -> VecEnv:
+    """
+    Create a wrapped, monitored ``VecEnv``.
+    By default it uses a ``DummyVecEnv`` which is usually faster
+    than a ``SubprocVecEnv``.
+    :param env_id: the environment ID or the environment class
+    :param n_envs: the number of environments you wish to have in parallel
+    :param seed: the initial seed for the random number generator
+    :param start_index: start rank index
+    :param monitor_dir: Path to a folder where the monitor files will be saved.
+        If None, no file will be written, however, the env will still be wrapped
+        in a Monitor wrapper to provide additional information about training.
+    :param wrapper_class: Additional wrapper to use on the environment.
+        This can also be a function with single argument that wraps the environment in many things.
+    :param env_kwargs: Optional keyword argument to pass to the env constructor
+    :param vec_env_cls: A custom ``VecEnv`` class constructor. Default: None.
+    :param vec_env_kwargs: Keyword arguments to pass to the ``VecEnv`` class constructor.
+    :param monitor_kwargs: Keyword arguments to pass to the ``Monitor`` class constructor.
+    :param wrapper_kwargs: Keyword arguments to pass to the ``Wrapper`` class constructor.
+    :return: The wrapped environment
+    """
+    env_kwargs = {} if env_kwargs is None else env_kwargs
+    vec_env_kwargs = {} if vec_env_kwargs is None else vec_env_kwargs
+    monitor_kwargs = {} if monitor_kwargs is None else monitor_kwargs
+    wrapper_kwargs = {} if wrapper_kwargs is None else wrapper_kwargs
+    def make_env(rank):
+        def _init():
+            if isinstance(env_id, str):
+                env = gym.make(env_id, **env_kwargs)
+            else:
+                env = env_id(**env_kwargs)
+            if seed is not None:
+                env.seed(seed + rank)
+                env.action_space.seed(seed + rank)
+            # Wrap the env in a Monitor wrapper
+            # to have additional training information
+            monitor_path = os.path.join(monitor_dir, str(rank)) if monitor_dir is not None else None
+            # Create the monitor folder if needed
+            if monitor_path is not None:
+                os.makedirs(monitor_dir, exist_ok=True)
+            env = Monitor(env, filename=monitor_path, **monitor_kwargs)
+            # Optionally, wrap the environment with the provided wrapper
+            if wrapper_class is not None:
+                env = wrapper_class(env, **wrapper_kwargs)
+            return env
+        return _init
+    # No custom VecEnv is passed
+    if vec_env_cls is None:
+        # Default: use a DummyVecEnv
+        vec_env_cls = DummyVecEnv
+    return vec_env_cls([make_env(i + start_index) for i in range(n_envs)], **vec_env_kwargs)

dexart-release/stable_baselines3/common/evaluation.py ADDED Viewed

	@@ -0,0 +1,131 @@

+import warnings
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+import gym
+import numpy as np
+from stable_baselines3.common import base_class
+from stable_baselines3.common.vec_env import DummyVecEnv, VecEnv, VecMonitor, is_vecenv_wrapped
+def evaluate_policy(
+    model: "base_class.BaseAlgorithm",
+    env: Union[gym.Env, VecEnv],
+    n_eval_episodes: int = 10,
+    deterministic: bool = True,
+    render: bool = False,
+    callback: Optional[Callable[[Dict[str, Any], Dict[str, Any]], None]] = None,
+    reward_threshold: Optional[float] = None,
+    return_episode_rewards: bool = False,
+    warn: bool = True,
+) -> Union[Tuple[float, float], Tuple[List[float], List[int]]]:
+    """
+    Runs policy for ``n_eval_episodes`` episodes and returns average reward.
+    If a vector env is passed in, this divides the episodes to evaluate onto the
+    different elements of the vector env. This static division of work is done to
+    remove bias. See https://github.com/DLR-RM/stable-baselines3/issues/402 for more
+    details and discussion.
+    .. note::
+        If environment has not been wrapped with ``Monitor`` wrapper, reward and
+        episode lengths are counted as it appears with ``env.step`` calls. If
+        the environment contains wrappers that modify rewards or episode lengths
+        (e.g. reward scaling, early episode reset), these will affect the evaluation
+        results as well. You can avoid this by wrapping environment with ``Monitor``
+        wrapper before anything else.
+    :param model: The RL agent you want to evaluate.
+    :param env: The gym environment or ``VecEnv`` environment.
+    :param n_eval_episodes: Number of episode to evaluate the agent
+    :param deterministic: Whether to use deterministic or stochastic actions
+    :param render: Whether to render the environment or not
+    :param callback: callback function to do additional checks,
+        called after each step. Gets locals() and globals() passed as parameters.
+    :param reward_threshold: Minimum expected reward per episode,
+        this will raise an error if the performance is not met
+    :param return_episode_rewards: If True, a list of rewards and episode lengths
+        per episode will be returned instead of the mean.
+    :param warn: If True (default), warns user about lack of a Monitor wrapper in the
+        evaluation environment.
+    :return: Mean reward per episode, std of reward per episode.
+        Returns ([float], [int]) when ``return_episode_rewards`` is True, first
+        list containing per-episode rewards and second containing per-episode lengths
+        (in number of steps).
+    """
+    is_monitor_wrapped = False
+    # Avoid circular import
+    from stable_baselines3.common.monitor import Monitor
+    if not isinstance(env, VecEnv):
+        env = DummyVecEnv([lambda: env])
+    is_monitor_wrapped = is_vecenv_wrapped(env, VecMonitor) or env.env_is_wrapped(Monitor)[0]
+    if not is_monitor_wrapped and warn:
+        warnings.warn(
+            "Evaluation environment is not wrapped with a ``Monitor`` wrapper. "
+            "This may result in reporting modified episode lengths and rewards, if other wrappers happen to modify these. "
+            "Consider wrapping environment first with ``Monitor`` wrapper.",
+            UserWarning,
+        )
+    n_envs = env.num_envs
+    episode_rewards = []
+    episode_lengths = []
+    episode_counts = np.zeros(n_envs, dtype="int")
+    # Divides episodes among different sub environments in the vector as evenly as possible
+    episode_count_targets = np.array([(n_eval_episodes + i) // n_envs for i in range(n_envs)], dtype="int")
+    current_rewards = np.zeros(n_envs)
+    current_lengths = np.zeros(n_envs, dtype="int")
+    observations = env.reset()
+    states = None
+    episode_starts = np.ones((env.num_envs,), dtype=bool)
+    while (episode_counts < episode_count_targets).any():
+        actions, states = model.predict(observations, state=states, episode_start=episode_starts, deterministic=deterministic)
+        observations, rewards, dones, infos = env.step(actions)
+        current_rewards += rewards
+        current_lengths += 1
+        for i in range(n_envs):
+            if episode_counts[i] < episode_count_targets[i]:
+                # unpack values so that the callback can access the local variables
+                reward = rewards[i]
+                done = dones[i]
+                info = infos[i]
+                episode_starts[i] = done
+                if callback is not None:
+                    callback(locals(), globals())
+                if dones[i]:
+                    if is_monitor_wrapped:
+                        # Atari wrapper can send a "done" signal when
+                        # the agent loses a life, but it does not correspond
+                        # to the true end of episode
+                        if "episode" in info.keys():
+                            # Do not trust "done" with episode endings.
+                            # Monitor wrapper includes "episode" key in info if environment
+                            # has been wrapped with it. Use those rewards instead.
+                            episode_rewards.append(info["episode"]["r"])
+                            episode_lengths.append(info["episode"]["l"])
+                            # Only increment at the real end of an episode
+                            episode_counts[i] += 1
+                    else:
+                        episode_rewards.append(current_rewards[i])
+                        episode_lengths.append(current_lengths[i])
+                        episode_counts[i] += 1
+                    current_rewards[i] = 0
+                    current_lengths[i] = 0
+        if render:
+            env.render()
+    mean_reward = np.mean(episode_rewards)
+    std_reward = np.std(episode_rewards)
+    if reward_threshold is not None:
+        assert mean_reward > reward_threshold, "Mean reward below threshold: " f"{mean_reward:.2f} < {reward_threshold:.2f}"
+    if return_episode_rewards:
+        return episode_rewards, episode_lengths
+    return mean_reward, std_reward

dexart-release/stable_baselines3/common/logger.py ADDED Viewed

	@@ -0,0 +1,644 @@

+import datetime
+import json
+import os
+import sys
+import tempfile
+import warnings
+from collections import defaultdict
+from typing import Any, Dict, List, Optional, Sequence, TextIO, Tuple, Union
+import numpy as np
+import pandas
+import torch as th
+from matplotlib import pyplot as plt
+try:
+    from torch.utils.tensorboard import SummaryWriter
+except ImportError:
+    SummaryWriter = None
+DEBUG = 10
+INFO = 20
+WARN = 30
+ERROR = 40
+DISABLED = 50
+class Video:
+    """
+    Video data class storing the video frames and the frame per seconds
+    :param frames: frames to create the video from
+    :param fps: frames per second
+    """
+    def __init__(self, frames: th.Tensor, fps: Union[float, int]):
+        self.frames = frames
+        self.fps = fps
+class Figure:
+    """
+    Figure data class storing a matplotlib figure and whether to close the figure after logging it
+    :param figure: figure to log
+    :param close: if true, close the figure after logging it
+    """
+    def __init__(self, figure: plt.figure, close: bool):
+        self.figure = figure
+        self.close = close
+class Image:
+    """
+    Image data class storing an image and data format
+    :param image: image to log
+    :param dataformats: Image data format specification of the form NCHW, NHWC, CHW, HWC, HW, WH, etc.
+        More info in add_image method doc at https://pytorch.org/docs/stable/tensorboard.html
+        Gym envs normally use 'HWC' (channel last)
+    """
+    def __init__(self, image: Union[th.Tensor, np.ndarray, str], dataformats: str):
+        self.image = image
+        self.dataformats = dataformats
+class FormatUnsupportedError(NotImplementedError):
+    """
+    Custom error to display informative message when
+    a value is not supported by some formats.
+    :param unsupported_formats: A sequence of unsupported formats,
+        for instance ``["stdout"]``.
+    :param value_description: Description of the value that cannot be logged by this format.
+    """
+    def __init__(self, unsupported_formats: Sequence[str], value_description: str):
+        if len(unsupported_formats) > 1:
+            format_str = f"formats {', '.join(unsupported_formats)} are"
+        else:
+            format_str = f"format {unsupported_formats[0]} is"
+        super().__init__(
+            f"The {format_str} not supported for the {value_description} value logged.\n"
+            f"You can exclude formats via the `exclude` parameter of the logger's `record` function."
+        )
+class KVWriter:
+    """
+    Key Value writer
+    """
+    def write(self, key_values: Dict[str, Any], key_excluded: Dict[str, Union[str, Tuple[str, ...]]],
+              step: int = 0) -> None:
+        """
+        Write a dictionary to file
+        :param key_values:
+        :param key_excluded:
+        :param step:
+        """
+        raise NotImplementedError
+    def close(self) -> None:
+        """
+        Close owned resources
+        """
+        raise NotImplementedError
+class SeqWriter:
+    """
+    sequence writer
+    """
+    def write_sequence(self, sequence: List) -> None:
+        """
+        write_sequence an array to file
+        :param sequence:
+        """
+        raise NotImplementedError
+class HumanOutputFormat(KVWriter, SeqWriter):
+    """A human-readable output format producing ASCII tables of key-value pairs.
+    Set attribute ``max_length`` to change the maximum length of keys and values
+    to write to output (or specify it when calling ``__init__``).
+    :param filename_or_file: the file to write the log to
+    :param max_length: the maximum length of keys and values to write to output.
+        Outputs longer than this will be truncated. An error will be raised
+        if multiple keys are truncated to the same value. The maximum output
+        width will be ``2*max_length + 7``. The default of 36 produces output
+        no longer than 79 characters wide.
+    """
+    def __init__(self, filename_or_file: Union[str, TextIO], max_length: int = 36):
+        self.max_length = max_length
+        if isinstance(filename_or_file, str):
+            self.file = open(filename_or_file, "wt")
+            self.own_file = True
+        else:
+            assert hasattr(filename_or_file, "write"), f"Expected file or str, got {filename_or_file}"
+            self.file = filename_or_file
+            self.own_file = False
+    def write(self, key_values: Dict, key_excluded: Dict, step: int = 0) -> None:
+        # Create strings for printing
+        key2str = {}
+        tag = None
+        for (key, value), (_, excluded) in zip(sorted(key_values.items()), sorted(key_excluded.items())):
+            if excluded is not None and ("stdout" in excluded or "log" in excluded):
+                continue
+            elif isinstance(value, Video):
+                raise FormatUnsupportedError(["stdout", "log"], "video")
+            elif isinstance(value, Figure):
+                raise FormatUnsupportedError(["stdout", "log"], "figure")
+            elif isinstance(value, Image):
+                raise FormatUnsupportedError(["stdout", "log"], "image")
+            elif isinstance(value, float):
+                # Align left
+                value_str = f"{value:<8.3g}"
+            else:
+                value_str = str(value)
+            if key.find("/") > 0:  # Find tag and add it to the dict
+                tag = key[: key.find("/") + 1]
+                key2str[self._truncate(tag)] = ""
+            # Remove tag from key
+            if tag is not None and tag in key:
+                key = str("   " + key[len(tag):])
+            truncated_key = self._truncate(key)
+            if truncated_key in key2str:
+                raise ValueError(
+                    f"Key '{key}' truncated to '{truncated_key}' that already exists. Consider increasing `max_length`."
+                )
+            key2str[truncated_key] = self._truncate(value_str)
+        # Find max widths
+        if len(key2str) == 0:
+            warnings.warn("Tried to write empty key-value dict")
+            return
+        else:
+            key_width = max(map(len, key2str.keys()))
+            val_width = max(map(len, key2str.values()))
+        # Write out the data
+        dashes = "-" * (key_width + val_width + 7)
+        lines = [dashes]
+        for key, value in key2str.items():
+            key_space = " " * (key_width - len(key))
+            val_space = " " * (val_width - len(value))
+            lines.append(f"| {key}{key_space} | {value}{val_space} |")
+        lines.append(dashes)
+        self.file.write("\n".join(lines) + "\n")
+        # Flush the output to the file
+        self.file.flush()
+    def _truncate(self, string: str) -> str:
+        if len(string) > self.max_length:
+            string = string[: self.max_length - 3] + "..."
+        return string
+    def write_sequence(self, sequence: List) -> None:
+        sequence = list(sequence)
+        for i, elem in enumerate(sequence):
+            self.file.write(elem)
+            if i < len(sequence) - 1:  # add space unless this is the last one
+                self.file.write(" ")
+        self.file.write("\n")
+        self.file.flush()
+    def close(self) -> None:
+        """
+        closes the file
+        """
+        if self.own_file:
+            self.file.close()
+def filter_excluded_keys(
+        key_values: Dict[str, Any], key_excluded: Dict[str, Union[str, Tuple[str, ...]]], _format: str
+) -> Dict[str, Any]:
+    """
+    Filters the keys specified by ``key_exclude`` for the specified format
+    :param key_values: log dictionary to be filtered
+    :param key_excluded: keys to be excluded per format
+    :param _format: format for which this filter is run
+    :return: dict without the excluded keys
+    """
+    def is_excluded(key: str) -> bool:
+        return key in key_excluded and key_excluded[key] is not None and _format in key_excluded[key]
+    return {key: value for key, value in key_values.items() if not is_excluded(key)}
+class JSONOutputFormat(KVWriter):
+    """
+    Log to a file, in the JSON format
+    :param filename: the file to write the log to
+    """
+    def __init__(self, filename: str):
+        self.file = open(filename, "wt")
+    def write(self, key_values: Dict[str, Any], key_excluded: Dict[str, Union[str, Tuple[str, ...]]],
+              step: int = 0) -> None:
+        def cast_to_json_serializable(value: Any):
+            if isinstance(value, Video):
+                raise FormatUnsupportedError(["json"], "video")
+            if isinstance(value, Figure):
+                raise FormatUnsupportedError(["json"], "figure")
+            if isinstance(value, Image):
+                raise FormatUnsupportedError(["json"], "image")
+            if hasattr(value, "dtype"):
+                if value.shape == () or len(value) == 1:
+                    # if value is a dimensionless numpy array or of length 1, serialize as a float
+                    return float(value)
+                else:
+                    # otherwise, a value is a numpy array, serialize as a list or nested lists
+                    return value.tolist()
+            return value
+        key_values = {
+            key: cast_to_json_serializable(value)
+            for key, value in filter_excluded_keys(key_values, key_excluded, "json").items()
+        }
+        self.file.write(json.dumps(key_values) + "\n")
+        self.file.flush()
+    def close(self) -> None:
+        """
+        closes the file
+        """
+        self.file.close()
+class CSVOutputFormat(KVWriter):
+    """
+    Log to a file, in a CSV format
+    :param filename: the file to write the log to
+    """
+    def __init__(self, filename: str):
+        self.file = open(filename, "w+t")
+        self.keys = []
+        self.separator = ","
+        self.quotechar = '"'
+    def write(self, key_values: Dict[str, Any], key_excluded: Dict[str, Union[str, Tuple[str, ...]]],
+              step: int = 0) -> None:
+        # Add our current row to the history
+        key_values = filter_excluded_keys(key_values, key_excluded, "csv")
+        extra_keys = key_values.keys() - self.keys
+        if extra_keys:
+            self.keys.extend(extra_keys)
+            self.file.seek(0)
+            lines = self.file.readlines()
+            self.file.seek(0)
+            for (i, key) in enumerate(self.keys):
+                if i > 0:
+                    self.file.write(",")
+                self.file.write(key)
+            self.file.write("\n")
+            for line in lines[1:]:
+                self.file.write(line[:-1])
+                self.file.write(self.separator * len(extra_keys))
+                self.file.write("\n")
+        for i, key in enumerate(self.keys):
+            if i > 0:
+                self.file.write(",")
+            value = key_values.get(key)
+            if isinstance(value, Video):
+                raise FormatUnsupportedError(["csv"], "video")
+            elif isinstance(value, Figure):
+                raise FormatUnsupportedError(["csv"], "figure")
+            elif isinstance(value, Image):
+                raise FormatUnsupportedError(["csv"], "image")
+            elif isinstance(value, str):
+                # escape quotechars by prepending them with another quotechar
+                value = value.replace(self.quotechar, self.quotechar + self.quotechar)
+                # additionally wrap text with quotechars so that any delimiters in the text are ignored by csv readers
+                self.file.write(self.quotechar + value + self.quotechar)
+            elif value is not None:
+                self.file.write(str(value))
+        self.file.write("\n")
+        self.file.flush()
+    def close(self) -> None:
+        """
+        closes the file
+        """
+        self.file.close()
+class TensorBoardOutputFormat(KVWriter):
+    """
+    Dumps key/value pairs into TensorBoard's numeric format.
+    :param folder: the folder to write the log to
+    """
+    def __init__(self, folder: str):
+        assert SummaryWriter is not None, "tensorboard is not installed, you can use " "pip install tensorboard to do so"
+        self.writer = SummaryWriter(log_dir=folder)
+    def write(self, key_values: Dict[str, Any], key_excluded: Dict[str, Union[str, Tuple[str, ...]]],
+              step: int = 0) -> None:
+        for (key, value), (_, excluded) in zip(sorted(key_values.items()), sorted(key_excluded.items())):
+            if excluded is not None and "tensorboard" in excluded:
+                continue
+            if isinstance(value, np.ScalarType):
+                if isinstance(value, str):
+                    # str is considered a np.ScalarType
+                    self.writer.add_text(key, value, step)
+                else:
+                    self.writer.add_scalar(key, value, step)
+            if isinstance(value, th.Tensor):
+                self.writer.add_histogram(key, value, step)
+            if isinstance(value, Video):
+                self.writer.add_video(key, value.frames, step, value.fps)
+            if isinstance(value, Figure):
+                self.writer.add_figure(key, value.figure, step, close=value.close)
+            if isinstance(value, Image):
+                self.writer.add_image(key, value.image, step, dataformats=value.dataformats)
+        # Flush the output to the file
+        self.writer.flush()
+    def close(self) -> None:
+        """
+        closes the file
+        """
+        if self.writer:
+            self.writer.close()
+            self.writer = None
+def make_output_format(_format: str, log_dir: str, log_suffix: str = "") -> KVWriter:
+    """
+    return a logger for the requested format
+    :param _format: the requested format to log to ('stdout', 'log', 'json' or 'csv' or 'tensorboard')
+    :param log_dir: the logging directory
+    :param log_suffix: the suffix for the log file
+    :return: the logger
+    """
+    os.makedirs(log_dir, exist_ok=True)
+    if _format == "stdout":
+        return HumanOutputFormat(sys.stdout)
+    elif _format == "log":
+        return HumanOutputFormat(os.path.join(log_dir, f"log{log_suffix}.txt"))
+    elif _format == "json":
+        return JSONOutputFormat(os.path.join(log_dir, f"progress{log_suffix}.json"))
+    elif _format == "csv":
+        return CSVOutputFormat(os.path.join(log_dir, f"progress{log_suffix}.csv"))
+    elif _format == "tensorboard":
+        return TensorBoardOutputFormat(log_dir)
+    else:
+        raise ValueError(f"Unknown format specified: {_format}")
+# ================================================================
+# Backend
+# ================================================================
+class Logger:
+    """
+    The logger class.
+    :param folder: the logging location
+    :param output_formats: the list of output formats
+    """
+    def __init__(self, folder: Optional[str], output_formats: List[KVWriter]):
+        self.name_to_value = defaultdict(float)  # values this iteration
+        self.name_to_count = defaultdict(int)
+        self.name_to_excluded = defaultdict(str)
+        self.level = INFO
+        self.dir = folder
+        self.output_formats = output_formats
+    def record(self, key: str, value: Any, exclude: Optional[Union[str, Tuple[str, ...]]] = None) -> None:
+        """
+        Log a value of some diagnostic
+        Call this once for each diagnostic quantity, each iteration
+        If called many times, last value will be used.
+        :param key: save to log this key
+        :param value: save to log this value
+        :param exclude: outputs to be excluded
+        """
+        self.name_to_value[key] = value
+        self.name_to_excluded[key] = exclude
+    def record_mean(self, key: str, value: Any, exclude: Optional[Union[str, Tuple[str, ...]]] = None) -> None:
+        """
+        The same as record(), but if called many times, values averaged.
+        :param key: save to log this key
+        :param value: save to log this value
+        :param exclude: outputs to be excluded
+        """
+        if value is None:
+            self.name_to_value[key] = None
+            return
+        old_val, count = self.name_to_value[key], self.name_to_count[key]
+        self.name_to_value[key] = old_val * count / (count + 1) + value / (count + 1)
+        self.name_to_count[key] = count + 1
+        self.name_to_excluded[key] = exclude
+    def dump(self, step: int = 0) -> None:
+        """
+        Write all of the diagnostics from the current iteration
+        """
+        if self.level == DISABLED:
+            return
+        for _format in self.output_formats:
+            if isinstance(_format, KVWriter):
+                _format.write(self.name_to_value, self.name_to_excluded, step)
+        self.name_to_value.clear()
+        self.name_to_count.clear()
+        self.name_to_excluded.clear()
+    def log(self, *args, level: int = INFO) -> None:
+        """
+        Write the sequence of args, with no separators,
+        to the console and output files (if you've configured an output file).
+        level: int. (see logger.py docs) If the global logger level is higher than
+                    the level argument here, don't print to stdout.
+        :param args: log the arguments
+        :param level: the logging level (can be DEBUG=10, INFO=20, WARN=30, ERROR=40, DISABLED=50)
+        """
+        if self.level <= level:
+            self._do_log(args)
+    def debug(self, *args) -> None:
+        """
+        Write the sequence of args, with no separators,
+        to the console and output files (if you've configured an output file).
+        Using the DEBUG level.
+        :param args: log the arguments
+        """
+        self.log(*args, level=DEBUG)
+    def info(self, *args) -> None:
+        """
+        Write the sequence of args, with no separators,
+        to the console and output files (if you've configured an output file).
+        Using the INFO level.
+        :param args: log the arguments
+        """
+        self.log(*args, level=INFO)
+    def warn(self, *args) -> None:
+        """
+        Write the sequence of args, with no separators,
+        to the console and output files (if you've configured an output file).
+        Using the WARN level.
+        :param args: log the arguments
+        """
+        self.log(*args, level=WARN)
+    def error(self, *args) -> None:
+        """
+        Write the sequence of args, with no separators,
+        to the console and output files (if you've configured an output file).
+        Using the ERROR level.
+        :param args: log the arguments
+        """
+        self.log(*args, level=ERROR)
+    # Configuration
+    # ----------------------------------------
+    def set_level(self, level: int) -> None:
+        """
+        Set logging threshold on current logger.
+        :param level: the logging level (can be DEBUG=10, INFO=20, WARN=30, ERROR=40, DISABLED=50)
+        """
+        self.level = level
+    def get_dir(self) -> str:
+        """
+        Get directory that log files are being written to.
+        will be None if there is no output directory (i.e., if you didn't call start)
+        :return: the logging directory
+        """
+        return self.dir
+    def close(self) -> None:
+        """
+        closes the file
+        """
+        for _format in self.output_formats:
+            _format.close()
+    # Misc
+    # ----------------------------------------
+    def _do_log(self, args) -> None:
+        """
+        log to the requested format outputs
+        :param args: the arguments to log
+        """
+        for _format in self.output_formats:
+            if isinstance(_format, SeqWriter):
+                _format.write_sequence(map(str, args))
+def configure(folder: Optional[str] = None, format_strings: Optional[List[str]] = None) -> Logger:
+    """
+    Configure the current logger.
+    :param folder: the save location
+        (if None, $SB3_LOGDIR, if still None, tempdir/SB3-[date & time])
+    :param format_strings: the output logging format
+        (if None, $SB3_LOG_FORMAT, if still None, ['stdout', 'log', 'csv'])
+    :return: The logger object.
+    """
+    if folder is None:
+        folder = os.getenv("SB3_LOGDIR")
+    if folder is None:
+        folder = os.path.join(tempfile.gettempdir(), datetime.datetime.now().strftime("SB3-%Y-%m-%d-%H-%M-%S-%f"))
+    assert isinstance(folder, str)
+    os.makedirs(folder, exist_ok=True)
+    log_suffix = ""
+    if format_strings is None:
+        format_strings = os.getenv("SB3_LOG_FORMAT", "stdout,log,csv").split(",")
+    format_strings = list(filter(None, format_strings))
+    output_formats = [make_output_format(f, folder, log_suffix) for f in format_strings]
+    logger = Logger(folder=folder, output_formats=output_formats)
+    # Only print when some files will be saved
+    if len(format_strings) > 0 and format_strings != ["stdout"]:
+        logger.log(f"Logging to {folder}")
+    return logger
+# ================================================================
+# Readers
+# ================================================================
+def read_json(filename: str) -> pandas.DataFrame:
+    """
+    read a json file using pandas
+    :param filename: the file path to read
+    :return: the data in the json
+    """
+    data = []
+    with open(filename) as file_handler:
+        for line in file_handler:
+            data.append(json.loads(line))
+    return pandas.DataFrame(data)
+def read_csv(filename: str) -> pandas.DataFrame:
+    """
+    read a csv file using pandas
+    :param filename: the file path to read
+    :return: the data in the csv
+    """
+    return pandas.read_csv(filename, index_col=None, comment="#")

dexart-release/stable_baselines3/common/monitor.py ADDED Viewed

	@@ -0,0 +1,239 @@

+__all__ = ["Monitor", "ResultsWriter", "get_monitor_files", "load_results"]
+import csv
+import json
+import os
+import time
+from glob import glob
+from typing import Dict, List, Optional, Tuple, Union
+import gym
+import numpy as np
+import pandas
+from stable_baselines3.common.type_aliases import GymObs, GymStepReturn
+class Monitor(gym.Wrapper):
+    """
+    A monitor wrapper for Gym environments, it is used to know the episode reward, length, time and other data.
+    :param env: The environment
+    :param filename: the location to save a log file, can be None for no log
+    :param allow_early_resets: allows the reset of the environment before it is done
+    :param reset_keywords: extra keywords for the reset call,
+        if extra parameters are needed at reset
+    :param info_keywords: extra information to log, from the information return of env.step()
+    """
+    EXT = "monitor.csv"
+    def __init__(
+        self,
+        env: gym.Env,
+        filename: Optional[str] = None,
+        allow_early_resets: bool = True,
+        reset_keywords: Tuple[str, ...] = (),
+        info_keywords: Tuple[str, ...] = (),
+    ):
+        super().__init__(env=env)
+        self.t_start = time.time()
+        if filename is not None:
+            self.results_writer = ResultsWriter(
+                filename,
+                header={"t_start": self.t_start, "env_id": env.spec and env.spec.id},
+                extra_keys=reset_keywords + info_keywords,
+            )
+        else:
+            self.results_writer = None
+        self.reset_keywords = reset_keywords
+        self.info_keywords = info_keywords
+        self.allow_early_resets = allow_early_resets
+        self.rewards = None
+        self.needs_reset = True
+        self.episode_returns = []
+        self.episode_lengths = []
+        self.episode_times = []
+        self.total_steps = 0
+        self.current_reset_info = {}  # extra info about the current episode, that was passed in during reset()
+    def reset(self, **kwargs) -> GymObs:
+        """
+        Calls the Gym environment reset. Can only be called if the environment is over, or if allow_early_resets is True
+        :param kwargs: Extra keywords saved for the next episode. only if defined by reset_keywords
+        :return: the first observation of the environment
+        """
+        if not self.allow_early_resets and not self.needs_reset:
+            raise RuntimeError(
+                "Tried to reset an environment before done. If you want to allow early resets, "
+                "wrap your env with Monitor(env, path, allow_early_resets=True)"
+            )
+        self.rewards = []
+        self.needs_reset = False
+        for key in self.reset_keywords:
+            value = kwargs.get(key)
+            if value is None:
+                raise ValueError(f"Expected you to pass keyword argument {key} into reset")
+            self.current_reset_info[key] = value
+        return self.env.reset(**kwargs)
+    def step(self, action: Union[np.ndarray, int]) -> GymStepReturn:
+        """
+        Step the environment with the given action
+        :param action: the action
+        :return: observation, reward, done, information
+        """
+        if self.needs_reset:
+            raise RuntimeError("Tried to step environment that needs reset")
+        observation, reward, done, info = self.env.step(action)
+        self.rewards.append(reward)
+        if done:
+            self.needs_reset = True
+            ep_rew = sum(self.rewards)
+            ep_len = len(self.rewards)
+            ep_info = {"r": round(ep_rew, 6), "l": ep_len, "t": round(time.time() - self.t_start, 6)}
+            for key in self.info_keywords:
+                ep_info[key] = info[key]
+            self.episode_returns.append(ep_rew)
+            self.episode_lengths.append(ep_len)
+            self.episode_times.append(time.time() - self.t_start)
+            ep_info.update(self.current_reset_info)
+            if self.results_writer:
+                self.results_writer.write_row(ep_info)
+            info["episode"] = ep_info
+        self.total_steps += 1
+        return observation, reward, done, info
+    def close(self) -> None:
+        """
+        Closes the environment
+        """
+        super().close()
+        if self.results_writer is not None:
+            self.results_writer.close()
+    def get_total_steps(self) -> int:
+        """
+        Returns the total number of timesteps
+        :return:
+        """
+        return self.total_steps
+    def get_episode_rewards(self) -> List[float]:
+        """
+        Returns the rewards of all the episodes
+        :return:
+        """
+        return self.episode_returns
+    def get_episode_lengths(self) -> List[int]:
+        """
+        Returns the number of timesteps of all the episodes
+        :return:
+        """
+        return self.episode_lengths
+    def get_episode_times(self) -> List[float]:
+        """
+        Returns the runtime in seconds of all the episodes
+        :return:
+        """
+        return self.episode_times
+class LoadMonitorResultsError(Exception):
+    """
+    Raised when loading the monitor log fails.
+    """
+    pass
+class ResultsWriter:
+    """
+    A result writer that saves the data from the `Monitor` class
+    :param filename: the location to save a log file, can be None for no log
+    :param header: the header dictionary object of the saved csv
+    :param reset_keywords: the extra information to log, typically is composed of
+        ``reset_keywords`` and ``info_keywords``
+    """
+    def __init__(
+        self,
+        filename: str = "",
+        header: Optional[Dict[str, Union[float, str]]] = None,
+        extra_keys: Tuple[str, ...] = (),
+    ):
+        if header is None:
+            header = {}
+        if not filename.endswith(Monitor.EXT):
+            if os.path.isdir(filename):
+                filename = os.path.join(filename, Monitor.EXT)
+            else:
+                filename = filename + "." + Monitor.EXT
+        # Prevent newline issue on Windows, see GH issue #692
+        self.file_handler = open(filename, "wt", newline="\n")
+        self.file_handler.write("#%s\n" % json.dumps(header))
+        self.logger = csv.DictWriter(self.file_handler, fieldnames=("r", "l", "t") + extra_keys)
+        self.logger.writeheader()
+        self.file_handler.flush()
+    def write_row(self, epinfo: Dict[str, Union[float, int]]) -> None:
+        """
+        Close the file handler
+        :param epinfo: the information on episodic return, length, and time
+        """
+        if self.logger:
+            self.logger.writerow(epinfo)
+            self.file_handler.flush()
+    def close(self) -> None:
+        """
+        Close the file handler
+        """
+        self.file_handler.close()
+def get_monitor_files(path: str) -> List[str]:
+    """
+    get all the monitor files in the given path
+    :param path: the logging folder
+    :return: the log files
+    """
+    return glob(os.path.join(path, "*" + Monitor.EXT))
+def load_results(path: str) -> pandas.DataFrame:
+    """
+    Load all Monitor logs from a given directory path matching ``*monitor.csv``
+    :param path: the directory path containing the log file(s)
+    :return: the logged data
+    """
+    monitor_files = get_monitor_files(path)
+    if len(monitor_files) == 0:
+        raise LoadMonitorResultsError(f"No monitor files of the form *{Monitor.EXT} found in {path}")
+    data_frames, headers = [], []
+    for file_name in monitor_files:
+        with open(file_name) as file_handler:
+            first_line = file_handler.readline()
+            assert first_line[0] == "#"
+            header = json.loads(first_line[1:])
+            data_frame = pandas.read_csv(file_handler, index_col=None)
+            headers.append(header)
+            data_frame["t"] += header["t_start"]
+        data_frames.append(data_frame)
+    data_frame = pandas.concat(data_frames)
+    data_frame.sort_values("t", inplace=True)
+    data_frame.reset_index(inplace=True)
+    data_frame["t"] -= min(header["t_start"] for header in headers)
+    return data_frame

dexart-release/stable_baselines3/common/noise.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import copy
+from abc import ABC, abstractmethod
+from typing import Iterable, List, Optional
+import numpy as np
+class ActionNoise(ABC):
+    """
+    The action noise base class
+    """
+    def __init__(self):
+        super().__init__()
+    def reset(self) -> None:
+        """
+        call end of episode reset for the noise
+        """
+        pass
+    @abstractmethod
+    def __call__(self) -> np.ndarray:
+        raise NotImplementedError()
+class NormalActionNoise(ActionNoise):
+    """
+    A Gaussian action noise
+    :param mean: the mean value of the noise
+    :param sigma: the scale of the noise (std here)
+    """
+    def __init__(self, mean: np.ndarray, sigma: np.ndarray):
+        self._mu = mean
+        self._sigma = sigma
+        super().__init__()
+    def __call__(self) -> np.ndarray:
+        return np.random.normal(self._mu, self._sigma)
+    def __repr__(self) -> str:
+        return f"NormalActionNoise(mu={self._mu}, sigma={self._sigma})"
+class OrnsteinUhlenbeckActionNoise(ActionNoise):
+    """
+    An Ornstein Uhlenbeck action noise, this is designed to approximate Brownian motion with friction.
+    Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab
+    :param mean: the mean of the noise
+    :param sigma: the scale of the noise
+    :param theta: the rate of mean reversion
+    :param dt: the timestep for the noise
+    :param initial_noise: the initial value for the noise output, (if None: 0)
+    """
+    def __init__(
+        self,
+        mean: np.ndarray,
+        sigma: np.ndarray,
+        theta: float = 0.15,
+        dt: float = 1e-2,
+        initial_noise: Optional[np.ndarray] = None,
+    ):
+        self._theta = theta
+        self._mu = mean
+        self._sigma = sigma
+        self._dt = dt
+        self.initial_noise = initial_noise
+        self.noise_prev = np.zeros_like(self._mu)
+        self.reset()
+        super().__init__()
+    def __call__(self) -> np.ndarray:
+        noise = (
+            self.noise_prev
+            + self._theta * (self._mu - self.noise_prev) * self._dt
+            + self._sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape)
+        )
+        self.noise_prev = noise
+        return noise
+    def reset(self) -> None:
+        """
+        reset the Ornstein Uhlenbeck noise, to the initial position
+        """
+        self.noise_prev = self.initial_noise if self.initial_noise is not None else np.zeros_like(self._mu)
+    def __repr__(self) -> str:
+        return f"OrnsteinUhlenbeckActionNoise(mu={self._mu}, sigma={self._sigma})"
+class VectorizedActionNoise(ActionNoise):
+    """
+    A Vectorized action noise for parallel environments.
+    :param base_noise: ActionNoise The noise generator to use
+    :param n_envs: The number of parallel environments
+    """
+    def __init__(self, base_noise: ActionNoise, n_envs: int):
+        try:
+            self.n_envs = int(n_envs)
+            assert self.n_envs > 0
+        except (TypeError, AssertionError):
+            raise ValueError(f"Expected n_envs={n_envs} to be positive integer greater than 0")
+        self.base_noise = base_noise
+        self.noises = [copy.deepcopy(self.base_noise) for _ in range(n_envs)]
+    def reset(self, indices: Optional[Iterable[int]] = None) -> None:
+        """
+        Reset all the noise processes, or those listed in indices
+        :param indices: Optional[Iterable[int]] The indices to reset. Default: None.
+            If the parameter is None, then all processes are reset to their initial position.
+        """
+        if indices is None:
+            indices = range(len(self.noises))
+        for index in indices:
+            self.noises[index].reset()
+    def __repr__(self) -> str:
+        return f"VecNoise(BaseNoise={repr(self.base_noise)}), n_envs={len(self.noises)})"
+    def __call__(self) -> np.ndarray:
+        """
+        Generate and stack the action noise from each noise object
+        """
+        noise = np.stack([noise() for noise in self.noises])
+        return noise
+    @property
+    def base_noise(self) -> ActionNoise:
+        return self._base_noise
+    @base_noise.setter
+    def base_noise(self, base_noise: ActionNoise) -> None:
+        if base_noise is None:
+            raise ValueError("Expected base_noise to be an instance of ActionNoise, not None", ActionNoise)
+        if not isinstance(base_noise, ActionNoise):
+            raise TypeError("Expected base_noise to be an instance of type ActionNoise", ActionNoise)
+        self._base_noise = base_noise
+    @property
+    def noises(self) -> List[ActionNoise]:
+        return self._noises
+    @noises.setter
+    def noises(self, noises: List[ActionNoise]) -> None:
+        noises = list(noises)  # raises TypeError if not iterable
+        assert len(noises) == self.n_envs, f"Expected a list of {self.n_envs} ActionNoises, found {len(noises)}."
+        different_types = [i for i, noise in enumerate(noises) if not isinstance(noise, type(self.base_noise))]
+        if len(different_types):
+            raise ValueError(
+                f"Noise instances at indices {different_types} don't match the type of base_noise", type(self.base_noise)
+            )
+        self._noises = noises
+        for noise in noises:
+            noise.reset()

dexart-release/stable_baselines3/common/on_policy_algorithm.py ADDED Viewed

	@@ -0,0 +1,320 @@

+import time
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
+import gym
+import numpy as np
+import torch as th
+from stable_baselines3.common.base_class import BaseAlgorithm
+from stable_baselines3.common.buffers import DictRolloutBuffer, RolloutBuffer
+from stable_baselines3.common.callbacks import BaseCallback
+from stable_baselines3.common.policies import ActorCriticPolicy
+from stable_baselines3.common.type_aliases import GymEnv, MaybeCallback, Schedule
+from stable_baselines3.common.utils import obs_as_tensor, safe_mean
+from stable_baselines3.common.vec_env import VecEnv
+class OnPolicyAlgorithm(BaseAlgorithm):
+    """
+    The base for On-Policy algorithms (ex: A2C/PPO).
+    :param policy: The policy model to use (MlpPolicy, CnnPolicy, ...)
+    :param env: The environment to learn from (if registered in Gym, can be str)
+    :param learning_rate: The learning rate, it can be a function
+        of the current progress remaining (from 1 to 0)
+    :param n_steps: The number of steps to run for each environment per update
+        (i.e. batch size is n_steps * n_env where n_env is number of environment copies running in parallel)
+    :param gamma: Discount factor
+    :param gae_lambda: Factor for trade-off of bias vs variance for Generalized Advantage Estimator.
+        Equivalent to classic advantage when set to 1.
+    :param ent_coef: Entropy coefficient for the loss calculation
+    :param vf_coef: Value function coefficient for the loss calculation
+    :param max_grad_norm: The maximum value for the gradient clipping
+    :param use_sde: Whether to use generalized State Dependent Exploration (gSDE)
+        instead of action noise exploration (default: False)
+    :param sde_sample_freq: Sample a new noise matrix every n steps when using gSDE
+        Default: -1 (only sample at the beginning of the rollout)
+    :param tensorboard_log: the log location for tensorboard (if None, no logging)
+    :param create_eval_env: Whether to create a second environment that will be
+        used for evaluating the agent periodically. (Only available when passing string for the environment)
+    :param monitor_wrapper: When creating an environment, whether to wrap it
+        or not in a Monitor wrapper.
+    :param policy_kwargs: additional arguments to be passed to the policy on creation
+    :param verbose: the verbosity level: 0 no output, 1 info, 2 debug
+    :param seed: Seed for the pseudo random generators
+    :param device: Device (cpu, cuda, ...) on which the code should be run.
+        Setting it to auto, the code will be run on the GPU if possible.
+    :param _init_setup_model: Whether or not to build the network at the creation of the instance
+    :param supported_action_spaces: The action spaces supported by the algorithm.
+    """
+    def __init__(
+            self,
+            policy: Union[str, Type[ActorCriticPolicy]],
+            env: Union[GymEnv, str],
+            learning_rate: Union[float, Schedule],
+            n_steps: int,
+            gamma: float,
+            gae_lambda: float,
+            ent_coef: float,
+            vf_coef: float,
+            max_grad_norm: float,
+            use_sde: bool,
+            sde_sample_freq: int,
+            tensorboard_log: Optional[str] = None,
+            create_eval_env: bool = False,
+            monitor_wrapper: bool = True,
+            policy_kwargs: Optional[Dict[str, Any]] = None,
+            verbose: int = 0,
+            seed: Optional[int] = None,
+            device: Union[th.device, str] = "auto",
+            _init_setup_model: bool = True,
+            supported_action_spaces: Optional[Tuple[gym.spaces.Space, ...]] = None,
+    ):
+        super().__init__(
+            policy=policy,
+            env=env,
+            learning_rate=learning_rate,
+            policy_kwargs=policy_kwargs,
+            verbose=verbose,
+            device=device,
+            use_sde=use_sde,
+            sde_sample_freq=sde_sample_freq,
+            create_eval_env=create_eval_env,
+            support_multi_env=True,
+            seed=seed,
+            tensorboard_log=tensorboard_log,
+            supported_action_spaces=supported_action_spaces,
+        )
+        self.n_steps = n_steps
+        self.gamma = gamma
+        self.gae_lambda = gae_lambda
+        self.ent_coef = ent_coef
+        self.vf_coef = vf_coef
+        self.max_grad_norm = max_grad_norm
+        self.rollout_buffer = None
+        self.last_rollout_reward = -np.inf
+        self.need_restore = False
+        self.last_policy_saved: List[Dict] = [{}, {}]
+        self.current_restore_step = 0
+        if _init_setup_model:
+            self._setup_model()
+    def _setup_model(self) -> None:
+        self._setup_lr_schedule()
+        self.set_random_seed(self.seed)
+        buffer_cls = DictRolloutBuffer if isinstance(self.observation_space, gym.spaces.Dict) else RolloutBuffer
+        self.rollout_buffer = buffer_cls(
+            self.n_steps,
+            self.observation_space,
+            self.action_space,
+            device=self.device,
+            gamma=self.gamma,
+            gae_lambda=self.gae_lambda,
+            n_envs=self.n_envs,
+        )
+        self.policy = self.policy_class(  # pytype:disable=not-instantiable
+            self.observation_space,
+            self.action_space,
+            self.lr_schedule,
+            use_sde=self.use_sde,
+            **self.policy_kwargs  # pytype:disable=not-instantiable
+        )
+        self.policy = self.policy.to(self.device)
+    def collect_rollouts(
+            self,
+            env: VecEnv,
+            callback: BaseCallback,
+            rollout_buffer: RolloutBuffer,
+            n_rollout_steps: int,
+    ) -> bool:
+        """
+        Collect experiences using the current policy and fill a ``RolloutBuffer``.
+        The term rollout here refers to the model-free notion and should not
+        be used with the concept of rollout used in model-based RL or planning.
+        :param env: The training environment
+        :param callback: Callback that will be called at each step
+            (and at the beginning and end of the rollout)
+        :param rollout_buffer: Buffer to fill with rollouts
+        :param n_steps: Number of experiences to collect per environment
+        :return: True if function returned with at least `n_rollout_steps`
+            collected, False if callback terminated rollout prematurely.
+        """
+        assert self._last_obs is not None, "No previous observation was provided"
+        # Switch to eval mode (this affects batch norm / dropout)
+        self.policy.set_training_mode(False)
+        last_episode_reward = self.last_rollout_reward
+        self.last_rollout_reward = 0
+        num_rollouts = 0
+        n_steps = 0
+        rollout_buffer.reset()
+        # Sample new weights for the state dependent exploration
+        if self.use_sde:
+            self.policy.reset_noise(env.num_envs)
+        callback.on_rollout_start()
+        while n_steps < n_rollout_steps:
+            if self.use_sde and self.sde_sample_freq > 0 and n_steps % self.sde_sample_freq == 0:
+                # Sample a new noise matrix
+                self.policy.reset_noise(env.num_envs)
+            with th.no_grad():
+                # Convert to pytorch tensor or to TensorDict
+                obs_tensor = obs_as_tensor(self._last_obs, self.device)
+                actions, values, log_probs = self.policy(obs_tensor)
+            actions = actions.cpu().numpy()
+            # Rescale and perform action
+            clipped_actions = actions
+            # Clip the actions to avoid out of bound error
+            if isinstance(self.action_space, gym.spaces.Box):
+                clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high)
+            new_obs, rewards, dones, infos = env.step(clipped_actions)
+            self.num_timesteps += env.num_envs
+            # Give access to local variables
+            callback.update_locals(locals())
+            if callback.on_step() is False:
+                return False
+            self._update_info_buffer(infos)
+            n_steps += 1
+            if isinstance(self.action_space, gym.spaces.Discrete):
+                # Reshape in case of discrete action
+                actions = actions.reshape(-1, 1)
+            # Handle timeout by bootstraping with value function
+            # see GitHub issue #633
+            for idx, done in enumerate(dones):
+                if (
+                        done
+                        and infos[idx].get("terminal_observation") is not None
+                        and infos[idx].get("TimeLimit.truncated", False)
+                ):
+                    terminal_obs = self.policy.obs_to_tensor(infos[idx]["terminal_observation"])[0]
+                    with th.no_grad():
+                        terminal_value = self.policy.predict_values(terminal_obs)[0]
+                    rewards[idx] += self.gamma * terminal_value
+                if done:
+                    num_rollouts += 1
+            self.last_rollout_reward += rewards.sum()
+            rollout_buffer.add(self._last_obs, actions, rewards, self._last_episode_starts, values, log_probs)
+            self._last_obs = new_obs
+            self._last_episode_starts = dones
+        with th.no_grad():
+            # Compute value for the last timestep
+            values = self.policy.predict_values(obs_as_tensor(new_obs, self.device))
+        rollout_buffer.compute_returns_and_advantage(last_values=values, dones=dones)
+        self.last_rollout_reward /= num_rollouts
+        reward_gap = last_episode_reward - self.last_rollout_reward
+        self.need_restore = False
+        self.current_restore_step = 0
+        callback.on_rollout_end()
+        return True
+    def train(self) -> None:
+        """
+        Consume current rollout data and update policy parameters.
+        Implemented by individual algorithms.
+        """
+        raise NotImplementedError
+    def learn(
+            self,
+            total_timesteps: int,
+            callback: MaybeCallback = None,
+            log_interval: int = 1,
+            eval_env: Optional[GymEnv] = None,
+            eval_freq: int = -1,
+            n_eval_episodes: int = 5,
+            tb_log_name: str = "OnPolicyAlgorithm",
+            eval_log_path: Optional[str] = None,
+            reset_num_timesteps: bool = True,
+            iter_start=0,
+    ) -> "OnPolicyAlgorithm":
+        iteration = iter_start
+        total_timesteps, callback = self._setup_learn(
+            total_timesteps, eval_env, callback, eval_freq, n_eval_episodes, eval_log_path, reset_num_timesteps,
+            tb_log_name
+        )
+        callback.on_training_start(locals(), globals())
+        while self.num_timesteps < total_timesteps:
+            x = time.time()
+            continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer,
+                                                      n_rollout_steps=self.n_steps)
+            print("Rollout time:", time.time() - x)
+            if continue_training is False:
+                break
+            if self.need_restore and self.current_restore_step < 5:
+                print(f"Large performance drop detected. Restore previous model.")
+                self.set_parameters(self.last_policy_saved[0], exact_match=True, device=self.device)
+                continue
+            else:
+                self.last_policy_saved[0] = self.last_policy_saved[1]
+                self.last_policy_saved[1] = self.get_parameters()
+            iteration += 1
+            self._update_current_progress_remaining(self.num_timesteps, total_timesteps)
+            # Display training infos
+            if log_interval is not None and iteration % log_interval == 0:
+                fps = int((self.num_timesteps - self._num_timesteps_at_start) / (time.time() - self.start_time))
+                self.logger.record("time/iterations", iteration, exclude="wandb")
+                self.logger.record("rollout/rollout_rew_mean", self.last_rollout_reward)
+                if len(self.ep_info_buffer) > 0 and len(self.ep_info_buffer[0]) > 0:
+                    self.logger.record("rollout/ep_rew_mean",
+                                       safe_mean([ep_info["r"] for ep_info in self.ep_info_buffer]))
+                    self.logger.record("rollout/ep_len_mean",
+                                       safe_mean([ep_info["l"] for ep_info in self.ep_info_buffer]))
+                self.logger.record("time/fps", fps)
+                self.logger.record("time/time_elapsed", int(time.time() - self.start_time), exclude="tensorboard")
+                self.logger.record("time/total_timesteps", self.num_timesteps, exclude="tensorboard")
+                self.logger.dump(step=iteration)
+            x = time.time()
+            self.train()
+            print("Train time:", time.time() - x)
+        callback.on_training_end()
+        return self
+    def _get_torch_save_params(self) -> Tuple[List[str], List[str]]:
+        state_dicts = ["policy", "policy.optimizer"]
+        return state_dicts, []
+    def _excluded_save_params(self) -> List[str]:
+        """
+        Returns the names of the parameters that should be excluded from being
+        saved by pickling. E.g. replay buffers are skipped by default
+        as they take up a lot of space. PyTorch variables should be excluded
+        with this so they can be stored with ``th.save``.
+        :return: List of parameters that should be excluded from being saved with pickle.
+        """
+        return super()._excluded_save_params() + ["last_policy_saved"]