Spaces:
Runtime error
Runtime error
Commit ·
6299d2b
1
Parent(s): 91977f9
add: Pusher env
Browse files- envs/__init__.py +9 -3
- envs/mujoco/ant_translator.py +1 -1
- envs/mujoco/pusher_policies.py +15 -0
- envs/mujoco/pusher_translator.py +93 -0
- record_reflexion.csv +1 -0
- shell/test_mujoco_hopper.sh +12 -0
- shell/test_mujoco_invertedPendulum.sh +4 -4
- shell/test_mujoco_pusher.sh +27 -0
envs/__init__.py
CHANGED
|
@@ -108,8 +108,9 @@ from .mujoco import reacher_translator, reacher_policies
|
|
| 108 |
from .mujoco import hopper_translator, hopper_policies
|
| 109 |
from .mujoco import walker2d_translator, walker2d_policies
|
| 110 |
|
| 111 |
-
|
| 112 |
-
|
|
|
|
| 113 |
|
| 114 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
| 115 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
|
@@ -135,12 +136,17 @@ REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTr
|
|
| 135 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
| 136 |
|
| 137 |
|
| 138 |
-
|
| 139 |
REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
|
| 140 |
REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
|
| 141 |
REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
|
| 142 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
|
|
|
|
| 144 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
| 145 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
| 146 |
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
|
|
|
| 108 |
from .mujoco import hopper_translator, hopper_policies
|
| 109 |
from .mujoco import walker2d_translator, walker2d_policies
|
| 110 |
|
| 111 |
+
|
| 112 |
+
|
| 113 |
+
|
| 114 |
|
| 115 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
| 116 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
|
|
|
| 136 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
| 137 |
|
| 138 |
|
| 139 |
+
from .mujoco import halfcheetah_translator, halfcheetah_policies
|
| 140 |
REGISTRY["halfcheetah_init_translator"] = halfcheetah_translator.GameDescriber
|
| 141 |
REGISTRY["halfcheetah_basic_translator"] = halfcheetah_translator.BasicStateSequenceTranslator
|
| 142 |
REGISTRY["halfcheetah_policies"] = [halfcheetah_policies.pseudo_random_policy, halfcheetah_policies.real_random_policy]
|
| 143 |
|
| 144 |
+
from .mujoco import pusher_translator, pusher_policies
|
| 145 |
+
REGISTRY["pusher_init_translator"] = pusher_translator.GameDescriber
|
| 146 |
+
REGISTRY["pusher_basic_translator"] = pusher_translator.BasicStateSequenceTranslator
|
| 147 |
+
REGISTRY["pusher_policies"] = [pusher_policies.pseudo_random_policy, pusher_policies.real_random_policy]
|
| 148 |
|
| 149 |
+
from .mujoco import ant_translator, ant_policies
|
| 150 |
REGISTRY["ant_init_translator"] = ant_translator.GameDescriber
|
| 151 |
REGISTRY["ant_basic_translator"] = ant_translator.BasicStateSequenceTranslator
|
| 152 |
REGISTRY["ant_policies"] = [ant_policies.pseudo_random_policy, ant_policies.real_random_policy]
|
envs/mujoco/ant_translator.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
'''
|
| 2 |
Action Space Box(-1.0, 1.0, (8,), float32)
|
| 3 |
Observation Space Box(-inf, inf, (27,), float64)
|
| 4 |
'''
|
|
|
|
| 1 |
+
'''Ant
|
| 2 |
Action Space Box(-1.0, 1.0, (8,), float32)
|
| 3 |
Observation Space Box(-inf, inf, (27,), float64)
|
| 4 |
'''
|
envs/mujoco/pusher_policies.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
def pseudo_random_policy(state, pre_action):
|
| 5 |
+
def get_description():
|
| 6 |
+
return "Select action randomly"
|
| 7 |
+
pseudo_random_policy.description = get_description()
|
| 8 |
+
return [4 * random.random() - 2 for i in range(7)]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def real_random_policy(state, pre_action=1):
|
| 12 |
+
def get_description():
|
| 13 |
+
return "Select action with a random policy"
|
| 14 |
+
real_random_policy.description = get_description()
|
| 15 |
+
return [4 * random.random() - 2 for i in range(7)]
|
envs/mujoco/pusher_translator.py
ADDED
|
@@ -0,0 +1,93 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''Pusher
|
| 2 |
+
Action Space Box(-2.0, 2.0, (7,), float32)
|
| 3 |
+
Observation Space Box(-inf, inf, (23,), float64)
|
| 4 |
+
'''
|
| 5 |
+
import math
|
| 6 |
+
|
| 7 |
+
class BasicLevelTranslator:
|
| 8 |
+
def __init__(self):
|
| 9 |
+
pass
|
| 10 |
+
|
| 11 |
+
def translate(self, state):
|
| 12 |
+
|
| 13 |
+
joint_angles = state[:7]
|
| 14 |
+
joint_velocities = state[7:14]
|
| 15 |
+
fingertip_coords = state[14:17]
|
| 16 |
+
object_coords = state[17:20]
|
| 17 |
+
goal_coords = state[20:]
|
| 18 |
+
|
| 19 |
+
joint_angle_degrees = [math.degrees(angle) for angle in joint_angles]
|
| 20 |
+
joint_velocity_degrees = [math.degrees(velocity) for velocity in joint_velocities]
|
| 21 |
+
|
| 22 |
+
res = (f"Rotation of the panning shoulder: {joint_angle_degrees[0]:.2f} degrees, "
|
| 23 |
+
f"Rotation of the shoulder lifting joint: {joint_angle_degrees[1]:.2f} degrees, "
|
| 24 |
+
f"Rotation of the shoulder rolling joint: {joint_angle_degrees[2]:.2f} degrees, "
|
| 25 |
+
f"Rotation of the elbow joint: {joint_angle_degrees[3]:.2f} degrees, "
|
| 26 |
+
f"Rotation of the forearm rolling joint: {joint_angle_degrees[4]:.2f} degrees, "
|
| 27 |
+
f"Rotation of the wrist flexing joint: {joint_angle_degrees[5]:.2f} degrees, "
|
| 28 |
+
f"Rotation of the wrist rolling joint: {joint_angle_degrees[6]:.2f} degrees, "
|
| 29 |
+
f"Rotational velocity of the panning shoulder: {joint_velocity_degrees[0]:.2f} degrees/s, "
|
| 30 |
+
f"Rotational velocity of the shoulder lifting joint: {joint_velocity_degrees[1]:.2f} degrees/s, "
|
| 31 |
+
f"Rotational velocity of the shoulder rolling joint: {joint_velocity_degrees[2]:.2f} degrees/s, "
|
| 32 |
+
f"Rotational velocity of the elbow joint: {joint_velocity_degrees[3]:.2f} degrees/s, "
|
| 33 |
+
f"Rotational velocity of the forearm rolling joint: {joint_velocity_degrees[4]:.2f} degrees/s, "
|
| 34 |
+
f"Rotational velocity of the wrist flexing joint: {joint_velocity_degrees[5]:.2f} degrees/s, "
|
| 35 |
+
f"Rotational velocity of the wrist rolling joint: {joint_velocity_degrees[6]:.2f} degrees/s, "
|
| 36 |
+
f"Fingertip coordinates (x, y, z): ({fingertip_coords[0]:.2f}, {fingertip_coords[1]:.2f}, {fingertip_coords[2]:.2f}), "
|
| 37 |
+
f"Object coordinates (x, y, z): ({object_coords[0]:.2f}, {object_coords[1]:.2f}, {object_coords[2]:.2f}), "
|
| 38 |
+
f"Goal coordinates (x, y, z): ({goal_coords[0]:.2f}, {goal_coords[1]:.2f}, {goal_coords[2]:.2f}).")
|
| 39 |
+
return res
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class GameDescriber:
|
| 43 |
+
def __init__(self, args):
|
| 44 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
| 45 |
+
self.max_episode_len = args.max_episode_len
|
| 46 |
+
self.action_desc_dict = {
|
| 47 |
+
}
|
| 48 |
+
self.reward_desc_dict = {
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
| 52 |
+
return ""
|
| 53 |
+
|
| 54 |
+
def translate_potential_next_state(self, state, action):
|
| 55 |
+
return ""
|
| 56 |
+
|
| 57 |
+
def describe_goal(self):
|
| 58 |
+
return "The goal is to move the target cylinder (object) to the goal position using the robot's end effector (fingertip)."
|
| 59 |
+
|
| 60 |
+
def describe_game(self):
|
| 61 |
+
return ("In the Pusher game, you control a multi-jointed robot arm to manipulate a target cylinder (object) "
|
| 62 |
+
"and place it in a goal position using the robot's fingertip (end effector). The robot has shoulder, elbow, "
|
| 63 |
+
"forearm, and wrist joints that you can control with torque values. The observation space includes joint angles, "
|
| 64 |
+
"angular velocities of joints, fingertip coordinates, object coordinates, and goal coordinates. The reward is "
|
| 65 |
+
"based on the distance between the fingertip and the object, the distance between the object and the goal, "
|
| 66 |
+
"and control penalties for large actions.")
|
| 67 |
+
|
| 68 |
+
def describe_action(self):
|
| 69 |
+
return ("Your next move: \n Please provide a list of 7 numerical values within the range [-2, 2], "
|
| 70 |
+
"representing the torques applied to the robot's joints (shoulder, elbow, forearm, and wrist).")
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
| 74 |
+
def translate(self, infos, is_current=False):
|
| 75 |
+
descriptions = []
|
| 76 |
+
if is_current:
|
| 77 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
| 78 |
+
return state_desc
|
| 79 |
+
for info in infos:
|
| 80 |
+
assert 'state' in info, "info should contain state information"
|
| 81 |
+
|
| 82 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
| 83 |
+
action_desc = ("Take Action: Apply Torques - "
|
| 84 |
+
"Shoulder Pan: {:.2f}, Shoulder Lift: {:.2f}, Shoulder Roll: {:.2f}, "
|
| 85 |
+
"Elbow Flex: {:.2f}, Forearm Roll: {:.2f}, Wrist Flex: {:.2f}, Wrist Roll: {:.2f}"
|
| 86 |
+
).format(info['action'][0], info['action'][1], info['action'][2], info['action'][3],
|
| 87 |
+
info['action'][4], info['action'][5], info['action'][6])
|
| 88 |
+
|
| 89 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
| 90 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
| 91 |
+
descriptions.append(f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to {next_state_desc}")
|
| 92 |
+
return descriptions
|
| 93 |
+
|
record_reflexion.csv
CHANGED
|
@@ -16,4 +16,5 @@ Hopper-v4,1,expert,3542.2
|
|
| 16 |
Walker2d-v4,1,expert,5000.0
|
| 17 |
Swimmer-v4,1,expert,44.4
|
| 18 |
Reacher-v4,1,expert,-2.6
|
|
|
|
| 19 |
|
|
|
|
| 16 |
Walker2d-v4,1,expert,5000.0
|
| 17 |
Swimmer-v4,1,expert,44.4
|
| 18 |
Reacher-v4,1,expert,-2.6
|
| 19 |
+
Pusher-v4,1,expert,-52.3
|
| 20 |
|
shell/test_mujoco_hopper.sh
CHANGED
|
@@ -1,5 +1,17 @@
|
|
| 1 |
# Hopper-v4
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# REFLEXION
|
| 4 |
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 5 |
|
|
|
|
| 1 |
# Hopper-v4
|
| 2 |
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
# REFLEXION
|
| 16 |
python main_reflexion.py --env_name Hopper-v4 --init_summarizer hopper_init_translator --curr_summarizer hopper_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 17 |
|
shell/test_mujoco_invertedPendulum.sh
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
# InvertedPendulum-v4
|
| 2 |
|
| 3 |
# COT
|
| 4 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
| 5 |
|
| 6 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
| 7 |
|
| 8 |
# SPP
|
| 9 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
| 10 |
|
| 11 |
-
python main_reflexion.py --env_name InvertedPendulum-
|
| 12 |
|
| 13 |
|
| 14 |
|
|
|
|
| 1 |
# InvertedPendulum-v4
|
| 2 |
|
| 3 |
# COT
|
| 4 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
|
| 6 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
|
| 8 |
# SPP
|
| 9 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
|
| 11 |
+
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
|
| 13 |
|
| 14 |
|
shell/test_mujoco_pusher.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Pusher-v4
|
| 2 |
+
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# REFLEXION
|
| 15 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 16 |
+
|
| 17 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
| 18 |
+
|
| 19 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# exe
|
| 23 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
| 24 |
+
|
| 25 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
| 26 |
+
|
| 27 |
+
python main_reflexion.py --env_name Pusher-v4 --init_summarizer pusher_init_translator --curr_summarizer pusher_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|