Spaces:
Runtime error
Runtime error
Commit ·
8f842da
1
Parent(s): 2ec5014
add reacher env and all mujoco envs now support COT, SPP, SELF-REFLEXION, EXE methods under L1&L3 setting.
Browse files- envs/__init__.py +13 -2
- envs/mujoco/reacher_policies.py +15 -0
- envs/mujoco/reacher_translator.py +67 -0
- envs/mujoco/swimmer_policies.py +15 -0
- envs/mujoco/swimmer_translator.py +80 -0
- record_reflexion.csv +6 -1
- test_atari.sh → shell/test_atari.sh +0 -0
- shell/test_mujoco_ant.sh +12 -0
- shell/test_mujoco_invertedDoublePendulum.sh +11 -0
- shell/test_mujoco_invertedPendulum.sh +12 -3
- shell/test_mujoco_reacher.sh +27 -0
- shell/test_mujoco_swimmer.sh +27 -0
- shell/test_mujoco_walker2d.sh +12 -0
- test_reflexion.sh → shell/test_reflexion.sh +0 -0
envs/__init__.py
CHANGED
|
@@ -100,6 +100,11 @@ REGISTRY["RepresentedPong_basic_policies"] = [
|
|
| 100 |
|
| 101 |
from .mujoco import invertedPendulum_translator, invertedPendulum_policies
|
| 102 |
from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
from .mujoco import hopper_translator, hopper_policies
|
| 104 |
from .mujoco import walker2d_translator, walker2d_policies
|
| 105 |
|
|
@@ -109,16 +114,22 @@ from .mujoco import ant_translator, ant_policies
|
|
| 109 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
| 110 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
| 111 |
REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
|
| 112 |
-
|
| 113 |
REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
|
| 114 |
REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
|
| 115 |
REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
|
| 116 |
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
|
| 119 |
REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
|
| 120 |
REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
|
| 121 |
-
|
| 122 |
REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
|
| 123 |
REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
|
| 124 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
|
|
|
| 100 |
|
| 101 |
from .mujoco import invertedPendulum_translator, invertedPendulum_policies
|
| 102 |
from .mujoco import invertedDoublePendulum_translator, invertedDoublePendulum_policies
|
| 103 |
+
|
| 104 |
+
from .mujoco import swimmer_translator, swimmer_policies
|
| 105 |
+
|
| 106 |
+
from .mujoco import reacher_translator, reacher_policies
|
| 107 |
+
|
| 108 |
from .mujoco import hopper_translator, hopper_policies
|
| 109 |
from .mujoco import walker2d_translator, walker2d_policies
|
| 110 |
|
|
|
|
| 114 |
REGISTRY["invertedPendulum_init_translator"] = invertedPendulum_translator.GameDescriber
|
| 115 |
REGISTRY["invertedPendulum_basic_translator"] = invertedPendulum_translator.BasicStateSequenceTranslator
|
| 116 |
REGISTRY["invertedPendulum_policies"] = [invertedPendulum_policies.pseudo_random_policy, invertedPendulum_policies.real_random_policy]
|
|
|
|
| 117 |
REGISTRY["invertedDoublePendulum_init_translator"] = invertedDoublePendulum_translator.GameDescriber
|
| 118 |
REGISTRY["invertedDoublePendulum_basic_translator"] = invertedDoublePendulum_translator.BasicStateSequenceTranslator
|
| 119 |
REGISTRY["invertedDoublePendulum_policies"] = [invertedDoublePendulum_policies.pseudo_random_policy, invertedDoublePendulum_policies.real_random_policy]
|
| 120 |
|
| 121 |
|
| 122 |
+
REGISTRY["swimmer_init_translator"] = swimmer_translator.GameDescriber
|
| 123 |
+
REGISTRY["swimmer_basic_translator"] = swimmer_translator.BasicStateSequenceTranslator
|
| 124 |
+
REGISTRY["swimmer_policies"] = [swimmer_policies.pseudo_random_policy, swimmer_policies.real_random_policy]
|
| 125 |
+
|
| 126 |
+
REGISTRY["reacher_init_translator"] = reacher_translator.GameDescriber
|
| 127 |
+
REGISTRY["reacher_basic_translator"] = reacher_translator.BasicStateSequenceTranslator
|
| 128 |
+
REGISTRY["reacher_policies"] = [reacher_policies.pseudo_random_policy, reacher_policies.real_random_policy]
|
| 129 |
+
|
| 130 |
REGISTRY["hopper_init_translator"] = hopper_translator.GameDescriber
|
| 131 |
REGISTRY["hopper_basic_translator"] = hopper_translator.BasicStateSequenceTranslator
|
| 132 |
REGISTRY["hopper_policies"] = [hopper_policies.pseudo_random_policy, hopper_policies.real_random_policy]
|
|
|
|
| 133 |
REGISTRY["walker2d_init_translator"] = walker2d_translator.GameDescriber
|
| 134 |
REGISTRY["walker2d_basic_translator"] = walker2d_translator.BasicStateSequenceTranslator
|
| 135 |
REGISTRY["walker2d_policies"] = [walker2d_policies.pseudo_random_policy, walker2d_policies.real_random_policy]
|
envs/mujoco/reacher_policies.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
def pseudo_random_policy(state, pre_action):
|
| 5 |
+
def get_description():
|
| 6 |
+
return "Select action randomly"
|
| 7 |
+
pseudo_random_policy.description = get_description()
|
| 8 |
+
return [2 * random.random() - 1 for i in range(2)]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def real_random_policy(state, pre_action=1):
|
| 12 |
+
def get_description():
|
| 13 |
+
return "Select action with a random policy"
|
| 14 |
+
real_random_policy.description = get_description()
|
| 15 |
+
return [2 * random.random() - 1 for i in range(2)]
|
envs/mujoco/reacher_translator.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''Reacher
|
| 2 |
+
Action Space Box(-1.0, 1.0, (2,), float32)
|
| 3 |
+
|
| 4 |
+
Observation Space Box(-inf, inf, (11,), float64)
|
| 5 |
+
'''
|
| 6 |
+
class BasicLevelTranslator:
|
| 7 |
+
def __init__(self):
|
| 8 |
+
pass
|
| 9 |
+
|
| 10 |
+
def translate(self, state):
|
| 11 |
+
(cos_angle_arm1, cos_angle_arm2, sin_angle_arm1, sin_angle_arm2,
|
| 12 |
+
target_x, target_y, angular_vel_arm1, angular_vel_arm2,
|
| 13 |
+
diff_x, diff_y, diff_z) = state
|
| 14 |
+
|
| 15 |
+
res = (f"Arm1 has a cosine angle of {cos_angle_arm1:.2f} and a sine angle of {sin_angle_arm1:.2f}. "\
|
| 16 |
+
f"Arm2 has a cosine angle of {cos_angle_arm2:.2f} and a sine angle of {sin_angle_arm2:.2f}. "\
|
| 17 |
+
f"Target position is at ({target_x:.2f}, {target_y:.2f}). "\
|
| 18 |
+
f"Arm1's angular velocity is {angular_vel_arm1:.2f} rad/s, and Arm2's is {angular_vel_arm2:.2f} rad/s. "\
|
| 19 |
+
f"Vector difference between fingertip and target is ({diff_x:.2f}, {diff_y:.2f}, {diff_z:.2f}).")
|
| 20 |
+
return res
|
| 21 |
+
|
| 22 |
+
class GameDescriber:
|
| 23 |
+
def __init__(self, args):
|
| 24 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
| 25 |
+
self.max_episode_len = args.max_episode_len
|
| 26 |
+
self.action_desc_dict = {
|
| 27 |
+
}
|
| 28 |
+
self.reward_desc_dict = {
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
| 32 |
+
return ""
|
| 33 |
+
|
| 34 |
+
def translate_potential_next_state(self, state, action):
|
| 35 |
+
return ""
|
| 36 |
+
|
| 37 |
+
def describe_goal(self):
|
| 38 |
+
return "The goal is to control a two-jointed robot arm to move its end effector (fingertip) close to a randomly spawned target."
|
| 39 |
+
|
| 40 |
+
def describe_game(self):
|
| 41 |
+
return ("In the Reacher game, you control a two-jointed robot arm. The objective is to maneuver the arm's fingertip close to a target. "\
|
| 42 |
+
"The observation space includes the cosine and sine of the arm angles, coordinates of the target, angular velocities of the arms, "\
|
| 43 |
+
"and the vector from the fingertip to the target. The episode ends after 50 timesteps or if any state space value becomes non-finite. "\
|
| 44 |
+
"Rewards are given based on the distance of the fingertip from the target and the magnitude of actions applied.")
|
| 45 |
+
|
| 46 |
+
def describe_action(self):
|
| 47 |
+
return ("Your next move: \n Please provide two numerical values representing the torques applied at the two hinge joints. "\
|
| 48 |
+
"Each value should be within the range of [-1, 1].")
|
| 49 |
+
|
| 50 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
| 51 |
+
def translate(self, infos, is_current=False):
|
| 52 |
+
descriptions = []
|
| 53 |
+
if is_current:
|
| 54 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
| 55 |
+
return state_desc
|
| 56 |
+
for i, info in enumerate(infos):
|
| 57 |
+
assert 'state' in info, "info should contain state information"
|
| 58 |
+
|
| 59 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
| 60 |
+
action_desc = ("Take Action: Apply Torque at Joint 1: {:.2f}, "
|
| 61 |
+
"Joint 2 Torque: {:.2f}"
|
| 62 |
+
).format(info['action'][0], info['action'][1])
|
| 63 |
+
|
| 64 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}, "
|
| 65 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
| 66 |
+
descriptions.append(f"{state_desc}.\\n {action_desc} \\n {reward_desc} \\n Transit to {next_state_desc}")
|
| 67 |
+
return descriptions
|
envs/mujoco/swimmer_policies.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
def pseudo_random_policy(state, pre_action):
|
| 5 |
+
def get_description():
|
| 6 |
+
return "Select action randomly"
|
| 7 |
+
pseudo_random_policy.description = get_description()
|
| 8 |
+
return [2 * random.random() - 1 for i in range(2)]
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def real_random_policy(state, pre_action=1):
|
| 12 |
+
def get_description():
|
| 13 |
+
return "Select action with a random policy"
|
| 14 |
+
real_random_policy.description = get_description()
|
| 15 |
+
return [2 * random.random() - 1 for i in range(2)]
|
envs/mujoco/swimmer_translator.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
'''Swimmer
|
| 2 |
+
Action Space Box(-1.0, 1.0, (2,), float32)
|
| 3 |
+
|
| 4 |
+
Observation Space Box(-inf, inf, (8,), float64)
|
| 5 |
+
'''
|
| 6 |
+
|
| 7 |
+
class BasicLevelTranslator:
|
| 8 |
+
def translate(self, state):
|
| 9 |
+
res = (
|
| 10 |
+
f"Angle of the front tip: {state[0]:.2f} rad\n"
|
| 11 |
+
f"Angle of the first rotor: {state[1]:.2f} rad\n"
|
| 12 |
+
f"Angle of the second rotor: {state[2]:.2f} rad\n"
|
| 13 |
+
f"Velocity of the tip along the x-axis: {state[3]:.2f} m/s\n"
|
| 14 |
+
f"Velocity of the tip along the y-axis: {state[4]:.2f} m/s\n"
|
| 15 |
+
f"Angular velocity of front tip: {state[5]:.2f} rad/s\n"
|
| 16 |
+
f"Angular velocity of the first rotor: {state[6]:.2f} rad/s\n"
|
| 17 |
+
f"Angular velocity of the second rotor: {state[7]:.2f} rad/s"
|
| 18 |
+
)
|
| 19 |
+
return res
|
| 20 |
+
|
| 21 |
+
class GameDescriber:
|
| 22 |
+
|
| 23 |
+
def __init__(self, args):
|
| 24 |
+
self.is_only_local_obs = args.is_only_local_obs == 1
|
| 25 |
+
self.max_episode_len = args.max_episode_len
|
| 26 |
+
self.action_desc_dict = {
|
| 27 |
+
}
|
| 28 |
+
self.reward_desc_dict = {
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def translate_terminate_state(self, state, episode_len, max_episode_len):
|
| 32 |
+
return ""
|
| 33 |
+
|
| 34 |
+
def translate_potential_next_state(self, state, action):
|
| 35 |
+
return ""
|
| 36 |
+
|
| 37 |
+
def describe_goal(self):
|
| 38 |
+
return (
|
| 39 |
+
"The goal in the Swimmer environment is to move as fast as possible towards the right "\
|
| 40 |
+
"by applying torque to the rotors and utilizing fluid friction. The swimmer consists of "\
|
| 41 |
+
"three or more segments connected by rotors, and the objective is to achieve efficient "\
|
| 42 |
+
"swimming motion."
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
def describe_game(self):
|
| 46 |
+
return (
|
| 47 |
+
"In the Swimmer environment, you control a swimmer consisting of three or more segments "\
|
| 48 |
+
"connected by rotors. Your goal is to make the swimmer move as fast as possible to the right "\
|
| 49 |
+
"in a two-dimensional pool. You can achieve this by applying torques to the rotors and utilizing "\
|
| 50 |
+
"fluid friction. The environment provides observations of the swimmer's angles, velocities, "\
|
| 51 |
+
"and angular velocities."
|
| 52 |
+
)
|
| 53 |
+
|
| 54 |
+
def describe_action(self):
|
| 55 |
+
return (
|
| 56 |
+
"Your next move: \nPlease provide a list of two numerical values, each within the range of [-1, 1], "\
|
| 57 |
+
"representing the torques to be applied to the two rotors of the swimmer. These torques will help "\
|
| 58 |
+
"control the swimmer's movement and achieve efficient swimming."
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
class BasicStateSequenceTranslator(BasicLevelTranslator):
|
| 63 |
+
def translate(self, infos, is_current=False):
|
| 64 |
+
descriptions = []
|
| 65 |
+
if is_current:
|
| 66 |
+
state_desc = BasicLevelTranslator().translate(infos[-1]['state'])
|
| 67 |
+
return state_desc
|
| 68 |
+
for i, info in enumerate(infos):
|
| 69 |
+
assert 'state' in info, "info should contain state information"
|
| 70 |
+
state_desc = BasicLevelTranslator().translate(info['state'])
|
| 71 |
+
action_desc = (
|
| 72 |
+
"Torques Applied: "
|
| 73 |
+
f"Rotor 1: {info['action'][0]:.2f}, Rotor 2: {info['action'][1]:.2f}"
|
| 74 |
+
)
|
| 75 |
+
reward_desc = f"Result: Reward of {info['reward']:.2f}"
|
| 76 |
+
next_state_desc = BasicLevelTranslator().translate(info['next_state'])
|
| 77 |
+
descriptions.append(
|
| 78 |
+
f"{state_desc}\n{action_desc}\n{reward_desc}\nTransit to\n{next_state_desc}"
|
| 79 |
+
)
|
| 80 |
+
return descriptions
|
record_reflexion.csv
CHANGED
|
@@ -10,5 +10,10 @@ FrozenLake-v1,1,expert,200.0
|
|
| 10 |
MountainCarContinuous-v0,1,expert,200.0
|
| 11 |
RepresentedBoxing-v0,1,expert,200.0
|
| 12 |
RepresentedPong-v0,1,expert,200.0
|
| 13 |
-
Ant-v4,1,expert,5000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 14 |
|
|
|
|
| 10 |
MountainCarContinuous-v0,1,expert,200.0
|
| 11 |
RepresentedBoxing-v0,1,expert,200.0
|
| 12 |
RepresentedPong-v0,1,expert,200.0
|
| 13 |
+
Ant-v4,1,expert,5000.2
|
| 14 |
+
HalfCheetah-v4,1,expert,12138.8
|
| 15 |
+
Hopper-v4,1,expert,3542.2
|
| 16 |
+
Walker2d-v4,1,expert,5000.0
|
| 17 |
+
Swimmer-v4,1,expert,44.4
|
| 18 |
+
Reacher-v4,1,expert,-2.6
|
| 19 |
|
test_atari.sh → shell/test_atari.sh
RENAMED
|
File without changes
|
shell/test_mujoco_ant.sh
CHANGED
|
@@ -1,6 +1,18 @@
|
|
| 1 |
|
| 2 |
# Ant-v4
|
| 3 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
# REFLEXION
|
| 5 |
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 6 |
|
|
|
|
| 1 |
|
| 2 |
# Ant-v4
|
| 3 |
|
| 4 |
+
# COT
|
| 5 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 6 |
+
|
| 7 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 8 |
+
|
| 9 |
+
# SPP
|
| 10 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 11 |
+
|
| 12 |
+
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
|
| 16 |
# REFLEXION
|
| 17 |
python main_reflexion.py --env_name Ant-v4 --init_summarizer ant_init_translator --curr_summarizer ant_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 18 |
|
shell/test_mujoco_invertedDoublePendulum.sh
CHANGED
|
@@ -1,5 +1,16 @@
|
|
| 1 |
# InvertedDoublePendulum-v4
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# REFLEXION
|
| 4 |
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 5 |
|
|
|
|
| 1 |
# InvertedDoublePendulum-v4
|
| 2 |
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
# REFLEXION
|
| 15 |
python main_reflexion.py --env_name InvertedDoublePendulum-v4 --init_summarizer invertedDoublePendulum_init_translator --curr_summarizer invertedDoublePendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 16 |
|
shell/test_mujoco_invertedPendulum.sh
CHANGED
|
@@ -1,16 +1,25 @@
|
|
| 1 |
# InvertedPendulum-v4
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# REFLEXION
|
| 4 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 5 |
|
| 6 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
| 7 |
|
| 8 |
-
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
| 9 |
|
| 10 |
|
| 11 |
# exe
|
| 12 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
| 13 |
|
| 14 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
| 15 |
-
|
| 16 |
-
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
|
|
|
| 1 |
# InvertedPendulum-v4
|
| 2 |
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name InvertedPendulum-v1 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
# REFLEXION
|
| 16 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 17 |
|
| 18 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
| 19 |
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
# exe
|
| 23 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
| 24 |
|
| 25 |
python main_reflexion.py --env_name InvertedPendulum-v4 --init_summarizer invertedPendulum_init_translator --curr_summarizer invertedPendulum_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
|
|
|
|
|
shell/test_mujoco_reacher.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Reacher-v4
|
| 2 |
+
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# REFLEXION
|
| 15 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 16 |
+
|
| 17 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
| 18 |
+
|
| 19 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# exe
|
| 23 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
| 24 |
+
|
| 25 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
| 26 |
+
|
| 27 |
+
python main_reflexion.py --env_name Reacher-v4 --init_summarizer reacher_init_translator --curr_summarizer reacher_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_swimmer.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Swimmer-v4
|
| 2 |
+
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
# REFLEXION
|
| 15 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 16 |
+
|
| 17 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 3 --num_trails 5 --distiller reflect_distiller
|
| 18 |
+
|
| 19 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider reflexion_actor --prompt_level 5 --num_trails 1 --distiller reflect_distiller
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
# exe
|
| 23 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 1 --num_trails 1 --distiller guide_generator --api_type openai
|
| 24 |
+
|
| 25 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 3 --num_trails 5 --distiller guide_generator
|
| 26 |
+
|
| 27 |
+
python main_reflexion.py --env_name Swimmer-v4 --init_summarizer swimmer_init_translator --curr_summarizer swimmer_basic_translator --decider exe_actor --prompt_level 5 --num_trails 1 --distiller guide_generator
|
shell/test_mujoco_walker2d.sh
CHANGED
|
@@ -1,5 +1,17 @@
|
|
| 1 |
# Walker2d-v4
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
# REFLEXION
|
| 4 |
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 5 |
|
|
|
|
| 1 |
# Walker2d-v4
|
| 2 |
|
| 3 |
+
# COT
|
| 4 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider cot_actor --prompt_level 1 --num_trails 1
|
| 5 |
+
|
| 6 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider cot_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 7 |
+
|
| 8 |
+
# SPP
|
| 9 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider spp_actor --prompt_level 1 --num_trails 1
|
| 10 |
+
|
| 11 |
+
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider spp_actor --prompt_level 3 --num_trails 5 --distiller traj_distiller
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
|
| 15 |
# REFLEXION
|
| 16 |
python main_reflexion.py --env_name Walker2d-v4 --init_summarizer walker2d_init_translator --curr_summarizer walker2d_basic_translator --decider reflexion_actor --prompt_level 1 --num_trails 1 --distiller reflect_distiller
|
| 17 |
|
test_reflexion.sh → shell/test_reflexion.sh
RENAMED
|
File without changes
|