xfu314's picture
Add phantom project with submodules and dependencies
96da58e
"""
Convenience script to tune a camera view in a mujoco environment.
Allows keyboard presses to move a camera around in the viewer, and
then prints the final position and quaternion you should set
for your camera in the mujoco XML file.
"""
import argparse
import time
import xml.etree.ElementTree as ET
import numpy as np
from pynput.keyboard import Controller, Key, Listener
import robosuite
import robosuite.utils.transform_utils as T
from robosuite.utils.camera_utils import CameraMover
from robosuite.utils.mjcf_utils import find_elements, find_parent
# some settings
DELTA_POS_KEY_PRESS = 0.05 # delta camera position per key press
DELTA_ROT_KEY_PRESS = 1 # delta camera angle per key press
class KeyboardHandler:
def __init__(self, camera_mover):
"""
Store internal state here.
Args:
camera_mover (CameraMover): Playback camera class
cam_body_id (int): id corresponding to parent body of camera element
"""
self.camera_mover = camera_mover
# make a thread to listen to keyboard and register our callback functions
self.listener = Listener(on_press=self.on_press, on_release=self.on_release)
# start listening
self.listener.start()
def on_press(self, key):
"""
Key handler for key presses.
Args:
key (int): keycode corresponding to the key that was pressed
"""
try:
# controls for moving rotation
if key == Key.up:
# rotate up
self.camera_mover.rotate_camera(point=None, axis=[1.0, 0.0, 0.0], angle=DELTA_ROT_KEY_PRESS)
elif key == Key.down:
# rotate down
self.camera_mover.rotate_camera(point=None, axis=[-1.0, 0.0, 0.0], angle=DELTA_ROT_KEY_PRESS)
elif key == Key.left:
# rotate left
self.camera_mover.rotate_camera(point=None, axis=[0.0, 1.0, 0.0], angle=DELTA_ROT_KEY_PRESS)
elif key == Key.right:
# rotate right
self.camera_mover.rotate_camera(point=None, axis=[0.0, -1.0, 0.0], angle=DELTA_ROT_KEY_PRESS)
# controls for moving position
elif key.char == "w":
# move forward
self.camera_mover.move_camera(direction=[0.0, 0.0, -1.0], scale=DELTA_POS_KEY_PRESS)
elif key.char == "s":
# move backward
self.camera_mover.move_camera(direction=[0.0, 0.0, 1.0], scale=DELTA_POS_KEY_PRESS)
elif key.char == "a":
# move left
self.camera_mover.move_camera(direction=[-1.0, 0.0, 0.0], scale=DELTA_POS_KEY_PRESS)
elif key.char == "d":
# move right
self.camera_mover.move_camera(direction=[1.0, 0.0, 0.0], scale=DELTA_POS_KEY_PRESS)
elif key.char == "r":
# move up
self.camera_mover.move_camera(direction=[0.0, 1.0, 0.0], scale=DELTA_POS_KEY_PRESS)
elif key.char == "f":
# move down
self.camera_mover.move_camera(direction=[0.0, -1.0, 0.0], scale=DELTA_POS_KEY_PRESS)
elif key.char == ".":
# rotate counterclockwise
self.camera_mover.rotate_camera(point=None, axis=[0.0, 0.0, 1.0], angle=DELTA_ROT_KEY_PRESS)
elif key.char == "/":
# rotate clockwise
self.camera_mover.rotate_camera(point=None, axis=[0.0, 0.0, -1.0], angle=DELTA_ROT_KEY_PRESS)
except AttributeError as e:
pass
def on_release(self, key):
"""
Key handler for key releases.
Args:
key: [NOT USED]
"""
pass
def print_command(char, info):
"""
Prints out the command + relevant info entered by user
Args:
char (str): Command entered
info (str): Any additional info to print
"""
char += " " * (10 - len(char))
print("{}\t{}".format(char, info))
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--env", type=str, default="Lift")
parser.add_argument("--robots", nargs="+", type=str, default="Sawyer", help="Which robot(s) to use in the env")
args = parser.parse_args()
print("\nWelcome to the camera tuning script! You will be able to tune a camera view")
print("by moving it around using your keyboard. The controls are printed below.")
print("")
print_command("Keys", "Command")
print_command("w-s", "zoom the camera in/out")
print_command("a-d", "pan the camera left/right")
print_command("r-f", "pan the camera up/down")
print_command("arrow keys", "rotate the camera to change view direction")
print_command(".-/", "rotate the camera view without changing view direction")
print("")
# read camera XML tag from user input
inp = input(
"\nPlease paste a camera name below \n"
"OR xml tag below (e.g. <camera ... />) \n"
"OR leave blank for an example:\n"
)
if len(inp) == 0:
if args.env != "Lift":
raise Exception("ERROR: env must be Lift to run default example.")
print("\nUsing an example tag corresponding to the frontview camera.")
print("This xml tag was copied from robosuite/models/assets/arenas/table_arena.xml")
inp = '<camera mode="fixed" name="frontview" pos="1.6 0 1.45" quat="0.56 0.43 0.43 0.56"/>'
# remember the tag and infer some properties
from_tag = "<" in inp
notify_str = (
"NOTE: using the following xml tag:\n"
if from_tag
else "NOTE: using the following camera (initialized at default sim location)\n"
)
print(notify_str)
print("{}\n".format(inp))
cam_tree = ET.fromstring(inp) if from_tag else ET.Element("camera", attrib={"name": inp})
CAMERA_NAME = cam_tree.get("name")
# make the environment
env = robosuite.make(
args.env,
robots=args.robots,
has_renderer=True,
has_offscreen_renderer=False,
ignore_done=True,
use_camera_obs=False,
control_freq=100,
)
env.reset()
# Create the camera mover
camera_mover = CameraMover(
env=env,
camera=CAMERA_NAME,
)
# Make sure we're using the camera that we're modifying
camera_id = env.sim.model.camera_name2id(CAMERA_NAME)
env.viewer.set_camera(camera_id=camera_id)
# Infer initial camera pose
if from_tag:
initial_file_camera_pos = np.array(cam_tree.get("pos").split(" ")).astype(float)
initial_file_camera_quat = T.convert_quat(np.array(cam_tree.get("quat").split(" ")).astype(float), to="xyzw")
# Set these values as well
camera_mover.set_camera_pose(pos=initial_file_camera_pos, quat=initial_file_camera_quat)
# Optionally set fov if specified
cam_fov = cam_tree.get("fovy", None)
if cam_fov is not None:
env.sim.model.cam_fovy[camera_id] = float(cam_fov)
else:
initial_file_camera_pos, initial_file_camera_quat = camera_mover.get_camera_pose()
# Define initial file camera pose
initial_file_camera_pose = T.make_pose(initial_file_camera_pos, T.quat2mat(initial_file_camera_quat))
# remember difference between camera pose in initial tag and absolute camera pose in world
initial_world_camera_pos, initial_world_camera_quat = camera_mover.get_camera_pose()
initial_world_camera_pose = T.make_pose(initial_world_camera_pos, T.quat2mat(initial_world_camera_quat))
world_in_file = initial_file_camera_pose.dot(T.pose_inv(initial_world_camera_pose))
# register callbacks to handle key presses in the viewer
key_handler = KeyboardHandler(camera_mover=camera_mover)
# just spin to let user interact with window
spin_count = 0
while True:
action = np.zeros(env.action_dim)
obs, reward, done, _ = env.step(action)
env.render()
spin_count += 1
if spin_count % 500 == 0:
# convert from world coordinates to file coordinates (xml subtree)
camera_pos, camera_quat = camera_mover.get_camera_pose()
world_camera_pose = T.make_pose(camera_pos, T.quat2mat(camera_quat))
file_camera_pose = world_in_file.dot(world_camera_pose)
# TODO: Figure out why numba causes black screen of death (specifically, during mat2pose --> mat2quat call below)
camera_pos, camera_quat = T.mat2pose(file_camera_pose)
camera_quat = T.convert_quat(camera_quat, to="wxyz")
print("\n\ncurrent camera tag you should copy")
cam_tree.set("pos", "{} {} {}".format(camera_pos[0], camera_pos[1], camera_pos[2]))
cam_tree.set("quat", "{} {} {} {}".format(camera_quat[0], camera_quat[1], camera_quat[2], camera_quat[3]))
print(ET.tostring(cam_tree, encoding="utf8").decode("utf8"))