|
|
from control_joints import * |
|
|
from read_joints import * |
|
|
from realsense import * |
|
|
import os |
|
|
import cv2 |
|
|
import numpy as np |
|
|
import torch |
|
|
import time |
|
|
import argparse |
|
|
import sys |
|
|
import json |
|
|
|
|
|
sys.path.insert(0, 'policy/ACT_DP_multitask') |
|
|
from policy_anyrobot import ACTDiffusionPolicy |
|
|
from t5_encoder import T5Embedder |
|
|
from utils import convert_weight |
|
|
import yaml |
|
|
|
|
|
def get_image(cameras): |
|
|
""" |
|
|
Get the latest images from all cameras and return them as a dictionary. |
|
|
""" |
|
|
obs_image = dict() |
|
|
for i in range(3): |
|
|
for cam in cameras: |
|
|
color_image = cam.get_latest_image() |
|
|
if color_image is not None: |
|
|
obs_image[cam.name] = color_image[:,:,::-1] |
|
|
|
|
|
filename = f"{cam.name}_image_{i}.png" |
|
|
cv2.imwrite(filename, color_image) |
|
|
print(f"Saved image: {filename}") |
|
|
return obs_image |
|
|
|
|
|
def get_observation(obs_image, joint): |
|
|
|
|
|
|
|
|
observation = dict() |
|
|
observation['images'] = dict() |
|
|
observation['images']['cam_high'] = np.moveaxis(obs_image['head_camera'], -1, 0) |
|
|
observation['images']['cam_left_wrist']= np.moveaxis(obs_image['left_camera'], -1, 0) |
|
|
observation['images']['cam_right_wrist'] = np.moveaxis(obs_image['right_camera'], -1, 0) |
|
|
observation['qpos'] = joint |
|
|
|
|
|
return observation |
|
|
|
|
|
def encode_obs(observations, camera_names): |
|
|
obs_img = [] |
|
|
for camera_name in camera_names: |
|
|
obs_img.append(observations['images'][camera_name]) |
|
|
obs_img = np.stack(obs_img, axis=0) |
|
|
image_data = torch.from_numpy(obs_img).unsqueeze(0).float() / 255.0 |
|
|
qpos_data = torch.from_numpy(np.array(observations['qpos'], dtype=np.float32)).unsqueeze(0) |
|
|
return image_data, qpos_data |
|
|
|
|
|
def get_model(config_file, ckpt_file, device): |
|
|
with open(config_file, "r", encoding="utf-8") as file: |
|
|
policy_config = json.load(file) |
|
|
print(f"Loading policy config from {config_file}") |
|
|
policy = ACTDiffusionPolicy(policy_config) |
|
|
print(f"Loading model from {ckpt_file}") |
|
|
policy.load_state_dict(convert_weight(torch.load(ckpt_file, weights_only=False)["state_dict"])) |
|
|
policy.to(device) |
|
|
policy.eval() |
|
|
stats = torch.load(ckpt_file, weights_only=False)["stats"] |
|
|
print('Resetting observation normalization stats') |
|
|
policy.reset_obs(stats, norm_type = policy_config["norm_type"]) |
|
|
camera_names = policy_config["camera_names"] |
|
|
return policy, camera_names |
|
|
|
|
|
def get_language_encoder(device): |
|
|
MODEL_PATH ='/data/gjx/.cache/huggingface/hub/models--google--t5-v1_1-xxl/snapshots/3db67ab1af984cf10548a73467f0e5bca2aaaeb2' |
|
|
|
|
|
CONFIG_PATH = os.path.join('policy/ACT_DP_multitask',"base.yaml") |
|
|
with open(CONFIG_PATH, "r") as fp: |
|
|
config = yaml.safe_load(fp) |
|
|
text_embedder = T5Embedder( |
|
|
from_pretrained=MODEL_PATH, |
|
|
model_max_length=config["dataset"]["tokenizer_max_length"], |
|
|
device=device, |
|
|
use_offload_folder=None, |
|
|
) |
|
|
tokenizer, text_encoder = text_embedder.tokenizer, text_embedder.model |
|
|
return tokenizer, text_encoder |
|
|
|
|
|
def get_language_embed(tokenizer, text_encoder, language_instruction, device): |
|
|
tokens = tokenizer( |
|
|
language_instruction, return_tensors="pt", |
|
|
padding="longest", |
|
|
truncation=True |
|
|
)["input_ids"].to(device) |
|
|
|
|
|
tokens = tokens.view(1, -1) |
|
|
with torch.no_grad(): |
|
|
pred = text_encoder(tokens).last_hidden_state.detach().cpu() |
|
|
|
|
|
return pred.squeeze(0).mean(0) |
|
|
|
|
|
def test_code(): |
|
|
print("Testing the code...") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
device = 'cpu' |
|
|
ckpt_dir = 'policy/ACT_DP_multitask/checkpoints/real_fintune_50_2000/act_dp' |
|
|
config_path = os.path.join(ckpt_dir, 'policy_config.json') |
|
|
ckpt_path = os.path.join(ckpt_dir, 'policy_lastest_seed_0.ckpt') |
|
|
policy, cameras_name = get_model(config_path, ckpt_path, device) |
|
|
|
|
|
instruction_file = 'instruction.txt' |
|
|
with open(instruction_file, 'r') as f: |
|
|
instruction = f.readline().strip().strip('"') |
|
|
print(f"Instruction: {instruction}") |
|
|
tokenizer, text_encoder = get_language_encoder(device) |
|
|
task_emb = get_language_embed(tokenizer, text_encoder, instruction, device) |
|
|
controller = ControlJoints(left_can=left_can, right_can=right_can) |
|
|
image = torch.rand(1, 3, 3, 480, 640) |
|
|
qpos = torch.rand(1, 14) |
|
|
actions = policy.get_action(qpos.float().to(device), image.float().to(device), task_emb.float().to(device)) |
|
|
print(f"Actions shape: {actions.shape}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(torch.backends.cudnn.enabled) |
|
|
print(torch.backends.cudnn.version()) |
|
|
print(torch.version.cuda) |
|
|
print(torch.__version__) |
|
|
|
|
|
parser = argparse.ArgumentParser(description="Deploy the action for a specific player") |
|
|
parser.add_argument("--ckpt_path", type=str, default='policy/ACT_DP_multitask/checkpoints/real_fintune_50_2000/act_dp') |
|
|
|
|
|
player_value = os.getenv("PLAYER") |
|
|
|
|
|
|
|
|
if player_value is None: |
|
|
raise ValueError("�������� PLAYER ���") |
|
|
try: |
|
|
player_value = int(player_value) |
|
|
except ValueError: |
|
|
raise ValueError("�������� PLAYER ������һ������") |
|
|
|
|
|
|
|
|
if player_value == 1: |
|
|
print("Player 1") |
|
|
cameras = [ |
|
|
RealSenseCam("337322073280", "left_camera"), |
|
|
RealSenseCam("337322074191", "head_camera"), |
|
|
RealSenseCam("337122072617", "right_camera"), |
|
|
] |
|
|
left_can, right_can = "can_left_1", "can_right_1" |
|
|
elif player_value == 2: |
|
|
print("Player 2") |
|
|
cameras = [ |
|
|
RealSenseCam("250122079815", "left_camera"), |
|
|
RealSenseCam("048522073543", "head_camera"), |
|
|
RealSenseCam("030522070109", "right_camera"), |
|
|
] |
|
|
left_can, right_can = "can_left_2", "can_right_2" |
|
|
else: |
|
|
raise ValueError("PLAYER ֵ��Ч�������� 1 �� 2") |
|
|
reader = JointReader(left_can=left_can, right_can=right_can) |
|
|
|
|
|
|
|
|
ctx = rs.context() |
|
|
|
|
|
|
|
|
if len(ctx.devices) > 0: |
|
|
print("Found RealSense devices:") |
|
|
for d in ctx.devices: |
|
|
|
|
|
name = d.get_info(rs.camera_info.name) |
|
|
serial_number = d.get_info(rs.camera_info.serial_number) |
|
|
print(f"Device: {name}, Serial Number: {serial_number}") |
|
|
else: |
|
|
print("No Intel RealSense devices connected") |
|
|
|
|
|
|
|
|
for cam in cameras: |
|
|
cam.start() |
|
|
|
|
|
|
|
|
for i in range(10): |
|
|
print(f"Warm up: {i}", end="\r") |
|
|
for cam in cameras: |
|
|
color_image = cam.get_latest_image() |
|
|
time.sleep(0.15) |
|
|
|
|
|
|
|
|
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
|
|
|
|
ckpt_dir = parser.parse_args().ckpt_path |
|
|
config_path = os.path.join(ckpt_dir, 'policy_config.json') |
|
|
|
|
|
ckpt_path = os.path.join(ckpt_dir, 'policy_epoch_600_seed_0.ckpt') |
|
|
policy, camera_names = get_model(config_path, ckpt_path, device) |
|
|
print('camera_names:', camera_names) |
|
|
|
|
|
instruction_file = 'instruction.txt' |
|
|
with open(instruction_file, 'r') as f: |
|
|
instruction = f.readline().strip() |
|
|
print(f"Using instruction: {instruction}") |
|
|
tokenizer, text_encoder = get_language_encoder(device) |
|
|
print('Loading language tokenizer and encoder...') |
|
|
task_emb = get_language_embed(tokenizer, text_encoder, instruction, device) |
|
|
|
|
|
controller = ControlJoints(left_can=left_can, right_can=right_can) |
|
|
max_timestep = 600 |
|
|
step = 0 |
|
|
while step < max_timestep: |
|
|
obs_image = get_image(cameras) |
|
|
joint = reader.get_joint_value() |
|
|
|
|
|
observation = get_observation(obs_image, joint) |
|
|
image, qpos = encode_obs(observation, camera_names) |
|
|
actions = policy.get_action(qpos.float().to(device), image.float().to(device), task_emb.float().to(device)) |
|
|
print(f"Step: {step}/{max_timestep}, Action: {actions.shape}") |
|
|
for action in actions[0:30]: |
|
|
controller.control(action) |
|
|
step += 1 |
|
|
|
|
|
time.sleep(0.05) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|