import numpy as np import cv2 import torch import yaml import os import sys # Add project root to path sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))) from wm.dataset.dataset import RoboticsDatasetWrapper def create_recon_overlay_video(config_path, sample_idx=0, output_dir="results/visualizations"): os.makedirs(output_dir, exist_ok=True) # 1. Load Config with open(config_path, 'r') as f: config = yaml.safe_load(f) # 2. Setup Dataset (Force RECON) print(f"Loading dataset: recon") dataset = RoboticsDatasetWrapper.get_dataset("recon") # 3. Get a sample sample = dataset[sample_idx] obs = sample['obs'] # [T, 3, H, W] in [0, 1] actions = sample['action'] # [T, 2] (v, w) if isinstance(obs, torch.Tensor): obs = obs.numpy() if isinstance(actions, torch.Tensor): actions = actions.numpy() T, C, H, W = obs.shape # 4. Integrate Actions (v, w) -> (x, y) # Assuming v is linear velocity, w is angular velocity # dt = 1/fps. Default is 10 FPS dt = 0.1 x, y, theta = 0.0, 0.0, 0.0 path = [[x, y]] thetas = [theta] for t in range(T-1): v = actions[t, 0] w = actions[t, 1] theta += w * dt x += v * np.cos(theta) * dt y += v * np.sin(theta) * dt path.append([x, y]) thetas.append(theta) path = np.array(path) # 5. Scaling to Pixels # Center the path and scale to middle 60% of image height/width max_disp = np.abs(path).max() scale = (min(H, W) * 0.3) / max_disp if max_disp > 0 else 1.0 # Map to pixel coordinates (Relative to center) # Robot X (forward) -> Pixel -Y (up) # Robot Y (left) -> Pixel -X (left) # So: # pixel_x = center_x - robot_y * scale # pixel_y = center_y - robot_x * scale pixel_path = np.zeros_like(path) pixel_path[:, 0] = W // 2 - path[:, 1] * scale pixel_path[:, 1] = H // 2 - path[:, 0] * scale # 6. Video Setup video_path = os.path.join(output_dir, f"recon_overlay_video_sample_{sample_idx}.mp4") fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(video_path, fourcc, 10.0, (W, H)) print(f"Generating RECON overlay video: {W}x{H}, {T} frames") for t in range(T): # Prepare frame: [C, H, W] -> [H, W, C] frame = (np.transpose(obs[t], (1, 2, 0)) * 255).astype(np.uint8) frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) # Draw the full path history up to time t if t > 0: for i in range(1, t + 1): pt1 = (int(pixel_path[i-1, 0]), int(pixel_path[i-1, 1])) pt2 = (int(pixel_path[i, 0]), int(pixel_path[i, 1])) # Color fades from Blue (start) to Red (end) color = (int(255 * (1 - i/T)), 0, int(255 * (i/T))) cv2.line(frame, pt1, pt2, color, 2, cv2.LINE_AA) # Draw current position (Green dot) curr_pos = (int(pixel_path[t, 0]), int(pixel_path[t, 1])) cv2.circle(frame, curr_pos, 5, (0, 255, 0), -1, cv2.LINE_AA) # Draw current orientation/velocity (White arrow) # Robot heading theta is relative to Robot X (which is now UP) # We need to rotate the arrow appropriately curr_theta = thetas[t] arrow_len = 20 # Robot X-axis is UP (-Y in pixels), Robot Y-axis is LEFT (-X in pixels) # Heading 0 is Robot X (UP): adx = 0, ady = -arrow_len # Heading pi/2 is Robot Y (LEFT): adx = -arrow_len, ady = 0 adx = -np.sin(curr_theta) * arrow_len ady = -np.cos(curr_theta) * arrow_len arrow_end = (int(curr_pos[0] + adx), int(curr_pos[1] + ady)) cv2.arrowedLine(frame, curr_pos, arrow_end, (255, 255, 255), 2, tipLength=0.3) # Add Step Label and velocity info cv2.putText(frame, f"Step: {t}", (10, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) v, w = actions[t] cv2.putText(frame, f"v: {v:.2f} w: {w:.2f}", (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 255), 2) out.write(frame) out.release() print(f"RECON overlay video saved to: {video_path}") if __name__ == "__main__": import argparse parser = argparse.ArgumentParser() parser.add_argument("--config", type=str, default="wm/config/fulltraj_dit/lang_table.yaml") parser.add_argument("--sample", type=int, default=0) args = parser.parse_args() create_recon_overlay_video(args.config, args.sample)