Spaces:

gpue
/

nova-sim

Paused

Georg commited on Jan 16

Commit

436f91f

1 Parent(s): 375d9f7

Enhance mujoco_server.py and UR5 environment for new UR5e T-Push scene support

- Added support for a new UR5e T-Push scene, including environment initialization and task reward calculation.
- Updated robot switching logic to accommodate the new robot type and scene.
- Enhanced WebSocket API to handle gym-style interactions for the new scene, including reset and step functionalities.
- Modified UI elements to reflect the addition of the UR5e T-Push robot and its specific controls.
- Updated README.md to document the new gym-style WebSocket API and example payloads for the T-Push scene.

Files changed (4) hide show

README.md +30 -2
mujoco_server.py +219 -23
robots/ur5/model/scene_t_push.xml +367 -0
robots/ur5/ur5_env.py +27 -6

README.md CHANGED Viewed

@@ -30,6 +30,7 @@ A unified MuJoCo-based robot simulation platform with web interface for multiple
 - Real-time MuJoCo physics simulation
 - Web-based video streaming interface
 - WebSocket-based state/command communication
 - Interactive camera controls (rotate, zoom, pan)
 - Robot switching without restart
 - Keyboard and button controls for locomotion
@@ -39,11 +40,20 @@ A unified MuJoCo-based robot simulation platform with web interface for multiple
 ### Native (Recommended for Development)
 ```bash
 # Install dependencies
 pip install mujoco gymnasium flask flask-sock opencv-python torch numpy
-# Optional: For PyMPC gait controller
-pip install jax jaxlib quadruped-pympc gym-quadruped
 # Start the server
 python mujoco_server.py
@@ -51,6 +61,24 @@ python mujoco_server.py
 # Open browser at http://localhost:3004/nova-sim/api/v1
 ```
 ### Docker
 ```bash

 - Real-time MuJoCo physics simulation
 - Web-based video streaming interface
 - WebSocket-based state/command communication
+- Gym-style WebSocket API for RL/IL clients
 - Interactive camera controls (rotate, zoom, pan)
 - Robot switching without restart
 - Keyboard and button controls for locomotion
 ### Native (Recommended for Development)
 ```bash
+# Create and activate a virtualenv
+python3 -m venv .venv
+source .venv/bin/activate
 # Install dependencies
 pip install mujoco gymnasium flask flask-sock opencv-python torch numpy
+# Optional: For PyMPC gait controller (Quadruped-PyMPC isn't on PyPI)
+pip install jax jaxlib gym-quadruped
+# Quadruped-PyMPC needs submodules; install from a local clone
+git clone --recurse-submodules https://github.com/iit-DLSLab/Quadruped-PyMPC
+cd Quadruped-PyMPC
+pip install -e .
 # Start the server
 python mujoco_server.py
 # Open browser at http://localhost:3004/nova-sim/api/v1
 ```
+If you see `ModuleNotFoundError: No module named 'cv2'`, make sure the venv is activated
+and `opencv-python` is installed in it.
+## Gym WebSocket API (RL/IL)
+The gym-style API is exposed at `ws://localhost:3004/nova-sim/api/v1/gym/ws`.
+It supports `reset`, `step`, `configure`, and `get_spaces`.
+Example request payloads:
+```json
+{"type": "configure", "data": {"robot": "ur5_t_push"}}
+{"type": "reset"}
+{"type": "step", "data": {"action": [0,0,0,0,0,0,0], "render": false}}
+```
+The server responds with `gym_reset`, `gym_step`, `gym_spaces`, or `gym_configured` messages.
 ### Docker
 ```bash

mujoco_server.py CHANGED Viewed

@@ -3,6 +3,7 @@ import sys
 import time
 import threading
 import json
 import cv2
 import numpy as np
 import mujoco
@@ -43,12 +44,13 @@ TARGET_FPS = int(os.environ.get('TARGET_FPS', 30 if IN_DOCKER else 60))
 SIM_STEPS_PER_FRAME = int(os.environ.get('SIM_STEPS_PER_FRAME', 10 if IN_DOCKER else 5))
 # Current robot type
-current_robot = "g1"  # "g1", "spot", or "ur5"
 # Environment instances (lazy loaded)
 env_g1 = None
 env_spot = None
 env_ur5 = None
 env = None  # Active environment
 # Simulation state
@@ -98,15 +100,27 @@ def init_spot():
     return env_spot
-def init_ur5():
     """Initialize UR5e environment."""
-    global env_ur5
     if env_ur5 is None:
-        # Import UR5Env from robots/ur5 directory
-        ur5_dir = os.path.join(_nova_sim_dir, 'robots', 'ur5')
-        sys.path.insert(0, ur5_dir)
-        from ur5_env import UR5Env
-        sys.path.pop(0)
         env_ur5 = UR5Env(render_mode="rgb_array", width=RENDER_WIDTH, height=RENDER_HEIGHT)
         env_ur5.reset()
     return env_ur5
@@ -135,11 +149,17 @@ def switch_robot(robot_type):
         cam.lookat = np.array([0, 0, 0.4])
         cam.distance = 2.5
     elif robot_type == "ur5":
-        env = init_ur5()
         cam.lookat = np.array([0.3, 0, 0.6])
         cam.distance = 1.8
         cam.azimuth = 150
         cam.elevation = -25
     else:
         print(f"Unknown robot type: {robot_type}")
         return
@@ -166,7 +186,7 @@ def broadcast_state():
         steps = env.steps
         # UR5 has different state structure
-        if current_robot == "ur5":
             ee_pos = env.get_end_effector_pos()
             ee_quat = env.get_end_effector_quat()
             target = env.get_target()
@@ -190,7 +210,8 @@ def broadcast_state():
                     'joint_targets': [float(j) for j in joint_targets],
                     'control_mode': control_mode,
                     'use_orientation': use_orientation,
-                    'steps': int(steps)
                 }
             })
         else:
@@ -255,7 +276,7 @@ def simulation_loop():
                     env.step_with_controller(dt=sim_dt)
                 # Update camera to follow robot (not for UR5 which is stationary)
-                if camera_follow and current_robot != "ur5":
                     robot_pos = env.data.qpos[:3]
                     cam.lookat[0] = robot_pos[0]
                     cam.lookat[1] = robot_pos[1]
@@ -334,7 +355,7 @@ def handle_ws_message(data):
         if current_robot == "g1":
             cam.distance = 3.0
             cam.lookat = np.array([0.0, 0.0, 0.8])
-        elif current_robot == "ur5":
             cam.distance = 1.8
             cam.lookat = np.array([0.3, 0.0, 0.6])
             cam.azimuth = 150
@@ -381,7 +402,7 @@ def handle_ws_message(data):
         y = payload.get('y', 0.0)
         z = payload.get('z', 0.6)
         with mujoco_lock:
-            if env is not None and current_robot == "ur5":
                 env.set_target(x, y, z)
     elif msg_type == 'gripper':
@@ -395,14 +416,14 @@ def handle_ws_message(data):
         else:
             value = payload.get('value', 128)
         with mujoco_lock:
-            if env is not None and current_robot == "ur5":
                 env.set_gripper(value)
     elif msg_type == 'control_mode':
         payload = data.get('data', {})
         mode = payload.get('mode', 'ik')
         with mujoco_lock:
-            if env is not None and current_robot == "ur5":
                 env.set_control_mode(mode)
     elif msg_type == 'joint_positions':
@@ -410,7 +431,7 @@ def handle_ws_message(data):
         positions = payload.get('positions', [])
         if len(positions) == 6:
             with mujoco_lock:
-                if env is not None and current_robot == "ur5":
                     env.set_joint_positions(positions)
     elif msg_type == 'arm_orientation':
@@ -419,17 +440,101 @@ def handle_ws_message(data):
         pitch = payload.get('pitch', np.pi/2)
         yaw = payload.get('yaw', 0.0)
         with mujoco_lock:
-            if env is not None and current_robot == "ur5":
                 env.set_target_orientation(roll, pitch, yaw)
     elif msg_type == 'use_orientation':
         payload = data.get('data', {})
         use = payload.get('enabled', True)
         with mujoco_lock:
-            if env is not None and current_robot == "ur5":
                 env.set_use_orientation(use)
 @sock.route(f'{API_PREFIX}/ws')
 def websocket_handler(ws):
     """Handle WebSocket connections."""
@@ -460,6 +565,87 @@ def websocket_handler(ws):
         print('WebSocket client disconnected')
 # Serve UI at /nova-sim (no redirect)
 @app.route('/nova-sim')
 @app.route('/nova-sim/')
@@ -679,6 +865,7 @@ def index():
                 EE Pos: <span id="ee_pos">0.00, 0.00, 0.00</span><br>
                 EE Ori: <span id="ee_ori">0.00, 0.00, 0.00</span><br>
                 Gripper: <span id="gripper_val">50%</span><br>
                 Mode: <span id="control_mode_display">IK</span> | Steps: <span id="arm_step_val">0</span>
             </div>
         </div>
@@ -711,6 +898,7 @@ def index():
                         <option value="g1">Unitree G1 (Humanoid)</option>
                         <option value="spot">Boston Dynamics Spot (Quadruped)</option>
                         <option value="ur5">Universal Robots UR5e (Arm)</option>
                     </select>
                     <div class="robot-info" id="robot_info">
                         29 DOF humanoid with RL walking policy
@@ -870,13 +1058,15 @@ def index():
             const robotInfoText = {
                 'g1': '29 DOF humanoid with RL walking policy',
                 'spot': '12 DOF quadruped with trot gait controller',
-                'ur5': '6 DOF robot arm with Robotiq gripper'
             };
             const robotTitles = {
                 'g1': 'Unitree G1 Humanoid',
                 'spot': 'Boston Dynamics Spot',
-                'ur5': 'Universal Robots UR5e'
             };
             const locomotionControls = document.getElementById('locomotion_controls');
@@ -924,7 +1114,7 @@ def index():
                         if (msg.type === 'state') {
                             const data = msg.data;
-                            if (data.robot === 'ur5') {
                                 // UR5 state
                                 const ee = data.end_effector;
                                 document.getElementById('ee_pos').innerText =
@@ -942,6 +1132,12 @@ def index():
                                 document.getElementById('gripper_val').innerText =
                                     ((255 - data.gripper) / 255 * 100).toFixed(0) + '% open';
                                 document.getElementById('arm_step_val').innerText = data.steps;
                                 // Update joint position display (actual positions)
                                 if (data.joint_positions) {
@@ -1055,7 +1251,7 @@ def index():
                 robotInfo.innerText = robotInfoText[robot] || '';
                 // Toggle controls based on robot type
-                if (robot === 'ur5') {
                     locomotionControls.classList.add('hidden');
                     armControls.classList.add('active');
                     document.getElementById('locomotion_state').style.display = 'none';

 import time
 import threading
 import json
+import base64
 import cv2
 import numpy as np
 import mujoco
 SIM_STEPS_PER_FRAME = int(os.environ.get('SIM_STEPS_PER_FRAME', 10 if IN_DOCKER else 5))
 # Current robot type
+current_robot = "g1"  # "g1", "spot", "ur5", or "ur5_t_push"
 # Environment instances (lazy loaded)
 env_g1 = None
 env_spot = None
 env_ur5 = None
+env_ur5_t_push = None
 env = None  # Active environment
 # Simulation state
     return env_spot
+def init_ur5(scene_name="scene"):
     """Initialize UR5e environment."""
+    global env_ur5, env_ur5_t_push
+    # Import UR5Env from robots/ur5 directory
+    ur5_dir = os.path.join(_nova_sim_dir, 'robots', 'ur5')
+    sys.path.insert(0, ur5_dir)
+    from ur5_env import UR5Env
+    sys.path.pop(0)
+    if scene_name == "scene_t_push":
+        if env_ur5_t_push is None:
+            env_ur5_t_push = UR5Env(
+                render_mode="rgb_array",
+                width=RENDER_WIDTH,
+                height=RENDER_HEIGHT,
+                scene_name="scene_t_push",
+            )
+            env_ur5_t_push.reset()
+        return env_ur5_t_push
     if env_ur5 is None:
         env_ur5 = UR5Env(render_mode="rgb_array", width=RENDER_WIDTH, height=RENDER_HEIGHT)
         env_ur5.reset()
     return env_ur5
         cam.lookat = np.array([0, 0, 0.4])
         cam.distance = 2.5
     elif robot_type == "ur5":
+        env = init_ur5("scene")
         cam.lookat = np.array([0.3, 0, 0.6])
         cam.distance = 1.8
         cam.azimuth = 150
         cam.elevation = -25
+    elif robot_type == "ur5_t_push":
+        env = init_ur5("scene_t_push")
+        cam.lookat = np.array([0.5, 0, 0.55])
+        cam.distance = 1.9
+        cam.azimuth = 150
+        cam.elevation = -25
     else:
         print(f"Unknown robot type: {robot_type}")
         return
         steps = env.steps
         # UR5 has different state structure
+        if current_robot in ("ur5", "ur5_t_push"):
             ee_pos = env.get_end_effector_pos()
             ee_quat = env.get_end_effector_quat()
             target = env.get_target()
                     'joint_targets': [float(j) for j in joint_targets],
                     'control_mode': control_mode,
                     'use_orientation': use_orientation,
+                    'steps': int(steps),
+                    'reward': env.get_task_reward()
                 }
             })
         else:
                     env.step_with_controller(dt=sim_dt)
                 # Update camera to follow robot (not for UR5 which is stationary)
+                if camera_follow and current_robot not in ("ur5", "ur5_t_push"):
                     robot_pos = env.data.qpos[:3]
                     cam.lookat[0] = robot_pos[0]
                     cam.lookat[1] = robot_pos[1]
         if current_robot == "g1":
             cam.distance = 3.0
             cam.lookat = np.array([0.0, 0.0, 0.8])
+        elif current_robot in ("ur5", "ur5_t_push"):
             cam.distance = 1.8
             cam.lookat = np.array([0.3, 0.0, 0.6])
             cam.azimuth = 150
         y = payload.get('y', 0.0)
         z = payload.get('z', 0.6)
         with mujoco_lock:
+            if env is not None and current_robot in ("ur5", "ur5_t_push"):
                 env.set_target(x, y, z)
     elif msg_type == 'gripper':
         else:
             value = payload.get('value', 128)
         with mujoco_lock:
+            if env is not None and current_robot in ("ur5", "ur5_t_push"):
                 env.set_gripper(value)
     elif msg_type == 'control_mode':
         payload = data.get('data', {})
         mode = payload.get('mode', 'ik')
         with mujoco_lock:
+            if env is not None and current_robot in ("ur5", "ur5_t_push"):
                 env.set_control_mode(mode)
     elif msg_type == 'joint_positions':
         positions = payload.get('positions', [])
         if len(positions) == 6:
             with mujoco_lock:
+                if env is not None and current_robot in ("ur5", "ur5_t_push"):
                     env.set_joint_positions(positions)
     elif msg_type == 'arm_orientation':
         pitch = payload.get('pitch', np.pi/2)
         yaw = payload.get('yaw', 0.0)
         with mujoco_lock:
+            if env is not None and current_robot in ("ur5", "ur5_t_push"):
                 env.set_target_orientation(roll, pitch, yaw)
     elif msg_type == 'use_orientation':
         payload = data.get('data', {})
         use = payload.get('enabled', True)
         with mujoco_lock:
+            if env is not None and current_robot in ("ur5", "ur5_t_push"):
                 env.set_use_orientation(use)
+def _serialize_space(space):
+    if hasattr(space, "low") and hasattr(space, "high"):
+        return {
+            "type": "box",
+            "shape": list(space.shape),
+            "low": space.low.tolist(),
+            "high": space.high.tolist(),
+            "dtype": str(space.dtype),
+        }
+    return {"type": "unknown"}
+def _resolve_robot_scene(robot, scene):
+    if robot == "ur5_t_push" and scene is None:
+        return "ur5", "scene_t_push"
+    if robot == "ur5" and scene is None:
+        return "ur5", "scene"
+    return robot, scene
+def _create_env(robot, scene):
+    if robot == "g1":
+        return G1Env(render_mode="rgb_array", width=RENDER_WIDTH, height=RENDER_HEIGHT)
+    if robot == "spot":
+        spot_dir = os.path.join(_nova_sim_dir, 'robots', 'spot')
+        sys.path.insert(0, spot_dir)
+        from spot_env import SpotEnv
+        sys.path.pop(0)
+        return SpotEnv(render_mode="rgb_array", width=RENDER_WIDTH, height=RENDER_HEIGHT, controller_type='pympc')
+    if robot == "ur5":
+        ur5_dir = os.path.join(_nova_sim_dir, 'robots', 'ur5')
+        sys.path.insert(0, ur5_dir)
+        from ur5_env import UR5Env
+        sys.path.pop(0)
+        if scene:
+            return UR5Env(render_mode="rgb_array", width=RENDER_WIDTH, height=RENDER_HEIGHT, scene_name=scene)
+        return UR5Env(render_mode="rgb_array", width=RENDER_WIDTH, height=RENDER_HEIGHT)
+    raise ValueError(f"Unsupported robot for gym: {robot}")
+class GymSession:
+    def __init__(self, robot="ur5", scene=None):
+        robot, scene = _resolve_robot_scene(robot, scene)
+        self.robot = robot
+        self.scene = scene
+        self.env = _create_env(robot, scene)
+        self.last_obs, _ = self.env.reset()
+    def configure(self, robot, scene=None):
+        robot, scene = _resolve_robot_scene(robot, scene)
+        if self.env is not None:
+            self.env.close()
+        self.robot = robot
+        self.scene = scene
+        self.env = _create_env(robot, scene)
+        self.last_obs, _ = self.env.reset()
+    def reset(self, seed=None):
+        obs, info = self.env.reset(seed=seed)
+        self.last_obs = obs
+        return obs, info
+    def step(self, action):
+        action = np.array(action, dtype=np.float32)
+        obs, reward, terminated, truncated, info = self.env.step(action)
+        self.last_obs = obs
+        return obs, reward, terminated, truncated, info
+    def render_jpeg(self):
+        frame = self.env.render()
+        if frame is None:
+            return None
+        frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
+        ret, buffer = cv2.imencode('.jpg', frame_bgr)
+        if not ret:
+            return None
+        return base64.b64encode(buffer.tobytes()).decode("ascii")
+    def close(self):
+        if self.env is not None:
+            self.env.close()
+            self.env = None
 @sock.route(f'{API_PREFIX}/ws')
 def websocket_handler(ws):
     """Handle WebSocket connections."""
         print('WebSocket client disconnected')
+@sock.route(f'{API_PREFIX}/gym/ws')
+def gym_websocket_handler(ws):
+    """Gym-style WebSocket API for RL/IL clients."""
+    session = GymSession()
+    try:
+        while True:
+            message = ws.receive()
+            if message is None:
+                break
+            try:
+                data = json.loads(message)
+            except json.JSONDecodeError:
+                ws.send(json.dumps({"type": "gym_error", "message": "Invalid JSON"}))
+                continue
+            msg_type = data.get("type")
+            payload = data.get("data", {})
+            msg_id = data.get("id")
+            try:
+                if msg_type == "reset":
+                    seed = payload.get("seed")
+                    obs, info = session.reset(seed=seed)
+                    response = {
+                        "type": "gym_reset",
+                        "data": {"obs": obs.tolist(), "info": info},
+                    }
+                elif msg_type == "step":
+                    action = payload.get("action", [])
+                    obs, reward, terminated, truncated, info = session.step(action)
+                    response = {
+                        "type": "gym_step",
+                        "data": {
+                            "obs": obs.tolist(),
+                            "reward": float(reward),
+                            "terminated": bool(terminated),
+                            "truncated": bool(truncated),
+                            "info": info,
+                        },
+                    }
+                    if payload.get("render", False):
+                        frame_jpeg = session.render_jpeg()
+                        response["data"]["frame_jpeg"] = frame_jpeg
+                elif msg_type == "configure":
+                    robot = payload.get("robot", "ur5")
+                    scene = payload.get("scene")
+                    session.configure(robot, scene)
+                    response = {
+                        "type": "gym_configured",
+                        "data": {"robot": session.robot, "scene": session.scene},
+                    }
+                elif msg_type == "get_spaces":
+                    response = {
+                        "type": "gym_spaces",
+                        "data": {
+                            "action_space": _serialize_space(session.env.action_space),
+                            "observation_space": _serialize_space(session.env.observation_space),
+                        },
+                    }
+                elif msg_type == "close":
+                    response = {"type": "gym_closed"}
+                    ws.send(json.dumps(response))
+                    break
+                else:
+                    response = {
+                        "type": "gym_error",
+                        "message": f"Unknown message type: {msg_type}",
+                    }
+                if msg_id is not None:
+                    response["id"] = msg_id
+                ws.send(json.dumps(response))
+            except Exception as e:
+                error_response = {"type": "gym_error", "message": str(e)}
+                if msg_id is not None:
+                    error_response["id"] = msg_id
+                ws.send(json.dumps(error_response))
+    finally:
+        session.close()
 # Serve UI at /nova-sim (no redirect)
 @app.route('/nova-sim')
 @app.route('/nova-sim/')
                 EE Pos: <span id="ee_pos">0.00, 0.00, 0.00</span><br>
                 EE Ori: <span id="ee_ori">0.00, 0.00, 0.00</span><br>
                 Gripper: <span id="gripper_val">50%</span><br>
+                Reward: <span id="arm_reward">-</span><br>
                 Mode: <span id="control_mode_display">IK</span> | Steps: <span id="arm_step_val">0</span>
             </div>
         </div>
                         <option value="g1">Unitree G1 (Humanoid)</option>
                         <option value="spot">Boston Dynamics Spot (Quadruped)</option>
                         <option value="ur5">Universal Robots UR5e (Arm)</option>
+                        <option value="ur5_t_push">UR5e T-Push Scene</option>
                     </select>
                     <div class="robot-info" id="robot_info">
                         29 DOF humanoid with RL walking policy
             const robotInfoText = {
                 'g1': '29 DOF humanoid with RL walking policy',
                 'spot': '12 DOF quadruped with trot gait controller',
+                'ur5': '6 DOF robot arm with Robotiq gripper',
+                'ur5_t_push': 'UR5e T-push task with stick tool'
             };
             const robotTitles = {
                 'g1': 'Unitree G1 Humanoid',
                 'spot': 'Boston Dynamics Spot',
+                'ur5': 'Universal Robots UR5e',
+                'ur5_t_push': 'UR5e T-Push Scene'
             };
             const locomotionControls = document.getElementById('locomotion_controls');
                         if (msg.type === 'state') {
                             const data = msg.data;
+                            if (data.robot === 'ur5' || data.robot === 'ur5_t_push') {
                                 // UR5 state
                                 const ee = data.end_effector;
                                 document.getElementById('ee_pos').innerText =
                                 document.getElementById('gripper_val').innerText =
                                     ((255 - data.gripper) / 255 * 100).toFixed(0) + '% open';
                                 document.getElementById('arm_step_val').innerText = data.steps;
+                                const rewardEl = document.getElementById('arm_reward');
+                                if (data.reward === null || data.reward === undefined) {
+                                    rewardEl.innerText = '-';
+                                } else {
+                                    rewardEl.innerText = data.reward.toFixed(3);
+                                }
                                 // Update joint position display (actual positions)
                                 if (data.joint_positions) {
                 robotInfo.innerText = robotInfoText[robot] || '';
                 // Toggle controls based on robot type
+                if (robot === 'ur5' || robot === 'ur5_t_push') {
                     locomotionControls.classList.add('hidden');
                     armControls.classList.add('active');
                     document.getElementById('locomotion_state').style.display = 'none';

robots/ur5/model/scene_t_push.xml ADDED Viewed

	@@ -0,0 +1,367 @@

+<mujoco model="ur5e_with_gripper">
+  <compiler angle="radian" meshdir="assets" autolimits="true"/>
+  <option integrator="implicitfast" cone="elliptic" impratio="10"/>
+  <!-- Wandelbots Corporate Design Colors:
+       Primary Dark: #01040f (0.004, 0.016, 0.059)
+       Light/Secondary: #bcbeec (0.737, 0.745, 0.925)
+       Accent: #211c44 (0.129, 0.110, 0.267)
+       Highlight: #8b7fef (0.545, 0.498, 0.937)
+  -->
+  <visual>
+    <headlight diffuse="0.6 0.6 0.6" ambient="0.35 0.35 0.4" specular="0 0 0"/>
+    <rgba haze="0.02 0.04 0.12 1"/>
+    <global azimuth="120" elevation="-20"/>
+  </visual>
+  <asset>
+    <!-- Wandelbots gradient skybox - deep purple to near black -->
+    <texture type="skybox" builtin="gradient" rgb1="0.13 0.11 0.27" rgb2="0.004 0.016 0.059" width="512" height="3072"/>
+    <!-- Ground with Wandelbots purple accent -->
+    <texture type="2d" name="groundplane" builtin="checker" mark="edge" rgb1="0.08 0.07 0.15" rgb2="0.04 0.04 0.08"
+      markrgb="0.55 0.5 0.94" width="300" height="300"/>
+    <material name="groundplane" texture="groundplane" texuniform="true" texrepeat="5 5" reflectance="0.15"/>
+    <!-- Table with subtle purple tint -->
+    <material name="table" rgba="0.18 0.16 0.25 1" specular="0.4" shininess="0.4"/>
+    <!-- Target marker with Wandelbots highlight color -->
+    <material name="target_mat" rgba="0.55 0.5 0.94 0.6" specular="0.5" shininess="0.5"/>
+    <!-- T-push scene materials -->
+    <material name="t_target_mat" rgba="0.2 0.7 0.35 0.25" specular="0.2" shininess="0.2"/>
+    <material name="t_object_mat" rgba="0.55 0.65 0.98 1" specular="0.3" shininess="0.2"/>
+    <material name="stick_mat" rgba="0.6 0.6 0.62 1" specular="0.4" shininess="0.3"/>
+    <!-- UR5e materials - with Wandelbots accent colors -->
+    <material name="black" rgba="0.02 0.02 0.04 1" specular="0.5" shininess="0.25"/>
+    <material name="jointgray" rgba="0.22 0.22 0.26 1" specular="0.5" shininess="0.25"/>
+    <material name="linkgray" rgba="0.74 0.75 0.82 1" specular="0.5" shininess="0.25"/>
+    <!-- Wandelbots purple accent instead of UR blue -->
+    <material name="urblue" rgba="0.55 0.5 0.94 1" specular="0.6" shininess="0.35"/>
+    <!-- Gripper materials -->
+    <material name="metal" rgba="0.58 0.58 0.58 1"/>
+    <material name="silicone" rgba="0.1882 0.1882 0.1882 1"/>
+    <material name="gray" rgba="0.4627 0.4627 0.4627 1"/>
+    <!-- UR5e meshes -->
+    <mesh file="base_0.obj"/>
+    <mesh file="base_1.obj"/>
+    <mesh file="shoulder_0.obj"/>
+    <mesh file="shoulder_1.obj"/>
+    <mesh file="shoulder_2.obj"/>
+    <mesh file="upperarm_0.obj"/>
+    <mesh file="upperarm_1.obj"/>
+    <mesh file="upperarm_2.obj"/>
+    <mesh file="upperarm_3.obj"/>
+    <mesh file="forearm_0.obj"/>
+    <mesh file="forearm_1.obj"/>
+    <mesh file="forearm_2.obj"/>
+    <mesh file="forearm_3.obj"/>
+    <mesh file="wrist1_0.obj"/>
+    <mesh file="wrist1_1.obj"/>
+    <mesh file="wrist1_2.obj"/>
+    <mesh file="wrist2_0.obj"/>
+    <mesh file="wrist2_1.obj"/>
+    <mesh file="wrist2_2.obj"/>
+    <mesh file="wrist3.obj"/>
+    <!-- Gripper meshes -->
+    <mesh name="base_mount" file="base_mount.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="base_g" file="base.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="driver" file="driver.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="coupler" file="coupler.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="follower" file="follower.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="pad" file="pad.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="silicone_pad" file="silicone_pad.stl" scale="0.001 0.001 0.001"/>
+    <mesh name="spring_link" file="spring_link.stl" scale="0.001 0.001 0.001"/>
+  </asset>
+  <default>
+    <default class="ur5e">
+      <joint axis="0 1 0" range="-6.28319 6.28319" armature="0.1"/>
+      <general gaintype="fixed" biastype="affine" ctrlrange="-6.2831 6.2831" gainprm="2000" biasprm="0 -2000 -400" forcerange="-150 150"/>
+      <default class="size3">
+        <default class="size3_limited">
+          <joint range="-3.1415 3.1415"/>
+          <general ctrlrange="-3.1415 3.1415"/>
+        </default>
+      </default>
+      <default class="size1">
+        <general gainprm="500" biasprm="0 -500 -100" forcerange="-28 28"/>
+      </default>
+      <default class="visual">
+        <geom type="mesh" contype="0" conaffinity="0" group="2"/>
+      </default>
+      <default class="collision">
+        <geom type="capsule" group="3"/>
+        <default class="eef_collision">
+          <geom type="cylinder"/>
+        </default>
+      </default>
+      <site size="0.001" rgba="0.5 0.5 0.5 0.3" group="4"/>
+    </default>
+    <default class="gripper">
+      <general biastype="affine"/>
+      <joint axis="1 0 0"/>
+      <default class="driver_j">
+        <joint range="0 0.8" armature="0.005" damping="0.1" solimplimit="0.95 0.99 0.001" solreflimit="0.005 1"/>
+      </default>
+      <default class="follower_j">
+        <joint range="-0.872664 0.872664" armature="0.001" pos="0 -0.018 0.0065" solimplimit="0.95 0.99 0.001" solreflimit="0.005 1"/>
+      </default>
+      <default class="spring_link_j">
+        <joint range="-0.29670597283 0.8" armature="0.001" stiffness="0.05" springref="2.62" damping="0.00125"/>
+      </default>
+      <default class="coupler_j">
+        <joint range="-1.57 0" armature="0.001" solimplimit="0.95 0.99 0.001" solreflimit="0.005 1"/>
+      </default>
+      <default class="visual_g">
+        <geom type="mesh" contype="0" conaffinity="0" group="2"/>
+      </default>
+      <default class="collision_g">
+        <geom type="mesh" group="3"/>
+        <default class="pad_box1">
+          <geom mass="0" type="box" pos="0 -0.0026 0.028125" size="0.011 0.004 0.009375" friction="0.7"
+            solimp="0.95 0.99 0.001" solref="0.004 1" priority="1" rgba="0.55 0.55 0.55 1"/>
+        </default>
+        <default class="pad_box2">
+          <geom mass="0" type="box" pos="0 -0.0026 0.009375" size="0.011 0.004 0.009375" friction="0.6"
+            solimp="0.95 0.99 0.001" solref="0.004 1" priority="1" rgba="0.45 0.45 0.45 1"/>
+        </default>
+      </default>
+    </default>
+  </default>
+  <worldbody>
+    <light pos="0 0 3.5" dir="0 0 -1" directional="true"/>
+    <geom name="floor" size="0 0 0.05" type="plane" material="groundplane"/>
+    <!-- Table -->
+    <body name="table" pos="0 0 0">
+      <geom name="table_top" type="box" pos="0.5 0 0.4" size="0.4 0.6 0.02" material="table"/>
+      <geom name="table_leg1" type="box" pos="0.2 0.4 0.2" size="0.03 0.03 0.2" material="table"/>
+      <geom name="table_leg2" type="box" pos="0.2 -0.4 0.2" size="0.03 0.03 0.2" material="table"/>
+      <geom name="table_leg3" type="box" pos="0.8 0.4 0.2" size="0.03 0.03 0.2" material="table"/>
+      <geom name="table_leg4" type="box" pos="0.8 -0.4 0.2" size="0.03 0.03 0.2" material="table"/>
+    </body>
+    <!-- Target visualization sphere (for IK target) -->
+    <body name="target" pos="0.4 0.0 0.6" mocap="true">
+      <geom name="target_vis" type="sphere" size="0.03" material="target_mat" contype="0" conaffinity="0"/>
+    </body>
+    <!-- UR5e robot mounted on table edge -->
+    <body name="base" pos="0 0 0.42" quat="0 0 0 -1" childclass="ur5e">
+      <inertial mass="4.0" pos="0 0 0" diaginertia="0.00443333156 0.00443333156 0.0072"/>
+      <geom mesh="base_0" material="black" class="visual"/>
+      <geom mesh="base_1" material="jointgray" class="visual"/>
+      <body name="shoulder_link" pos="0 0 0.163">
+        <inertial mass="3.7" pos="0 0 0" diaginertia="0.0102675 0.0102675 0.00666"/>
+        <joint name="shoulder_pan_joint" class="size3" axis="0 0 1"/>
+        <geom mesh="shoulder_0" material="urblue" class="visual"/>
+        <geom mesh="shoulder_1" material="black" class="visual"/>
+        <geom mesh="shoulder_2" material="jointgray" class="visual"/>
+        <geom class="collision" size="0.06 0.06" pos="0 0 -0.04"/>
+        <body name="upper_arm_link" pos="0 0.138 0" quat="1 0 1 0">
+          <inertial mass="8.393" pos="0 0 0.2125" diaginertia="0.133886 0.133886 0.0151074"/>
+          <joint name="shoulder_lift_joint" class="size3"/>
+          <geom mesh="upperarm_0" material="linkgray" class="visual"/>
+          <geom mesh="upperarm_1" material="black" class="visual"/>
+          <geom mesh="upperarm_2" material="jointgray" class="visual"/>
+          <geom mesh="upperarm_3" material="urblue" class="visual"/>
+          <geom class="collision" pos="0 -0.04 0" quat="1 1 0 0" size="0.06 0.06"/>
+          <geom class="collision" size="0.05 0.2" pos="0 0 0.2"/>
+          <body name="forearm_link" pos="0 -0.131 0.425">
+            <inertial mass="2.275" pos="0 0 0.196" diaginertia="0.0311796 0.0311796 0.004095"/>
+            <joint name="elbow_joint" class="size3_limited"/>
+            <geom mesh="forearm_0" material="urblue" class="visual"/>
+            <geom mesh="forearm_1" material="linkgray" class="visual"/>
+            <geom mesh="forearm_2" material="black" class="visual"/>
+            <geom mesh="forearm_3" material="jointgray" class="visual"/>
+            <geom class="collision" pos="0 0.08 0" quat="1 1 0 0" size="0.055 0.06"/>
+            <geom class="collision" size="0.038 0.19" pos="0 0 0.2"/>
+            <body name="wrist_1_link" pos="0 0 0.392" quat="1 0 1 0">
+              <inertial mass="1.219" pos="0 0.127 0" diaginertia="0.0025599 0.0025599 0.0021942"/>
+              <joint name="wrist_1_joint" class="size1"/>
+              <geom mesh="wrist1_0" material="black" class="visual"/>
+              <geom mesh="wrist1_1" material="urblue" class="visual"/>
+              <geom mesh="wrist1_2" material="jointgray" class="visual"/>
+              <geom class="collision" pos="0 0.05 0" quat="1 1 0 0" size="0.04 0.07"/>
+              <body name="wrist_2_link" pos="0 0.127 0">
+                <inertial mass="1.219" pos="0 0 0.1" diaginertia="0.0025599 0.0025599 0.0021942"/>
+                <joint name="wrist_2_joint" axis="0 0 1" class="size1"/>
+                <geom mesh="wrist2_0" material="black" class="visual"/>
+                <geom mesh="wrist2_1" material="urblue" class="visual"/>
+                <geom mesh="wrist2_2" material="jointgray" class="visual"/>
+                <geom class="collision" size="0.04 0.06" pos="0 0 0.04"/>
+                <geom class="collision" pos="0 0.02 0.1" quat="1 1 0 0" size="0.04 0.04"/>
+                <body name="wrist_3_link" pos="0 0 0.1">
+                  <inertial mass="0.1889" pos="0 0.0771683 0" quat="1 0 0 1"
+                    diaginertia="0.000132134 9.90863e-05 9.90863e-05"/>
+                  <joint name="wrist_3_joint" class="size1"/>
+                  <geom material="linkgray" mesh="wrist3" class="visual"/>
+                  <geom class="eef_collision" pos="0 0.08 0" quat="1 1 0 0" size="0.04 0.02"/>
+                  <site name="attachment_site" pos="0 0.1 0" quat="-1 1 0 0"/>
+                  <!-- Gripper attached to wrist -->
+                  <body name="gripper_base_mount" pos="0 0.1 0" quat="-1 1 0 0" childclass="gripper">
+                    <body name="gripper_base_mount_inner" pos="0 0 0.007">
+                      <geom class="visual_g" mesh="base_mount" material="black"/>
+                      <geom class="collision_g" mesh="base_mount"/>
+                      <body name="gripper_base" pos="0 0 0.0038" quat="1 0 0 -1">
+                        <inertial mass="0.777441" pos="0 -2.70394e-05 0.0354675" quat="1 -0.00152849 0 0"
+                          diaginertia="0.000260285 0.000225381 0.000152708"/>
+                        <geom class="visual_g" mesh="base_g" material="black"/>
+                        <geom class="collision_g" mesh="base_g"/>
+                        <site name="pinch" pos="0 0 0.145" type="sphere" group="5" rgba="0.9 0.9 0.9 1" size="0.005"/>
+                        <!-- End-effector site for IK -->
+                        <site name="ee_site" pos="0 0 0.16" type="sphere" size="0.01" rgba="1 0 0 0.5"/>
+                        <!-- Stick tool for T-push task -->
+                        <geom name="push_stick" type="capsule" fromto="0 0 0.16 0 0.18 0.16" size="0.008" material="stick_mat" mass="0.02" friction="1 0.01 0.01"/>
+                        <!-- Right-hand side 4-bar linkage -->
+                        <body name="right_driver" pos="0 0.0306011 0.054904">
+                          <inertial mass="0.00899563" pos="2.96931e-12 0.0177547 0.00107314" quat="0.681301 0.732003 0 0"
+                            diaginertia="1.72352e-06 1.60906e-06 3.22006e-07"/>
+                          <joint name="right_driver_joint" class="driver_j"/>
+                          <geom class="visual_g" mesh="driver" material="gray"/>
+                          <geom class="collision_g" mesh="driver"/>
+                          <body name="right_coupler" pos="0 0.0315 -0.0041">
+                            <inertial mass="0.0140974" pos="0 0.00301209 0.0232175" quat="0.705636 -0.0455904 0.0455904 0.705636"
+                              diaginertia="4.16206e-06 3.52216e-06 8.88131e-07"/>
+                            <joint name="right_coupler_joint" class="coupler_j"/>
+                            <geom class="visual_g" mesh="coupler" material="black"/>
+                            <geom class="collision_g" mesh="coupler"/>
+                          </body>
+                        </body>
+                        <body name="right_spring_link" pos="0 0.0132 0.0609">
+                          <inertial mass="0.0221642" pos="-8.65005e-09 0.0181624 0.0212658" quat="0.663403 -0.244737 0.244737 0.663403"
+                            diaginertia="8.96853e-06 6.71733e-06 2.63931e-06"/>
+                          <joint name="right_spring_link_joint" class="spring_link_j"/>
+                          <geom class="visual_g" mesh="spring_link" material="black"/>
+                          <geom class="collision_g" mesh="spring_link"/>
+                          <body name="right_follower" pos="0 0.055 0.0375">
+                            <inertial mass="0.0125222" pos="0 -0.011046 0.0124786" quat="1 0.1664 0 0"
+                              diaginertia="2.67415e-06 2.4559e-06 6.02031e-07"/>
+                            <joint name="right_follower_joint" class="follower_j"/>
+                            <geom class="visual_g" mesh="follower" material="black"/>
+                            <geom class="collision_g" mesh="follower"/>
+                            <body name="right_pad" pos="0 -0.0189 0.01352">
+                              <geom class="pad_box1" name="right_pad1"/>
+                              <geom class="pad_box2" name="right_pad2"/>
+                              <inertial mass="0.0035" pos="0 -0.0025 0.0185" quat="0.707107 0 0 0.707107"
+                                diaginertia="4.73958e-07 3.64583e-07 1.23958e-07"/>
+                              <geom class="visual_g" mesh="pad"/>
+                              <body name="right_silicone_pad">
+                                <geom class="visual_g" mesh="silicone_pad" material="black"/>
+                              </body>
+                            </body>
+                          </body>
+                        </body>
+                        <!-- Left-hand side 4-bar linkage -->
+                        <body name="left_driver" pos="0 -0.0306011 0.054904" quat="0 0 0 1">
+                          <inertial mass="0.00899563" pos="0 0.0177547 0.00107314" quat="0.681301 0.732003 0 0"
+                            diaginertia="1.72352e-06 1.60906e-06 3.22006e-07"/>
+                          <joint name="left_driver_joint" class="driver_j"/>
+                          <geom class="visual_g" mesh="driver" material="gray"/>
+                          <geom class="collision_g" mesh="driver"/>
+                          <body name="left_coupler" pos="0 0.0315 -0.0041">
+                            <inertial mass="0.0140974" pos="0 0.00301209 0.0232175" quat="0.705636 -0.0455904 0.0455904 0.705636"
+                              diaginertia="4.16206e-06 3.52216e-06 8.88131e-07"/>
+                            <joint name="left_coupler_joint" class="coupler_j"/>
+                            <geom class="visual_g" mesh="coupler" material="black"/>
+                            <geom class="collision_g" mesh="coupler"/>
+                          </body>
+                        </body>
+                        <body name="left_spring_link" pos="0 -0.0132 0.0609" quat="0 0 0 1">
+                          <inertial mass="0.0221642" pos="-8.65005e-09 0.0181624 0.0212658" quat="0.663403 -0.244737 0.244737 0.663403"
+                            diaginertia="8.96853e-06 6.71733e-06 2.63931e-06"/>
+                          <joint name="left_spring_link_joint" class="spring_link_j"/>
+                          <geom class="visual_g" mesh="spring_link" material="black"/>
+                          <geom class="collision_g" mesh="spring_link"/>
+                          <body name="left_follower" pos="0 0.055 0.0375">
+                            <inertial mass="0.0125222" pos="0 -0.011046 0.0124786" quat="1 0.1664 0 0"
+                              diaginertia="2.67415e-06 2.4559e-06 6.02031e-07"/>
+                            <joint name="left_follower_joint" class="follower_j"/>
+                            <geom class="visual_g" mesh="follower" material="black"/>
+                            <geom class="collision_g" mesh="follower"/>
+                            <body name="left_pad" pos="0 -0.0189 0.01352">
+                              <geom class="pad_box1" name="left_pad1"/>
+                              <geom class="pad_box2" name="left_pad2"/>
+                              <inertial mass="0.0035" pos="0 -0.0025 0.0185" quat="1 0 0 1"
+                                diaginertia="4.73958e-07 3.64583e-07 1.23958e-07"/>
+                              <geom class="visual_g" mesh="pad"/>
+                              <body name="left_silicone_pad">
+                                <geom class="visual_g" mesh="silicone_pad" material="black"/>
+                              </body>
+                            </body>
+                          </body>
+                        </body>
+                      </body>
+                    </body>
+                  </body>
+                </body>
+              </body>
+            </body>
+          </body>
+        </body>
+      </body>
+    </body>
+    <!-- T-shaped target (visual marker) -->
+    <body name="t_target" pos="0.62 -0.18 0.425">
+      <geom name="t_target_stem" type="box" pos="0 -0.05 0" size="0.02 0.07 0.002" material="t_target_mat" contype="0" conaffinity="0"/>
+      <geom name="t_target_cap" type="box" pos="0 0.03 0" size="0.08 0.02 0.002" material="t_target_mat" contype="0" conaffinity="0"/>
+    </body>
+    <!-- Movable T-shaped object to push into target -->
+    <body name="t_object" pos="0.45 0.2 0.43">
+      <freejoint name="t_object_joint"/>
+      <geom name="t_object_stem" type="box" pos="0 -0.05 0" size="0.02 0.07 0.01" material="t_object_mat" mass="0.3" friction="1 0.005 0.005"/>
+      <geom name="t_object_cap" type="box" pos="0 0.03 0" size="0.08 0.02 0.01" material="t_object_mat" mass="0.15" friction="1 0.005 0.005"/>
+    </body>
+  </worldbody>
+  <contact>
+    <exclude body1="gripper_base" body2="left_driver"/>
+    <exclude body1="gripper_base" body2="right_driver"/>
+    <exclude body1="gripper_base" body2="left_spring_link"/>
+    <exclude body1="gripper_base" body2="right_spring_link"/>
+    <exclude body1="right_coupler" body2="right_follower"/>
+    <exclude body1="left_coupler" body2="left_follower"/>
+  </contact>
+  <tendon>
+    <fixed name="split">
+      <joint joint="right_driver_joint" coef="0.5"/>
+      <joint joint="left_driver_joint" coef="0.5"/>
+    </fixed>
+  </tendon>
+  <equality>
+    <connect anchor="0 0 0" body1="right_follower" body2="right_coupler" solimp="0.95 0.99 0.001" solref="0.005 1"/>
+    <connect anchor="0 0 0" body1="left_follower" body2="left_coupler" solimp="0.95 0.99 0.001" solref="0.005 1"/>
+    <joint joint1="right_driver_joint" joint2="left_driver_joint" polycoef="0 1 0 0 0" solimp="0.95 0.99 0.001" solref="0.005 1"/>
+  </equality>
+  <actuator>
+    <!-- UR5e joint actuators -->
+    <general class="size3" name="shoulder_pan" joint="shoulder_pan_joint"/>
+    <general class="size3" name="shoulder_lift" joint="shoulder_lift_joint"/>
+    <general class="size3_limited" name="elbow" joint="elbow_joint"/>
+    <general class="size1" name="wrist_1" joint="wrist_1_joint"/>
+    <general class="size1" name="wrist_2" joint="wrist_2_joint"/>
+    <general class="size1" name="wrist_3" joint="wrist_3_joint"/>
+    <!-- Gripper actuator -->
+    <general name="gripper" tendon="split" forcerange="-5 5" ctrlrange="0 255" gainprm="0.3137255 0 0" biasprm="0 -100 -10"/>
+  </actuator>
+  <keyframe>
+    <!-- Official MuJoCo Menagerie UR5e home pose with gripper open (0=open) -->
+    <key name="home" qpos="-1.5708 -1.5708 1.5708 -1.5708 -1.5708 0 0 0 0 0 0 0 0 0 0.45 0.2 0.43 1 0 0 0"
+         ctrl="-1.5708 -1.5708 1.5708 -1.5708 -1.5708 0 0"/>
+  </keyframe>
+</mujoco>

robots/ur5/ur5_env.py CHANGED Viewed

@@ -43,7 +43,7 @@ class UR5Env(gym.Env):
         0.0,      # wrist_3 - no rotation
     ], dtype=np.float32)
-    def __init__(self, render_mode=None, width=1280, height=720):
         """Initialize UR5e environment.
         Args:
@@ -55,7 +55,8 @@ class UR5Env(gym.Env):
         # Load model
         ur5_dir = os.path.dirname(os.path.abspath(__file__))
-        model_path = os.path.join(ur5_dir, "model", "scene.xml")
         self.model = mujoco.MjModel.from_xml_path(model_path)
         # Override framebuffer size
@@ -71,6 +72,8 @@ class UR5Env(gym.Env):
         # Find site IDs
         self.ee_site_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "ee_site")
         self.target_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "target")
         # Action space: 6 joint positions + 1 gripper (0-255)
         self.action_space = spaces.Box(
@@ -97,6 +100,7 @@ class UR5Env(gym.Env):
         self.render_mode = render_mode
         self.width = width
         self.height = height
         self.renderer = None
         self.steps = 0
@@ -132,6 +136,15 @@ class UR5Env(gym.Env):
         # Direct joint targets (used when control_mode is 'joint')
         self._joint_targets = self.DEFAULT_HOME_POSE.copy()
     def set_target(self, x: float, y: float, z: float, update_joint_targets=True):
         """Set target position for IK controller.
@@ -290,6 +303,15 @@ class UR5Env(gym.Env):
         """Compatibility method - returns zeros for arm robots."""
         return np.array([0.0, 0.0, 0.0], dtype=np.float32)
     def reset(self, seed=None, options=None):
         super().reset(seed=seed)
@@ -300,10 +322,9 @@ class UR5Env(gym.Env):
         self.data.ctrl[:6] = self.DEFAULT_HOME_POSE.copy()
         self.data.ctrl[6] = 0  # Gripper open (Robotiq: 0=open, 255=closed)
-        # Reset box position
-        box_qpos_start = 6 + 8  # 6 arm joints + 8 gripper joints
-        self.data.qpos[box_qpos_start:box_qpos_start+3] = [0.5, 0.2, 0.45]
-        self.data.qpos[box_qpos_start+3:box_qpos_start+7] = [1, 0, 0, 0]
         # Compute forward kinematics to get EE pose from home joints
         mujoco.mj_forward(self.model, self.data)

         0.0,      # wrist_3 - no rotation
     ], dtype=np.float32)
+    def __init__(self, render_mode=None, width=1280, height=720, scene_name="scene"):
         """Initialize UR5e environment.
         Args:
         # Load model
         ur5_dir = os.path.dirname(os.path.abspath(__file__))
+        scene_file = f"{scene_name}.xml" if not scene_name.endswith(".xml") else scene_name
+        model_path = os.path.join(ur5_dir, "model", scene_file)
         self.model = mujoco.MjModel.from_xml_path(model_path)
         # Override framebuffer size
         # Find site IDs
         self.ee_site_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_SITE, "ee_site")
         self.target_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "target")
+        self.t_object_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "t_object")
+        self.t_target_body_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_BODY, "t_target")
         # Action space: 6 joint positions + 1 gripper (0-255)
         self.action_space = spaces.Box(
         self.render_mode = render_mode
         self.width = width
         self.height = height
+        self.scene_name = scene_name
         self.renderer = None
         self.steps = 0
         # Direct joint targets (used when control_mode is 'joint')
         self._joint_targets = self.DEFAULT_HOME_POSE.copy()
+    def _reset_freejoint(self, joint_name: str, pos, quat):
+        """Reset a freejoint pose by name if it exists."""
+        joint_id = mujoco.mj_name2id(self.model, mujoco.mjtObj.mjOBJ_JOINT, joint_name)
+        if joint_id == -1:
+            return
+        qpos_adr = self.model.jnt_qposadr[joint_id]
+        self.data.qpos[qpos_adr:qpos_adr + 3] = pos
+        self.data.qpos[qpos_adr + 3:qpos_adr + 7] = quat
     def set_target(self, x: float, y: float, z: float, update_joint_targets=True):
         """Set target position for IK controller.
         """Compatibility method - returns zeros for arm robots."""
         return np.array([0.0, 0.0, 0.0], dtype=np.float32)
+    def get_task_reward(self):
+        """Return task reward for scene-specific tasks, or None if not applicable."""
+        if self.t_object_body_id == -1 or self.t_target_body_id == -1:
+            return None
+        t_object_pos = self.data.xpos[self.t_object_body_id]
+        t_target_pos = self.data.xpos[self.t_target_body_id]
+        planar_dist = np.linalg.norm(t_object_pos[:2] - t_target_pos[:2])
+        return -float(planar_dist)
     def reset(self, seed=None, options=None):
         super().reset(seed=seed)
         self.data.ctrl[:6] = self.DEFAULT_HOME_POSE.copy()
         self.data.ctrl[6] = 0  # Gripper open (Robotiq: 0=open, 255=closed)
+        # Reset task objects if present
+        self._reset_freejoint("box_joint", [0.5, 0.2, 0.45], [1, 0, 0, 0])
+        self._reset_freejoint("t_object_joint", [0.45, 0.2, 0.43], [1, 0, 0, 0])
         # Compute forward kinematics to get EE pose from home joints
         mujoco.mj_forward(self.model, self.data)