Spaces:

gpue
/

nova-sim

Paused

Georg commited on Feb 3

Commit

c1ce8c4

1 Parent(s): 8dfd85d

Enhance episode control and jogging management in mujoco_server.py and nova_jogger.py

- Added episode duration tracking and watchdog functionality to automatically truncate episodes exceeding the maximum duration.
- Introduced timestamps for teleoperation commands to improve debugging and state management.
- Updated jogging state management to ensure proper handling of active jogging commands and WebSocket connection states in nova_jogger.py.
- Refactored episode control state initialization and reset logic for better clarity and functionality.

Files changed (3) hide show

mujoco_server.py +56 -2
robots/ur5/model/scene_t_push.xml +1 -1
robots/ur5/nova_jogger.py +39 -4

mujoco_server.py CHANGED Viewed

@@ -99,6 +99,8 @@ needs_robot_switch = None  # Robot to switch to
 episode_control_state = {
     "terminate": False,
     "truncate": False,
 }
 # Perception state - stores latest detected object poses
@@ -130,6 +132,8 @@ last_teleop_action: dict[str, Any] = {
     "j6": 0.0,
 }
 teleop_lock = threading.Lock()
 # Homing state
 homing_state = {
@@ -191,6 +195,11 @@ def _reset_active_environment() -> None:
         if env is not None:
             env.reset()
     _reset_camera_for_current_robot()
 def _schedule_robot_switch(robot: str, scene: Optional[str]) -> None:
@@ -696,6 +705,12 @@ def switch_robot(robot_type, scene_name=None):
     current_scene = active_scene
     env.reset()
     # Initialize gripper value in teleop_action for UR5
     with teleop_lock:
         if robot_type in ("ur5", "ur5_t_push"):
@@ -1052,6 +1067,7 @@ def _signal_episode_control(action: str):
     with episode_control_lock:
         episode_control_state[action] = True
     status_label = "terminated" if action == "terminate" else "truncated"
     payload = {
         "action": action,
@@ -1064,6 +1080,11 @@ def _signal_episode_control(action: str):
     with mujoco_lock:
         if env is not None:
             env.reset()
 def _consume_episode_control_flags():
@@ -1193,7 +1214,7 @@ def simulation_loop():
     def _run_iteration():
         nonlocal broadcast_counter
-        global needs_robot_switch, latest_frame
         if needs_robot_switch is not None:
             with mujoco_lock:
@@ -1203,6 +1224,28 @@ def simulation_loop():
                 )
             needs_robot_switch = None
         with mujoco_lock:
             if env is None:
                 return
@@ -1528,7 +1571,7 @@ def _step_multi_axis_homing_locked(tolerance: float = 0.01) -> dict:
 def handle_ws_message(ws, data):
     """Handle incoming WebSocket message."""
-    global needs_robot_switch, camera_follow, t_target_visible, last_teleop_action
     msg_type = data.get('type')
@@ -1584,9 +1627,13 @@ def handle_ws_message(ws, data):
                     any_joint = any(abs(v) > 0.001 for v in joint_velocities)
                     any_cartesian = any(abs(v) > 0.001 for v in cartesian_translation + cartesian_rotation)
                     # Apply the appropriate jogging mode
                     if any_joint:
                         env.start_multi_joint_jog(joint_velocities)
                     elif any_cartesian:
                         # Convert m/s to mm/s for translations
                         translation_mm_s = [v * 1000.0 for v in cartesian_translation]
@@ -1597,9 +1644,11 @@ def handle_ws_message(ws, data):
                             tcp_id='Flange',
                             coord_system_id='world'
                         )
                     else:
                         # No active velocity - stop jogging
                         env.stop_jog()
                 else:
                     # For locomotion robots: use vx, vy, vyaw
                     env.set_command(vx, vy, vyaw)
@@ -1628,6 +1677,11 @@ def handle_ws_message(ws, data):
         with mujoco_lock:
             if env is not None:
                 obs, info = env.reset(seed=seed)
     elif msg_type == 'switch_robot':
         payload = data.get('data', {})

 episode_control_state = {
     "terminate": False,
     "truncate": False,
+    "start_time": 0.0,  # Timestamp when episode started
+    "max_duration": 120.0,  # Max episode duration in seconds (safety limit)
 }
 # Perception state - stores latest detected object poses
     "j6": 0.0,
 }
 teleop_lock = threading.Lock()
+last_teleop_command_time = 0.0  # Timestamp of last teleop command (for debugging)
+jogging_active = False  # Track if jogging is currently active
 # Homing state
 homing_state = {
         if env is not None:
             env.reset()
     _reset_camera_for_current_robot()
+    # Track episode start time for duration watchdog
+    with episode_control_lock:
+        episode_control_state["start_time"] = time.time()
+        episode_control_state["terminate"] = False
+        episode_control_state["truncate"] = False
 def _schedule_robot_switch(robot: str, scene: Optional[str]) -> None:
     current_scene = active_scene
     env.reset()
+    # Track episode start time for duration watchdog
+    with episode_control_lock:
+        episode_control_state["start_time"] = time.time()
+        episode_control_state["terminate"] = False
+        episode_control_state["truncate"] = False
     # Initialize gripper value in teleop_action for UR5
     with teleop_lock:
         if robot_type in ("ur5", "ur5_t_push"):
     with episode_control_lock:
         episode_control_state[action] = True
+        episode_control_state["start_time"] = 0.0  # Reset timer
     status_label = "terminated" if action == "terminate" else "truncated"
     payload = {
         "action": action,
     with mujoco_lock:
         if env is not None:
             env.reset()
+    # Track new episode start time
+    with episode_control_lock:
+        episode_control_state["start_time"] = time.time()
+        episode_control_state["terminate"] = False
+        episode_control_state["truncate"] = False
 def _consume_episode_control_flags():
     def _run_iteration():
         nonlocal broadcast_counter
+        global needs_robot_switch, latest_frame, jogging_active, last_teleop_command_time
         if needs_robot_switch is not None:
             with mujoco_lock:
                 )
             needs_robot_switch = None
+        # Check episode duration watchdog
+        current_time = time.time()
+        with episode_control_lock:
+            episode_start = episode_control_state.get("start_time", 0.0)
+            max_duration = episode_control_state.get("max_duration", 120.0)
+            if episode_start > 0 and (current_time - episode_start) > max_duration:
+                # Episode has exceeded max duration - automatically truncate
+                print(f"[Watchdog] Episode exceeded max duration ({max_duration}s) - truncating episode")
+                episode_control_state["truncate"] = True
+                episode_control_state["start_time"] = 0.0  # Reset timer
+                # Stop any active jogging
+                if jogging_active:
+                    with mujoco_lock:
+                        if env is not None and current_robot in ("ur5", "ur5_t_push"):
+                            stop_jog_fn = getattr(env, "stop_jog", None)
+                            if callable(stop_jog_fn):
+                                try:
+                                    stop_jog_fn()
+                                    jogging_active = False
+                                except Exception as e:
+                                    print(f"[Watchdog] Error stopping jogging during truncation: {e}")
         with mujoco_lock:
             if env is None:
                 return
 def handle_ws_message(ws, data):
     """Handle incoming WebSocket message."""
+    global needs_robot_switch, camera_follow, t_target_visible, last_teleop_action, last_teleop_command_time, jogging_active
     msg_type = data.get('type')
                     any_joint = any(abs(v) > 0.001 for v in joint_velocities)
                     any_cartesian = any(abs(v) > 0.001 for v in cartesian_translation + cartesian_rotation)
+                    # Update teleop command timestamp
+                    last_teleop_command_time = time.time()
                     # Apply the appropriate jogging mode
                     if any_joint:
                         env.start_multi_joint_jog(joint_velocities)
+                        jogging_active = True
                     elif any_cartesian:
                         # Convert m/s to mm/s for translations
                         translation_mm_s = [v * 1000.0 for v in cartesian_translation]
                             tcp_id='Flange',
                             coord_system_id='world'
                         )
+                        jogging_active = True
                     else:
                         # No active velocity - stop jogging
                         env.stop_jog()
+                        jogging_active = False
                 else:
                     # For locomotion robots: use vx, vy, vyaw
                     env.set_command(vx, vy, vyaw)
         with mujoco_lock:
             if env is not None:
                 obs, info = env.reset(seed=seed)
+        # Track episode start time for duration watchdog
+        with episode_control_lock:
+            episode_control_state["start_time"] = time.time()
+            episode_control_state["terminate"] = False
+            episode_control_state["truncate"] = False
     elif msg_type == 'switch_robot':
         payload = data.get('data', {})

robots/ur5/model/scene_t_push.xml CHANGED Viewed

@@ -113,7 +113,7 @@
     </body>
     <!-- UR5e robot mounted on table edge -->
-    <body name="base" pos="0 0 0.53" quat="0 0 0 -1" childclass="ur5e">
       <inertial mass="4.0" pos="0 0 0" diaginertia="0.00443333156 0.00443333156 0.0072"/>
       <geom mesh="base_0" material="black" class="visual"/>
       <geom mesh="base_1" material="jointgray" class="visual"/>

     </body>
     <!-- UR5e robot mounted on table edge -->
+    <body name="base" pos="0 0 0.54" quat="0 0 0 -1" childclass="ur5e">
       <inertial mass="4.0" pos="0 0 0" diaginertia="0.00443333156 0.00443333156 0.0072"/>
       <geom mesh="base_0" material="black" class="visual"/>
       <geom mesh="base_1" material="jointgray" class="visual"/>

robots/ur5/nova_jogger.py CHANGED Viewed

@@ -427,7 +427,18 @@ class NovaJogger:
                 self._current_command = None
             # Send one final stop command using v2 API
             if self._jogger_ws:
                 # Option 1: Use PauseJoggingRequest (v2 API preferred method)
                 # Option 2: Send zero velocities (backward compatible)
                 # Using zero velocities for better compatibility with existing code
@@ -451,7 +462,16 @@ class NovaJogger:
                         "message_type": "PauseJoggingRequest"
                     }
-                self._jogger_ws.send(json.dumps(command))
             print("[Nova Jogger] Stopped (cleared command)")
             return True
@@ -462,7 +482,11 @@ class NovaJogger:
     def disconnect(self):
         """Disconnect from the jogger."""
-        # Stop the send thread first
         self._stop_send_thread = True
         if self._send_thread is not None:
             self._send_thread.join(timeout=2.0)
@@ -476,14 +500,25 @@ class NovaJogger:
         if self._jogger_ws:
             try:
                 self.stop()
-                self._jogger_ws.close()
             except Exception as e:
-                print(f"[Nova Jogger] Error during disconnect: {e}")
             finally:
                 self._jogger_ws = None
                 self._connected = False
                 self._current_mode = None
     def is_connected(self) -> bool:
         """Check if jogger is connected."""

                 self._current_command = None
             # Send one final stop command using v2 API
+            # Check if WebSocket is open before sending
             if self._jogger_ws:
+                # Check WebSocket state - only send if connection is open
+                try:
+                    ws_state = getattr(self._jogger_ws, 'socket', None)
+                    if ws_state is None or (hasattr(ws_state, 'fileno') and ws_state.fileno() == -1):
+                        # WebSocket is closed, skip sending
+                        print("[Nova Jogger] Stopped (WebSocket already closed)")
+                        return True
+                except:
+                    pass  # If we can't check state, try sending anyway
                 # Option 1: Use PauseJoggingRequest (v2 API preferred method)
                 # Option 2: Send zero velocities (backward compatible)
                 # Using zero velocities for better compatibility with existing code
                         "message_type": "PauseJoggingRequest"
                     }
+                try:
+                    self._jogger_ws.send(json.dumps(command))
+                except Exception as send_error:
+                    # Ignore errors related to closed connections
+                    error_str = str(send_error).lower()
+                    if "close frame" in error_str or "closed" in error_str:
+                        print("[Nova Jogger] Stopped (connection closed during stop)")
+                        return True
+                    # Re-raise other errors
+                    raise
             print("[Nova Jogger] Stopped (cleared command)")
             return True
     def disconnect(self):
         """Disconnect from the jogger."""
+        # Clear current command first to stop the send loop
+        with self._lock:
+            self._current_command = None
+        # Stop the send thread
         self._stop_send_thread = True
         if self._send_thread is not None:
             self._send_thread.join(timeout=2.0)
         if self._jogger_ws:
             try:
+                # Try to send a stop command before closing
                 self.stop()
             except Exception as e:
+                # Ignore errors during stop - we're disconnecting anyway
+                error_str = str(e).lower()
+                if "close frame" not in error_str and "closed" not in error_str:
+                    print(f"[Nova Jogger] Error during stop before disconnect: {e}")
+            try:
+                # Close the WebSocket connection
+                self._jogger_ws.close(timeout=1.0)
+            except Exception as e:
+                # Ignore close errors - connection might already be closed
+                pass
             finally:
                 self._jogger_ws = None
                 self._connected = False
                 self._current_mode = None
+                print("[Nova Jogger] Disconnected")
     def is_connected(self) -> bool:
         """Check if jogger is connected."""