Georg commited on
Commit
25f4c68
·
1 Parent(s): 8772f94
Files changed (2) hide show
  1. frontend/index.html +33 -1
  2. mujoco_server.py +187 -7
frontend/index.html CHANGED
@@ -1065,7 +1065,13 @@
1065
  <div class="control-group">
1066
  <label style="display: flex; align-items: center; gap: 8px; cursor: pointer;">
1067
  <input type="checkbox" id="cam_follow" checked onchange="setCameraFollow()">
1068
- Camera Follow Robot
 
 
 
 
 
 
1069
  </label>
1070
  </div>
1071
  </div>
@@ -1991,6 +1997,27 @@
1991
  if (robotSelect.value !== expectedValue) {
1992
  robotSelect.value = expectedValue;
1993
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1994
  // Toggle controls based on robot type
1995
  if (robot === 'ur5' || robot === 'ur5_t_push') {
1996
  locomotionControls.classList.add('hidden');
@@ -2120,6 +2147,11 @@
2120
  send('camera_follow', { follow });
2121
  }
2122
 
 
 
 
 
 
2123
  // UR5 controls
2124
  let currentControlMode = 'ik';
2125
 
 
1065
  <div class="control-group">
1066
  <label style="display: flex; align-items: center; gap: 8px; cursor: pointer;">
1067
  <input type="checkbox" id="cam_follow" checked onchange="setCameraFollow()">
1068
+ <span id="cam_follow_label">Camera Follow Robot</span>
1069
+ </label>
1070
+ </div>
1071
+ <div class="control-group" id="target_visibility_control" style="display: none;">
1072
+ <label style="display: flex; align-items: center; gap: 8px; cursor: pointer;">
1073
+ <input type="checkbox" id="target_visible" checked onchange="setTargetVisibility()">
1074
+ Show Target T
1075
  </label>
1076
  </div>
1077
  </div>
 
1997
  if (robotSelect.value !== expectedValue) {
1998
  robotSelect.value = expectedValue;
1999
  }
2000
+
2001
+ // Update camera follow label based on robot type
2002
+ const camFollowLabel = document.getElementById('cam_follow_label');
2003
+ if (camFollowLabel) {
2004
+ if (robot === 'ur5' || robot === 'ur5_t_push') {
2005
+ camFollowLabel.innerText = 'Camera Follow Object';
2006
+ } else {
2007
+ camFollowLabel.innerText = 'Camera Follow Robot';
2008
+ }
2009
+ }
2010
+
2011
+ // Show/hide target visibility control (only for push-T scene)
2012
+ const targetVisibilityControl = document.getElementById('target_visibility_control');
2013
+ if (targetVisibilityControl) {
2014
+ if (robot === 'ur5_t_push') {
2015
+ targetVisibilityControl.style.display = 'block';
2016
+ } else {
2017
+ targetVisibilityControl.style.display = 'none';
2018
+ }
2019
+ }
2020
+
2021
  // Toggle controls based on robot type
2022
  if (robot === 'ur5' || robot === 'ur5_t_push') {
2023
  locomotionControls.classList.add('hidden');
 
2147
  send('camera_follow', { follow });
2148
  }
2149
 
2150
+ function setTargetVisibility() {
2151
+ const visible = document.getElementById('target_visible').checked;
2152
+ send('toggle_target_visibility', { visible });
2153
+ }
2154
+
2155
  // UR5 controls
2156
  let currentControlMode = 'ik';
2157
 
mujoco_server.py CHANGED
@@ -98,6 +98,13 @@ episode_control_state = {
98
  "terminate": False,
99
  "truncate": False,
100
  }
 
 
 
 
 
 
 
101
  episode_control_lock = threading.Lock()
102
 
103
  # Latest teleoperation action (for trainer state)
@@ -154,6 +161,9 @@ cam.lookat = np.array([0, 0, 0.8])
154
  # Camera follow mode
155
  camera_follow = True
156
 
 
 
 
157
  def _reset_camera_for_current_robot() -> None:
158
  """Reset the orbit camera to the defaults for the active robot."""
159
  cam.azimuth = 135
@@ -223,6 +233,7 @@ AVAILABLE_ACTIONS = [
223
  "switch_robot",
224
  "camera",
225
  "camera_follow",
 
226
  "teleop_action",
227
  "home",
228
  "stop_home",
@@ -568,7 +579,8 @@ def _get_scene_objects():
568
  t_object_id = getattr(env, 't_object_body_id', -1)
569
  if t_object_id >= 0:
570
  pos = env.data.xpos[t_object_id]
571
- quat = env._get_body_quat(t_object_id) if hasattr(env, '_get_body_quat') else np.array([1.0, 0.0, 0.0, 0.0])
 
572
  scene_objects.append({
573
  'name': 't_object',
574
  'position': {'x': float(pos[0]), 'y': float(pos[1]), 'z': float(pos[2])},
@@ -579,7 +591,7 @@ def _get_scene_objects():
579
  t_target_id = getattr(env, 't_target_body_id', -1)
580
  if t_target_id >= 0:
581
  pos = env.data.xpos[t_target_id]
582
- quat = env._get_body_quat(t_target_id) if hasattr(env, '_get_body_quat') else np.array([1.0, 0.0, 0.0, 0.0])
583
  scene_objects.append({
584
  'name': 't_target',
585
  'position': {'x': float(pos[0]), 'y': float(pos[1]), 'z': float(pos[2])},
@@ -591,7 +603,7 @@ def _get_scene_objects():
591
  box_id = mujoco.mj_name2id(env.model, mujoco.mjtObj.mjOBJ_BODY, "box")
592
  if box_id >= 0:
593
  pos = env.data.xpos[box_id]
594
- quat = env._get_body_quat(box_id) if hasattr(env, '_get_body_quat') else np.array([1.0, 0.0, 0.0, 0.0])
595
  scene_objects.append({
596
  'name': 'box',
597
  'position': {'x': float(pos[0]), 'y': float(pos[1]), 'z': float(pos[2])},
@@ -887,8 +899,112 @@ def _consume_episode_control_flags():
887
  return terminate, truncate
888
 
889
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
890
  def simulation_loop():
891
- global running, latest_frame, camera_follow, renderer, needs_robot_switch, env
892
 
893
  print("Starting simulation loop...")
894
 
@@ -933,8 +1049,24 @@ def simulation_loop():
933
  for _ in range(physics_steps_per_frame):
934
  env.step_with_controller(dt=sim_dt)
935
 
936
- # Update camera to follow robot (not for UR5 which is stationary)
937
- if camera_follow and current_robot not in ("ur5", "ur5_t_push"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
938
  robot_pos = env.data.qpos[:3]
939
  cam.lookat[0] = robot_pos[0]
940
  cam.lookat[1] = robot_pos[1]
@@ -943,6 +1075,24 @@ def simulation_loop():
943
  # Render
944
  if renderer is not None:
945
  renderer.update_scene(env.data, camera=cam)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
946
  frame = renderer.render()
947
  else:
948
  return
@@ -988,6 +1138,22 @@ def simulation_loop():
988
  cam_obj.azimuth = -yaw + 90.0
989
  cam_obj.elevation = -pitch
990
  renderer_obj.update_scene(env.data, camera=cam_obj)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
991
  overlay_frame = renderer_obj.render()
992
  if overlay_frame is None:
993
  continue
@@ -1048,7 +1214,7 @@ def generate_overlay_frames(name: str):
1048
 
1049
  def handle_ws_message(ws, data):
1050
  """Handle incoming WebSocket message."""
1051
- global needs_robot_switch, camera_follow, last_teleop_action
1052
 
1053
  msg_type = data.get('type')
1054
 
@@ -1342,6 +1508,10 @@ def handle_ws_message(ws, data):
1342
  payload = data.get('data', {})
1343
  camera_follow = payload.get('follow', True)
1344
 
 
 
 
 
1345
  elif msg_type == 'teleop_action':
1346
  payload = data.get('data', {})
1347
  # Accept both old format (dx/dy/dz) and new format (vx/vy/vz)
@@ -1457,6 +1627,16 @@ def handle_ws_message(ws, data):
1457
  if env is not None and current_robot in ("ur5", "ur5_t_push"):
1458
  env.set_use_orientation(use)
1459
 
 
 
 
 
 
 
 
 
 
 
1460
  elif msg_type == 'set_nova_mode':
1461
  payload = data.get('data', {})
1462
  enabled = bool(payload.get('enabled', False))
 
98
  "terminate": False,
99
  "truncate": False,
100
  }
101
+
102
+ # Perception state - stores latest detected object poses
103
+ perception_state = {
104
+ "poses": [], # List of {object_id, position, orientation, confidence}
105
+ "last_update": 0, # Timestamp of last update
106
+ }
107
+ perception_lock = threading.Lock()
108
  episode_control_lock = threading.Lock()
109
 
110
  # Latest teleoperation action (for trainer state)
 
161
  # Camera follow mode
162
  camera_follow = True
163
 
164
+ # Target visibility for push-T scene
165
+ t_target_visible = True
166
+
167
  def _reset_camera_for_current_robot() -> None:
168
  """Reset the orbit camera to the defaults for the active robot."""
169
  cam.azimuth = 135
 
233
  "switch_robot",
234
  "camera",
235
  "camera_follow",
236
+ "toggle_target_visibility",
237
  "teleop_action",
238
  "home",
239
  "stop_home",
 
579
  t_object_id = getattr(env, 't_object_body_id', -1)
580
  if t_object_id >= 0:
581
  pos = env.data.xpos[t_object_id]
582
+ # MuJoCo stores quaternions in data.xquat as [w, x, y, z]
583
+ quat = env.data.xquat[t_object_id]
584
  scene_objects.append({
585
  'name': 't_object',
586
  'position': {'x': float(pos[0]), 'y': float(pos[1]), 'z': float(pos[2])},
 
591
  t_target_id = getattr(env, 't_target_body_id', -1)
592
  if t_target_id >= 0:
593
  pos = env.data.xpos[t_target_id]
594
+ quat = env.data.xquat[t_target_id]
595
  scene_objects.append({
596
  'name': 't_target',
597
  'position': {'x': float(pos[0]), 'y': float(pos[1]), 'z': float(pos[2])},
 
603
  box_id = mujoco.mj_name2id(env.model, mujoco.mjtObj.mjOBJ_BODY, "box")
604
  if box_id >= 0:
605
  pos = env.data.xpos[box_id]
606
+ quat = env.data.xquat[box_id]
607
  scene_objects.append({
608
  'name': 'box',
609
  'position': {'x': float(pos[0]), 'y': float(pos[1]), 'z': float(pos[2])},
 
899
  return terminate, truncate
900
 
901
 
902
+ def _quat_to_mat(quat):
903
+ """Convert quaternion (w, x, y, z) to 3x3 rotation matrix.
904
+
905
+ Args:
906
+ quat: dict with keys 'w', 'x', 'y', 'z' or tuple/list [w, x, y, z]
907
+
908
+ Returns:
909
+ 3x3 rotation matrix as numpy array
910
+ """
911
+ if isinstance(quat, dict):
912
+ w, x, y, z = quat['w'], quat['x'], quat['y'], quat['z']
913
+ else:
914
+ w, x, y, z = quat
915
+
916
+ # Normalize quaternion
917
+ norm = math.sqrt(w*w + x*x + y*y + z*z)
918
+ if norm < 1e-10:
919
+ return np.eye(3)
920
+ w, x, y, z = w/norm, x/norm, y/norm, z/norm
921
+
922
+ # Convert to rotation matrix
923
+ mat = np.array([
924
+ [1 - 2*(y*y + z*z), 2*(x*y - w*z), 2*(x*z + w*y)],
925
+ [ 2*(x*y + w*z), 1 - 2*(x*x + z*z), 2*(y*z - w*x)],
926
+ [ 2*(x*z - w*y), 2*(y*z + w*x), 1 - 2*(x*x + y*y)]
927
+ ])
928
+ return mat
929
+
930
+
931
+ def _render_perception_frames(renderer_obj, model, data):
932
+ """Render bounding boxes for detected object poses.
933
+
934
+ Draws wireframe boxes at detected poses, color-coded by confidence.
935
+ Green (high confidence >0.8), Yellow (medium 0.5-0.8), Red (low <0.5).
936
+ """
937
+ with perception_lock:
938
+ poses = perception_state.get("poses", [])
939
+ last_update = perception_state.get("last_update", 0)
940
+
941
+ # Don't render if data is too old (> 1 second)
942
+ if time.time() - last_update > 1.0:
943
+ return
944
+
945
+ # Render bounding box for each detected pose
946
+ for pose in poses:
947
+ try:
948
+ pos = pose.get("position", {})
949
+ orientation = pose.get("orientation", {})
950
+ confidence = pose.get("confidence", 0.0)
951
+ dimensions = pose.get("dimensions")
952
+
953
+ # Skip if no dimensions provided
954
+ if not dimensions:
955
+ continue
956
+
957
+ # Extract position
958
+ px = float(pos.get("x", 0.0))
959
+ py = float(pos.get("y", 0.0))
960
+ pz = float(pos.get("z", 0.0))
961
+
962
+ # Extract dimensions
963
+ if isinstance(dimensions, dict):
964
+ width = float(dimensions.get("width", 0.05))
965
+ height = float(dimensions.get("height", 0.05))
966
+ depth = float(dimensions.get("depth", 0.05))
967
+ elif isinstance(dimensions, (list, tuple)):
968
+ width, height, depth = float(dimensions[0]), float(dimensions[1]), float(dimensions[2])
969
+ else:
970
+ continue
971
+
972
+ # Color based on confidence: high=green, medium=yellow, low=red
973
+ if confidence > 0.8:
974
+ rgba = [0.0, 1.0, 0.0, 0.5] # Green
975
+ elif confidence > 0.5:
976
+ rgba = [1.0, 1.0, 0.0, 0.5] # Yellow
977
+ else:
978
+ rgba = [1.0, 0.0, 0.0, 0.5] # Red
979
+
980
+ # Convert quaternion to rotation matrix
981
+ rot_mat = _quat_to_mat(orientation)
982
+
983
+ # Render as box geometry
984
+ # MuJoCo box size is half-extents, so divide by 2
985
+ mujoco.mjv_initGeom(
986
+ renderer_obj.scene.geoms[renderer_obj.scene.ngeom],
987
+ type=mujoco.mjtGeom.mjGEOM_BOX,
988
+ size=[width/2, height/2, depth/2],
989
+ pos=[px, py, pz],
990
+ mat=rot_mat.flatten(),
991
+ rgba=rgba
992
+ )
993
+ renderer_obj.scene.ngeom += 1
994
+
995
+ # Limit to max geoms to avoid overflow
996
+ if renderer_obj.scene.ngeom >= renderer_obj.scene.maxgeom - 10:
997
+ break
998
+
999
+ except Exception as e:
1000
+ # Silently skip invalid poses
1001
+ pass
1002
+
1003
+
1004
+
1005
+
1006
  def simulation_loop():
1007
+ global running, latest_frame, camera_follow, t_target_visible, renderer, needs_robot_switch, env
1008
 
1009
  print("Starting simulation loop...")
1010
 
 
1049
  for _ in range(physics_steps_per_frame):
1050
  env.step_with_controller(dt=sim_dt)
1051
 
1052
+ # Update camera to follow robot or object
1053
+ if current_robot in ("ur5", "ur5_t_push"):
1054
+ # For UR5: follow t_object when enabled, table center when disabled
1055
+ if camera_follow:
1056
+ # Follow t_object if it exists
1057
+ t_object_id = getattr(env, 't_object_body_id', -1)
1058
+ if t_object_id >= 0:
1059
+ object_pos = env.data.xpos[t_object_id]
1060
+ cam.lookat[0] = object_pos[0]
1061
+ cam.lookat[1] = object_pos[1]
1062
+ cam.lookat[2] = object_pos[2]
1063
+ else:
1064
+ # Center on table
1065
+ cam.lookat[0] = 0.5
1066
+ cam.lookat[1] = 0.0
1067
+ cam.lookat[2] = 0.42
1068
+ elif camera_follow:
1069
+ # For locomotion robots: follow robot base
1070
  robot_pos = env.data.qpos[:3]
1071
  cam.lookat[0] = robot_pos[0]
1072
  cam.lookat[1] = robot_pos[1]
 
1075
  # Render
1076
  if renderer is not None:
1077
  renderer.update_scene(env.data, camera=cam)
1078
+
1079
+ # Hide t_target if visibility is disabled (push-T scene only)
1080
+ if current_robot == "ur5_t_push" and not t_target_visible:
1081
+ t_target_id = getattr(env, 't_target_body_id', -1)
1082
+ if t_target_id >= 0:
1083
+ # Set alpha to 0 for all geoms belonging to t_target body
1084
+ for i in range(renderer.scene.ngeom):
1085
+ geom = renderer.scene.geoms[i]
1086
+ # Check if this geom belongs to t_target body by comparing position
1087
+ target_pos = env.data.xpos[t_target_id]
1088
+ geom_pos = geom.pos
1089
+ # If geom is very close to target body, it likely belongs to it
1090
+ dist = np.linalg.norm(np.array(geom_pos) - target_pos)
1091
+ if dist < 0.15: # Within 15cm - likely part of the T-shape
1092
+ geom.rgba[3] = 0.0 # Set alpha to 0 (invisible)
1093
+
1094
+ # Add perception visualization markers
1095
+ _render_perception_frames(renderer, env.model, env.data)
1096
  frame = renderer.render()
1097
  else:
1098
  return
 
1138
  cam_obj.azimuth = -yaw + 90.0
1139
  cam_obj.elevation = -pitch
1140
  renderer_obj.update_scene(env.data, camera=cam_obj)
1141
+
1142
+ # Hide t_target if visibility is disabled (push-T scene only)
1143
+ if current_robot == "ur5_t_push" and not t_target_visible:
1144
+ t_target_id = getattr(env, 't_target_body_id', -1)
1145
+ if t_target_id >= 0:
1146
+ # Set alpha to 0 for all geoms belonging to t_target body
1147
+ for i in range(renderer_obj.scene.ngeom):
1148
+ geom = renderer_obj.scene.geoms[i]
1149
+ # Check if this geom belongs to t_target body by comparing position
1150
+ target_pos = env.data.xpos[t_target_id]
1151
+ geom_pos = geom.pos
1152
+ # If geom is very close to target body, it likely belongs to it
1153
+ dist = np.linalg.norm(np.array(geom_pos) - target_pos)
1154
+ if dist < 0.15: # Within 15cm - likely part of the T-shape
1155
+ geom.rgba[3] = 0.0 # Set alpha to 0 (invisible)
1156
+
1157
  overlay_frame = renderer_obj.render()
1158
  if overlay_frame is None:
1159
  continue
 
1214
 
1215
  def handle_ws_message(ws, data):
1216
  """Handle incoming WebSocket message."""
1217
+ global needs_robot_switch, camera_follow, t_target_visible, last_teleop_action
1218
 
1219
  msg_type = data.get('type')
1220
 
 
1508
  payload = data.get('data', {})
1509
  camera_follow = payload.get('follow', True)
1510
 
1511
+ elif msg_type == 'toggle_target_visibility':
1512
+ payload = data.get('data', {})
1513
+ t_target_visible = payload.get('visible', True)
1514
+
1515
  elif msg_type == 'teleop_action':
1516
  payload = data.get('data', {})
1517
  # Accept both old format (dx/dy/dz) and new format (vx/vy/vz)
 
1627
  if env is not None and current_robot in ("ur5", "ur5_t_push"):
1628
  env.set_use_orientation(use)
1629
 
1630
+ elif msg_type == 'perception_update':
1631
+ # Handle perception updates from training system
1632
+ payload = data.get('data', {})
1633
+ poses = payload.get('poses', [])
1634
+ timestamp = payload.get('timestamp', time.time())
1635
+ with perception_lock:
1636
+ perception_state["poses"] = poses
1637
+ perception_state["last_update"] = timestamp
1638
+ # print(f"[Perception] Received {len(poses)} pose(s) at {timestamp:.2f}")
1639
+
1640
  elif msg_type == 'set_nova_mode':
1641
  payload = data.get('data', {})
1642
  enabled = bool(payload.get('enabled', False))