File size: 13,367 Bytes
1b7961a 5e06c65 1b7961a 5e06c65 1b7961a 3c5f45f 7eaf1e9 0a7688b 5e06c65 f4dbc3b 0a7688b 3c5f45f 5e06c65 ec90f47 9436a55 ec90f47 1b7961a ec90f47 1b7961a 0a7688b 9a518dc 0a7688b 492932f 0e6daaf 3c5f45f ec90f47 1b7961a 3bec3fb 3c5f45f 3bec3fb f2c36a1 e1833c3 de77861 e1833c3 f2c36a1 3bec3fb 0cd2cd7 3c5f45f 795a24d c98a307 795a24d c98a307 795a24d c98a307 795a24d c98a307 795a24d c98a307 795a24d 1b7961a 45b8435 9a518dc 5e06c65 9436a55 5e06c65 d9d9c58 9a518dc 3c5f45f 9436a55 1b7961a 9fe512f 7eaf1e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 |
import os
import json
import tempfile
import imageio
import numpy as np
import mujoco
# Extract final object rotations in Euler angles for interpretability (optional)
from scipy.spatial.transform import Rotation as R
MODEL_PATH = "assets/tray.xml"
N_OBJECTS = 5 # number of dynamic blocks to randomize
PUSH_START_STEP = 50
SIM_STEPS = 200
IMPACT_STEP = 60 # is a good starting point, just after the pusher activates.
def quat_to_rotmat(q):
"""Convert quaternion to rotation matrix."""
w, x, y, z = q
return np.array([
[1 - 2*y**2 - 2*z**2, 2*x*y - 2*z*w, 2*x*z + 2*y*w],
[2*x*y + 2*z*w, 1 - 2*x**2 - 2*z**2, 2*y*z - 2*x*w],
[2*x*z - 2*y*w, 2*y*z + 2*x*w, 1 - 2*x**2 - 2*y**2]
])
def classify_orientation_stability(data, model, alpha_deg=45):
"""
Check object stability using orientation.
Returns a list of booleans, True if the object is stable.
"""
stable_objects = []
cos_alpha = np.cos(np.deg2rad(alpha_deg))
g = np.array([0, 0, -1])
for i in range(N_OBJECTS):
q = data.qpos[i*7 + 3 : i*7 + 7]
rotmat = quat_to_rotmat(q)
obj_z_world = rotmat[:, 2] # local z-axis in world coords
cos_theta = -np.dot(g, obj_z_world) # because gravity is downward
stable = cos_theta > cos_alpha
stable_objects.append(stable)
return stable_objects
def classify_orientation_stability_verbose(data, model, alpha_deg=45):
results = []
cos_alpha = np.cos(np.deg2rad(alpha_deg))
g = np.array([0, 0, -1])
for i in range(N_OBJECTS):
q = data.qpos[i*7 + 3 : i*7 + 7]
rotmat = quat_to_rotmat(q)
obj_z_world = rotmat[:, 2]
cos_theta = -np.dot(g, obj_z_world)
if cos_theta > cos_alpha:
results.append({"stable": True, "reason": "upright (tilt within limit)"})
else:
angle_deg = np.rad2deg(np.arccos(cos_theta))
results.append({
"stable": False,
"reason": f"tilt angle {angle_deg:.2f}° exceeds {alpha_deg}°"
})
return results
def classify_stability(data, model):
"""
Classify object stability based on position data.
For each object in the scene:
- Check the object's position using data.qpos[z].
- Determine whether the object is upright, within the tray bounds, or has fallen/rolled over.
- Define thresholds for "stable" as:
* z-position > 0.01
* x and y within tray bounds of ±0.3
Args:
data: MuJoCo data object containing simulation state.
model: MuJoCo model (unused in this function but typically passed alongside data).
Returns:
List of booleans indicating whether each object is stable.
"""
stable_objects = []
tray_bounds = 0.3
for i in range(N_OBJECTS):
pos = data.qpos[i*7 : i*7 + 3]
if abs(pos[0]) < tray_bounds and abs(pos[1]) < tray_bounds and pos[2] > 0.01:
stable_objects.append(True)
else:
stable_objects.append(False)
return stable_objects
# Extend the classifier to explain why something is unstable:
def classify_stability_verbose(data, model):
flags = []
tray_bounds = 0.3
for i in range(N_OBJECTS):
pos = data.qpos[i*7 : i*7 + 3]
reasons = []
if pos[2] < 0.01:
reasons.append("z too low (possibly toppled)")
if abs(pos[0]) > tray_bounds or abs(pos[1]) > tray_bounds:
reasons.append("outside tray bounds")
if not reasons:
flags.append({"stable": True, "reason": "object upright and within tray"})
else:
flags.append({"stable": False, "reason": ", ".join(reasons)})
return flags
# Create LLM-Friendly Prompt Template
def format_llm_prompt(physics_state):
"""
Converts a physics state dictionary of a simulated scene into a JSON-serializable list of objects
(human-readable), where each object contains position, orientation, velocity, angular velocity,
and stability information suitable for prompting a large language model (LLM).
Each object is described by its:
- 3D position
- orientation in ZYX Euler angles (in degrees)
- linear velocity
- angular velocity
- stability status (Stable or Unstable)
Parameters:
physics_state (dict): A dictionary containing the physics properties of each object,
with the following keys:
- "positions_xyz": List of shape (N, 3), each entry a position vector [x, y, z]
- "orientations_euler": List of shape (N, 3), each entry an orientation vector [z, y, x] in degrees
- "velocities_linear": List of shape (N, 3), linear velocity vectors
- "velocities_angular": List of shape (N, 3), angular velocity vectors
- "stable_flags": List of bools indicating whether each object is stable
Returns:
List[dict]: Each dictionary represents an object with formatted fields
suitable for JSON serialization.
"""
formatted = []
for i in range(len(physics_state["positions_xyz"])):
obj = {
"id": i,
"position": np.round(physics_state["positions_xyz"][i], 3).tolist(),
"orientation_euler_zyx_deg": np.round(physics_state["orientations_euler"][i], 1).tolist(),
"linear_velocity": np.round(physics_state["velocities_linear"][i], 3).tolist(),
"angular_velocity": np.round(physics_state["velocities_angular"][i], 3).tolist(),
"status": "Stable" if physics_state["stable_flags"][i] else "Unstable"
#"status": "Stable" if verbose["stable"] else f"Unstable ({verbose['reason']})"
}
formatted.append(obj)
return formatted
def format_as_natural_language_prompt(scene_description, task_description=None):
"""
Convert structured physics state into a natural language prompt for LLMs.
Args:
scene_description (List[dict]): Output of format_llm_prompt().
task_description (str, optional): Additional instruction to prepend, such as
"Describe which blocks are unstable and why" or
"Predict which objects might move further if the tray tilts".
Returns:
str: Natural language prompt for an LLM.
"""
lines = []
if task_description:
lines.append(task_description.strip())
lines.append("") # blank line for separation
lines.append("Here is the scene summary:")
for obj in scene_description:
line = (
f"Object {obj['id']} is at position {obj['position']} with orientation {obj['orientation_euler_zyx_deg']} degrees. "
f"It has linear velocity {obj['linear_velocity']} and angular velocity {obj['angular_velocity']}. "
f"Status: {obj['status']}."
)
lines.append(line)
return "\n".join(lines)
def run_tray_simulation(
seed=0,
num_objects=N_OBJECTS,
azimuth=45,
elevation=-25,
distance=0.6,
unique_id=None # Unique file naming
):
np.random.seed(seed)
model = mujoco.MjModel.from_xml_path(MODEL_PATH)
data = mujoco.MjData(model)
# Explicitly reset the simulation state
mujoco.mj_resetData(model, data)
tray_id = mujoco.mj_name2id(
model,
mujoco.mjtObj.mjOBJ_BODY,
"tray"
)
# Apply a transient force to the tray
tray_force_applied = True # False # One-time trigger
# Locate pusher DOF index (last one in qpos if you have 10 objects)
# Compute DOF offsets
pusher_idx = num_objects * 7
pusher_vel_idx = num_objects * 6
# Pusher setup
pusher_pos = np.array([0.35, -0.35, 0.08]) # Slightly outside tray corner
pusher_quat = [1, 0, 0, 0]
data.qpos[pusher_idx:pusher_idx+3] = pusher_pos
data.qpos[pusher_idx+3:pusher_idx+7] = pusher_quat
# Compute velocity towards center
target = np.array([0.0, 0.0, 0.05])
direction = target - pusher_pos
direction[2] = 0.0
direction /= np.linalg.norm(direction)
push_velocity = 0.15 * direction # Adjust magnitude
# Object initialization
for i in range(num_objects):
obj_start = i * 7 # 7 DOF per free joint body
pos = np.random.uniform(low=[-0.1, -0.1, 0.05], high=[0.1, 0.1, 0.15])
quat = [1, 0, 0, 0] # no rotation
data.qpos[obj_start:obj_start+3] = pos
data.qpos[obj_start+3:obj_start+7] = quat
data.qvel[i*6:i*6+6] = 0
print(f"Object {i} initial position: {pos}") # Debug print
# Renderer setup
renderer = mujoco.Renderer(model, width=480, height=480) # width=640, height=640
# Custom camera parameters
scene_option = mujoco.MjvOption()
# Enable contact visualization in the renderer to debug collisions
# Check if contact points appear when the puck hits the tray. If not,
# collisions are not being detected.
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = True
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True # if you want to see joint frames or anchors to understand the articulated structure.
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True # if the model uses transparent geoms and you want them shown correctly.
cam = mujoco.MjvCamera()
cam.type = mujoco.mjtCamera.mjCAMERA_FREE
cam.lookat[:] = np.array([0.0, 0.0, 0.02]) # Center of the tray
cam.distance = distance # Zoom
cam.azimuth = azimuth # Yaw angle
cam.elevation = elevation # Pitch angle
frames = []
keyframes = {0: None, 25: None, 50: None, 200: None}
for t in range(SIM_STEPS):
if t == PUSH_START_STEP:
# Activate pusher
#data.qvel[pusher_vel_idx:pusher_vel_idx+3] = [-0.05, 0.05, 0.0] # toward center
data.qvel[pusher_vel_idx:pusher_vel_idx+3] = push_velocity # toward center
#if t == IMPACT_STEP and not tray_force_applied:
# mujoco.mj_applyFT(
# model,
# data,
# np.array([5.0, 0.0, 0.0]), # force (e.g. impulse to the right; adjust magnitude)
# np.array([0.0, 0.0, 0.0]), # torque
# np.array([0.0, 0.0, 0.0]), # point in global frame (apply at body origin)
# tray_id, # body ID
# data.qfrc_applied # force accumulator to apply into
# )
# tray_force_applied = True
mujoco.mj_step(model, data)
renderer.update_scene(
data,
camera=cam,
scene_option=scene_option
)
img = renderer.render()
if t in keyframes:
#imageio.imwrite(f"/tmp/frame_{t}.png", img)
imageio.imwrite(f"/tmp/frame_{t}_{unique_id}.png", img)
frames.append(img)
renderer.close()
stability_flags = classify_stability(data, model)
# Optional: print or return
print("Stability:", stability_flags)
euler_angles = []
for i in range(num_objects):
quat = data.qpos[i*7+3 : i*7+7] # qw, qx, qy, qz
# Convert to Euler (yaw-pitch-roll)
rot = R.from_quat([quat[1], quat[2], quat[3], quat[0]]) # MuJoCo uses [qw, qx, qy, qz]
euler = rot.as_euler('zyx', degrees=True)
euler_angles.append(euler.tolist())
# Fully structured snapshot of: Positions,
# Orientations (in both quaternion and Euler angle forms),
# Linear and angular velocities, and Stability flags
physics_state = {
"positions_xyz": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, :3].tolist(), # [x,y,z,qw,qx,qy,qz]
"orientations_quat": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, 3:].tolist(),
"orientations_euler": euler_angles,
"velocities_linear": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, :3].tolist(), # [vx,vy,vz,wx,wy,wz]
"velocities_angular": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, 3:].tolist(),
"stable_flags": stability_flags,
#"stable_verbose": classify_stability_verbose(data, model), more interpretable to LLMs
}
# Save JSON Snapshot (optional, for LLM input/debugging)
#json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}_state.json")
# Save JSON with unique ID
json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}_state.json")
with open(json_path, "w") as f:
json.dump(physics_state, f, indent=2)
# LLM-friendly prompt output
formatted = format_llm_prompt(physics_state)
prompt = format_as_natural_language_prompt(
formatted,
task_description="Explain which objects are likely to fall if the tray is tilted slightly to the right."
)
# Save to GIF
#gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}.gif")
# Save GIF with unique ID
gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}.gif")
imageio.mimsave(gif_path, frames, fps=20)
return gif_path, stability_flags, physics_state, prompt, json_path # optionally also return json_path |