llmphy / tray_sim.py
chrisjcc's picture
Removed Depth and Segmentation Code
d74efa4 verified
import os
import json
import tempfile
import imageio
import numpy as np
import mujoco
# Extract final object rotations in Euler angles for interpretability (optional)
from scipy.spatial.transform import Rotation as R
MODEL_PATH = "assets/tray.xml"
N_OBJECTS = 5 # number of dynamic blocks to randomize
PUSH_START_STEP = 50
SIM_STEPS = 200
IMPACT_STEP = 60 # is a good starting point, just after the pusher activates.
def quat_to_rotmat(q):
"""Convert quaternion to rotation matrix."""
w, x, y, z = q
return np.array([
[1 - 2*y**2 - 2*z**2, 2*x*y - 2*z*w, 2*x*z + 2*y*w],
[2*x*y + 2*z*w, 1 - 2*x**2 - 2*z**2, 2*y*z - 2*x*w],
[2*x*z - 2*y*w, 2*y*z + 2*x*w, 1 - 2*x**2 - 2*y**2]
])
def classify_orientation_stability(data, model, alpha_deg=45):
"""
Check object stability using orientation.
Returns a list of booleans, True if the object is stable.
"""
stable_objects = []
cos_alpha = np.cos(np.deg2rad(alpha_deg))
g = np.array([0, 0, -1])
for i in range(N_OBJECTS):
q = data.qpos[i*7 + 3 : i*7 + 7]
rotmat = quat_to_rotmat(q)
obj_z_world = rotmat[:, 2] # local z-axis in world coords
cos_theta = -np.dot(g, obj_z_world) # because gravity is downward
stable = cos_theta > cos_alpha
stable_objects.append(stable)
return stable_objects
def classify_orientation_stability_verbose(data, model, alpha_deg=45):
results = []
cos_alpha = np.cos(np.deg2rad(alpha_deg))
g = np.array([0, 0, -1])
for i in range(N_OBJECTS):
q = data.qpos[i*7 + 3 : i*7 + 7]
rotmat = quat_to_rotmat(q)
obj_z_world = rotmat[:, 2]
cos_theta = -np.dot(g, obj_z_world)
if cos_theta > cos_alpha:
results.append({"stable": True, "reason": "upright (tilt within limit)"})
else:
angle_deg = np.rad2deg(np.arccos(cos_theta))
results.append({
"stable": False,
"reason": f"tilt angle {angle_deg:.2f}° exceeds {alpha_deg}°"
})
return results
def classify_stability(data, model):
"""
Classify object stability based on position data.
For each object in the scene:
- Check the object's position using data.qpos[z].
- Determine whether the object is upright, within the tray bounds, or has fallen/rolled over.
- Define thresholds for "stable" as:
* z-position > 0.01
* x and y within tray bounds of ±0.3
Args:
data: MuJoCo data object containing simulation state.
model: MuJoCo model (unused in this function but typically passed alongside data).
Returns:
List of booleans indicating whether each object is stable.
"""
stable_objects = []
tray_bounds = 0.3
for i in range(N_OBJECTS):
pos = data.qpos[i*7 : i*7 + 3]
if abs(pos[0]) < tray_bounds and abs(pos[1]) < tray_bounds and pos[2] > 0.01:
stable_objects.append(True)
else:
stable_objects.append(False)
return stable_objects
# Extend the classifier to explain why something is unstable:
def classify_stability_verbose(data, model):
flags = []
tray_bounds = 0.3
for i in range(N_OBJECTS):
pos = data.qpos[i*7 : i*7 + 3]
reasons = []
if pos[2] < 0.01:
reasons.append("z too low (possibly toppled)")
if abs(pos[0]) > tray_bounds or abs(pos[1]) > tray_bounds:
reasons.append("outside tray bounds")
if not reasons:
flags.append({"stable": True, "reason": "object upright and within tray"})
else:
flags.append({"stable": False, "reason": ", ".join(reasons)})
return flags
# Create LLM-Friendly Prompt Template
def format_llm_prompt(physics_state):
"""
Converts a physics state dictionary of a simulated scene into a JSON-serializable list of objects
(human-readable), where each object contains position, orientation, velocity, angular velocity,
and stability information suitable for prompting a large language model (LLM).
Each object is described by its:
- 3D position
- orientation in ZYX Euler angles (in degrees)
- linear velocity
- angular velocity
- stability status (Stable or Unstable)
Parameters:
physics_state (dict): A dictionary containing the physics properties of each object,
with the following keys:
- "positions_xyz": List of shape (N, 3), each entry a position vector [x, y, z]
- "orientations_euler": List of shape (N, 3), each entry an orientation vector [z, y, x] in degrees
- "velocities_linear": List of shape (N, 3), linear velocity vectors
- "velocities_angular": List of shape (N, 3), angular velocity vectors
- "stable_flags": List of bools indicating whether each object is stable
Returns:
List[dict]: Each dictionary represents an object with formatted fields
suitable for JSON serialization.
"""
formatted = []
for i in range(len(physics_state["positions_xyz"])):
obj = {
"id": i,
"position": np.round(physics_state["positions_xyz"][i], 3).tolist(),
"orientation_euler_zyx_deg": np.round(physics_state["orientations_euler"][i], 1).tolist(),
"linear_velocity": np.round(physics_state["velocities_linear"][i], 3).tolist(),
"angular_velocity": np.round(physics_state["velocities_angular"][i], 3).tolist(),
"status": "Stable" if physics_state["stable_flags"][i] else "Unstable"
#"status": "Stable" if verbose["stable"] else f"Unstable ({verbose['reason']})"
}
formatted.append(obj)
return formatted
def format_as_natural_language_prompt(scene_description, task_description=None):
"""
Convert structured physics state into a natural language prompt for LLMs.
Args:
scene_description (List[dict]): Output of format_llm_prompt().
task_description (str, optional): Additional instruction to prepend, such as
"Describe which blocks are unstable and why" or
"Predict which objects might move further if the tray tilts".
Returns:
str: Natural language prompt for an LLM.
"""
lines = []
if task_description:
lines.append(task_description.strip())
lines.append("") # blank line for separation
lines.append("Here is the scene summary:")
for obj in scene_description:
line = (
f"Object {obj['id']} is at position {obj['position']} with orientation {obj['orientation_euler_zyx_deg']} degrees. "
f"It has linear velocity {obj['linear_velocity']} and angular velocity {obj['angular_velocity']}. "
f"Status: {obj['status']}."
)
lines.append(line)
return "\n".join(lines)
def run_tray_simulation(
seed=0,
num_objects=N_OBJECTS,
azimuth=45,
elevation=-25,
distance=0.6,
unique_id=None # Unique file naming
):
np.random.seed(seed)
model = mujoco.MjModel.from_xml_path(MODEL_PATH)
data = mujoco.MjData(model)
# Explicitly reset the simulation state
mujoco.mj_resetData(model, data)
tray_id = mujoco.mj_name2id(
model,
mujoco.mjtObj.mjOBJ_BODY,
"tray"
)
# Apply a transient force to the tray
tray_force_applied = True # False # One-time trigger
# Locate pusher DOF index (last one in qpos if you have 10 objects)
# Compute DOF offsets
pusher_idx = num_objects * 7
pusher_vel_idx = num_objects * 6
# Pusher setup
pusher_pos = np.array([0.35, -0.35, 0.08]) # Slightly outside tray corner
pusher_quat = [1, 0, 0, 0]
data.qpos[pusher_idx:pusher_idx+3] = pusher_pos
data.qpos[pusher_idx+3:pusher_idx+7] = pusher_quat
# Compute velocity towards center
target = np.array([0.0, 0.0, 0.05])
direction = target - pusher_pos
direction[2] = 0.0
direction /= np.linalg.norm(direction)
push_velocity = 0.15 * direction # Adjust magnitude
# Object initialization
for i in range(num_objects):
obj_start = i * 7 # 7 DOF per free joint body
pos = np.random.uniform(low=[-0.1, -0.1, 0.05], high=[0.1, 0.1, 0.15])
quat = [1, 0, 0, 0] # no rotation
data.qpos[obj_start:obj_start+3] = pos
data.qpos[obj_start+3:obj_start+7] = quat
data.qvel[i*6:i*6+6] = 0
print(f"Object {i} initial position: {pos}") # Debug print
# Renderer setup
renderer = mujoco.Renderer(model, width=480, height=480) # width=640, height=640
# Custom camera parameters
scene_option = mujoco.MjvOption()
# Enable contact visualization in the renderer to debug collisions
# Check if contact points appear when the puck hits the tray. If not,
# collisions are not being detected.
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = True
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True # if you want to see joint frames or anchors to understand the articulated structure.
#scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True # if the model uses transparent geoms and you want them shown correctly.
cam = mujoco.MjvCamera()
cam.type = mujoco.mjtCamera.mjCAMERA_FREE
cam.lookat[:] = np.array([0.0, 0.0, 0.02]) # Center of the tray
cam.distance = distance # Zoom
cam.azimuth = azimuth # Yaw angle
cam.elevation = elevation # Pitch angle
frames = []
keyframes = {0: None, 25: None, 50: None, 200: None}
for t in range(SIM_STEPS):
if t == PUSH_START_STEP:
# Activate pusher
#data.qvel[pusher_vel_idx:pusher_vel_idx+3] = [-0.05, 0.05, 0.0] # toward center
data.qvel[pusher_vel_idx:pusher_vel_idx+3] = push_velocity # toward center
#if t == IMPACT_STEP and not tray_force_applied:
# mujoco.mj_applyFT(
# model,
# data,
# np.array([5.0, 0.0, 0.0]), # force (e.g. impulse to the right; adjust magnitude)
# np.array([0.0, 0.0, 0.0]), # torque
# np.array([0.0, 0.0, 0.0]), # point in global frame (apply at body origin)
# tray_id, # body ID
# data.qfrc_applied # force accumulator to apply into
# )
# tray_force_applied = True
mujoco.mj_step(model, data)
renderer.update_scene(
data,
camera=cam,
scene_option=scene_option
)
img = renderer.render()
if t in keyframes:
#imageio.imwrite(f"/tmp/frame_{t}.png", img)
imageio.imwrite(f"/tmp/frame_{t}_{unique_id}.png", img)
frames.append(img)
renderer.close()
stability_flags = classify_stability(data, model)
# Optional: print or return
print("Stability:", stability_flags)
euler_angles = []
for i in range(num_objects):
quat = data.qpos[i*7+3 : i*7+7] # qw, qx, qy, qz
# Convert to Euler (yaw-pitch-roll)
rot = R.from_quat([quat[1], quat[2], quat[3], quat[0]]) # MuJoCo uses [qw, qx, qy, qz]
euler = rot.as_euler('zyx', degrees=True)
euler_angles.append(euler.tolist())
# Fully structured snapshot of: Positions,
# Orientations (in both quaternion and Euler angle forms),
# Linear and angular velocities, and Stability flags
physics_state = {
"positions_xyz": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, :3].tolist(), # [x,y,z,qw,qx,qy,qz]
"orientations_quat": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, 3:].tolist(),
"orientations_euler": euler_angles,
"velocities_linear": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, :3].tolist(), # [vx,vy,vz,wx,wy,wz]
"velocities_angular": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, 3:].tolist(),
"stable_flags": stability_flags,
#"stable_verbose": classify_stability_verbose(data, model), more interpretable to LLMs
}
# Save JSON Snapshot (optional, for LLM input/debugging)
#json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}_state.json")
# Save JSON with unique ID
json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}_state.json")
with open(json_path, "w") as f:
json.dump(physics_state, f, indent=2)
# LLM-friendly prompt output
formatted = format_llm_prompt(physics_state)
prompt = format_as_natural_language_prompt(
formatted,
task_description="Explain which objects are likely to fall if the tray is tilted slightly to the right."
)
# Save to GIF
#gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}.gif")
# Save GIF with unique ID
gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}.gif")
imageio.mimsave(gif_path, frames, fps=20)
return gif_path, stability_flags, physics_state, prompt, json_path # optionally also return json_path