Spaces:

chrisjcc
/

llmphy

Sleeping

App Files Files Community

llmphy / tray_sim.py

chrisjcc

Removed Depth and Segmentation Code

d74efa4 verified 7 months ago

raw

history blame contribute delete

13.4 kB

	import os
	import json
	import tempfile
	import imageio

	import numpy as np
	import mujoco

	# Extract final object rotations in Euler angles for interpretability (optional)
	from scipy.spatial.transform import Rotation as R

	MODEL_PATH = "assets/tray.xml"
	N_OBJECTS = 5 # number of dynamic blocks to randomize
	PUSH_START_STEP = 50
	SIM_STEPS = 200
	IMPACT_STEP = 60 # is a good starting point, just after the pusher activates.


	def quat_to_rotmat(q):
	"""Convert quaternion to rotation matrix."""
	w, x, y, z = q
	return np.array([
	[1 - 2y2 - 2z*2, 2xy - 2zw, 2xz + 2y*w],
	[2xy + 2zw, 1 - 2x2 - 2z*2, 2yz - 2x*w],
	[2xz - 2yw, 2yz + 2xw, 1 - 2x2 - 2y**2]
	])

	def classify_orientation_stability(data, model, alpha_deg=45):
	"""
	Check object stability using orientation.
	Returns a list of booleans, True if the object is stable.
	"""
	stable_objects = []
	cos_alpha = np.cos(np.deg2rad(alpha_deg))
	g = np.array([0, 0, -1])

	for i in range(N_OBJECTS):
	q = data.qpos[i7 + 3 : i7 + 7]
	rotmat = quat_to_rotmat(q)
	obj_z_world = rotmat[:, 2] # local z-axis in world coords
	cos_theta = -np.dot(g, obj_z_world) # because gravity is downward
	stable = cos_theta > cos_alpha
	stable_objects.append(stable)

	return stable_objects

	def classify_orientation_stability_verbose(data, model, alpha_deg=45):
	results = []
	cos_alpha = np.cos(np.deg2rad(alpha_deg))
	g = np.array([0, 0, -1])

	for i in range(N_OBJECTS):
	q = data.qpos[i7 + 3 : i7 + 7]
	rotmat = quat_to_rotmat(q)
	obj_z_world = rotmat[:, 2]
	cos_theta = -np.dot(g, obj_z_world)

	if cos_theta > cos_alpha:
	results.append({"stable": True, "reason": "upright (tilt within limit)"})
	else:
	angle_deg = np.rad2deg(np.arccos(cos_theta))
	results.append({
	"stable": False,
	"reason": f"tilt angle {angle_deg:.2f}° exceeds {alpha_deg}°"
	})

	return results


	def classify_stability(data, model):
	"""
	Classify object stability based on position data.

	For each object in the scene:
	- Check the object's position using data.qpos[z].
	- Determine whether the object is upright, within the tray bounds, or has fallen/rolled over.
	- Define thresholds for "stable" as:
	* z-position > 0.01
	* x and y within tray bounds of ±0.3

	Args:
	data: MuJoCo data object containing simulation state.
	model: MuJoCo model (unused in this function but typically passed alongside data).

	Returns:
	List of booleans indicating whether each object is stable.
	"""

	stable_objects = []
	tray_bounds = 0.3
	for i in range(N_OBJECTS):
	pos = data.qpos[i7 : i7 + 3]
	if abs(pos[0]) < tray_bounds and abs(pos[1]) < tray_bounds and pos[2] > 0.01:
	stable_objects.append(True)
	else:
	stable_objects.append(False)
	return stable_objects

	# Extend the classifier to explain why something is unstable:
	def classify_stability_verbose(data, model):
	flags = []
	tray_bounds = 0.3
	for i in range(N_OBJECTS):
	pos = data.qpos[i7 : i7 + 3]
	reasons = []
	if pos[2] < 0.01:
	reasons.append("z too low (possibly toppled)")
	if abs(pos[0]) > tray_bounds or abs(pos[1]) > tray_bounds:
	reasons.append("outside tray bounds")
	if not reasons:
	flags.append({"stable": True, "reason": "object upright and within tray"})
	else:
	flags.append({"stable": False, "reason": ", ".join(reasons)})
	return flags

	# Create LLM-Friendly Prompt Template
	def format_llm_prompt(physics_state):
	"""
	Converts a physics state dictionary of a simulated scene into a JSON-serializable list of objects
	(human-readable), where each object contains position, orientation, velocity, angular velocity,
	and stability information suitable for prompting a large language model (LLM).

	Each object is described by its:
	- 3D position
	- orientation in ZYX Euler angles (in degrees)
	- linear velocity
	- angular velocity
	- stability status (Stable or Unstable)

	Parameters:
	physics_state (dict): A dictionary containing the physics properties of each object,
	with the following keys:
	- "positions_xyz": List of shape (N, 3), each entry a position vector [x, y, z]
	- "orientations_euler": List of shape (N, 3), each entry an orientation vector [z, y, x] in degrees
	- "velocities_linear": List of shape (N, 3), linear velocity vectors
	- "velocities_angular": List of shape (N, 3), angular velocity vectors
	- "stable_flags": List of bools indicating whether each object is stable

	Returns:
	List[dict]: Each dictionary represents an object with formatted fields
	suitable for JSON serialization.
	"""
	formatted = []
	for i in range(len(physics_state["positions_xyz"])):
	obj = {
	"id": i,
	"position": np.round(physics_state["positions_xyz"][i], 3).tolist(),
	"orientation_euler_zyx_deg": np.round(physics_state["orientations_euler"][i], 1).tolist(),
	"linear_velocity": np.round(physics_state["velocities_linear"][i], 3).tolist(),
	"angular_velocity": np.round(physics_state["velocities_angular"][i], 3).tolist(),
	"status": "Stable" if physics_state["stable_flags"][i] else "Unstable"
	#"status": "Stable" if verbose["stable"] else f"Unstable ({verbose['reason']})"
	}
	formatted.append(obj)
	return formatted

	def format_as_natural_language_prompt(scene_description, task_description=None):
	"""
	Convert structured physics state into a natural language prompt for LLMs.

	Args:
	scene_description (List[dict]): Output of format_llm_prompt().
	task_description (str, optional): Additional instruction to prepend, such as
	"Describe which blocks are unstable and why" or
	"Predict which objects might move further if the tray tilts".

	Returns:
	str: Natural language prompt for an LLM.
	"""
	lines = []
	if task_description:
	lines.append(task_description.strip())
	lines.append("") # blank line for separation

	lines.append("Here is the scene summary:")

	for obj in scene_description:
	line = (
	f"Object {obj['id']} is at position {obj['position']} with orientation {obj['orientation_euler_zyx_deg']} degrees. "
	f"It has linear velocity {obj['linear_velocity']} and angular velocity {obj['angular_velocity']}. "
	f"Status: {obj['status']}."
	)
	lines.append(line)

	return "\n".join(lines)

	def run_tray_simulation(
	seed=0,
	num_objects=N_OBJECTS,
	azimuth=45,
	elevation=-25,
	distance=0.6,
	unique_id=None # Unique file naming
	):
	np.random.seed(seed)
	model = mujoco.MjModel.from_xml_path(MODEL_PATH)
	data = mujoco.MjData(model)

	# Explicitly reset the simulation state
	mujoco.mj_resetData(model, data)

	tray_id = mujoco.mj_name2id(
	model,
	mujoco.mjtObj.mjOBJ_BODY,
	"tray"
	)

	# Apply a transient force to the tray
	tray_force_applied = True # False # One-time trigger

	# Locate pusher DOF index (last one in qpos if you have 10 objects)
	# Compute DOF offsets
	pusher_idx = num_objects * 7
	pusher_vel_idx = num_objects * 6

	# Pusher setup
	pusher_pos = np.array([0.35, -0.35, 0.08]) # Slightly outside tray corner
	pusher_quat = [1, 0, 0, 0]
	data.qpos[pusher_idx:pusher_idx+3] = pusher_pos
	data.qpos[pusher_idx+3:pusher_idx+7] = pusher_quat

	# Compute velocity towards center
	target = np.array([0.0, 0.0, 0.05])
	direction = target - pusher_pos
	direction[2] = 0.0
	direction /= np.linalg.norm(direction)
	push_velocity = 0.15 * direction # Adjust magnitude

	# Object initialization
	for i in range(num_objects):
	obj_start = i * 7 # 7 DOF per free joint body
	pos = np.random.uniform(low=[-0.1, -0.1, 0.05], high=[0.1, 0.1, 0.15])
	quat = [1, 0, 0, 0] # no rotation
	data.qpos[obj_start:obj_start+3] = pos
	data.qpos[obj_start+3:obj_start+7] = quat
	data.qvel[i6:i6+6] = 0
	print(f"Object {i} initial position: {pos}") # Debug print

	# Renderer setup
	renderer = mujoco.Renderer(model, width=480, height=480) # width=640, height=640

	# Custom camera parameters
	scene_option = mujoco.MjvOption()

	# Enable contact visualization in the renderer to debug collisions
	# Check if contact points appear when the puck hits the tray. If not,
	# collisions are not being detected.
	#scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True
	#scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = True
	#scene_option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True # if you want to see joint frames or anchors to understand the articulated structure.
	#scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True # if the model uses transparent geoms and you want them shown correctly.

	cam = mujoco.MjvCamera()
	cam.type = mujoco.mjtCamera.mjCAMERA_FREE
	cam.lookat[:] = np.array([0.0, 0.0, 0.02]) # Center of the tray
	cam.distance = distance # Zoom
	cam.azimuth = azimuth # Yaw angle
	cam.elevation = elevation # Pitch angle

	frames = []
	keyframes = {0: None, 25: None, 50: None, 200: None}

	for t in range(SIM_STEPS):
	if t == PUSH_START_STEP:
	# Activate pusher
	#data.qvel[pusher_vel_idx:pusher_vel_idx+3] = [-0.05, 0.05, 0.0] # toward center
	data.qvel[pusher_vel_idx:pusher_vel_idx+3] = push_velocity # toward center

	#if t == IMPACT_STEP and not tray_force_applied:
	# mujoco.mj_applyFT(
	# model,
	# data,
	# np.array([5.0, 0.0, 0.0]), # force (e.g. impulse to the right; adjust magnitude)
	# np.array([0.0, 0.0, 0.0]), # torque
	# np.array([0.0, 0.0, 0.0]), # point in global frame (apply at body origin)
	# tray_id, # body ID
	# data.qfrc_applied # force accumulator to apply into
	# )
	# tray_force_applied = True

	mujoco.mj_step(model, data)

	renderer.update_scene(
	data,
	camera=cam,
	scene_option=scene_option
	)
	img = renderer.render()

	if t in keyframes:
	#imageio.imwrite(f"/tmp/frame_{t}.png", img)
	imageio.imwrite(f"/tmp/frame_{t}_{unique_id}.png", img)

	frames.append(img)

	renderer.close()

	stability_flags = classify_stability(data, model)

	# Optional: print or return
	print("Stability:", stability_flags)

	euler_angles = []
	for i in range(num_objects):
	quat = data.qpos[i7+3 : i7+7] # qw, qx, qy, qz
	# Convert to Euler (yaw-pitch-roll)
	rot = R.from_quat([quat[1], quat[2], quat[3], quat[0]]) # MuJoCo uses [qw, qx, qy, qz]
	euler = rot.as_euler('zyx', degrees=True)
	euler_angles.append(euler.tolist())

	# Fully structured snapshot of: Positions,
	# Orientations (in both quaternion and Euler angle forms),
	# Linear and angular velocities, and Stability flags
	physics_state = {
	"positions_xyz": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, :3].tolist(), # [x,y,z,qw,qx,qy,qz]
	"orientations_quat": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, 3:].tolist(),
	"orientations_euler": euler_angles,
	"velocities_linear": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, :3].tolist(), # [vx,vy,vz,wx,wy,wz]
	"velocities_angular": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, 3:].tolist(),
	"stable_flags": stability_flags,
	#"stable_verbose": classify_stability_verbose(data, model), more interpretable to LLMs
	}

	# Save JSON Snapshot (optional, for LLM input/debugging)
	#json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}_state.json")
	# Save JSON with unique ID
	json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}_state.json")
	with open(json_path, "w") as f:
	json.dump(physics_state, f, indent=2)

	# LLM-friendly prompt output
	formatted = format_llm_prompt(physics_state)
	prompt = format_as_natural_language_prompt(
	formatted,
	task_description="Explain which objects are likely to fall if the tray is tilted slightly to the right."
	)

	# Save to GIF
	#gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}.gif")
	# Save GIF with unique ID
	gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}.gif")
	imageio.mimsave(gif_path, frames, fps=20)

	return gif_path, stability_flags, physics_state, prompt, json_path # optionally also return json_path