Spaces:

chrisjcc
/

llmphy

Sleeping

File size: 13,367 Bytes

import os
import json
import tempfile
import imageio

import numpy as np
import mujoco

# Extract final object rotations in Euler angles for interpretability (optional)
from scipy.spatial.transform import Rotation as R

MODEL_PATH = "assets/tray.xml"
N_OBJECTS = 5  # number of dynamic blocks to randomize
PUSH_START_STEP = 50
SIM_STEPS = 200
IMPACT_STEP = 60  # is a good starting point, just after the pusher activates.


def quat_to_rotmat(q):
    """Convert quaternion to rotation matrix."""
    w, x, y, z = q
    return np.array([
        [1 - 2*y**2 - 2*z**2,     2*x*y - 2*z*w,     2*x*z + 2*y*w],
        [2*x*y + 2*z*w,     1 - 2*x**2 - 2*z**2,     2*y*z - 2*x*w],
        [2*x*z - 2*y*w,         2*y*z + 2*x*w, 1 - 2*x**2 - 2*y**2]
    ])

def classify_orientation_stability(data, model, alpha_deg=45):
    """
    Check object stability using orientation.
    Returns a list of booleans, True if the object is stable.
    """
    stable_objects = []
    cos_alpha = np.cos(np.deg2rad(alpha_deg))
    g = np.array([0, 0, -1])

    for i in range(N_OBJECTS):
        q = data.qpos[i*7 + 3 : i*7 + 7]
        rotmat = quat_to_rotmat(q)
        obj_z_world = rotmat[:, 2]  # local z-axis in world coords
        cos_theta = -np.dot(g, obj_z_world)  # because gravity is downward
        stable = cos_theta > cos_alpha
        stable_objects.append(stable)

    return stable_objects

def classify_orientation_stability_verbose(data, model, alpha_deg=45):
    results = []
    cos_alpha = np.cos(np.deg2rad(alpha_deg))
    g = np.array([0, 0, -1])

    for i in range(N_OBJECTS):
        q = data.qpos[i*7 + 3 : i*7 + 7]
        rotmat = quat_to_rotmat(q)
        obj_z_world = rotmat[:, 2]
        cos_theta = -np.dot(g, obj_z_world)

        if cos_theta > cos_alpha:
            results.append({"stable": True, "reason": "upright (tilt within limit)"})
        else:
            angle_deg = np.rad2deg(np.arccos(cos_theta))
            results.append({
                "stable": False,
                "reason": f"tilt angle {angle_deg:.2f}° exceeds {alpha_deg}°"
            })

    return results

    
def classify_stability(data, model):
    """
    Classify object stability based on position data.

    For each object in the scene:
    - Check the object's position using data.qpos[z].
    - Determine whether the object is upright, within the tray bounds, or has fallen/rolled over.
    - Define thresholds for "stable" as:
        * z-position > 0.01
        * x and y within tray bounds of ±0.3

    Args:
        data: MuJoCo data object containing simulation state.
        model: MuJoCo model (unused in this function but typically passed alongside data).

    Returns:
        List of booleans indicating whether each object is stable.
    """
    
    stable_objects = []
    tray_bounds = 0.3
    for i in range(N_OBJECTS):
        pos = data.qpos[i*7 : i*7 + 3]
        if abs(pos[0]) < tray_bounds and abs(pos[1]) < tray_bounds and pos[2] > 0.01:
            stable_objects.append(True)
        else:
            stable_objects.append(False)
    return stable_objects

# Extend the classifier to explain why something is unstable:
def classify_stability_verbose(data, model):
    flags = []
    tray_bounds = 0.3
    for i in range(N_OBJECTS):
        pos = data.qpos[i*7 : i*7 + 3]
        reasons = []
        if pos[2] < 0.01:
            reasons.append("z too low (possibly toppled)")
        if abs(pos[0]) > tray_bounds or abs(pos[1]) > tray_bounds:
            reasons.append("outside tray bounds")
        if not reasons:
            flags.append({"stable": True, "reason": "object upright and within tray"})
        else:
            flags.append({"stable": False, "reason": ", ".join(reasons)})
    return flags

# Create LLM-Friendly Prompt Template
def format_llm_prompt(physics_state):
    """
    Converts a physics state dictionary of a simulated scene into a JSON-serializable list of objects
    (human-readable), where each object contains position, orientation, velocity, angular velocity,
    and stability information suitable for prompting a large language model (LLM).

    Each object is described by its:
      - 3D position
      - orientation in ZYX Euler angles (in degrees)
      - linear velocity
      - angular velocity
      - stability status (Stable or Unstable)

    Parameters:
        physics_state (dict): A dictionary containing the physics properties of each object,
            with the following keys:
            - "positions_xyz": List of shape (N, 3), each entry a position vector [x, y, z]
            - "orientations_euler": List of shape (N, 3), each entry an orientation vector [z, y, x] in degrees
            - "velocities_linear": List of shape (N, 3), linear velocity vectors
            - "velocities_angular": List of shape (N, 3), angular velocity vectors
            - "stable_flags": List of bools indicating whether each object is stable

    Returns:
        List[dict]: Each dictionary represents an object with formatted fields
                    suitable for JSON serialization.
    """
    formatted = []
    for i in range(len(physics_state["positions_xyz"])):        
        obj = {
            "id": i,
            "position": np.round(physics_state["positions_xyz"][i], 3).tolist(),
            "orientation_euler_zyx_deg": np.round(physics_state["orientations_euler"][i], 1).tolist(),
            "linear_velocity": np.round(physics_state["velocities_linear"][i], 3).tolist(),
            "angular_velocity": np.round(physics_state["velocities_angular"][i], 3).tolist(),
            "status": "Stable" if physics_state["stable_flags"][i] else "Unstable"
            #"status": "Stable" if verbose["stable"] else f"Unstable ({verbose['reason']})"
        }
        formatted.append(obj)
    return formatted

def format_as_natural_language_prompt(scene_description, task_description=None):
    """
    Convert structured physics state into a natural language prompt for LLMs.

    Args:
        scene_description (List[dict]): Output of format_llm_prompt().
        task_description (str, optional): Additional instruction to prepend, such as
            "Describe which blocks are unstable and why" or
            "Predict which objects might move further if the tray tilts".

    Returns:
        str: Natural language prompt for an LLM.
    """
    lines = []
    if task_description:
        lines.append(task_description.strip())
        lines.append("")  # blank line for separation

    lines.append("Here is the scene summary:")

    for obj in scene_description:
        line = (
            f"Object {obj['id']} is at position {obj['position']} with orientation {obj['orientation_euler_zyx_deg']} degrees. "
            f"It has linear velocity {obj['linear_velocity']} and angular velocity {obj['angular_velocity']}. "
            f"Status: {obj['status']}."
        )
        lines.append(line)

    return "\n".join(lines)

def run_tray_simulation(
    seed=0,
    num_objects=N_OBJECTS,
    azimuth=45,
    elevation=-25,
    distance=0.6,
    unique_id=None  # Unique file naming
):
    np.random.seed(seed)
    model = mujoco.MjModel.from_xml_path(MODEL_PATH)
    data = mujoco.MjData(model)

    # Explicitly reset the simulation state
    mujoco.mj_resetData(model, data)
    
    tray_id = mujoco.mj_name2id(
        model,
        mujoco.mjtObj.mjOBJ_BODY,
        "tray"
    )

    # Apply a transient force to the tray
    tray_force_applied = True # False   # One-time trigger
    
    # Locate pusher DOF index (last one in qpos if you have 10 objects)
    # Compute DOF offsets
    pusher_idx = num_objects * 7
    pusher_vel_idx = num_objects * 6

    # Pusher setup
    pusher_pos = np.array([0.35, -0.35, 0.08])  # Slightly outside tray corner
    pusher_quat = [1, 0, 0, 0]
    data.qpos[pusher_idx:pusher_idx+3] = pusher_pos
    data.qpos[pusher_idx+3:pusher_idx+7] = pusher_quat
    
    # Compute velocity towards center
    target = np.array([0.0, 0.0, 0.05])
    direction = target - pusher_pos
    direction[2] = 0.0
    direction /= np.linalg.norm(direction)
    push_velocity = 0.15 * direction  # Adjust magnitude

    # Object initialization
    for i in range(num_objects):
        obj_start = i * 7  # 7 DOF per free joint body
        pos = np.random.uniform(low=[-0.1, -0.1, 0.05], high=[0.1, 0.1, 0.15])
        quat = [1, 0, 0, 0]  # no rotation
        data.qpos[obj_start:obj_start+3] = pos
        data.qpos[obj_start+3:obj_start+7] = quat
        data.qvel[i*6:i*6+6] = 0
        print(f"Object {i} initial position: {pos}")  # Debug print

    # Renderer setup
    renderer = mujoco.Renderer(model, width=480, height=480) # width=640, height=640

    # Custom camera parameters
    scene_option = mujoco.MjvOption()

    # Enable contact visualization in the renderer to debug collisions
    # Check if contact points appear when the puck hits the tray. If not,
    # collisions are not being detected.
    #scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTPOINT] = True
    #scene_option.flags[mujoco.mjtVisFlag.mjVIS_CONTACTFORCE] = True
    #scene_option.flags[mujoco.mjtVisFlag.mjVIS_JOINT] = True # if you want to see joint frames or anchors to understand the articulated structure.
    #scene_option.flags[mujoco.mjtVisFlag.mjVIS_TRANSPARENT] = True #  if the model uses transparent geoms and you want them shown correctly.

    cam = mujoco.MjvCamera()
    cam.type = mujoco.mjtCamera.mjCAMERA_FREE
    cam.lookat[:] = np.array([0.0, 0.0, 0.02])     # Center of the tray
    cam.distance = distance                        # Zoom
    cam.azimuth = azimuth                          # Yaw angle
    cam.elevation = elevation                      # Pitch angle
    
    frames = []
    keyframes = {0: None, 25: None, 50: None, 200: None}

    for t in range(SIM_STEPS):
        if t == PUSH_START_STEP:
            # Activate pusher
            #data.qvel[pusher_vel_idx:pusher_vel_idx+3] = [-0.05, 0.05, 0.0]  # toward center
            data.qvel[pusher_vel_idx:pusher_vel_idx+3] = push_velocity  # toward center
    
        #if t == IMPACT_STEP and not tray_force_applied:
        #    mujoco.mj_applyFT(
        #        model,
        #        data,
        #        np.array([5.0, 0.0, 0.0]),       # force (e.g. impulse to the right; adjust magnitude)
        #        np.array([0.0, 0.0, 0.0]),       # torque
        #        np.array([0.0, 0.0, 0.0]),       # point in global frame (apply at body origin)
        #        tray_id,                         # body ID
        #        data.qfrc_applied                # force accumulator to apply into
        #    )
        #    tray_force_applied = True
            
        mujoco.mj_step(model, data)
    
        renderer.update_scene(
            data,
            camera=cam,
            scene_option=scene_option
        )
        img = renderer.render()
            
        if t in keyframes:
            #imageio.imwrite(f"/tmp/frame_{t}.png", img)
            imageio.imwrite(f"/tmp/frame_{t}_{unique_id}.png", img)
    
        frames.append(img)

    renderer.close()

    stability_flags = classify_stability(data, model)

    # Optional: print or return
    print("Stability:", stability_flags)
    
    euler_angles = []
    for i in range(num_objects):
        quat = data.qpos[i*7+3 : i*7+7]  # qw, qx, qy, qz
        # Convert to Euler (yaw-pitch-roll)
        rot = R.from_quat([quat[1], quat[2], quat[3], quat[0]])  # MuJoCo uses [qw, qx, qy, qz]
        euler = rot.as_euler('zyx', degrees=True)
        euler_angles.append(euler.tolist())   

    # Fully structured snapshot of: Positions, 
    # Orientations (in both quaternion and Euler angle forms),
    # Linear and angular velocities, and Stability flags
    physics_state = {
        "positions_xyz": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, :3].tolist(),  # [x,y,z,qw,qx,qy,qz]
        "orientations_quat": data.qpos[:num_objects * 7].reshape(num_objects, 7)[:, 3:].tolist(),
        "orientations_euler": euler_angles,
        "velocities_linear": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, :3].tolist(),  # [vx,vy,vz,wx,wy,wz]
        "velocities_angular": data.qvel[:num_objects * 6].reshape(num_objects, 6)[:, 3:].tolist(),
        "stable_flags": stability_flags,
        #"stable_verbose": classify_stability_verbose(data, model), more interpretable to LLMs
    }

    # Save JSON Snapshot (optional, for LLM input/debugging)
    #json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}_state.json")
    # Save JSON with unique ID
    json_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}_state.json")
    with open(json_path, "w") as f:
        json.dump(physics_state, f, indent=2)

    # LLM-friendly prompt output 
    formatted = format_llm_prompt(physics_state)
    prompt = format_as_natural_language_prompt(
        formatted,
        task_description="Explain which objects are likely to fall if the tray is tilted slightly to the right."
    )

    # Save to GIF
    #gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{seed}.gif")
    # Save GIF with unique ID
    gif_path = os.path.join(tempfile.gettempdir(), f"tray_sim_{unique_id}.gif")
    imageio.mimsave(gif_path, frames, fps=20)

    return gif_path, stability_flags, physics_state, prompt, json_path # optionally also return json_path