diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..fc5a340abcf2e5202e33ecd96c02b09d67df318b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text + +*_model filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 8ed5ecbeea69382c6fcc0fb9faf2f67c2fe88cd6..5d4db0d447d9a3ecac0ecfcb45b5805a7d6238b3 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,47 @@ ---- -license: gpl-2.0 ---- +--- +license: gpl-2.0 +--- + +# AugMPCModels + +Deployment bundles for [AugMPC](https://github.com/AndrePatri/AugMPC), published alongside the preprint: + +- Andrea Patrizi, Carlo Rizzardo, Arturo Laurenzi, Francesco Ruscelli, Luca Rossini, Nikos G. Tsagarakis +- *RL-Augmented MPC for Non-Gaited Legged and Hybrid Locomotion* +- arXiv: [2603.10878](https://arxiv.org/abs/2603.10878) + +## Repository structure + +Each model is distributed as a **bundle**, not just as a checkpoint file. A bundle may include: + +- the policy checkpoint +- environment and deployment configuration files +- robot description files (`URDF`, `SRDF`) +- controller/world-interface helpers needed for reproduction +- a `bundle.yaml` manifest describing the contents + +Current layout: + +```text +bundles/ + centauro/ + / + bundle.yaml + _model + *.yaml + *.py + *.urdf + *.srdf + other/ +``` + +## Usage model + +The intended runtime flow is: + +1. resolve or download a bundle locally, +2. read `bundle.yaml`, +3. locate the checkpoint file and companion configs, +4. launch AugMPC using the resolved local bundle path. + +This repository is therefore meant to back a bundle resolver, rather than a direct `torch.load()` call on a remote file. diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/bundle.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/bundle.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c72ce176647c7b366a64b740c968c778559001af --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/bundle.yaml @@ -0,0 +1,107 @@ +bundle_format: augmpc_model_bundle_v1 +bundle_name: d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl +checkpoint_file: d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl_model +preserved_training_cfgs: + - ibrido_run__2026_01_19_14_51_36_ID/training_cfg_centauro_no_yaw_ub_cloop_perception.sh +framework: + repos: + AugMPC: + commit: f2ff28085ea76d2b548841de6f363f7183480f86 + branch: ibrido + remote: git@github.com:AndrePatri/AugMPC.git + dirty: false + AugMPCEnvs: + commit: 46c5d4161cb4b249b3a2e50c93c6bc2aa087f027 + branch: ibrido + remote: git@github.com:AndrePatri/AugMPCEnvs.git + dirty: false + AugMPCModels: + commit: 8b15c800c0f5684c61fdaf4847156ff71df61ebc + branch: main + remote: https://huggingface.co/AndrePatri/AugMPCModels + dirty: true + CentauroHybridMPC: + commit: 640889d4c3b9c8d360b5a3ccde6fc2bd8f139891 + branch: ibrido + remote: git@github.com:ADVRHumanoids/CentauroHybridMPC.git + dirty: false + EigenIPC: + commit: 7c1c3ecd08716e61ed164a6e0bb788aa716705ca + branch: devel + remote: git@github.com:AndrePatri/EigenIPC.git + dirty: false + IBRIDO: + commit: 0ceb5f3e0508b6ecdce12bcc8f0dcbcde8f29a93 + branch: main + remote: git@github.com:AndrePatri/IBRIDO.git + dirty: false + IsaacLab: + commit: a520a883ce996d855cc9d5255d71fd1c1307633f + branch: HEAD + remote: git@github.com:isaac-sim/IsaacLab.git + dirty: true + KyonRLStepping: + commit: 2bea2b8d70078974869df0549e90fc27ff31f851 + branch: ibrido + remote: git@github.com:ADVRHumanoids/KyonRLStepping.git + dirty: false + MPCHive: + commit: 45b4fc692850cef607020dda2e32fd708e7fff62 + branch: devel + remote: git@github.com:AndrePatri/MPCHive.git + dirty: false + MPCViz: + commit: 806d03efcc9d8ab1fb04991a159c19ba89bfb85b + branch: ros2_humble + remote: git@github.com:AndrePatri/MPCViz.git + dirty: false + adarl: + commit: f585499e49fa05fdd070a77f3211c0996599b87b + branch: ibrido + remote: git@github.com:c-rizz/adarl.git + dirty: false + casadi: + commit: 79cebe3b416dee22abc87de0076b80a5b97bd345 + branch: optional_float + remote: git@github.com:AndrePatri/casadi.git + dirty: true + horizon: + commit: 3b084317709ff9b88d4a07ac5436f5988b39eece + branch: ibrido + remote: git@github.com:AndrePatri/horizon.git + dirty: true + iit-centauro-ros-pkg: + commit: 6069807ebb37a6d7df39430a02685e09ed9b167a + branch: ibrido_ros2 + remote: git@github.com:AndrePatri/iit-centauro-ros-pkg.git + dirty: false + iit-dagana-ros-pkg: + commit: f7ecd6c84a0b7f3320c1b7de6449cbcd4445d2fe + branch: ibrido_ros2 + remote: git@github.com:AndrePatri/iit-dagana-ros-pkg.git + dirty: false + iit-kyon-description: + commit: 50fd7c8909b3ddfd1ebbe67c61d6b775b86df6b1 + branch: ibrido_ros2 + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + iit-kyon-description-mpc: + commit: 3a92bee28405172e8f6c628b4498703724d35bf5 + branch: ibrido_ros2 + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + iit-kyon-ros-pkg: + commit: 298917efffb63dbca540e0aedbd21b41bf393fbf + branch: ibrido_ros2_simple + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + phase_manager: + commit: 9925d18c0d7a55d013cbaa4998c61d85a3a8944f + branch: ibrido + remote: git@github.com:AndrePatri/phase_manager.git + dirty: false + unitree_ros: + commit: c75a622b8782d11dd6aa4c2ebd3b0f9c13a56aae + branch: ibrido + remote: git@github.com:AndrePatri/unitree_ros.git + dirty: true diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID.srdf b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID.srdf new file mode 100644 index 0000000000000000000000000000000000000000..adf957c2d948b8f62f760014fc1b9a95cb3e8cfb --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID.srdf @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID.urdf b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID.urdf new file mode 100644 index 0000000000000000000000000000000000000000..70256a67050070e647bdcf5cf3b142c14b5db50b --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_big_wheels_no_yaw_ub_2026_01_19_14_51_36_ID.urdf @@ -0,0 +1,1569 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_rhc.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..d34283b2a945a53bd53a2acd30d8423e8842e3e6 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_rhc.py @@ -0,0 +1,144 @@ +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc + +import numpy as np + +from typing import Dict + +from centaurohybridmpc.utils.sysutils import PathsGetter + +class CentauroRhc(HybridQuadRhc): + + def __init__(self, + srdf_path: str, + urdf_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes: float = 31, + dt: float = 0.03, + injection_node: int = 10, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {} + ): + + paths = PathsGetter() + self._files_suffix="" + if open_loop: + self._files_suffix="_open" + + self._add_upper_body=False + if ("add_upper_body" in custom_opts) and \ + (custom_opts["add_upper_body"]): + self._add_upper_body=True + self._files_suffix+="_ub" + + config_path=paths.RHCCONFIGPATH_NO_WHEELS+self._files_suffix+".yaml" + + super().__init__(srdf_path=srdf_path, + urdf_path=urdf_path, + config_path=config_path, + robot_name=robot_name, # used for shared memory namespaces + codegen_dir=codegen_dir, + n_nodes=n_nodes, + dt=dt, + injection_node=injection_node, + max_solver_iter=max_solver_iter, # defaults to rt-iteration + open_loop=open_loop, + close_loop_all=close_loop_all, + dtype=dtype, + verbose=verbose, + debug=debug, + refs_in_hor_frame=refs_in_hor_frame, + timeout_ms=timeout_ms, + custom_opts=custom_opts) + + self._fail_idx_scale=1e-9 + self._fail_idx_thresh_open_loop=1e0 + self._fail_idx_thresh_close_loop=10 + + if open_loop: + self._fail_idx_thresh=self._fail_idx_thresh_open_loop + else: + self._fail_idx_thresh=self._fail_idx_thresh_close_loop + + # adding some additional config files for jnt imp control + self._rhc_fpaths.append(paths.JNT_IMP_CONFIG_XBOT+".yaml") + self._rhc_fpaths.append(paths.JNT_IMP_CONFIG+".yaml") + + def _set_rhc_pred_idx(self): + self._pred_node_idx=round((self._n_nodes-1)*2/3) + + def _set_rhc_cmds_idx(self): + self._rhc_cmds_node_idx=2 + + def _config_override(self): + paths = PathsGetter() + if ("control_wheels" in self._custom_opts): + if self._custom_opts["control_wheels"]: + self.config_path = paths.RHCCONFIGPATH_WHEELS+self._files_suffix+".yaml" + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + self.config_path = paths.RHCCONFIGPATH_WHEELS_NO_YAW+self._files_suffix+".yaml" + if ("replace_continuous_joints" in self._custom_opts) and \ + (not self._custom_opts["replace_continuous_joints"]): + # use continuous joints -> different config + self.config_path = paths.RHCCONFIGPATH_WHEELS_CONTINUOUS+self._files_suffix+".yaml" + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + self.config_path = paths.RHCCONFIGPATH_WHEELS_CONTINUOUS_NO_YAW+self._files_suffix+".yaml" + + else: + self._custom_opts["control_wheels"]=False + + if not self._custom_opts["control_wheels"]: + self._fixed_jnt_patterns=self._fixed_jnt_patterns+\ + ["j_wheel", + "ankle_yaw"] + self._custom_opts["replace_continuous_joints"]=True + + def _init_problem(self): + + if not self._custom_opts["control_wheels"]: + self._yaw_vertical_weight=120.0 + else: + self._yaw_vertical_weight=50.0 + + fixed_jnts_patterns=[ + "d435_head", + "velodyne_joint"] + + if not self._add_upper_body: + fixed_jnts_patterns.append("j_arm") + fixed_jnts_patterns.append("torso") + + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + fixed_jnts_patterns.append("ankle_yaw") + + flight_duration_sec=0.5 # [s] + flight_duration=int(flight_duration_sec/self._dt) + post_flight_duration_sec=0.2 # [s] + post_flight_duration=int(post_flight_duration_sec/self._dt) + + step_height=0.1 + if ("step_height" in self._custom_opts): + step_height=self._custom_opts["step_height"] + + super()._init_problem(fixed_jnt_patterns=fixed_jnts_patterns, + wheels_patterns=["wheel_"], + foot_linkname="wheel_1", + flight_duration=flight_duration, + post_flight_stance=post_flight_duration, + step_height=step_height, + keep_yaw_vert=True, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=True, + vertical_land_weight=10.0, + phase_force_reg=5e-2, + vel_bounds_weight=1.0) \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_rhc_wheels_continuous_no_yaw_ub.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_rhc_wheels_continuous_no_yaw_ub.yaml new file mode 100644 index 0000000000000000000000000000000000000000..606ec4458856a6953c6892733edf3e61d38a60a0 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/centauro_rhc_wheels_continuous_no_yaw_ub.yaml @@ -0,0 +1,368 @@ +solver: + type: ilqr + ipopt.linear_solver: ma57 + ipopt.tol: 0.1 +# ilqr.merit_der_threshold: 1e-3 +# ilqr.defect_norm_threshold: 1e-3 + ipopt.constr_viol_tol: 0.01 + ilqr.constraint_violation_threshold: 1e-2 +# ipopt.hessian_approximation: exact + ipopt.print_level: 5 + ipopt.suppress_all_output: 'yes' + ipopt.sb: 'yes' + ilqr.suppress_all_output: 'yes' + ilqr.codegen_enabled: true + ilqr.codegen_workdir: /tmp/tyhio + ilqr.enable_gn: true + ilqr.hxx_reg_base: 0.0 + ilqr.n_threads: 0 + print_time: 0 + +constraints: + - contact_1 + - contact_2 + - contact_3 + - contact_4 + +costs: + - z_contact_1 + - z_contact_2 + - z_contact_3 + - z_contact_4 + # - vz_contact_1 + # - vz_contact_2 + # - vz_contact_3 + # - vz_contact_4 + # - xy_contact_1 + # - xy_contact_2 + # - xy_contact_3 + # - xy_contact_4 + # - vxy_contact_1 + # - vxy_contact_2 + # - vxy_contact_3 + # - vxy_contact_4 + - base_lin_velxy + - base_lin_velz + - base_omega + - base_capture + - joint_posture_capture + - joint_posture_capture_torso + - joint_posture_capture_ub + - v_root_regularization + - v_jnts_regularization + - a_regularization + - joint_v_reg_wheels + - joint_a_reg_wheels + # - force_regularization + +.define: + - &w_base_omega 15. + - &w_base_vxy 20. + - &w_base_vz 20. + - &w_base_z 15. + - &w_contact_z 175.0 + - &w_contact_vz 250.0 + - &w_contact_xy 60.0 + - &w_contact_vxy 50.0 + - &w_base_capture 200. + - &w_capture_postural 30.0 + - &w_capture_postural_torso 30.0 + - &w_capture_postural_ub 5.0 + - &w_v_root 2e-1 + - &w_v_jnts 2e0 + - &w_v_wheels 1e0 + - &w_a 4e-1 + - &w_a_wheels 8e-1 + - &wheel_radius 0.124 + +base_lin_velxy: + type: Cartesian + distal_link: base_link + indices: [0, 1] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_vxy + +base_lin_velz: + type: Cartesian + distal_link: base_link + indices: [2] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_vz + +base_omega: + type: Cartesian + distal_link: base_link + indices: [3, 4, 5] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_omega + +base_capture: + type: Cartesian + distal_link: base_link + indices: [0, 1, 2, 3, 4, 5] + nodes: ${range(N-5, N+1)} + cartesian_type: velocity + weight: *w_base_capture + +# =============================== + +rolling_contact_1: + type: Rolling + frame: wheel_1 + radius: *wheel_radius + +rolling_contact_2: + type: Rolling + frame: wheel_2 + radius: *wheel_radius + +rolling_contact_3: + type: Rolling + frame: wheel_3 + radius: *wheel_radius + +rolling_contact_4: + type: Rolling + frame: wheel_4 + radius: *wheel_radius + +# ================================== + +interaction_contact_1: + type: VertexForce + frame: contact_1 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_1 + +interaction_contact_2: + type: VertexForce + frame: contact_2 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_2 + +interaction_contact_3: + type: VertexForce + frame: contact_3 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_3 + +interaction_contact_4: + type: VertexForce + frame: contact_4 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_4 + +contact_1: + type: Contact + subtask: [interaction_contact_1, rolling_contact_1] + +contact_2: + type: Contact + subtask: [interaction_contact_2, rolling_contact_2] + +contact_3: + type: Contact + subtask: [interaction_contact_3, rolling_contact_3] + +contact_4: + type: Contact + subtask: [interaction_contact_4, rolling_contact_4] + +joint_posture_capture: + type: Postural + weight: *w_capture_postural + indices: [0, 1, 2, 3, + 6, 7, 8, 9, + 12, 13, 14, 15, + 18, 19, 20, 21] + nodes: ${range(N-5, N+1)} + +joint_posture_capture_torso: + type: Postural + weight: *w_capture_postural_torso + indices: [24] + nodes: ${range(N-5, N+1)} + +joint_posture_capture_ub: + type: Postural + weight: *w_capture_postural_ub + indices: [ + 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36] + nodes: ${range(N-5, N+1)} + +v_root_regularization: + type: Regularization + nodes: ${range(0, 1)} + indices: [0, 1, 2, 3, 4, 5] + weight: + v: *w_v_root + +v_jnts_regularization: + type: Regularization + nodes: ${range(0, N+1)} + indices: [6, 7, 8, 9, + 11, 12, 13, 14, + 16, 17, 18, 19, + 21, 21, 23, 24, + 26, + 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38] + weight: + v: *w_v_jnts + +a_regularization: + type: Regularization + nodes: ${range(0, N+1)} + indices: [0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, + 11, 12, 13, 14, + 16, 17, 18, 19, + 21, 21, 23, 24, + 26, + 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38] + weight: + a: *w_a # 0.01 + +joint_v_reg_wheels: + type: Regularization + nodes: ${range(0, N+1)} + indices: [10, 15, 20, 25] + weight: + v: *w_v_wheels + +joint_a_reg_wheels: + type: Regularization + nodes: ${range(0, N+1)} + indices: [10, 15, 20, 25] + weight: + a: *w_a_wheels # 0.01 + +# flight phase traj tracking +z_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +xy_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +xy_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +xy_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +xy_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +vz_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vxy_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy + +vxy_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy + +vxy_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy + +vxy_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl_model b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl_model new file mode 100644 index 0000000000000000000000000000000000000000..e334e145aea292997059970e3e06c1849cbd5849 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c9df84000339df696ddf1be50309bbbb823ad25881edfe072b9afe15a41fce4 +size 10021303 diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/fake_pos_track_env_phase_control.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/fake_pos_track_env_phase_control.py new file mode 100644 index 0000000000000000000000000000000000000000..3e36dc2e8606d09d797d81e58efc05f61f2489a9 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/fake_pos_track_env_phase_control.py @@ -0,0 +1,198 @@ +from typing import Dict + +import os + +import torch + +from EigenIPC.PyEigenIPC import VLevel + +from mpc_hive.utilities.math_utils_torch import world2base_frame + +from aug_mpc_envs.training_envs.flight_phase_control_env import FlightPhaseControl + +class FakePosTrackEnvPhaseControl(FlightPhaseControl): + """Adds positional goals to the flight-parameter control env so agents chase waypoints while directly setting gait contact flags and flight profiles.""" + + def __init__(self, + namespace: str, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._add_env_opt(env_opts, "max_distance", default=5.0) # [m] + self._add_env_opt(env_opts, "min_distance", default=0.0) # [m] + self._add_env_opt(env_opts, "max_vref", default=1.0) # [m/s] + self._add_env_opt(env_opts, "max_dp", default=5.0) # [m] after this, v ref saturates + self._add_env_opt(env_opts, "max_dt", default=env_opts["max_dp"]/ env_opts["max_vref"]) + + FlightPhaseControl.__init__(self, + namespace=namespace, + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def get_file_paths(self): + paths=FlightPhaseControl.get_file_paths(self) + paths.append(os.path.abspath(__file__)) + return paths + + def _custom_post_init(self): + FlightPhaseControl._custom_post_init(self) + + # position targets to be reached (wrt robot's pos at ep start) + self._p_trgt_w=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2].detach().clone() + self._p_delta_w=self._p_trgt_w.detach().clone() + self._dp_norm=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + self._dp_versor=self._p_trgt_w.detach().clone() + + self._trgt_d=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + self._trgt_theta=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + + def _update_loc_twist_refs(self): + # this is called at each env substep + + self._compute_twist_ref_w() + + if not self._override_agent_refs: + agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p", + gpu=self._use_gpu) + agent_p_ref_current[:, 0:2]=self._p_trgt_w + + # then convert it to base ref local for the agent + robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + # rotate agent ref from world to robot base + world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, + t_out=self._agent_twist_ref_current_base_loc) + # write it to agent refs tensors + self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc, + gpu=self._use_gpu) + + def _compute_twist_ref_w(self, env_indxs: torch.Tensor = None): + + # angular refs are not altered + if env_indxs is None: + # we update the position error using the current base position + self._p_delta_w[:, :]=self._p_trgt_w-\ + self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + + self._dp_norm[:, :]=self._p_delta_w.norm(dim=1,keepdim=True)+1e-6 + self._dp_versor[:, :]=self._p_delta_w/self._dp_norm + + # apply for vref saturation + to_be_saturated=self._dp_norm[:, :]>self._env_opts["max_dp"] + self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"] + + # we compute the twist refs for the agent depending of the position error + self._agent_twist_ref_current_w[:, 0:2]=self._dp_norm*self._dp_versor/self._env_opts["max_dt"] + self._agent_twist_ref_current_w[:, 2:3]=0 # no vertical vel + + # apply pof0 using last value of bernoully coeffs + self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel + self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega + else: + self._p_delta_w[env_indxs, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[env_indxs, 0:2] -\ + self._p_trgt_w[env_indxs, :] + + # apply for vref saturation + to_be_saturated=torch.logical_and((self._dp_norm[:, :]>self._env_opts["max_dp"]).flatten(),env_indxs) + self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"] + + self._dp_norm[env_indxs, :]=self._p_delta_w[env_indxs, :].norm(dim=1,keepdim=True)+1e-6 + self._dp_versor[env_indxs, :]=self._p_delta_w[env_indxs, :]/self._dp_norm[env_indxs, :] + + self._agent_twist_ref_current_w[env_indxs, 0:2]=self._dp_norm[env_indxs, :]*self._dp_versor[env_indxs, :]/self._env_opts["max_dt"] + self._agent_twist_ref_current_w[env_indxs, 2:3]=0 # no vertical vel + + # apply pof0 using last value of bernoully coeffs + self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :] + self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # runs at every post_step + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p", + gpu=self._use_gpu) + + agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega", + gpu=self._use_gpu) + + # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \ + # agent_p_ref_current[:, 0:2] + self._p_trgt_w[:, :]=agent_p_ref_current[:, 0:2] # set p target target from shared mem + + self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem + + def _debug_agent_refs(self): + if self._use_gpu: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True) + + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + # we randomize the target position/omega in world frame + if env_indxs is None: + self._trgt_d.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"]) + self._trgt_theta.uniform_(0.0, 2*torch.pi) + + self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] +\ + torch.cat((self._trgt_d*torch.cos(self._trgt_theta) + ,self._trgt_d*torch.sin(self._trgt_theta)), dim=1) + + # randomize just omega + random_uniform=torch.full_like(self._agent_twist_ref_current_w[:, 3:6], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[:, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6] + + # sample for all envs pof0 + if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + + else: + + if env_indxs.any(): + integer_idxs=torch.nonzero(env_indxs).flatten() + + trgt_d_selected=self._trgt_d[integer_idxs, :] + trgt_d_selected.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"]) + self._trgt_d[integer_idxs, :]=trgt_d_selected + + trgt_theta_selected=self._trgt_theta[integer_idxs, :] + trgt_theta_selected.uniform_(0.0, 2*torch.pi) + self._trgt_theta[integer_idxs, :]=trgt_theta_selected + + self._p_trgt_w[integer_idxs, 0:1]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 0:1] +\ + self._trgt_d[integer_idxs, :]*torch.cos(self._trgt_theta[integer_idxs, :]) + self._p_trgt_w[integer_idxs, 1:2]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 1:2] +\ + self._trgt_d[integer_idxs, :]*torch.sin(self._trgt_theta[integer_idxs, :]) + + # randomize just omega + random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, 3:6], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[env_indxs, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6] + + # sample for all envs pof0, then reset to 1 for envs which are not to be randomized + if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + self._bernoulli_coeffs_linvel[~env_indxs, :]=1 + self._bernoulli_coeffs_omega[~env_indxs, :]=1 + + self._compute_twist_ref_w(env_indxs=env_indxs) # update linear vel twist refs based on pos error diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/flight_phase_control_env.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/flight_phase_control_env.py new file mode 100644 index 0000000000000000000000000000000000000000..5d9133fe468d93da12d788d4cd93fc3de7636ca6 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/flight_phase_control_env.py @@ -0,0 +1,219 @@ + +from typing import Dict + +import os + +import torch + +from EigenIPC.PyEigenIPC import VLevel + +from mpc_hive.utilities.shared_data.rhc_data import RobotState + +from aug_mpc_envs.training_envs.twist_tracking_env import TwistTrackingEnv + +class FlightPhaseControl(TwistTrackingEnv): + """Extends twist tracking with per-contact flight length/apex/end actions so agents learn to modulate aerial phases alongside base velocity tracking.""" + + def __init__(self, + namespace: str, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._add_env_opt(env_opts, "control_flength", default=True) + self._add_env_opt(env_opts, "control_fapex", default=True) + self._add_env_opt(env_opts, "control_fend", default=True) + self._add_env_opt(env_opts, "control_flanding", default=False) + + self._add_env_opt(env_opts, "flength_min", default=8) # substeps + + # temporarily creating robot state client to get some data + robot_state_tmp = RobotState(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False) + robot_state_tmp.run() + n_contacts = len(robot_state_tmp.contact_names()) + robot_state_tmp.close() + + actions_dim=10 # base size + if env_opts["control_flength"]: + actions_dim+=n_contacts + if env_opts["control_fapex"]: + actions_dim+=n_contacts + if env_opts["control_fend"]: + actions_dim+=n_contacts + if env_opts["control_flanding"]: + actions_dim+=2*n_contacts + + TwistTrackingEnv.__init__(self, + namespace=namespace, + actions_dim=actions_dim, + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def get_file_paths(self): + paths=TwistTrackingEnv.get_file_paths(self) + paths.append(os.path.abspath(__file__)) + return paths + + def _custom_post_init(self): + + TwistTrackingEnv._custom_post_init(self) + + self._add_env_opt(self._env_opts, "flength_max", default=self._n_nodes_rhc.mean().item()) # MPC steps (substeps) + + # additional actions bounds + + # flight params (length) + if self._env_opts["control_flength"]: + idx=self._actions_map["flight_len_start"] + self._actions_lb[:, idx:(idx+self._n_contacts)]=self._env_opts["flength_min"] + self._actions_ub[:, idx:(idx+self._n_contacts)]=self._env_opts["flength_max"] + self._is_continuous_actions[idx:(idx+self._n_contacts)]=True + # flight params (apex) + if self._env_opts["control_fapex"]: + idx=self._actions_map["flight_apex_start"] + self._actions_lb[:, idx:(idx+self._n_contacts)]=0.05 + self._actions_ub[:, idx:(idx+self._n_contacts)]=0.35 + self._is_continuous_actions[idx:(idx+self._n_contacts)]=True + # flight params (end) + if self._env_opts["control_fend"]: + idx=self._actions_map["flight_end_start"] + self._actions_lb[:, idx:(idx+self._n_contacts)]=0.0 + self._actions_ub[:, idx:(idx+self._n_contacts)]=0.2 + self._is_continuous_actions[idx:(idx+self._n_contacts)]=True + # flight params (landing dx, dy) + if self._env_opts["control_flanding"]: + idx=self._actions_map["flight_land_dx_start"] + self._actions_lb[:, idx:(idx+self._n_contacts)]=-0.5 + self._actions_ub[:, idx:(idx+self._n_contacts)]=0.5 + self._is_continuous_actions[idx:(idx+self._n_contacts)]=True + idx=self._actions_map["flight_land_dy_start"] + self._actions_lb[:, idx:(idx+self._n_contacts)]=-0.5 + self._actions_ub[:, idx:(idx+self._n_contacts)]=0.5 + self._is_continuous_actions[idx:(idx+self._n_contacts)]=True + + # redefine default actions + self.default_action[:, :] = (self._actions_ub+self._actions_lb)/2.0 + # self.default_action[:, ~self._is_continuous_actions] = 1.0 + + if self._env_opts["control_flength"]: + idx=self._actions_map["flight_len_start"] + self.safe_action[:, idx:(idx+self._n_contacts)]=(self._env_opts["flength_max"]+self._env_opts["flength_min"])/3.0 + + if self._env_opts["control_fapex"]: + idx=self._actions_map["flight_apex_start"] + self.safe_action[:, idx:(idx+self._n_contacts)]=0.1 + + if self._env_opts["control_fend"]: + idx=self._actions_map["flight_end_start"] + self.safe_action[:, idx:(idx+self._n_contacts)]=0.0 + + if self._env_opts["control_flanding"]: + idx=self._actions_map["flight_land_dx_start"] + self.safe_action[:, idx:(idx+self._n_contacts)]=0.0 + idx=self._actions_map["flight_land_dy_start"] + self.safe_action[:, idx:(idx+self._n_contacts)]=0.0 + + def _set_rhc_refs(self): + TwistTrackingEnv._set_rhc_refs(self) + + action_to_be_applied = self.get_actual_actions() + + if self._env_opts["control_flength"]: + idx=self._actions_map["flight_len_start"] + flen_now=self._rhc_refs.flight_settings_req.get(data_type="len_remain", gpu=self._use_gpu) + flen_now[:, :]=action_to_be_applied[:, idx:(idx+self._n_contacts)] + self._rhc_refs.flight_settings_req.set(data=flen_now, data_type="len_remain", gpu=self._use_gpu) + + if self._env_opts["control_fapex"]: + idx=self._actions_map["flight_apex_start"] + fapex_now=self._rhc_refs.flight_settings_req.get(data_type="apex_dpos", gpu=self._use_gpu) + fapex_now[:, :]=action_to_be_applied[:, idx:(idx+self._n_contacts)] + self._rhc_refs.flight_settings_req.set(data=fapex_now, data_type="apex_dpos", gpu=self._use_gpu) + + if self._env_opts["control_fend"]: + idx=self._actions_map["flight_end_start"] + fend_now=self._rhc_refs.flight_settings_req.get(data_type="end_dpos", gpu=self._use_gpu) + fend_now[:, :]=action_to_be_applied[:, idx:(idx+self._n_contacts)] + self._rhc_refs.flight_settings_req.set(data=fend_now, data_type="end_dpos", gpu=self._use_gpu) + + if self._env_opts["control_flanding"]: + idx=self._actions_map["flight_land_dx_start"] + fland_dx_now=self._rhc_refs.flight_settings_req.get(data_type="land_dx", gpu=self._use_gpu) + fland_dx_now[:, :]=action_to_be_applied[:, idx:(idx+self._n_contacts)] + self._rhc_refs.flight_settings_req.set(data=fland_dx_now, data_type="land_dx", gpu=self._use_gpu) + idx=self._actions_map["flight_land_dy_start"] + fland_dy_now=self._rhc_refs.flight_settings_req.get(data_type="land_dy", gpu=self._use_gpu) + fland_dy_now[:, :]=action_to_be_applied[:, idx:(idx+self._n_contacts)] + self._rhc_refs.flight_settings_req.set(data=fland_dy_now, data_type="land_dy", gpu=self._use_gpu) + + def _write_rhc_refs(self): + TwistTrackingEnv._write_rhc_refs(self) + if self._use_gpu: + self._rhc_refs.flight_settings_req.synch_mirror(from_gpu=True,non_blocking=False) + self._rhc_refs.flight_settings_req.synch_all(read=False, retry=True) + + def _get_action_names(self): + + action_names = [""] * self.actions_dim() + action_names[0] = "vx_cmd" # twist commands from agent to RHC controller + action_names[1] = "vy_cmd" + action_names[2] = "vz_cmd" + action_names[3] = "roll_omega_cmd" + action_names[4] = "pitch_omega_cmd" + action_names[5] = "yaw_omega_cmd" + + next_idx=6 + self._actions_map["contact_flag_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx] = f"contact_flag_{contact}" + next_idx+=1 + if self._env_opts["control_flength"]: + self._actions_map["flight_len_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx+i] = f"flight_len_{contact}" + next_idx+=len(self._contact_names) + if self._env_opts["control_fapex"]: + self._actions_map["flight_apex_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx+i] = f"flight_apex_{contact}" + next_idx+=len(self._contact_names) + if self._env_opts["control_fend"]: + self._actions_map["flight_end_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx+i] = f"flight_end_{contact}" + next_idx+=len(self._contact_names) + if self._env_opts["control_flanding"]: + self._actions_map["flight_land_dx_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx+i] = f"flight_land_dx_{contact}" + next_idx+=len(self._contact_names) + self._actions_map["flight_land_dy_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx+i] = f"flight_land_dy_{contact}" + next_idx+=len(self._contact_names) + + return action_names + diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/hybrid_quad_rhc.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/hybrid_quad_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..951af04dd38e267986c7da5e1f71f3ad72c1a2a4 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/hybrid_quad_rhc.py @@ -0,0 +1,1243 @@ +from mpc_hive.controllers.rhc import RHController +# from perf_sleep.pyperfsleep import PerfSleep +# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +import numpy as np + +import os +# import shutil + +import time +from abc import ABC, abstractmethod + +from typing import Dict, List +import re + +class HybridQuadRhc(RHController): + + def __init__(self, + srdf_path: str, + urdf_path: str, + config_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes:float = 25, + injection_node:int = 10, + dt: float = 0.02, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {}): + + self._refs_in_hor_frame = refs_in_hor_frame + + self._injection_node = injection_node + + self._open_loop = open_loop + self._close_loop_all = close_loop_all + + self._codegen_dir = codegen_dir + if not os.path.exists(self._codegen_dir): + os.makedirs(self._codegen_dir) + # else: + # # Directory already exists, delete it and recreate + # shutil.rmtree(self._codegen_dir) + # os.makedirs(self._codegen_dir) + + self.step_counter = 0 + self.sol_counter = 0 + + self.max_solver_iter = max_solver_iter + + self._timer_start = time.perf_counter() + self._prb_update_time = time.perf_counter() + self._phase_shift_time = time.perf_counter() + self._task_ref_update_time = time.perf_counter() + self._rti_time = time.perf_counter() + + self.robot_name = robot_name + + self.config_path = config_path + + self.urdf_path = urdf_path + # read urdf and srdf files + with open(self.urdf_path, 'r') as file: + self.urdf = file.read() + self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + self._c_timelines = dict() + self._f_reg_timelines = dict() + + self._custom_opts={"replace_continuous_joints": False, + "use_force_feedback": False, + "lin_a_feedback": False, + "is_open_loop": self._open_loop, # fully open (just for db) + "fully_closed": False, # closed loop with full feedback (just for db) + "closed_partial": False, # closed loop with partial feedback + "adaptive_is": True, # closed loop with adaptation + "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase + "alpha_from_outside": False, # alpha set ext. from shared memory + "alpha_half": 1.0, + "only_vel_wheels": True, # whether wheels (if present) are just vel controlled + "use_jnt_v_feedback": False + } + + self._custom_opts.update(custom_opts) + + self._alpha_half=self._custom_opts["alpha_half"] + + if self._open_loop: + self._custom_opts["fully_closed"]=False + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + else: + self._custom_opts["is_open_loop"]=False + if self._custom_opts["fully_closed"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["closed_partial"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["fully_closed"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["adaptive_is"]: + self._custom_opts["closed_partial"]=False + self._custom_opts["fully_closed"]=False + + super().__init__(srdf_path=srdf_path, + n_nodes=n_nodes, + dt=dt, + namespace=self.robot_name, + dtype=dtype, + verbose=verbose, + debug=debug, + timeout_ms=timeout_ms) + + self._rhc_fpaths.append(self.config_path) + + def _config_override(self): + pass + + def _post_problem_init(self): + + self.rhc_costs={} + self.rhc_constr={} + + self._fail_idx_scale=0.0 + self._expl_idx_window_size=int(1*self._n_nodes) + self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size)) + self._expl_idx_counter=0 + self._expl_idx_buffer_counter=0 + + self._pred_node_idx=self._n_nodes-1 + + self._nq=self.nq() + self._nq_jnts=self._nq-7# assuming floating base + self._nv=self.nv() + self._nv_jnts=self._nv-6 + + self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype) + self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype) + self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype) + self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype) + self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype) + self._alphas_q_root[:, :]=1.0 # default to all open + self._alphas_q_jnts[:, :]=1.0 + self._alphas_twist_root[:, :]=1.0 + self._alphas_v_jnts[:, :]=1.0 + self._alphas_a[:, :]=1.0 + + def _init_problem(self, + fixed_jnt_patterns: List[str] = None, + wheels_patterns: List[str] = None, + foot_linkname: str = None, + flight_duration: int = 10, + post_flight_stance: int = 3, + step_height: float = 0.12, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = 2.0, + vertical_landing: bool = False, + vertical_land_weight: float = 1.0, + phase_force_reg: float = 1e-2, + vel_bounds_weight: float = 1.0): + + self._fixed_jnt_patterns=fixed_jnt_patterns + + self._config_override() + + Journal.log(self.__class__.__name__, + "_init_problem", + f" Will use horizon config file at {self.config_path}", + LogType.INFO, + throw_when_excep=True) + + self._vel_bounds_weight=vel_bounds_weight + self._phase_force_reg=phase_force_reg + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_land_weight=vertical_land_weight + # overrides parent + self._prb = Problem(self._n_intervals, + receding=True, + casadi_type=cs.SX) + self._prb.setDt(self._dt) + + if "replace_continuous_joints" in self._custom_opts: + # continous joints are parametrized in So2 + if self._custom_opts["replace_continuous_joints"]: + self.urdf = self.urdf.replace('continuous', 'revolute') + else: + self.urdf = self.urdf.replace('continuous', 'revolute') + + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + + self._init_robot_homer() + + # handle fixed joints + fixed_joint_map={} + if self._fixed_jnt_patterns is not None: + for jnt_name in self._get_robot_jnt_names(): + for fixed_jnt_pattern in self._fixed_jnt_patterns: + if fixed_jnt_pattern in jnt_name: + fixed_joint_map.update({f"{jnt_name}": + self._homer.get_homing_val(jnt_name=jnt_name)}) + break # do not search for other pattern matches + + if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers + Journal.log(self.__class__.__name__, + "_init_problem", + f"Will fix following joints: \n{str(fixed_joint_map)}", + LogType.INFO, + throw_when_excep=True) + # with the fixed joint map + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map) + # assign again controlled joints names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + # updated robot homer for controlled joints + self._init_robot_homer() + + # handle continuous joints (need to change homing and retrieve + # cont jnts indexes) and homing + self._continuous_joints=self._get_continuous_jnt_names() + # reduced + self._continuous_joints_idxs=[] + self._continuous_joints_idxs_cos=[] + self._continuous_joints_idxs_sin=[] + self._continuous_joints_idxs_red=[] + self._rev_joints_idxs=[] + self._rev_joints_idxs_red=[] + # qfull + self._continuous_joints_idxs_qfull=[] + self._continuous_joints_idxs_cos_qfull=[] + self._continuous_joints_idxs_sin_qfull=[] + self._continuous_joints_idxs_red_qfull=[] + self._rev_joints_idxs_qfull=[] + self._rev_joints_idxs_red_qfull=[] + jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints)) + jnt_names=self._get_robot_jnt_names() + for i in range(len(jnt_names)): + jnt=jnt_names[i] + index=self._get_jnt_id(jnt)# accounting for floating joint + homing_idx=index-7 # homing is only for actuated joints + homing_value=self._homer.get_homing_val(jnt) + if jnt in self._continuous_joints: + jnt_homing[homing_idx]=np.cos(homing_value).item() + jnt_homing[homing_idx+1]=np.sin(homing_value).item() + # just actuated joints + self._continuous_joints_idxs.append(homing_idx) # cos + self._continuous_joints_idxs.append(homing_idx+1) # sin + self._continuous_joints_idxs_cos.append(homing_idx) + self._continuous_joints_idxs_sin.append(homing_idx+1) + self._continuous_joints_idxs_red.append(i) + # q full + self._continuous_joints_idxs_qfull.append(index) # cos + self._continuous_joints_idxs_qfull.append(index+1) # sin + self._continuous_joints_idxs_cos_qfull.append(index) + self._continuous_joints_idxs_sin_qfull.append(index+1) + self._continuous_joints_idxs_red_qfull.append(i+7) + else: + jnt_homing[homing_idx]=homing_value + # just actuated joints + self._rev_joints_idxs.append(homing_idx) + self._rev_joints_idxs_red.append(i) + # q full + self._rev_joints_idxs_qfull.append(index) + self._rev_joints_idxs_red_qfull.append(i+7) + + self._jnts_q_reduced=None + if not len(self._continuous_joints)==0: + cont_joints=", ".join(self._continuous_joints) + + Journal.log(self.__class__.__name__, + "_init_problem", + f"The following continuous joints were found: \n{cont_joints}", + LogType.INFO, + throw_when_excep=True) + # preallocating data + self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype) + self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype) + self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + else: + self._custom_opts["replace_continuous_joints"]=True + Journal.log(self.__class__.__name__, + "_init_problem", + f"No continuous joints were found.", + LogType.INFO, + throw_when_excep=True) + + # retrieve wheels indexes (not considering continuous joints, + # ok just for v, eff, etc..) + self._wheel_patterns=wheels_patterns + self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns) + self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81] + + # we can create an init for the base + init = self._base_init.tolist() + jnt_homing + + if foot_linkname is not None: + FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height + ground_level = FK(q=init)['ee_pos'] + self._base_init[2] = -ground_level[2] # override init + + self._model = FullModelInverseDynamics(problem=self._prb, + kd=self._kin_dyn, + q_init=self._homer.get_homing_map(), + base_init=self._base_init) + + self._ti = TaskInterface(prb=self._prb, + model=self._model, + max_solver_iter=self.max_solver_iter, + debug = self._debug, + verbose = self._verbose, + codegen_workdir = self._codegen_dir) + self._ti.setTaskFromYaml(self.config_path) + + # setting initial base pos ref if exists + base_pos = self._ti.getTask('base_height') + if base_pos is not None: + base_pos.setRef(np.atleast_2d(self._base_init).T) + + self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes????? + + self._gm = GaitManager(self._ti, + self._pm, + self._injection_node, + keep_yaw_vert=keep_yaw_vert, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=vertical_landing, + landing_vert_weight=self._vertical_land_weight, + phase_force_reg=self._phase_force_reg, + custom_opts=self._custom_opts, + flight_duration=flight_duration, + post_flight_stance=post_flight_stance, + step_height=step_height, + dh=0.0) + + self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0) + self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0) + self._ti.model.q.setInitialGuess(self._ti.model.q0) + self._ti.model.v.setInitialGuess(self._ti.model.v0) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f=len(cforces) + for c in cforces: + c.setInitialGuess(np.array(self._f0)/n_contact_f) + + vel_lims = self._model.kd.velocityLimits() + import horizon.utils as utils + self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:])) + self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:])) + + self._meas_lin_a_par=None + # if self._custom_opts["lin_a_feedback"]: + # # acceleration feedback on first node + # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback", + # dim=3, nodes=0) + # base_lin_a_prb=self._prb.getInput().getVars()[0:3] + # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, + # nodes=[0]) + + # if not self._open_loop: + # # we create a residual cost to be used as an attractor to the measured state on the first node + # # hard constraints injecting meas. states are pure EVIL! + # prb_state=self._prb.getState() + # full_state=prb_state.getVars() + # state_dim=prb_state.getBounds()[0].shape[0] + # meas_state=self._prb.createParameter(name="measured_state", + # dim=state_dim, nodes=0) + # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), + # nodes=[0]) + + self._ti.finalize() + self._ti.bootstrap() + + self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes + self._ti.load_initial_guess() + + self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we + # know n_dofs -> we assign it (by default = None) + + self.n_contacts = len(self._model.cmap.keys()) + + # remove variables bounds (before they were necessary to have a good + # quality bootstrap solution) + self._q_inf=np.zeros((self.nq(), 1)) + self._q_inf[:, :]=np.inf + self._v_inf=np.zeros((self.nv(), 1)) + self._v_inf[:, :]=np.inf + self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0) + self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0) + + # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb) + + def get_file_paths(self): + # can be overriden by child + paths = super().get_file_paths() + return paths + + def _get_quat_remap(self): + # overrides parent + return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention + + def _zmp(self, model): + + num = cs.SX([0, 0]) + den = cs.SX([0]) + pos_contact = dict() + force_val = dict() + + q = cs.SX.sym('q', model.nq) + v = cs.SX.sym('v', model.nv) + a = cs.SX.sym('a', model.nv) + + com = model.kd.centerOfMass()(q=q, v=v, a=a)['com'] + + n = cs.SX([0, 0, 1]) + for c in model.fmap.keys(): + pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos'] + force_val[c] = cs.SX.sym('force_val', 3) + num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n) + den += cs.dot(force_val[c], n) + + zmp = com[0:2] + (num / den) + input_list = [] + input_list.append(q) + input_list.append(v) + input_list.append(a) + + for elem in force_val.values(): + input_list.append(elem) + + f = cs.Function('zmp', input_list, [zmp]) + return f + + def _add_zmp(self): + + input_zmp = [] + + input_zmp.append(self._model.q) + input_zmp.append(self._model.v) + input_zmp.append(self._model.a) + + for f_var in self._model.fmap.values(): + input_zmp.append(f_var) + + c_mean = cs.SX([0, 0, 0]) + for c_name, f_var in self._model.fmap.items(): + fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos'] + c_mean += fk_c_pos + + c_mean /= len(self._model.cmap.keys()) + + zmp_nominal_weight = 10. + zmp_fun = self._zmp(self._model)(*input_zmp) + + if 'wheel_joint_1' in self._model.kd.joint_names(): + zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2])) + + def _quaternion_multiply(self, + q1, q2): + x1, y1, z1, w1 = q1 + x2, y2, z2, w2 = q2 + w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return np.array([x, y, z, w]) + + def _get_continuous_jnt_names(self): + import xml.etree.ElementTree as ET + root = ET.fromstring(self.urdf) + continuous_joints = [] + for joint in root.findall('joint'): + joint_type = joint.get('type') + if joint_type == 'continuous': + joint_name = joint.get('name') + continuous_joints.append(joint_name) + return continuous_joints + + def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]): + jnt_names=self._get_robot_jnt_names() + wheels_idxs=[] + for i in range(len(jnt_names)): + jnt_name=jnt_names[i] + for wheel_pattern in wheel_patterns: + if wheel_pattern in jnt_name: + wheels_idxs.append(i) + break + return wheels_idxs + + def _get_jnt_id(self, jnt_name): + return self._kin_dyn.joint_iq(jnt_name) + + def _init_rhc_task_cmds(self): + + rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm, + robot_index_shm=self.controller_index, + robot_index_view=0, # when using optimize_mem the view if always of shape 1x... + namespace=self.namespace, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + use_force_feedback=self._custom_opts["use_force_feedback"], + optimize_mem=True) + + rhc_refs.run() + + rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller) + rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap()) + + rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3], + q_ref=np.atleast_2d(self._base_init)[:, 3:7]) + + return rhc_refs + + def get_vertex_fnames_from_ti(self): + tasks=self._ti.task_list + contact_f_names=[] + for task in tasks: + if isinstance(task, ContactTask): + interaction_task=task.dynamics_tasks[0] + contact_f_names.append(interaction_task.vertex_frames[0]) + return contact_f_names + + def _get_contact_names(self): + # should get contact names from vertex frames + # list(self._ti.model.cmap.keys()) + return self.get_vertex_fnames_from_ti() + + def _get_robot_jnt_names(self): + + joints_names = self._kin_dyn.joint_names() + to_be_removed = ["universe", + "reference", + "world", + "floating", + "floating_base"] + for name in to_be_removed: + if name in joints_names: + joints_names.remove(name) + + return joints_names + + def _get_ndofs(self): + return len(self._model.joint_names) + + def nq(self): + return self._kin_dyn.nq() + + def nv(self): + return self._kin_dyn.nv() + + def _get_robot_mass(self): + + return self._kin_dyn.mass() + + def _get_root_full_q_from_sol(self, node_idx=1): + + root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype) + + np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) + np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full) + + return root_q_full + + def _get_full_q_from_sol(self, node_idx=1): + + return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype) + + def _get_root_twist_from_sol(self, node_idx=1): + # provided in world frame + twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3] + # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6] + return twist_base_local + + def _get_root_a_from_sol(self, node_idx=0): + # provided in world frame + a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3] + # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6] + return a_base_local + + def _get_jnt_q_from_sol(self, node_idx=0, + reduce: bool = True, + clamp: bool = True): + + full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype) + + np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place + np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place + + if self._custom_opts["replace_continuous_joints"] or (not reduce): + if clamp: + return np.fmod(full_jnts_q, 2*np.pi) + else: + return full_jnts_q + else: + cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2) + # copy rev joint vals + self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1) + # and continuous + self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1) + return self._jnts_q_reduced + + def _get_jnt_v_from_sol(self, node_idx=1): + + jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place + # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place + + return jnt_v_sol + + def _get_jnt_a_from_sol(self, node_idx=1): + + return self._get_a_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + + def _get_jnt_eff_from_sol(self, node_idx=0): + + efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx) + + # if self._custom_opts["only_vel_wheels"]: + + jnt_efforts=efforts_on_node[6:, 0] + + if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v: + jnt_efforts[self._wheels_idxs_v]=0.0 + + return jnt_efforts.reshape(1, + self._nv_jnts).astype(self._dtype) + + def _get_rhc_cost(self): + + return self._ti.solution["opt_cost"] + + def _get_rhc_constr_viol(self): + + return self._ti.solution["residual_norm"] + + def _get_rhc_nodes_cost(self): + + cost = self._ti.solver_rti.getCostValOnNodes() + return cost.reshape((1, -1)) + + def _get_rhc_nodes_constr_viol(self): + + constr_viol = self._ti.solver_rti.getConstrValOnNodes() + return constr_viol.reshape((1, -1)) + + def _get_rhc_niter_to_sol(self): + + return self._ti.solution["n_iter2sol"] + + def _set_ig(self): + + shift_num = -1 # shift data by one node + + x_opt = self._ti.solution['x_opt'] + u_opt = self._ti.solution['u_opt'] + + # building ig for state + xig = np.roll(x_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + xig[:, -1 - i] = x_opt[:, -1] + # building ig for inputs + uig = np.roll(u_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + uig[:, -1 - i] = u_opt[:, -1] + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + + return xig, uig + + def _update_open_loop(self): + + xig, _ = self._set_ig() + + q_state, v_state, a_state=self._set_is_open() + + # robot_state=xig[:, 0] + # # open loop update: + # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0 + # # is node 1 in the last opt solution) + + return q_state, v_state, a_state + + def _update_closed_loop(self): + + # set initial guess for controller + xig, _ = self._set_ig() + # set initial state + q_state=None + v_state=None + a_state=None + if self._custom_opts["adaptive_is"]: + # adaptive closed loop + q_state, v_state, a_state=self._set_is_adaptive() + elif self._custom_opts["fully_closed"]: + q_state, v_state, a_state=self._set_is_full() + elif self._custom_opts["closed_partial"]: + q_state, v_state, a_state=self._set_is_partial() + else: + Journal.log(self.__class__.__name__, + "_update_closed_loop", + "Neither adaptive_is, fully_closed, or closed_partial.", + LogType.EXCEP, + throw_when_excep = False) + q_state, v_state, a_state=self._set_is() + + return q_state, v_state, a_state + + def _set_is_open(self): + + # overriding states with rhc data + q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1) + + twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1) + v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_q_full_rhc=q[0:7] # root full q + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_twist_rhc=vel[0:6] # lin v. + jnts_v_rhc=vel[6:] # jnts v + + self.rhc_refs.set_alpha(alpha=1.0) # fully open + + # close state on known quantities + root_q_full_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_twist_rhc.setBounds(lb=twist_root, + ub=twist_root, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((twist_root, v_jnts), + axis=0) + + return (q_state, v_state, None) + + def _set_is_full(self): + + # measurements + q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_full_q_rhc=q[0:7] # root p + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_full_q_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_partial(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # overriding states with rhc data (-> all overridden state are open looop) + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc # position is always open loop + if not self._custom_opts["estimate_v_root"]: + v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1) + # override v jnts with the ones from controller + if not self._custom_opts["use_jnt_v_feedback"]: + v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1) + # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1) + # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1) + # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1) + # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root, + ub=q_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints) + else: # get it from controller + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_adaptive(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + # getting prediction defects + root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # close state on known quantities, estimate some (e.g. lin vel) and + # open loop if thing start to explode + alpha_now=1.0 + delta=0.0 + if self._custom_opts["alpha_from_outside"]: + alpha_now=self.rhc_refs.get_alpha() + else: # "autotuned" alpha + if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.) + delta=np.max(np.abs(jnt_v_delta)) + else: + delta=np.max(np.abs(omega_root_delta)) + # fail_idx=self._get_failure_index() + # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh + alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0 + + bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1] + self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db + self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db + + self._alphas_q_root[:]=alpha_now # for now single alpha for everything + self._alphas_q_jnts[:]=alpha_now + self._alphas_twist_root[:]=alpha_now + self._alphas_v_jnts[:]=alpha_now + self._alphas_a[:]=alpha_now + if not self._custom_opts["estimate_v_root"]: + self._alphas_twist_root[0:3]=1.0 # open + self._alphas_v_jnts[:]=1.0 # open + + # position is always open loop + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc + + # expaning meas q if continuous joints + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + + # continous joints position is always open loop, but we need a delta vector of matching dimension + q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1) + + self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:] + + self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\ + np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.cos(q_jnts[self._continuous_joints_idxs_red, :]) + self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\ + np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.sin(q_jnts[self._continuous_joints_idxs_red, :]) + + q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts + jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts + + self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop + self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop + + # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, + ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, + ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) + else: + root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, + ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0) + root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, + ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, + ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0) + if self._custom_opts["lin_a_feedback"]: + lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _solve(self): + + if self._debug: + return self._db_solve() + else: + return self._min_solve() + + def _min_solve(self): + # minimal solve version -> no debug + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop() # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop() # updates the TO ig and + # initial conditions using robot measurements + + self._pm.shift() # shifts phases of one dt + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + try: + converged = self._ti.rti() # solves the problem + self.sol_counter = self.sol_counter + 1 + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + return False + + def _db_solve(self): + + self._timer_start = time.perf_counter() + + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop() # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop() # updates the TO ig and + # initial conditions using robot measurements + + self._prb_update_time = time.perf_counter() + self._pm.shift() # shifts phases of one dt + self._phase_shift_time = time.perf_counter() + + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + self._task_ref_update_time = time.perf_counter() + + try: + converged = self._ti.rti() # solves the problem + self._rti_time = time.perf_counter() + self.sol_counter = self.sol_counter + 1 + self._update_db_data() + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + if self._verbose: + exception = f"Rti() for controller {self.controller_index} failed" + \ + f" with exception{type(e).__name__}" + Journal.log(self.__class__.__name__, + "solve", + exception, + LogType.EXCEP, + throw_when_excep = False) + self._update_db_data() + return False + + def _get_fail_idx(self): + + self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx() + self._expl_idx_buffer_counter+=1 + self._expl_idx_counter+=1 + if self._expl_idx_counter%self._expl_idx_window_size==0: + self._expl_idx_buffer_counter=0 # restart from 0 + + running_avrg=np.mean(self._explosion_idx_buffer).item() + + return running_avrg + + def _get_explosion_idx(self): + explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale + return explosion_index + + def _update_db_data(self): + + self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start + self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time + self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time + self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time + self.rhc_costs.update(self._ti.solver_rti.getCostsValues()) + self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues()) + + def _reset(self): + + # reset task interface (ig, solvers, etc..) + + # phase manager and sets bootstap as solution + self._gm.reset() + self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution + self._expl_idx_counter=0.0 + self._expl_idx_buffer_counter=0 + + def _get_cost_data(self): + + cost_dict = self._ti.solver_rti.getCostsValues() + cost_names = list(cost_dict.keys()) + cost_dims = [1] * len(cost_names) # costs are always scalar + return cost_names, cost_dims + + def _get_constr_data(self): + + constr_dict = self._ti.solver_rti.getConstraintsValues() + constr_names = list(constr_dict.keys()) + constr_dims = [-1] * len(constr_names) + i = 0 + for constr in constr_dict: + constr_val = constr_dict[constr] + constr_shape = constr_val.shape + constr_dims[i] = constr_shape[0] + i+=1 + return constr_names, constr_dims + + def _get_q_from_sol(self): + full_q=self._ti.solution['q'].astype(self._dtype) + if self._custom_opts["replace_continuous_joints"]: + return full_q + else: + cont_jnts=full_q[self._continuous_joints_idxs_qfull, :] + cos=cont_jnts[::2, :] + sin=cont_jnts[1::2, :] + # copy root + self._full_q_reduced[0:7, :]=full_q[0:7, :] + # copy rev joint vals + self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :] + # and continuous + angle=np.arctan2(sin, cos) + self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle + return self._full_q_reduced + + def _get_v_from_sol(self): + return self._ti.solution['v'].astype(self._dtype) + + def _get_a_from_sol(self): + return self._ti.solution['a'].astype(self._dtype) + + def _get_a_dot_from_sol(self): + return None + + def _get_f_from_sol(self): + # to be overridden by child class + contact_names =self._get_contacts() # we use controller-side names + try: + data=[] + for key in contact_names: + contact_f=self._ti.solution["f_" + key].astype(self._dtype) + np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False) + np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f) + data.append(contact_f) + return np.concatenate(data, axis=0) + except: + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + return self.rhc_costs[cost_name] + + def _get_constr_from_sol(self, + constr_name: str): + return self.rhc_constr[constr_name] \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/isaac_world_interface.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/isaac_world_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..0718536d91c6f1ff0a587b3f084c6debc7e0c302 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/isaac_world_interface.py @@ -0,0 +1,2344 @@ +# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com) +# +# This file is part of AugMPCEnvs and distributed under the General Public License version 2 license. +# +# AugMPCEnvs is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# AugMPCEnvs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with AugMPCEnvs. If not, see . +# +from isaacsim import SimulationApp + +import carb + +import os +import math + +import torch +import numpy as np + +from typing import Dict, List + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal + +from aug_mpc_envs.utils.math_utils import quat_to_omega +from aug_mpc_envs.utils.height_grid_visualizer import HeightGridVisualizer +from aug_mpc_envs.utils.height_sensor import HeightGridSensor + +from aug_mpc.world_interfaces.world_interface_base import AugMPCWorldInterfaceBase +from mpc_hive.utilities.math_utils_torch import world2base_frame,world2base_frame3D + +class IsaacSimEnv(AugMPCWorldInterfaceBase): + + def __init__(self, + robot_names: List[str], + robot_urdf_paths: List[str], + robot_srdf_paths: List[str], + jnt_imp_config_paths: List[str], + n_contacts: List[int], + cluster_dt: List[float], + use_remote_stepping: List[bool], + name: str = "IsaacSimEnv", + num_envs: int = 1, + debug = False, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + n_init_step: int = 0, + timeout_ms: int = 60000, + env_opts: Dict = None, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + override_low_lev_controller: bool = False): + + self._render_step_counter = 0 + + super().__init__(name=name, + robot_names=robot_names, + robot_urdf_paths=robot_urdf_paths, + robot_srdf_paths=robot_srdf_paths, + jnt_imp_config_paths=jnt_imp_config_paths, + n_contacts=n_contacts, + cluster_dt=cluster_dt, + use_remote_stepping=use_remote_stepping, + num_envs=num_envs, + debug=debug, + verbose=verbose, + vlevel=vlevel, + n_init_step=n_init_step, + timeout_ms=timeout_ms, + env_opts=env_opts, + use_gpu=use_gpu, + dtype=dtype, + override_low_lev_controller=override_low_lev_controller) + + def is_running(self): + return self._simulation_app.is_running() + + def _pre_setup(self): + self._backend="torch" + enable_livestream = self._env_opts["enable_livestream"] + enable_viewport = self._env_opts["enable_viewport"] + base_isaac_exp = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.kit' + base_isaac_exp_headless = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.headless.kit' + + experience=base_isaac_exp + if self._env_opts["headless"]: + info = f"Will run in headless mode." + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + if enable_livestream: + experience = "" + elif enable_viewport: + exception = f"Using viewport is not supported yet." + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + else: + experience=base_isaac_exp_headless + + self._simulation_app = SimulationApp({"headless": self._env_opts["headless"]}, + experience=experience) + # all imports depending on isaac sim kits have to be done after simulationapp + # from omni.isaac.core.tasks.base_task import BaseTask + self._import_isaac_pkgs() + info = "Using IsaacSim experience file @ " + experience + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + # carb.settings.get_settings().set("/persistent/omnihydra/useSceneGraphInstancing", True) + + if enable_livestream: + info = "Livestream enabled" + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + + self._simulation_app.set_setting("/app/livestream/enabled", True) + self._simulation_app.set_setting("/app/window/drawMouse", True) + self._simulation_app.set_setting("/app/livestream/proto", "ws") + self._simulation_app.set_setting("/app/livestream/websocket/framerate_limit", 120) + self._simulation_app.set_setting("/ngx/enabled", False) + enable_extension("omni.kit.livestream.native") + enable_extension("omni.services.streaming.manager") + self._render = ((not self._env_opts["headless"]) or self._env_opts["render_to_file"]) or enable_livestream or enable_viewport + + self._record = False + self._world = None + self._physics_context = None + self._scene = None + self._task = None + self._metadata = None + + self._robots_art_views = {} + self._blink_rigid_prim_views = {} + self._robots_articulations = {} + self._robots_geom_prim_views = {} + self.omni_contact_sensors = {} + + self._solver_position_iteration_count=self._env_opts["solver_position_iteration_count"] + self._solver_velocity_iteration_count=self._env_opts["solver_velocity_iteration_count"] + self._solver_stabilization_thresh=self._env_opts["stabilization_threshold"] + self._solver_position_iteration_counts={} + self._solver_velocity_iteration_counts={} + self._solver_stabilization_threshs={} + self._robot_bodynames={} + self._robot_n_links={} + self._robot_n_dofs={} + self._robot_dof_names={} + self._distr_offset={} # decribed how robots within each env are distributed + self._spawning_radius=self._env_opts["spawning_radius"] # [m] -> default distance between roots of robots in a single + self._height_sensors={} + self._height_imgs={} + self._height_vis={} + self._height_vis_step={} + self._height_vis_step={} + self._height_vis={} + self._terrain_hit_margin = 1.0 + self._terrain_hit_log_period = 1000 + self._terrain_hit_active = {} + self._terrain_hit_counts = {} + self._terrain_hit_counts_last_logged = {} + for robot_name in self._robot_names: + self._terrain_hit_active[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.bool) + self._terrain_hit_counts[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.int32) + self._terrain_hit_counts_last_logged[robot_name] = None + + def _import_isaac_pkgs(self): + # we use global, so that we can create the simulation app inside (and so + # access Isaac's kit) and also expose to all methods the imports + global World, omni_kit, get_context, UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools, UsdShade, Vt + global enable_extension, set_camera_view, _urdf, move_prim, GridCloner, prim_utils + global get_current_stage, Scene, ArticulationView, RigidPrimView, rep + global OmniContactSensors, RlTerrains,OmniJntImpCntrl + global PhysxSchema, UsdGeom + global _sensor, _dynamic_control + global get_prim_at_path + + from pxr import PhysxSchema, UsdGeom, UsdShade, Vt + + from omni.isaac.core.world import World + from omni.usd import get_context + from pxr import UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools + from omni.isaac.core.utils.extensions import enable_extension + from omni.isaac.core.utils.viewports import set_camera_view + import omni.kit as omni_kit + from omni.importer.urdf import _urdf + from omni.isaac.core.utils.prims import move_prim + from omni.isaac.cloner import GridCloner + import omni.isaac.core.utils.prims as prim_utils + from omni.isaac.core.utils.stage import get_current_stage + from omni.isaac.core.scenes.scene import Scene + from omni.isaac.core.articulations import ArticulationView + from omni.isaac.core.prims import RigidPrimView + + import omni.replicator.core as rep + + from omni.isaac.core.utils.prims import get_prim_at_path + + from omni.isaac.sensor import _sensor + + from omni.isaac.dynamic_control import _dynamic_control + + from aug_mpc_envs.utils.contact_sensor import OmniContactSensors + from aug_mpc_envs.utils.omni_jnt_imp_cntrl import OmniJntImpCntrl + from aug_mpc_envs.utils.terrains import RlTerrains + + def _parse_env_opts(self): + isaac_opts={} + isaac_opts["envs_ns"]="/World/envs" + isaac_opts["template_env_ns"]=isaac_opts["envs_ns"] + "/env_0" + isaac_opts["base_linkname"]="base_link" + isaac_opts["deduce_base_link"]=False + isaac_opts["ground_plane_prim_path"]="/World/terrain" + isaac_opts["physics_prim_path"]="/physicsScene" + isaac_opts["use_gpu"]=True + isaac_opts["use_gpu_pipeline"]=True + isaac_opts["device"]="cuda" + isaac_opts["is_fixed_base"]=False + isaac_opts["merge_fixed_jnts"]=True + isaac_opts["self_collide"]=True + isaac_opts["sim_device"]="cuda" if isaac_opts["use_gpu"] else "cpu" + isaac_opts["physics_dt"]=1e-3 + isaac_opts["gravity"] = np.array([0.0, 0.0, -9.81]) + isaac_opts["use_fabric"]=True# Enable/disable reading of physics buffers directly. Default is True. + isaac_opts["replicate_physics"]=True + # isaac_opts["worker_thread_count"]=4 + isaac_opts["solver_type"]=1 # 0: PGS, 1:TGS, defaults to TGS. PGS faster but TGS more stable + isaac_opts["enable_stabilization"]=True + # isaac_opts["bounce_threshold_velocity"] = 0.2 + # isaac_opts["friction_offset_threshold"] = 0.04 + # isaac_opts["friction_correlation_distance"] = 0.025 + # isaac_opts["enable_sleeping"] = True + # Per-actor settings ( can override in actor_options ) + isaac_opts["solver_position_iteration_count"] = 4 # defaults to 4 + isaac_opts["solver_velocity_iteration_count"] = 3 # defaults to 1 + isaac_opts["sleep_threshold"] = 1e-5 # Mass-normalized kinetic energy threshold below which an actor may go to sleep. + # Allowed range [0, max_float). + isaac_opts["stabilization_threshold"] = 1e-4 + # Per-body settings ( can override in actor_options ) + # isaac_opts["enable_gyroscopic_forces"] = True + # isaac_opts["density"] = 1000 # density to be used for bodies that do not specify mass or density + # isaac_opts["max_depenetration_velocity"] = 100.0 + # isaac_opts["solver_velocity_iteration_count"] = 1 + # GPU buffers settings + isaac_opts["gpu_max_rigid_contact_count"] = 512 * 2048 + isaac_opts["gpu_max_rigid_patch_count"] = 80 * 2048 + isaac_opts["gpu_found_lost_pairs_capacity"] = 102400 + isaac_opts["gpu_found_lost_aggregate_pairs_capacity"] = 102400 + isaac_opts["gpu_total_aggregate_pairs_capacity"] = 102400 + # isaac_opts["gpu_max_soft_body_contacts"] = 1024 * 1024 + # isaac_opts["gpu_max_particle_contacts"] = 1024 * 1024 + # isaac_opts["gpu_heap_capacity"] = 64 * 1024 * 1024 + # isaac_opts["gpu_temp_buffer_capacity"] = 16 * 1024 * 1024 + # isaac_opts["gpu_max_num_partitions"] = 8 + + isaac_opts["env_spacing"]=15.0 + isaac_opts["spawning_height"]=1.0 + isaac_opts["spawning_radius"]=1.0 + isaac_opts["spawn_height_check_half_extent"]=0.2 + isaac_opts["spawn_height_cushion"]=0.03 + + isaac_opts["enable_height_vis"]=False + isaac_opts["height_vis_radius"]=0.03 + isaac_opts["height_vis_update_period"]=1 + isaac_opts["collision_refinement_level"]=3 # increase cylinder tesselation for smoother wheel contacts + + # rendering helpers + isaac_opts["render_to_file"]=False + isaac_opts["use_follow_camera"]=False # if True, follow robot during rendering in human mode + isaac_opts["render_follow_env_idx"]=0 + isaac_opts["render_follow_robot_idx"]=0 + isaac_opts["render_follow_offset"]=[-0.2, 3.0, 0.1] + isaac_opts["render_follow_target_offset"]=[-0.2, -1.0, 0.0] + isaac_opts["rendering_dt"]=15*isaac_opts["physics_dt"] + isaac_opts["camera_prim_path"]="/OmniverseKit_Persp" + isaac_opts["render_resolution"]=[1280, 720] # [1024, 576] + + isaac_opts["render_panoramic_cam"]=True + isaac_opts["render_panoramic_cam_height"]=2.0 + isaac_opts["render_panoramic_cam_target_xy"]=[10.0, 14.] + isaac_opts["render_panoramic_cam_target_z"]=1.2 + + # ground opts + isaac_opts["use_flat_ground"]=True + isaac_opts["static_friction"]=0.5 + isaac_opts["dynamic_friction"]=0.5 + isaac_opts["restitution"]=0.1 + isaac_opts["ground_type"]="random" + isaac_opts["ground_size"]=800 + isaac_opts["terrain_border"]=isaac_opts["ground_size"]/2 + isaac_opts["dh_ground"]=0.03 + isaac_opts["step_height_lb"]=0.08 + isaac_opts["step_height_ub"]=0.15 + isaac_opts["step_width_lb"]=0.4 + isaac_opts["step_width_ub"]= 1.5 + isaac_opts["contact_prims"] = [] + isaac_opts["sensor_radii"] = 0.1 + isaac_opts["contact_offsets"] = {} + + isaac_opts["enable_livestream"] = False + isaac_opts["enable_viewport"] = False + + isaac_opts["use_diff_vels"] = False + + # random perturbations (impulse, durations, directions are sampled uniformly, force/torque computed accordinly) + isaac_opts["use_random_pertub"]=False + isaac_opts["pert_planar_only"]=True # if True, linear pushes only in xy plane and no torques + + isaac_opts["pert_wrenches_rate"]=10.0 # on average 1 pert every pert_wrenches_rate seconds + isaac_opts["pert_wrenches_min_duration"]=0.6 + isaac_opts["pert_wrenches_max_duration"]=3.5 # [s] + isaac_opts["pert_force_max_weight_scale"]=1.2 # clip force norm to scale*weight + isaac_opts["pert_force_min_weight_scale"]=0.2 # optional min force norm as scale*weight + isaac_opts["pert_torque_max_weight_scale"]=1.0 # clip torque norm to scale*weight*max_ang_impulse_lever + + isaac_opts["pert_target_delta_v"]=2.0 # [m/s] desired max impulse = m*delta_v + isaac_opts["det_pert_rate"]=True + + # max impulse (unitless scale multiplied by weight to get N*s): delta_v/g + isaac_opts["max_lin_impulse_norm"]=isaac_opts["pert_target_delta_v"]/9.81 + isaac_opts["lin_impulse_mag_min"]=1.0 # [0, 1] -> min fraction of max_lin_impulse_norm when sampling + isaac_opts["lin_impulse_mag_max"]=1.0 # [0, 1] -> max fraction of max_lin_impulse_norm when sampling + + isaac_opts["max_ang_impulse_lever"]=0.2 # [m] + isaac_opts["max_ang_impulse_norm"]=isaac_opts["max_lin_impulse_norm"]*isaac_opts["max_ang_impulse_lever"] + isaac_opts["terrain_hit_log_period"]=1000 + + # opts definition end + + isaac_opts.update(self._env_opts) # update defaults with provided opts + isaac_opts["rendering_freq"]=int(isaac_opts["rendering_dt"]/isaac_opts["physics_dt"]) + # isaac_opts["rendering_dt"]=isaac_opts["physics_dt"] # forcing rendering_dt==physics_dt + # for some mystic reason simulation is infuenced by the rendering dt (why ??????) + + # modify things + isaac_opts["cloning_offset"] = np.array([[0.0, 0.0, isaac_opts["spawning_height"]]]*self._num_envs) + if not isaac_opts["use_gpu"]: # don't use GPU at all + isaac_opts["use_gpu_pipeline"]=False + isaac_opts["device"]="cpu" + isaac_opts["sim_device"]="cpu" + else: # use GPU + if isaac_opts["use_gpu_pipeline"]: + isaac_opts["device"]="cuda" + isaac_opts["sim_device"]="cuda" + else: # cpu pipeline + isaac_opts["device"]="cpu" + isaac_opts["sim_device"]="cpu" + isaac_opts["use_gpu"]=False + # isaac_opts["sim_device"]=isaac_opts["device"] + # overwrite env opts in case some sim params were missing + self._env_opts=isaac_opts + + # update device flag based on sim opts + self._device=isaac_opts["device"] + self._use_gpu=isaac_opts["use_gpu"] + + def _calc_robot_distrib(self): + + import math + # we distribute robots in a single env. along the + # circumference of a circle of given radius + n_robots = len(self._robot_names) + offset_baseangle = 2 * math.pi / n_robots + for i in range(n_robots): + offset_angle = offset_baseangle * (i + 1) + robot_offset_wrt_center = torch.tensor([self._spawning_radius * math.cos(offset_angle), + self._spawning_radius * math.sin(offset_angle), 0], + device=self._device, + dtype=self._dtype) + # list with n references to the original tensor + tensor_list = [robot_offset_wrt_center] * self._num_envs + self._distr_offset[self._robot_names[i]] = torch.stack(tensor_list, dim=0) + + def _init_world(self): + + self._cloner = GridCloner(spacing=self._env_opts["env_spacing"]) + self._cloner.define_base_env(self._env_opts["envs_ns"]) + prim_utils.define_prim(self._env_opts["template_env_ns"]) + self._envs_prim_paths = self._cloner.generate_paths(self._env_opts["envs_ns"] + "/env", + self._num_envs) + + # parse device based on sim_param settings + + info = "Using sim device: " + str(self._env_opts["sim_device"]) + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + + self._world = World( + physics_dt=self._env_opts["physics_dt"], + rendering_dt=self._env_opts["physics_dt"], # == physics dt (rendering is actually done manually by this env) + backend=self._backend, + device=str(self._env_opts["sim_device"]), + physics_prim_path=self._env_opts["physics_prim_path"], + set_defaults = False, # set to True to use the defaults settings [physics_dt = 1.0/ 60.0, + # stage units in meters = 0.01 (i.e in cms), rendering_dt = 1.0 / 60.0, gravity = -9.81 m / s + # ccd_enabled, stabilization_enabled, gpu dynamics turned off, + # broadcast type is MBP, solver type is TGS] + sim_params=self._env_opts + ) + + big_info = "[World] Creating Isaac simulation " + self._name + "\n" + \ + "use_gpu_pipeline: " + str(self._env_opts["use_gpu_pipeline"]) + "\n" + \ + "device: " + str(self._env_opts["sim_device"]) + "\n" +\ + "backend: " + str(self._backend) + "\n" +\ + "integration_dt: " + str(self.physics_dt()) + "\n" + \ + "rendering_dt: " + str(self.rendering_dt()) + "\n" + Journal.log(self.__class__.__name__, + "_init_world", + big_info, + LogType.STAT, + throw_when_excep = True) + + # we get the physics context to expose additional low-level ## + # settings of the simulation + self._physics_context = self._world.get_physics_context() + self._physics_scene_path = self._physics_context.prim_path + # self._physics_context.enable_gpu_dynamics(True) + self._physics_context.enable_stablization(True) + self._physics_scene_prim = self._physics_context.get_current_physics_scene_prim() + self._solver_type = self._physics_context.get_solver_type() + + if "gpu_max_rigid_contact_count" in self._env_opts: + self._physics_context.set_gpu_max_rigid_contact_count(self._env_opts["gpu_max_rigid_contact_count"]) + if "gpu_max_rigid_patch_count" in self._env_opts: + self._physics_context.set_gpu_max_rigid_patch_count(self._env_opts["gpu_max_rigid_patch_count"]) + if "gpu_found_lost_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_found_lost_pairs_capacity(self._env_opts["gpu_found_lost_pairs_capacity"]) + if "gpu_found_lost_aggregate_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(self._env_opts["gpu_found_lost_aggregate_pairs_capacity"]) + if "gpu_total_aggregate_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_total_aggregate_pairs_capacity(self._env_opts["gpu_total_aggregate_pairs_capacity"]) + if "gpu_max_soft_body_contacts" in self._env_opts: + self._physics_context.set_gpu_max_soft_body_contacts(self._env_opts["gpu_max_soft_body_contacts"]) + if "gpu_max_particle_contacts" in self._env_opts: + self._physics_context.set_gpu_max_particle_contacts(self._env_opts["gpu_max_particle_contacts"]) + if "gpu_heap_capacity" in self._env_opts: + self._physics_context.set_gpu_heap_capacity(self._env_opts["gpu_heap_capacity"]) + if "gpu_temp_buffer_capacity" in self._env_opts: + self._physics_context.set_gpu_temp_buffer_capacity(self._env_opts["gpu_temp_buffer_capacity"]) + if "gpu_max_num_partitions" in self._env_opts: + self._physics_context.set_gpu_max_num_partitions(self._env_opts["gpu_max_num_partitions"]) + + # overwriting defaults + # self._physics_context.set_gpu_max_rigid_contact_count(2 * self._physics_context.get_gpu_max_rigid_contact_count()) + # self._physics_context.set_gpu_max_rigid_patch_count(2 * self._physics_context.get_gpu_max_rigid_patch_count()) + # self._physics_context.set_gpu_found_lost_pairs_capacity(2 * self._physics_context.get_gpu_found_lost_pairs_capacity()) + # self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity()) + # self._physics_context.set_gpu_total_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_total_aggregate_pairs_capacity()) + # self._physics_context.set_gpu_heap_capacity(2 * self._physics_context.get_gpu_heap_capacity()) + # self._physics_context.set_gpu_temp_buffer_capacity(20 * self._physics_context.get_gpu_heap_capacity()) + # self._physics_context.set_gpu_max_num_partitions(20 * self._physics_context.get_gpu_temp_buffer_capacity()) + + # GPU buffers + self._gpu_max_rigid_contact_count = self._physics_context.get_gpu_max_rigid_contact_count() + self._gpu_max_rigid_patch_count = self._physics_context.get_gpu_max_rigid_patch_count() + self._gpu_found_lost_pairs_capacity = self._physics_context.get_gpu_found_lost_pairs_capacity() + self._gpu_found_lost_aggregate_pairs_capacity = self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity() + self._gpu_total_aggregate_pairs_capacity = self._physics_context.get_gpu_total_aggregate_pairs_capacity() + self._gpu_max_soft_body_contacts = self._physics_context.get_gpu_max_soft_body_contacts() + self._gpu_max_particle_contacts = self._physics_context.get_gpu_max_particle_contacts() + self._gpu_heap_capacity = self._physics_context.get_gpu_heap_capacity() + self._gpu_temp_buffer_capacity = self._physics_context.get_gpu_temp_buffer_capacity() + # self._gpu_max_num_partitions = physics_context.get_gpu_max_num_partitions() # BROKEN->method does not exist + + big_info2 = "[physics context]:" + "\n" + \ + "gpu_max_rigid_contact_count: " + str(self._gpu_max_rigid_contact_count) + "\n" + \ + "gpu_max_rigid_patch_count: " + str(self._gpu_max_rigid_patch_count) + "\n" + \ + "gpu_found_lost_pairs_capacity: " + str(self._gpu_found_lost_pairs_capacity) + "\n" + \ + "gpu_found_lost_aggregate_pairs_capacity: " + str(self._gpu_found_lost_aggregate_pairs_capacity) + "\n" + \ + "gpu_total_aggregate_pairs_capacity: " + str(self._gpu_total_aggregate_pairs_capacity) + "\n" + \ + "gpu_max_soft_body_contacts: " + str(self._gpu_max_soft_body_contacts) + "\n" + \ + "gpu_max_particle_contacts: " + str(self._gpu_max_particle_contacts) + "\n" + \ + "gpu_heap_capacity: " + str(self._gpu_heap_capacity) + "\n" + \ + "gpu_temp_buffer_capacity: " + str(self._gpu_temp_buffer_capacity) + "\n" + \ + "use_gpu_sim: " + str(self._world.get_physics_context().use_gpu_sim) + "\n" + \ + "use_gpu_pipeline: " + str(self._world.get_physics_context().use_gpu_pipeline) + "\n" + \ + "use_fabric: " + str(self._world.get_physics_context().use_fabric) + "\n" + \ + "world device: " + str(self._world.get_physics_context().device) + "\n" + \ + "physics context device: " + str(self._world.get_physics_context().device) + "\n" + + Journal.log(self.__class__.__name__, + "set_task", + big_info2, + LogType.STAT, + throw_when_excep = True) + + self._scene = self._world.scene + self._physics_context = self._world.get_physics_context() + + self._stage = get_context().get_stage() + + # strong, uniform lighting: bright sun + dome fill to cover the whole terrain + prim_utils.define_prim("/World/Lighting", "Xform") + sun_path = "/World/Lighting/SunLight" + dome_path = "/World/Lighting/AmbientDome" + + distantLight = UsdLux.DistantLight.Define(self._stage, Sdf.Path(sun_path)) + distantLight.CreateIntensityAttr(450.0) + distantLight.CreateAngleAttr(0.5) # soften shadows a bit + distantLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0)) + # Shadow attr naming differs across versions; set the underlying USD attribute directly. + distantLight.GetPrim().CreateAttribute("shadow:enable", Sdf.ValueTypeNames.Bool).Set(True) + + domeLight = UsdLux.DomeLight.Define(self._stage, Sdf.Path(dome_path)) + domeLight.CreateIntensityAttr(200.0) + domeLight.CreateExposureAttr(1.0) + domeLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0)) + + self._configure_scene() + + # if "enable_viewport" in sim_params: + # self._render = sim_params["enable_viewport"] + + def _get_baselink_candidate(self, + robot_name: str): + + stage=get_current_stage() + all_prims = [prim.GetPath().pathString for prim in stage.Traverse()] + filtered_prims = [prim for prim in all_prims if f"/{robot_name}/" in prim] + + matching=min(filtered_prims, key=len) if filtered_prims else None + + return matching.split('/')[-1] + + def _configure_scene(self): + + # environment + self._fix_base = [self._env_opts["is_fixed_base"]] * len(self._robot_names) + self._self_collide = [self._env_opts["self_collide"]] * len(self._robot_names) + self._merge_fixed = [self._env_opts["merge_fixed_jnts"]] * len(self._robot_names) + + Journal.log(self.__class__.__name__, + "_configure_scene", + "cloning environments...", + LogType.STAT, + throw_when_excep = True) + + self._ground_plane_prim_paths=[] + self._ground_plane=None + self.terrain_generator = None + if not self._env_opts["use_flat_ground"]: + # ensure terrain is large enough to contain all env clones + spacing = float(self._env_opts["env_spacing"]) + num_envs = self._num_envs + num_per_row = max(1, int(math.sqrt(num_envs))) + num_rows = int(math.ceil(num_envs / num_per_row)) + num_cols = int(math.ceil(num_envs / num_rows)) + row_offset = 0.5 * spacing * (num_rows - 1) + col_offset = 0.5 * spacing * (num_cols - 1) + margin = spacing # leave a full spacing as border cushion + required_size = 2.0 * (max(row_offset, col_offset) + margin) + + if required_size > self._env_opts["ground_size"]: + old_size = self._env_opts["ground_size"] + self._env_opts["ground_size"] = required_size + self._env_opts["terrain_border"] = self._env_opts["ground_size"] / 2.0 + Journal.log(self.__class__.__name__, + "_configure_scene", + f"Ground size increased from {old_size} to {required_size} to fit {num_envs} envs (spacing {spacing}).", + LogType.WARN, + throw_when_excep = True) + + min_height=-self._env_opts["dh_ground"] + max_height=self._env_opts["dh_ground"] + step=max_height-min_height + if self._env_opts["ground_type"]=="random": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_random_uniform_terrain(terrain_size=self._env_opts["ground_size"], + min_height=min_height, + max_height=max_height, + step=step, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + elif self._env_opts["ground_type"]=="random_patches": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif_patches" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_random_patched_terrain(terrain_size=self._env_opts["ground_size"], + min_height=min_height, + max_height=max_height, + step=step, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + patch_ratio=0.8, + patch_size=10 + ) + elif self._env_opts["ground_type"]=="slopes": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_slopes" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_sloped_terrain(terrain_size=self._env_opts["ground_size"], + slope=-0.5, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"] + ) + elif self._env_opts["ground_type"]=="stairs": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stairs" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stairs_terrain(terrain_size=self._env_opts["ground_size"], + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + ) + elif self._env_opts["ground_type"]=="stepup": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stepup_terrain( + terrain_size=self._env_opts["ground_size"], + stairs_ratio=0.3, + min_steps=1, + max_steps=1, + pyramid_platform_size=15.0, + position=np.array([0.0, 0.0, 0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + step_height=0.15 + ) + elif self._env_opts["ground_type"]=="stepup_prim": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup_prim" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stepup_prim_terrain( + terrain_size=self._env_opts["ground_size"], + stairs_ratio=0.99, + platform_size=50.0, + step_height_lb=self._env_opts["step_height_lb"], + step_height_ub=self._env_opts["step_height_ub"], + min_step_width=self._env_opts.get("step_width_lb", None), + max_step_width=self._env_opts.get("step_width_ub", None), + position=np.array([0.0, 0.0, 0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + n_steps=25, + area_factor=0.7, + random_n_steps=False + ) + # apply a custom checker material to the terrain primitives + mat_path = self._ensure_lightblue_checker_material() + self._apply_checker_material_to_terrain(terrain_root_path=terrain_prim_path, material_path=mat_path) + self._add_checker_overlay_plane(terrain_root_path=terrain_prim_path, material_path=mat_path) + self._add_checker_overlays_on_tiles(terrain_root_path=terrain_prim_path, material_path=mat_path) + else: + ground_type=self._env_opts["ground_type"] + Journal.log(self.__class__.__name__, + "_configure_scene", + f"Terrain type {ground_type} not supported. Will default to flat ground.", + LogType.EXCEP, + throw_when_excep = True) + + # add offsets to intial height depending on the terrain heightmap + if self.terrain_generator is not None: + stage = get_current_stage() + up_axis = UsdGeom.GetStageUpAxis(stage) + + spacing = self._env_opts["env_spacing"] + num_envs = self._num_envs + + num_per_row = max(1, int(np.sqrt(num_envs))) + num_rows = int(np.ceil(num_envs / num_per_row)) + num_cols = int(np.ceil(num_envs / num_rows)) + + row_offset = 0.5 * spacing * (num_rows - 1) + col_offset = 0.5 * spacing * (num_cols - 1) + + offsets = np.array(self._env_opts["cloning_offset"], dtype=float) + + for env_idx in range(num_envs): + row = env_idx // num_cols + col = env_idx % num_cols + x = row_offset - row * spacing + y = col * spacing - col_offset + + half_extent = self._env_opts["spawn_height_check_half_extent"] + if up_axis == UsdGeom.Tokens.z: + height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent) + else: + height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent) + + offsets[env_idx][2] += height + self._env_opts["spawn_height_cushion"] + + self._env_opts["cloning_offset"] = offsets + + else: + defaul_prim_path=self._env_opts["ground_plane_prim_path"]+"_default" + self._ground_plane_prim_paths.append(defaul_prim_path) + self._ground_plane=self._scene.add_default_ground_plane(z_position=0, + name="terrain", + prim_path=defaul_prim_path, + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + self._terrain_collision=None + + physics_material=UsdPhysics.MaterialAPI.Apply(self._ground_plane.prim) + physics_material.CreateDynamicFrictionAttr(self._env_opts["dynamic_friction"]) + physics_material.CreateStaticFrictionAttr(self._env_opts["static_friction"]) + physics_material.CreateRestitutionAttr(self._env_opts["restitution"]) + # self._ground_plane.apply_physics_material(physics_material) + + self._terrain_collision=PhysxSchema.PhysxCollisionAPI.Get(get_current_stage(), self._ground_plane.prim_path) + self._terrain_material=UsdPhysics.MaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path) + self._terrain_physix_material=PhysxSchema.PhysxMaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path) + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + urdf_path = self._robot_urdf_paths[robot_name] + srdf_path = self._robot_srdf_paths[robot_name] + fix_base = self._fix_base[i] + self_collide = self._self_collide[i] + merge_fixed = self._merge_fixed[i] + + self._generate_rob_descriptions(robot_name=robot_name, + urdf_path=urdf_path, + srdf_path=srdf_path) + self._import_urdf(robot_name, + fix_base=fix_base, + self_collide=self_collide, + merge_fixed=merge_fixed) + + self._cloner.clone( + source_prim_path=self._env_opts["template_env_ns"], + prim_paths=self._envs_prim_paths, + replicate_physics=self._env_opts["replicate_physics"], + position_offsets=self._env_opts["cloning_offset"] + ) # we can clone the environment in which all the robos are + + base_link_name=self._env_opts["base_linkname"] + if self._env_opts["deduce_base_link"]: + base_link_name=self._get_baselink_candidate(robot_name=robot_name) + + self._robots_art_views[robot_name] = ArticulationView(name = robot_name + "ArtView", + prim_paths_expr = self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, + reset_xform_properties=False) + self._robots_articulations[robot_name] = self._scene.add(self._robots_art_views[robot_name]) + + # height grid sensor (terrain may be None if using flat ground) + self._height_sensors[robot_name] = HeightGridSensor( + terrain_utils=self.terrain_generator if not self._env_opts["use_flat_ground"] else None, + grid_size=int(self._env_opts["height_sensor_pixels"]), + resolution=float(self._env_opts["height_sensor_resolution"]), + forward_offset=float(self._env_opts["height_sensor_forward_offset"]), + lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]), + n_envs=self._num_envs, + device=self._device, + dtype=self._dtype) + # ensure shared-data flags are set if a height sensor is active + self._env_opts["height_sensor_pixels"] = int(self._env_opts["height_sensor_pixels"]) + self._env_opts["height_sensor_resolution"] = float(self._env_opts["height_sensor_resolution"]) + self._enable_height_shared = True + self._height_sensor_pixels = self._env_opts["height_sensor_pixels"] + self._height_sensor_resolution = self._env_opts["height_sensor_resolution"] + self._height_vis_step[robot_name] = 0 + if self._env_opts["enable_height_vis"]: + self._height_vis[robot_name] = HeightGridVisualizer( + robot_name=robot_name, + num_envs=self._num_envs, + grid_size=int(self._env_opts["height_sensor_pixels"]), + resolution=float(self._env_opts["height_sensor_resolution"]), + marker_radius=float(self._env_opts["height_vis_radius"]), + forward_offset=float(self._env_opts["height_sensor_forward_offset"]), + lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]), + device=self._device, + dtype=self._dtype) + + self._blink_rigid_prim_views[robot_name] = RigidPrimView(prim_paths_expr=self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, + name = robot_name + "RigidPrimView") # base link prim views + self._scene.add(self._blink_rigid_prim_views[robot_name]) # need to add so it is properly initialized when resetting world + + # self._robots_geom_prim_views[robot_name] = GeometryPrimView(name = robot_name + "GeomView", + # prim_paths_expr = self._env_ns + "/env*"+ "/" + robot_name, + # # prepare_contact_sensors = True + # ) + # self._robots_geom_prim_views[robot_name].apply_collision_apis() # to be able to apply contact sensors + + # init contact sensors + self._init_contact_sensors(robot_name=robot_name) # IMPORTANT: this has to be called + # after calling the clone() method and initializing articulation views!! + + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_1/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_2/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_3/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_4/collisions/mesh_1") + + # filter collisions between default ground plane and custom terrains + # self._cloner.filter_collisions(physicsscene_path = self._physics_context.prim_path, + # collision_root_path = "/World/terrain_collisions", + # prim_paths=[self._ground_plane_prim_paths[1]], + # global_paths=[self._ground_plane_prim_paths[0]] + # ) + + # delete_prim(self._env_opts["ground_plane_prim_path"] + "/SphereLight") # we remove the default spherical light + + # set default camera viewport position and target + camera_position=[4.2, 4.2, 1.5] + camera_target=[0, 0, 0] + # use a single camera prim (configurable) for both viewport and render products + camera_prim = self._env_opts["camera_prim_path"] + self._set_initial_camera_params(camera_position=camera_position, + camera_target=camera_target, + camera_prim_path=camera_prim) + + if self._env_opts["render_to_file"]: + # base output dir + from datetime import datetime + timestamp = datetime.now().strftime("h%H_m%M_s%S_%d_%m_%Y") + self._render_output_dir = f"/tmp/IsaacRenderings/{timestamp}" + res = tuple(int(x) for x in self._env_opts["render_resolution"]) + + # create render product from chosen camera prim + self._render_product = rep.create.render_product(camera_prim, + res, name='rendering_camera') + self._render_writer = rep.WriterRegistry.get("BasicWriter") + self._render_writer.initialize(output_dir=self._render_output_dir, + rgb=True) + self._render_writer.attach([self._render_product]) + # optional top-down capture + if self._env_opts["render_panoramic_cam"]: + td_height = float(self._env_opts["render_panoramic_cam_height"]) + td_dir = self._render_output_dir + "/panoramic_cam" + td_offset = self._env_opts["render_panoramic_cam_target_xy"] + td_target_z = float(self._env_opts["render_panoramic_cam_target_z"]) + pos = [8.0, 11.0, td_height] + self._panoramic_cam_camera = rep.create.camera(focal_length=12, + name='rendering_camera_panoramic_cam', + clipping_range = (1, 200), + position = pos, + look_at = [td_offset[0], td_offset[1], td_target_z]) + self._panoramic_cam_render_product = rep.create.render_product(self._panoramic_cam_camera, + res, name='rendering_camera_panoramic_cam') + self._panoramic_cam_writer = rep.WriterRegistry.get("BasicWriter") + self._panoramic_cam_writer.initialize(output_dir=td_dir, rgb=True) + self._panoramic_cam_writer.attach([self._panoramic_cam_render_product]) + + self.apply_collision_filters(self._physics_context.prim_path, + "/World/collisions") + + self._reset_sim() + + self._fill_robot_info_from_world() + # initializes robot state data + + # update solver options + self._update_art_solver_options() + self._get_solver_info() # get again solver option before printing everything + self._print_envs_info() # debug print + + # for n in range(self._n_init_steps): # run some initialization steps + # self._step_sim() + + self._init_robots_state() + + self.scene_setup_completed = True + + Journal.log(self.__class__.__name__, + "set_up_scene", + "finished scene setup...", + LogType.STAT, + throw_when_excep = True) + + self._is = _sensor.acquire_imu_sensor_interface() + self._dyn_control=_dynamic_control.acquire_dynamic_control_interface() + + def _set_contact_links_material(self, prim_path: str): + prim=get_prim_at_path(prim_path) + physics_material=UsdPhysics.MaterialAPI.Apply(prim) + physics_material.CreateDynamicFrictionAttr(0) + physics_material.CreateStaticFrictionAttr(0) + physics_material.CreateRestitutionAttr(1.0) + physxMaterialAPI=PhysxSchema.PhysxMaterialAPI.Apply(prim) + physxMaterialAPI.CreateFrictionCombineModeAttr().Set("multiply") # average, min, multiply, max + physxMaterialAPI.CreateRestitutionCombineModeAttr().Set("multiply") + + def _get_lightblue_checker_texture_path(self) -> str: + tex_rel_path = os.path.join(os.path.dirname(__file__), "..", "assets", "textures", "ibrido_terrain_texture.png") + return os.path.abspath(tex_rel_path) + + def _ensure_lightblue_checker_material(self): + """Create (or reuse) a light-blue checker material for primitive terrains.""" + stage = get_current_stage() + mat_path = "/World/Looks/IbridoCheckerMaterial" + mat_prim = stage.GetPrimAtPath(mat_path) + if mat_prim.IsValid(): + return mat_path + + texture_path = self._get_lightblue_checker_texture_path() + + material = UsdShade.Material.Define(stage, mat_path) + + st_reader = UsdShade.Shader.Define(stage, f"{mat_path}/PrimvarReader_st") + st_reader.CreateIdAttr("UsdPrimvarReader_float2") + st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st") + st_reader.CreateOutput("result", Sdf.ValueTypeNames.Float2) + + uv_transform = UsdShade.Shader.Define(stage, f"{mat_path}/UVTransform") + uv_transform.CreateIdAttr("UsdTransform2d") + # keep UV scale at 1 here; tiling is controlled via mesh UVs + uv_transform.CreateInput("in", Sdf.ValueTypeNames.Float2).ConnectToSource(st_reader.GetOutput("result")) + uv_transform.CreateInput("scale", Sdf.ValueTypeNames.Float2).Set(Gf.Vec2f(1.0, 1.0)) + uv_transform.CreateOutput("result", Sdf.ValueTypeNames.Float2) + + tex = UsdShade.Shader.Define(stage, f"{mat_path}/CheckerTex") + tex.CreateIdAttr("UsdUVTexture") + tex.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(Sdf.AssetPath(texture_path)) + tex.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource(uv_transform.GetOutput("result")) + tex.CreateInput("wrapS", Sdf.ValueTypeNames.Token).Set("repeat") + tex.CreateInput("wrapT", Sdf.ValueTypeNames.Token).Set("repeat") + tex.CreateInput("minFilter", Sdf.ValueTypeNames.Token).Set("nearest") + tex.CreateInput("magFilter", Sdf.ValueTypeNames.Token).Set("nearest") + # disable mipmaps to avoid blurring sharp edges + tex.CreateInput("mipFilter", Sdf.ValueTypeNames.Token).Set("nearest") + tex.CreateInput("enableMipMap", Sdf.ValueTypeNames.Bool).Set(False) + tex.CreateInput("fallback", Sdf.ValueTypeNames.Color4f).Set(Gf.Vec4f(0.69, 0.85, 1.0, 1.0)) + tex.CreateOutput("rgb", Sdf.ValueTypeNames.Float3) + tex.CreateOutput("a", Sdf.ValueTypeNames.Float) + + pbr = UsdShade.Shader.Define(stage, f"{mat_path}/PBRShader") + pbr.CreateIdAttr("UsdPreviewSurface") + pbr.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource(tex.GetOutput("rgb")) + pbr.CreateInput("roughness", Sdf.ValueTypeNames.Float).Set(0.45) + pbr.CreateInput("metallic", Sdf.ValueTypeNames.Float).Set(0.0) + pbr.CreateOutput("surface", Sdf.ValueTypeNames.Token) + + material.CreateSurfaceOutput().ConnectToSource(pbr.GetOutput("surface")) + + return mat_path + + def _ensure_groundplane_material(self): + """Guarantee a ground-plane material exists (default checker) and return its path.""" + stage = get_current_stage() + mat_path = "/World/Looks/groundPlaneMaterial" + mat_prim = stage.GetPrimAtPath(mat_path) + if mat_prim.IsValid(): + return mat_path + + # create a temporary default ground plane to spawn the checker material, then delete the geom + tmp_gp_path = "/World/tmp_ground_for_material" + self._scene.add_default_ground_plane(z_position=-1000.0, + name="tmp_ground_for_material", + prim_path=tmp_gp_path, + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + + mat_prim = stage.GetPrimAtPath(mat_path) + prim_utils.delete_prim(tmp_gp_path) + + return mat_path if mat_prim.IsValid() else None + + def _apply_checker_material_to_terrain(self, terrain_root_path: str, material_path: str): + """Bind the checker material to all terrain prims (visual only).""" + + stage = get_current_stage() + mat_prim = stage.GetPrimAtPath(material_path) + if not mat_prim.IsValid(): + return + material = UsdShade.Material(mat_prim) + for prim in stage.Traverse(): + path = prim.GetPath().pathString + if not path.startswith(terrain_root_path): + continue + binding = UsdShade.MaterialBindingAPI.Apply(prim) + binding.Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _add_checker_overlay_plane(self, terrain_root_path: str, material_path: str): + """Create a thin visual-only mesh with UVs so the checker pattern shows up even on cube prims.""" + stage = get_current_stage() + plane_path = f"{terrain_root_path}/visual_checker" + plane_prim = stage.GetPrimAtPath(plane_path) + if plane_prim.IsValid(): + # if already exists, just (re)bind material + mat_prim = stage.GetPrimAtPath(material_path) + if mat_prim.IsValid(): + UsdShade.MaterialBindingAPI.Apply(plane_prim).Bind(UsdShade.Material(mat_prim), UsdShade.Tokens.strongerThanDescendants) + return + + # try to read base slab dimensions/position to size the overlay + slab_path = terrain_root_path + "_slab" + slab_prim = stage.GetPrimAtPath(slab_path) + center = Gf.Vec3f(0.0, 0.0, 0.0) + width = float(self._env_opts.get("ground_size", 50.0)) + length = width + thickness = 0.1 + if slab_prim.IsValid(): + xformable = UsdGeom.Xformable(slab_prim) + for op in xformable.GetOrderedXformOps(): + if op.GetOpType() == UsdGeom.XformOp.TypeTranslate: + center = Gf.Vec3f(op.Get()) + elif op.GetOpType() == UsdGeom.XformOp.TypeScale: + scale = op.Get() + width = float(scale[0]) + length = float(scale[1]) + thickness = float(scale[2]) + + half_w = 0.5 * width + half_l = 0.5 * length + z = center[2] + 0.5 * thickness + 1e-3 # slightly above the slab to avoid z-fighting + + plane = UsdGeom.Mesh.Define(stage, plane_path) + plane.CreatePointsAttr([ + Gf.Vec3f(center[0] - half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] + half_l, z), + Gf.Vec3f(center[0] - half_w, center[1] + half_l, z), + ]) + plane.CreateFaceVertexCountsAttr([4]) + plane.CreateFaceVertexIndicesAttr([0, 1, 2, 3]) + plane.CreateDoubleSidedAttr(True) + + # increase tiling density; adjustable via env opt + uv_repeats = max(1, int((width / 10.0) * 3.0)) + primvars = UsdGeom.PrimvarsAPI(plane) + st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex) + st.Set(Vt.Vec2fArray([ + Gf.Vec2f(0.0, 0.0), + Gf.Vec2f(uv_repeats, 0.0), + Gf.Vec2f(uv_repeats, uv_repeats), + Gf.Vec2f(0.0, uv_repeats), + ])) + st.SetInterpolation(UsdGeom.Tokens.vertex) + + mat_prim = stage.GetPrimAtPath(material_path) + if mat_prim.IsValid(): + material = UsdShade.Material(mat_prim) + UsdShade.MaterialBindingAPI.Apply(plane.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _add_checker_overlays_on_tiles(self, terrain_root_path: str, material_path: str): + """Add visual quads on top of each tile cube so the checker texture appears on raised steps.""" + stage = get_current_stage() + mat_prim = stage.GetPrimAtPath(material_path) + if not mat_prim.IsValid(): + return + material = UsdShade.Material(mat_prim) + for prim in stage.Traverse(): + path = prim.GetPath().pathString + if not path.startswith(terrain_root_path): + continue + if prim.GetTypeName() != "Cube": + continue + name = path.split("/")[-1] + if "wall" in name or name.endswith("_slab"): + continue + xformable = UsdGeom.Xformable(prim) + center = Gf.Vec3f(0.0, 0.0, 0.0) + scale = Gf.Vec3f(1.0, 1.0, 1.0) + for op in xformable.GetOrderedXformOps(): + if op.GetOpType() == UsdGeom.XformOp.TypeTranslate: + center = Gf.Vec3f(op.Get()) + elif op.GetOpType() == UsdGeom.XformOp.TypeScale: + scale = Gf.Vec3f(op.Get()) + width, length, height = float(scale[0]), float(scale[1]), float(scale[2]) + half_w = 0.5 * width + half_l = 0.5 * length + z = center[2] + 0.5 * height + 1e-3 + overlay_path = f"{path}_checker" + if stage.GetPrimAtPath(overlay_path).IsValid(): + UsdShade.MaterialBindingAPI.Apply(stage.GetPrimAtPath(overlay_path)).Bind(material, UsdShade.Tokens.strongerThanDescendants) + continue + mesh = UsdGeom.Mesh.Define(stage, overlay_path) + mesh.CreatePointsAttr([ + Gf.Vec3f(center[0] - half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] + half_l, z), + Gf.Vec3f(center[0] - half_w, center[1] + half_l, z), + ]) + mesh.CreateFaceVertexCountsAttr([4]) + mesh.CreateFaceVertexIndicesAttr([0, 1, 2, 3]) + mesh.CreateDoubleSidedAttr(True) + uv_repeats = max(1, int((width / 10.0) * float(self._env_opts.get("checker_uv_density", 3.0)))) + primvars = UsdGeom.PrimvarsAPI(mesh) + st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex) + st.Set(Vt.Vec2fArray([ + Gf.Vec2f(0.0, 0.0), + Gf.Vec2f(uv_repeats, 0.0), + Gf.Vec2f(uv_repeats, uv_repeats), + Gf.Vec2f(0.0, uv_repeats), + ])) + st.SetInterpolation(UsdGeom.Tokens.vertex) + UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _is_link(self, prim): + return prim.GetTypeName() == 'Xform' + + def _is_joint(self, prim): + return prim.GetTypeName() == 'PhysicsRevoluteJoint' + + def _create_collision_group(self, group_path, link_paths): + """ + Create a collision group under the given group_path that contains the links. + Args: + group_path (str): Path to create the collision group. + link_paths (List[str]): List of link paths to include in this group. + """ + collision_group = Sdf.PrimSpec( + self._stage.GetRootLayer().GetPrimAtPath(group_path), + group_path.split("/")[-1], + Sdf.SpecifierDef, + "PhysicsCollisionGroup" + ) + # Add the links to the collision group + for link_path in link_paths: + includes_rel = Sdf.RelationshipSpec(collision_group, "collection:colliders:includes", False) + includes_rel.targetPathList.Append(link_path) + + def _add_collision_filter(self, group_path, link1, link2): + """ + Filters collision between two successive links. + + Args: + group_path (str): Path of the collision group. + link1 (str): Path of the first link. + link2 (str): Path of the second link. + """ + # Create a relationship to filter collisions between the two links + filtered_groups = Sdf.RelationshipSpec( + self._stage.GetPrimAtPath(group_path), + "physics:filteredGroups", + False + ) + filtered_groups.targetPathList.Append(link1) + filtered_groups.targetPathList.Append(link2) + + def _render_sim(self, mode="human"): + + if mode == "human": + # follow requested robot/env + if self._env_opts["use_follow_camera"]: + ridx = int(self._env_opts["render_follow_robot_idx"]) + eidx = int(self._env_opts["render_follow_env_idx"]) + if ridx < len(self._robot_names) and eidx < self._num_envs: + rname = self._robot_names[ridx] + pos = self._root_p.get(rname, None) + if pos is not None and pos.shape[0] > eidx: + base = pos[eidx].detach().cpu() + offset = torch.as_tensor(self._env_opts["render_follow_offset"], + device=base.device, dtype=base.dtype) + target_offset = torch.as_tensor(self._env_opts["render_follow_target_offset"], + device=base.device, dtype=base.dtype) + quat = self._root_q.get(rname, None) + if quat is not None and quat.shape[0] > eidx: + q = quat[eidx].detach().cpu() + w, x, y, z = q.unbind(-1) + yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) + cy, sy = torch.cos(yaw), torch.sin(yaw) + rot = torch.tensor([[cy, -sy], [sy, cy]], device=base.device, dtype=base.dtype) + offset_xy = torch.matmul(rot, offset[:2]) + target_offset_xy = torch.matmul(rot, target_offset[:2]) + offset = torch.stack((offset_xy[0], offset_xy[1], offset[2])) + target_offset = torch.stack((target_offset_xy[0], target_offset_xy[1], target_offset[2])) + eye = (base + offset).tolist() + target = (base + target_offset).tolist() + set_camera_view(eye=eye, target=target, camera_prim_path=self._env_opts["camera_prim_path"]) + + self._world.render() + # optional height grid visualization + if self._env_opts["enable_height_vis"]: + for robot_name, vis in self._height_vis.items(): + # use latest stored states + if robot_name not in self._height_imgs or robot_name not in self._height_sensors: + continue + heights = self._height_imgs.get(robot_name, None) + if heights is None or heights.numel() == 0: + continue + pos_src = self._root_p.get(robot_name, None) + quat_src = self._root_q.get(robot_name, None) + if pos_src is None or quat_src is None: + continue + step = self._height_vis_step.get(robot_name, 0) + period = max(1, int(self._env_opts["height_vis_update_period"])) + if step % period == 0: + try: + vis.update( + base_positions=pos_src, + base_quats=quat_src, + heights=heights) + except Exception as exc: + print(f"[height_vis] update failed for {robot_name}: {exc}") + self._height_vis_step[robot_name] = step + 1 + return None + elif mode == "rgb_array": + # check if viewport is enabled -- if not, then complain because we won't get any data + if not self._render or not self._record: + exception = f"Cannot render '{mode}' when rendering is not enabled. Please check the provided" + \ + "arguments to the environment class at initialization." + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + # obtain the rgb data + rgb_data = self._rgb_annotator.get_data() + # convert to numpy array + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + # return the rgb data + return rgb_data[:, :, :3] + else: + return None + + def _create_viewport_render_product(self, resolution=None): + """Create a render product of the viewport for rendering.""" + + try: + + # create render product + camera_prim = self._env_opts["camera_prim_path"] + res = resolution + if res is None: + res = tuple(int(x) for x in self._env_opts["render_resolution"]) + self._render_product = rep.create.render_product(camera_prim, res) + # create rgb annotator -- used to read data from the render product + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + self._record = True + except Exception as e: + carb.log_info("omni.replicator.core could not be imported. Skipping creation of render product.") + carb.log_info(str(e)) + + def _close(self): + if self._simulation_app.is_running(): + self._simulation_app.close() + + def _step_world(self): + self._world.step(render=False, step_sim=True) + + if (self._render) and (self._render_step_counter%self._env_opts["rendering_freq"]==0): + # if self._env_opts["render_to_file"]: + # rep.orchestrator.step() + self._render_sim() # manually trigger rendering (World.step(render=True) for some reason + # will step the simulation for a dt==rendering_dt) + self._render_step_counter += 1 + + def _generate_jnt_imp_control(self, robot_name: str): + + jnt_imp_controller = OmniJntImpCntrl(articulation=self._robots_articulations[robot_name], + device=self._device, + dtype=self._dtype, + enable_safety=True, + urdf_path=self._urdf_dump_paths[robot_name], + config_path=self._jnt_imp_config_paths[robot_name], + enable_profiling=False, + debug_checks=self._debug, + override_art_controller=self._override_low_lev_controller) + + return jnt_imp_controller + + def _reset_sim(self): + self._world.reset(soft=False) + + def _reset_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False): + + if env_indxs is not None: + if self._debug: + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + if randomize: + self._randomize_yaw(robot_name=robot_name,env_indxs=env_indxs) + + # root q + self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][env_indxs, :], + orientations=self._root_q_default[robot_name][env_indxs, :], + indices = env_indxs) + # jnts q + self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][env_indxs, :], + indices = env_indxs) + # root v and omega + self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][env_indxs, :], + indices = env_indxs) + # jnts v + concatenated_vel = torch.cat((self._root_v_default[robot_name][env_indxs, :], + self._root_omega_default[robot_name][env_indxs, :]), dim=1) + self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel, + indices = env_indxs) + # jnts eff + self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][env_indxs, :], + indices = env_indxs) + self._reset_perturbations(robot_name=robot_name, env_indxs=env_indxs) + else: + + if randomize: + self._randomize_yaw(robot_name=robot_name,env_indxs=None) + + # root q + self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][:, :], + orientations=self._root_q_default[robot_name][:, :], + indices = None) + # jnts q + self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][:, :], + indices = None) + # root v and omega + self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][:, :], + indices = None) + # jnts v + concatenated_vel = torch.cat((self._root_v_default[robot_name][:, :], + self._root_omega_default[robot_name][:, :]), dim=1) + self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel, + indices = None) + # jnts eff + self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][:, :], + indices = None) + self._reset_perturbations(robot_name=robot_name, env_indxs=None) + + # we update the robots state + self._read_root_state_from_robot(env_indxs=env_indxs, + robot_name=robot_name) + self._read_jnts_state_from_robot(env_indxs=env_indxs, + robot_name=robot_name) + + def _reset_perturbations(self, robot_name: str, env_indxs: torch.Tensor = None): + """Clear perturbation state and wrenches for selected envs.""" + if robot_name not in self._pert_active: + return + if env_indxs is None: + self._pert_active[robot_name].zero_() + self._pert_steps_remaining[robot_name].zero_() + self._pert_forces_world[robot_name].zero_() + self._pert_torques_world[robot_name].zero_() + self._pert_det_counter[robot_name].zero_() + else: + self._pert_active[robot_name][env_indxs] = False + self._pert_steps_remaining[robot_name][env_indxs] = 0 + self._pert_forces_world[robot_name][env_indxs, :] = 0 + self._pert_torques_world[robot_name][env_indxs, :] = 0 + self._pert_det_counter[robot_name][env_indxs] = 0 + + def _process_perturbations(self): + + # Iterate over each robot view + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + # Pre-fetch views for code clarity (references, not copies) + active = self._pert_active[robot_name] + steps_rem = self._pert_steps_remaining[robot_name] + forces_world = self._pert_forces_world[robot_name] + torques_world = self._pert_torques_world[robot_name] + planar_only = self._env_opts["pert_planar_only"] if "pert_planar_only" in self._env_opts else False + + # --- 1. Update Active Counters (In-Place) --- + if active.any(): + # In-place subtraction + steps_rem[active] -= 1 + + # --- 2. Reset Finished Perturbations (In-Place) --- + # Logic: Active AND (Steps <= 0) + # Note: Creating 'newly_ended' boolean mask is a tiny unavoidable allocation + newly_ended = active & (steps_rem <= 0) + + if newly_ended.any(): + # Use masked_fill_ for in-place zeroing + active.masked_fill_(newly_ended, False) + forces_world[newly_ended, :]=0 + torques_world[newly_ended, :]=0 + steps_rem.masked_fill_(newly_ended, 0) + + # --- 3. Trigger New Perturbations --- + + det_rate = self._env_opts["det_pert_rate"] + if det_rate: + # deterministic spacing: count physics steps and trigger when threshold reached (if not already active) + det_counter = self._pert_det_counter[robot_name] + det_counter += 1 + trigger_mask = (det_counter >= self._pert_det_steps) & (~active) + else: + # Reuse scratch buffer for probability check + # Assumes self._pert_scratch is (num_envs, 1) pre-allocated + self._pert_scratch[robot_name].uniform_(0.0, 1.0) # used for triggering new perturbations + # Check probs against threshold + # Flatten scratch to (N,) to match 'active' mask + trigger_mask = (self._pert_scratch[robot_name].flatten() < self._pert_wrenches_prob) & (~active) + + if trigger_mask.any(): + + # Cache weights (references) + weight = self._weights[robot_name] + + # we now treat the configured "max_*_impulse_*" as the maximum **impulse** (N·s or N·m·s), + # and convert impulse -> force/torque by dividing by the sampled duration (seconds). + # Use per-robot weight scaling as before. + lin_impulse_max = self._env_opts["max_lin_impulse_norm"] * weight + ang_impulse_max = self._env_opts["max_ang_impulse_norm"] * weight + + # --- Force (Impulse) Direction Generation (Reuse _pert_lindir buffer) --- + lindir = self._pert_lindir[robot_name] # (N, 3) + + if planar_only: + # planar push direction from random yaw + yaw_angles = self._pert_scratch[robot_name].uniform_(0.0, 2*math.pi).flatten() + lindir[:, 0] = torch.cos(yaw_angles) + lindir[:, 1] = torch.sin(yaw_angles) + lindir[:, 2] = 0.0 + else: + # 1. Fill with Standard Normal noise in-place + lindir.normal_() + # 2. Normalize in-place + norms = torch.norm(lindir, dim=1, keepdim=True).clamp_min_(1e-6) + lindir.div_(norms) + + # 3. Sample linear impulse magnitudes (reuse scratch) + # scratch has shape (N,1) - uniform [0,1] + self._pert_scratch[robot_name].uniform_(self._env_opts["lin_impulse_mag_min"], self._env_opts["lin_impulse_mag_max"]) + # impulse vectors = unit_dir * (rand * lin_impulse_max) + + lindir.mul_(self._pert_scratch[robot_name] * lin_impulse_max) # now contains linear impulses (N,3) + + # --- Angular (Impulse) Direction Generation (Reuse _pert_angdir buffer) --- + angdir = self._pert_angdir[robot_name] # (N, 3) + + if planar_only: + angdir.zero_() # no torque when planar-only is requested + else: + # 1. Fill with Standard Normal noise + angdir.normal_() + # 2. Normalize + norms = torch.norm(angdir, dim=1, keepdim=True).clamp_min_(1e-6) + angdir.div_(norms) + + # 3. Sample angular impulse magnitudes (reuse scratch) + self._pert_scratch[robot_name].uniform_(0.0, 1.0) + angdir.mul_(self._pert_scratch[robot_name] * ang_impulse_max) # now contains angular impulses (N,3) + + # --- Duration Generation (Reuse _pert_durations) --- + # Keep integer steps sampling (same shape/device/dtype) + self._pert_durations[robot_name] = torch.randint_like( + self._pert_durations[robot_name], + low=self._pert_min_steps, + high=self._pert_max_steps + 1 + ) + + # --- convert to float + duration_steps = self._pert_durations[robot_name].to(dtype=lindir.dtype, device=lindir.device) + # duration in seconds (shape (N,)) + duration_seconds = duration_steps * self.physics_dt() + # avoid divide-by-zero + duration_seconds = duration_seconds.clamp_min_(1e-6) + + # compute per-step forces/torques = impulse / duration_seconds + # lindir currently holds linear impulses, angdir holds angular impulses + forces_to_apply = lindir / duration_seconds + torques_to_apply = angdir / duration_seconds + + # Optional clipping based on robot weight (min/max) + f_norm = torch.norm(forces_to_apply, dim=1, keepdim=True).clamp_min_(1e-9) + target_norm = f_norm + + f_clip_scale_max = self._env_opts["pert_force_max_weight_scale"] + if f_clip_scale_max > 0.0: + force_max = self._weights[robot_name] * f_clip_scale_max # (N,1) + target_norm = torch.minimum(target_norm, force_max) + + f_clip_scale_min = self._env_opts["pert_force_min_weight_scale"] + if f_clip_scale_min > 0.0: + force_min = self._weights[robot_name] * f_clip_scale_min + target_norm = torch.maximum(target_norm, force_min) + + forces_to_apply = forces_to_apply * (target_norm / f_norm) + + t_clip_scale = self._env_opts["pert_torque_max_weight_scale"] + if t_clip_scale > 0.0: + torque_max = self._weights[robot_name] * self._env_opts["max_ang_impulse_lever"] * t_clip_scale + t_norm = torch.norm(torques_to_apply, dim=1, keepdim=True).clamp_min_(1e-9) + t_scale = torch.minimum(torch.ones_like(t_norm), torque_max / t_norm) + torques_to_apply = torques_to_apply * t_scale + + # --- Update State Buffers --- + # Use boolean indexing to scatter only triggered values + active[trigger_mask] = True + steps_rem[trigger_mask] = self._pert_durations[robot_name][trigger_mask, :].flatten() + forces_world[trigger_mask, :] = forces_to_apply[trigger_mask, :] + torques_world[trigger_mask, :] = torques_to_apply[trigger_mask, :] + if det_rate: + det_counter[trigger_mask] = 0 + + # --- 4. Apply Wrenches (Vectorized) --- + # Only call API if there are active perturbations to minimize overhead + + forces_world[~active, :]=0 + torques_world[~active, :]=0 + + self._blink_rigid_prim_views[robot_name].apply_forces_and_torques_at_pos( + forces=forces_world, + torques=torques_world, + positions=None, # body frame origin + is_global=True + ) + + def _pre_step(self): + + if self._env_opts["use_random_pertub"]: + self._process_perturbations() + + super()._pre_step() + + def _pre_step_db(self): + + if self._env_opts["use_random_pertub"]: + self._process_perturbations() + + super()._pre_step_db() + + def _update_contact_state(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + super()._update_contact_state(robot_name, env_indxs) + + if self._env_opts["use_random_pertub"]: + # write APPLIED perturbations to root wrench (mainly for debug) + self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_forces_world[robot_name][env_indxs, :], + data_type="f", + contact_name="root", + robot_idxs = env_indxs, + gpu=self._use_gpu) + self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_torques_world[robot_name][env_indxs, :], + data_type="t", + contact_name="root", + robot_idxs = env_indxs, + gpu=self._use_gpu) + + def _post_warmup_validation(self, robot_name: str): + """Validate warmup state: base height, tilt, and (if available) contacts.""" + envs = torch.arange(self._num_envs, device=self._device) + + # terrain height query + def _terrain_height(xy: torch.Tensor): + if self._env_opts["use_flat_ground"] or self.terrain_generator is None: + return torch.zeros((xy.shape[0],), device=xy.device, dtype=self._dtype) + heights = [] + half_extent = self._env_opts["spawn_height_check_half_extent"] + for k in range(xy.shape[0]): + h = self.terrain_generator.get_max_height_in_rect( + float(xy[k, 0]), float(xy[k, 1]), half_extent=half_extent) + heights.append(h) + return torch.as_tensor(heights, device=xy.device, dtype=self._dtype) + + # base height check + base_xy = self._root_p[robot_name][:, 0:2] + base_z = self._root_p[robot_name][:, 2] + ground_z = _terrain_height(base_xy) + margin = float(self._env_opts["spawn_height_cushion"]) + bad_z = base_z < (ground_z + margin) + + # tilt check (angle between base up and world up) + q = self._root_q[robot_name] + # quaternion to up vector + w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3] + up = torch.stack([ + 2 * (x * z - w * y), + 2 * (y * z + w * x), + 1 - 2 * (x * x + y * y) + ], dim=1) + cos_tilt = up[:, 2].clamp(-1.0, 1.0) + tilt_thresh_deg = 35.0 + bad_tilt = cos_tilt < math.cos(math.radians(tilt_thresh_deg)) + + # contact check (only if sensors exist) + # bad_contact = torch.zeros_like(base_z, dtype=torch.bool) + # if robot_name in self.omni_contact_sensors and self.omni_contact_sensors[robot_name] is not None: + # counts = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + # for link in self._contact_names.get(robot_name, []): + # f_contact = self._get_contact_f(robot_name=robot_name, + # contact_link=link, + # env_indxs=None) + # if f_contact is None: + # continue + # # use normal component (assume z-up); ignore tangential forces + # active = f_contact[:, 2] > 1e-3 + # counts += active.int() + # bad_contact = counts < 3 + + failing = torch.nonzero(bad_z | bad_tilt, as_tuple=False).flatten() + if failing.numel() > 0: + # remediate: lift to terrain+margin, upright (preserve yaw), zero root velocities + yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) + safe_z = (ground_z + margin)[failing] + self._root_p[robot_name][failing, 2] = safe_z + cos_h = torch.cos(yaw[failing] / 2) + sin_h = torch.sin(yaw[failing] / 2) + upright = torch.zeros((failing.shape[0], 4), device=self._device, dtype=self._dtype) + upright[:, 0] = cos_h + upright[:, 3] = sin_h + self._root_q[robot_name][failing, :] = upright + self._root_v[robot_name][failing, :] = 0.0 + self._root_omega[robot_name][failing, :] = 0.0 + + msgs = [] + if bad_z.any(): + msgs.append(f"low_z envs {torch.nonzero(bad_z, as_tuple=False).flatten().tolist()}") + if bad_tilt.any(): + msgs.append(f"tilt envs {torch.nonzero(bad_tilt, as_tuple=False).flatten().tolist()}") + Journal.log(self.__class__.__name__, + "_post_warmup_validation", + f"Warmup validation adjusted {robot_name}: " + "; ".join(msgs), + LogType.WARN, + throw_when_excep=False) + return failing + + def _import_urdf(self, + robot_name: str, + fix_base = False, + self_collide = False, + merge_fixed = True): + + import_config=_urdf.ImportConfig() + # status,import_config=omni_kit.commands.execute("URDFCreateImportConfig") + + Journal.log(self.__class__.__name__, + "update_root_offsets", + "importing robot URDF", + LogType.STAT, + throw_when_excep = True) + _urdf.acquire_urdf_interface() + # we overwrite some settings which are bound to be fixed + import_config.merge_fixed_joints = merge_fixed # makes sim more stable + # in case of fixed joints with light objects + import_config.import_inertia_tensor = True + # import_config.convex_decomp = False + import_config.fix_base = fix_base + import_config.self_collision = self_collide + # import_config.distance_scale = 1 + # import_config.make_default_prim = True + # import_config.create_physics_scene = True + # import_config.default_drive_strength = 1047.19751 + # import_config.default_position_drive_damping = 52.35988 + # import_config.default_drive_type = _urdf.UrdfJointTargetType.JOINT_DRIVE_POSITION + # import URDF + success, robot_prim_path_default = omni_kit.commands.execute( + "URDFParseAndImportFile", + urdf_path=self._urdf_dump_paths[robot_name], + import_config=import_config, + # get_articulation_root=True, + ) + + robot_base_prim_path = self._env_opts["template_env_ns"] + "/" + robot_name + + if success: + Journal.log(self.__class__.__name__, + "_import_urdf", + "Successfully importedf URDF into IsaacSim", + LogType.STAT) + else: + Journal.log(self.__class__.__name__, + "_import_urdf", + "Failed to import URDF into IsaacSim", + LogType.EXCEP, + throw_when_excep = True) + + # moving default prim to base prim path for cloning + move_prim(robot_prim_path_default, # from + robot_base_prim_path) # to + + robot_base_prim = prim_utils.get_prim_at_path(robot_base_prim_path) + children = prim_utils.get_prim_children(robot_base_prim) + # log imported prim children to the journal (print was getting truncated in logs) + Journal.log(self.__class__.__name__, + "_import_urdf", + f"Imported robot URDF children: {children}", + LogType.STAT) + + # improve collision tesselation for cylinders (e.g., wheels) if requested + # self._apply_collision_refinement(robot_base_prim_path, + # self._env_opts["collision_refinement_level"]) + + return success + + def _apply_collision_refinement(self, robot_base_prim_path: str, refinement_level: int): + """Set refinement level on collision cylinders to avoid coarse faceting.""" + if refinement_level is None: + return + stage = get_current_stage() + coll_prefix = robot_base_prim_path + "/collisions" + count = 0 + for prim in stage.Traverse(): + if not prim.IsValid(): + continue + path_str = prim.GetPath().pathString + if not path_str.startswith(coll_prefix): + continue + if prim.GetTypeName() == "Cylinder": + attr = prim.GetAttribute("refinementLevel") + if not attr.IsValid(): + attr = prim.CreateAttribute("refinementLevel", Sdf.ValueTypeNames.Int) + attr.Set(int(refinement_level)) + count += 1 + Journal.log(self.__class__.__name__, + "_apply_collision_refinement", + f"Applied refinement level {refinement_level} to {count} cylinder collision prims under {coll_prefix}", + LogType.STAT) + + def apply_collision_filters(self, + physicscene_path: str, + coll_root_path: str): + + self._cloner.filter_collisions(physicsscene_path = physicscene_path, + collision_root_path = coll_root_path, + prim_paths=self._envs_prim_paths, + global_paths=self._ground_plane_prim_paths # can collide with these prims + ) + + def _read_root_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._get_root_state(numerical_diff=self._env_opts["use_diff_vels"], + env_indxs=env_indxs, + robot_name=robot_name) + + # height grid sensor readout + if robot_name in self._height_sensors: + pos_src = self._root_p[robot_name] if env_indxs is None else self._root_p[robot_name][env_indxs] + quat_src = self._root_q[robot_name] if env_indxs is None else self._root_q[robot_name][env_indxs] + heights = self._height_sensors[robot_name].read(pos_src, quat_src) + if self._env_opts["use_flat_ground"]: + heights.zero_() + if env_indxs is None: + self._height_imgs[robot_name] = heights + else: + # clone to avoid overlapping write/read views + self._height_imgs[robot_name][env_indxs] = heights.clone() + + # print("height image") + # print(self._height_imgs[robot_name][0, :, : ]) + + def _read_jnts_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._get_robots_jnt_state( + numerical_diff=self._env_opts["use_diff_vels"], + env_indxs=env_indxs, + robot_name=robot_name) + + def _get_root_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + numerical_diff: bool = False, + base_loc: bool = True): + + # reading = self._is.get_sensor_reading("/World/Cube/Imu_Sensor", + # use_latest_data = True) + + dt=self._cluster_dt[robot_name] # getting diff state always at cluster rate + + # measurements from simulator are in world frame + if env_indxs is not None: + + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True, + indices=env_indxs) # tuple: (pos, quat), quat is [w, i, j, k] in Isaac4.2 + + self._root_p[robot_name][env_indxs, :] = pose[0] + + going_to_fly=self._root_p[robot_name][env_indxs, 0:2]>(self._env_opts["terrain_border"]-0.1) + if going_to_fly.any(): + flying=going_to_fly.sum().item() + warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!" + Journal.log(self.__class__.__name__, + "_get_root_state", + warn, + LogType.WARN, + throw_when_excep = True) + self._root_q[robot_name][env_indxs, :] = pose[1] # root orientation + if not numerical_diff: + # we get velocities from the simulation. This is not good since + # these can actually represent artifacts which do not have physical meaning. + # It's better to obtain them by differentiation to avoid issues with controllers, etc... + self._root_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True, + indices=env_indxs) # root lin. velocity + self._root_omega[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True, + indices=env_indxs) # root ang. velocity + + # for now obtain root a numerically + self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \ + self._root_v_prev[robot_name][env_indxs, :]) / dt + self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \ + self._root_omega_prev[robot_name][env_indxs, :]) / dt + + self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] + self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :] + else: + # differentiate numerically + self._root_v[robot_name][env_indxs, :] = (self._root_p[robot_name][env_indxs, :] - \ + self._root_p_prev[robot_name][env_indxs, :]) / dt + self._root_omega[robot_name][env_indxs, :] = quat_to_omega( + self._root_q_prev[robot_name][env_indxs, :], + self._root_q[robot_name][env_indxs, :], + dt) + + self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \ + self._root_v_prev[robot_name][env_indxs, :]) / dt + self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \ + self._root_omega_prev[robot_name][env_indxs, :]) / dt + + # update "previous" data for numerical differentiation + self._root_p_prev[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] + self._root_q_prev[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] + self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :] + self._track_terrain_hits(robot_name=robot_name, env_indxs=env_indxs) + + else: + # updating data for all environments + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True) # tuple: (pos, quat) + self._root_p[robot_name][:, :] = pose[0] + self._root_q[robot_name][:, :] = pose[1] # root orientation + + going_to_fly=self._root_p[robot_name][:, 0:2]>(self._env_opts["terrain_border"]-0.1) + if going_to_fly.any(): + flying=going_to_fly.sum().item() + warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!" + Journal.log(self.__class__.__name__, + "_get_root_state", + warn, + LogType.WARN, + throw_when_excep = True) + + if not numerical_diff: + # we get velocities from the simulation. This is not good since + # these can actually represent artifacts which do not have physical meaning. + # It's better to obtain them by differentiation to avoid issues with controllers, etc... + self._root_v[robot_name][:, :] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True) # root lin. velocity + self._root_omega[robot_name][:, :] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True) # root ang. velocity + + self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \ + self._root_v_prev[robot_name][:, :]) / dt + self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \ + self._root_omega_prev[robot_name][:, :]) / dt + + self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] + self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :] + else: + # differentiate numerically + self._root_v[robot_name][:, :] = (self._root_p[robot_name][:, :] - \ + self._root_p_prev[robot_name][:, :]) / dt + self._root_omega[robot_name][:, :] = quat_to_omega(self._root_q_prev[robot_name][:, :], + self._root_q[robot_name][:, :], + dt) + + self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \ + self._root_v_prev[robot_name][:, :]) / dt + self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \ + self._root_omega_prev[robot_name][:, :]) / dt + + # update "previous" data for numerical differentiation + self._root_p_prev[robot_name][:, :] = self._root_p[robot_name][:, :] + self._root_q_prev[robot_name][:, :] = self._root_q[robot_name][:, :] + self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] + self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :] + self._track_terrain_hits(robot_name=robot_name, env_indxs=None) + + if base_loc: + # rotate robot twist in base local + twist_w=torch.cat((self._root_v[robot_name], + self._root_omega[robot_name]), + dim=1) + twist_base_loc=torch.cat((self._root_v_base_loc[robot_name], + self._root_omega_base_loc[robot_name]), + dim=1) + world2base_frame(t_w=twist_w,q_b=self._root_q[robot_name],t_out=twist_base_loc) + self._root_v_base_loc[robot_name]=twist_base_loc[:, 0:3] + self._root_omega_base_loc[robot_name]=twist_base_loc[:, 3:6] + + # rotate robot a in base local + a_w=torch.cat((self._root_a[robot_name], + self._root_alpha[robot_name]), + dim=1) + a_base_loc=torch.cat((self._root_a_base_loc[robot_name], + self._root_alpha_base_loc[robot_name]), + dim=1) + world2base_frame(t_w=a_w,q_b=self._root_q[robot_name],t_out=a_base_loc) + self._root_a_base_loc[robot_name]=a_base_loc[:, 0:3] + self._root_alpha_base_loc[robot_name]=a_base_loc[:, 3:6] + + # rotate gravity in base local + world2base_frame3D(v_w=self._gravity_normalized[robot_name],q_b=self._root_q[robot_name], + v_out=self._gravity_normalized_base_loc[robot_name]) + + def _get_robots_jnt_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + numerical_diff: bool = False): + + dt= self.physics_dt() if self._override_low_lev_controller else self._cluster_dt[robot_name] + + # measurements from simulator are in world frame + if env_indxs is not None: + + self._jnts_q[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_positions( + clone = True, + indices=env_indxs) # joint positions + if not numerical_diff: + self._jnts_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True, + indices=env_indxs) # joint velocities + else: + # differentiate numerically + self._jnts_v[robot_name][env_indxs, :] = (self._jnts_q[robot_name][env_indxs, :] - \ + self._jnts_q_prev[robot_name][env_indxs, :]) / dt + # update "previous" data for numerical differentiation + self._jnts_q_prev[robot_name][env_indxs, :] = self._jnts_q[robot_name][env_indxs, :] + + self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( + clone = True, + joint_indices=None, + indices=env_indxs) # measured joint efforts (computed by joint force solver) + + else: + self._jnts_q[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) # joint positions + if not numerical_diff: + self._jnts_v[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True) # joint velocities + else: + self._jnts_v[robot_name][:, :] = (self._jnts_q[robot_name][:, :] - \ + self._jnts_q_prev[robot_name][:, :]) / dt + + self._jnts_q_prev[robot_name][:, :] = self._jnts_q[robot_name][:, :] + + self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( + clone = True) # measured joint efforts (computed by joint force solver) + + def _get_contact_f(self, + robot_name: str, + contact_link: str, + env_indxs: torch.Tensor) -> torch.Tensor: + + if self.omni_contact_sensors[robot_name] is not None: + return self.omni_contact_sensors[robot_name].get(dt=self.physics_dt(), + contact_link=contact_link, + env_indxs=env_indxs, + clone=False) + + def _set_jnts_to_homing(self, robot_name: str): + self._robots_art_views[robot_name].set_joints_default_state(positions=self._homing, + velocities = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \ + dtype=self._dtype, device=self._device), + efforts = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \ + dtype=self._dtype, device=self._device)) + + def _set_root_to_defconfig(self, robot_name: str): + self._robots_art_views[robot_name].set_default_state(positions=self._root_p_default[robot_name], + orientations=self._root_q_default[robot_name]) + + def _zero_angular_velocities(self, robot_name: str, env_indxs: torch.Tensor = None): + """Zero angular velocities and joint velocities for the given robot/envs.""" + if env_indxs is None: + twist = self._robots_art_views[robot_name].get_velocities(clone=True) + # twist[:, 0:2] = 0.0 + twist[:, 3:] = 0.0 # zero angular part, preserve current linear + self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=None) + + # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities( + # indices = None, clone=True) + # jnt_vel[:, :] = 0.0 + + # self._robots_art_views[robot_name].set_joint_velocities( + # velocities = jnt_vel, + # indices = None) + + else: + twist = self._robots_art_views[robot_name].get_velocities(clone=True, indices=env_indxs) + # twist[:, 0:2] = 0.0 + twist[:, 3:] = 0.0 + self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=env_indxs) + + # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities( + # indices = env_indxs, clone=True) + # jnt_vel[:, :] = 0.0 + + # self._robots_art_views[robot_name].set_joint_velocities( + # velocities = jnt_vel, + # indices = env_indxs) + + def _get_solver_info(self): + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + self._solver_position_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_position_iteration_counts() + self._solver_velocity_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_velocity_iteration_counts() + self._solver_stabilization_threshs[robot_name] = self._robots_art_views[robot_name].get_stabilization_thresholds() + + def _update_art_solver_options(self): + + # sets new solver iteration options for specifc articulations + self._get_solver_info() # gets current solver info for the articulations of the + # environments, so that dictionaries are filled properly + + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + # increase by a factor + self._solver_position_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_position_iteration_count) + self._solver_velocity_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_velocity_iteration_count) + self._solver_stabilization_threshs[robot_name] = torch.full((self._num_envs,), self._solver_stabilization_thresh) + self._robots_art_views[robot_name].set_solver_position_iteration_counts(self._solver_position_iteration_counts[robot_name]) + self._robots_art_views[robot_name].set_solver_velocity_iteration_counts(self._solver_velocity_iteration_counts[robot_name]) + self._robots_art_views[robot_name].set_stabilization_thresholds(self._solver_stabilization_threshs[robot_name]) + self._get_solver_info() # gets again solver info for articulation, so that it's possible to debug if + # the operation was successful + + def _print_envs_info(self): + + ground_info = f"[Ground info]" + "\n" + \ + "static friction coeff.: " + str(self._terrain_material.GetStaticFrictionAttr().Get()) + "\n" + \ + "dynamics friction coeff.: " + str(self._terrain_material.GetDynamicFrictionAttr().Get()) + "\n" + \ + "restitution coeff.: " + str(self._terrain_material.GetRestitutionAttr().Get()) + "\n" +\ + "friction comb. mode: " + str(self._terrain_physix_material.GetFrictionCombineModeAttr().Get()) + "\n" + \ + "damping comb. mode: " + str(self._terrain_physix_material.GetDampingCombineModeAttr().Get()) + "\n" + \ + "restitution comb. mode: " + str(self._terrain_physix_material.GetRestitutionCombineModeAttr().Get()) + "\n" + + Journal.log(self.__class__.__name__, + "_print_envs_info", + ground_info, + LogType.STAT, + throw_when_excep = True) + + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + task_info = f"[{robot_name}]" + "\n" + \ + "bodies: " + str(self._robots_art_views[robot_name].body_names) + "\n" + \ + "n. prims: " + str(self._robots_art_views[robot_name].count) + "\n" + \ + "prims names: " + str(self._robots_art_views[robot_name].prim_paths) + "\n" + \ + "n. bodies: " + str(self._robots_art_views[robot_name].num_bodies) + "\n" + \ + "n. dofs: " + str(self._robots_art_views[robot_name].num_dof) + "\n" + \ + "dof names: " + str(self._robots_art_views[robot_name].dof_names) + "\n" + \ + "solver_position_iteration_counts: " + str(self._solver_position_iteration_counts[robot_name]) + "\n" + \ + "solver_velocity_iteration_counts: " + str(self._solver_velocity_iteration_counts[robot_name]) + "\n" + \ + "stabiliz. thresholds: " + str(self._solver_stabilization_threshs[robot_name]) + # print("dof limits: " + str(self._robots_art_views[robot_name].get_dof_limits())) + # print("effort modes: " + str(self._robots_art_views[robot_name].get_effort_modes())) + # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains())) + # print("dof max efforts: " + str(self._robots_art_views[robot_name].get_max_efforts())) + # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains())) + # print("physics handle valid: " + str(self._robots_art_views[robot_name].is_physics_handle_valid()) + Journal.log(self.__class__.__name__, + "_print_envs_info", + task_info, + LogType.STAT, + throw_when_excep = True) + + def _fill_robot_info_from_world(self): + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + self._robot_bodynames[robot_name] = self._robots_art_views[robot_name].body_names + self._robot_n_links[robot_name] = self._robots_art_views[robot_name].num_bodies + self._robot_n_dofs[robot_name] = self._robots_art_views[robot_name].num_dof + self._robot_dof_names[robot_name] = self._robots_art_views[robot_name].dof_names + + def _set_initial_camera_params(self, + camera_position=[10, 10, 3], + camera_target=[0, 0, 0], + camera_prim_path="/OmniverseKit_Persp"): + set_camera_view(eye=camera_position, + target=camera_target, + camera_prim_path=camera_prim_path) + + def _init_contact_sensors(self, robot_name: str): + self.omni_contact_sensors[robot_name]=None + sensor_radii={} + contact_offsets={} + self._contact_names[robot_name]=self._env_opts["contact_prims"] + for contact_prim in self._env_opts["contact_prims"]: + sensor_radii[contact_prim]=self._env_opts["sensor_radii"] + contact_offsets[contact_prim]=np.array([0.0, 0.0, 0.0]) + if not (len(self._env_opts["contact_prims"])==0): + self.omni_contact_sensors[robot_name]=OmniContactSensors( + name=robot_name, + n_envs=self._num_envs, + contact_prims=self._env_opts["contact_prims"], + contact_offsets=contact_offsets, + sensor_radii=sensor_radii, + device=self._device, + dtype=self._dtype, + enable_debug=self._debug, + filter_paths=self._ground_plane_prim_paths) + self.omni_contact_sensors[robot_name].create_contact_sensors( + self._world, + envs_namespace=self._env_opts["envs_ns"]) + + def _init_robots_state(self): + + self._masses = {} + self._weights = {} + + self._pert_active = {} # bool mask: (num_envs,) + self._pert_steps_remaining = {} # int steps: (num_envs,) + self._pert_forces_world = {} # (num_envs,3) + self._pert_torques_world = {} # (num_envs,3) + self._pert_force_local = {} # (num_envs,3) (if needed) + self._pert_torque_local = {} # (num_envs,3) + self._pert_lindir = {} + self._pert_angdir = {} + self._pert_durations = {} + self._pert_scratch = {} + self._pert_det_counter = {} + + # convert durations in seconds to integer physics steps (min 1 step) + self._pert_min_steps = max(1, int(math.ceil(self._env_opts["pert_wrenches_min_duration"] / self.physics_dt()))) + self._pert_max_steps = max(self._pert_min_steps, int(math.ceil(self._env_opts["pert_wrenches_max_duration"] / self.physics_dt()))) + + pert_wrenches_step_rate=self._env_opts["pert_wrenches_rate"]/self.physics_dt() # 1 pert every n physics steps + self._pert_det_steps = max(1, int(round(pert_wrenches_step_rate))) + self._pert_wrenches_prob=min(1.0, 1.0/pert_wrenches_step_rate) # sampling prob to be used when not deterministic + + self._calc_robot_distrib() + + for i in range(0, len(self._robot_names)): + + robot_name = self._robot_names[i] + + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True) # tuple: (pos, quat) + + # root p (measured, previous, default) + self._root_p[robot_name] = pose[0] + self._root_p_prev[robot_name] = torch.clone(pose[0]) + # print(self._root_p_default[robot_name].device) + self._root_p_default[robot_name] = torch.clone(pose[0]) + self._distr_offset[robot_name] + # root q (measured, previous, default) + self._root_q[robot_name] = pose[1] # root orientation + self._root_q_prev[robot_name] = torch.clone(pose[1]) + self._root_q_default[robot_name] = torch.clone(pose[1]) + # jnt q (measured, previous, default) + self._jnts_q[robot_name] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) # joint positions + self._jnts_q_prev[robot_name] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) + self._jnts_q_default[robot_name] = torch.full((self._jnts_q[robot_name].shape[0], + self._jnts_q[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + + # root v (measured, default) + self._root_v[robot_name] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True) # root lin. velocityù + self._root_v_base_loc[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_v_prev[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_v_default[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + # root omega (measured, default) + self._root_omega[robot_name] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True) # root ang. velocity + self._root_omega_prev[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0) + + self._root_omega_base_loc[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0) + self._root_omega_default[robot_name] = torch.full((self._root_omega[robot_name].shape[0], self._root_omega[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + # root a (measured,) + self._root_a[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_a_base_loc[robot_name] = torch.full_like(self._root_a[robot_name], fill_value=0.0) + self._root_alpha[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_alpha_base_loc[robot_name] = torch.full_like(self._root_alpha[robot_name], fill_value=0.0) + + # height grid sensor storage + grid_sz = int(self._env_opts["height_sensor_pixels"]) + self._height_imgs[robot_name] = torch.zeros((self._num_envs, grid_sz, grid_sz), + dtype=self._dtype, + device=self._device) + + # joints v (measured, default) + self._jnts_v[robot_name] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True) # joint velocities + self._jnts_v_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + + # joints efforts (measured, default) + self._jnts_eff[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + self._jnts_eff_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + self._root_pos_offsets[robot_name] = torch.zeros((self._num_envs, 3), + device=self._device) # reference position offses + + self._root_q_offsets[robot_name] = torch.zeros((self._num_envs, 4), + device=self._device) + self._root_q_offsets[robot_name][:, 0] = 1.0 # init to valid identity quaternion + + self._update_root_offsets(robot_name) + + # boolean active flag per env + self._pert_active[robot_name] = torch.zeros((self._num_envs,), dtype=torch.bool, device=self._device) + # remaining steps as integer tensor + self._pert_steps_remaining[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + # world force & torque (N and N*m) stored as floats + self._pert_forces_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_torques_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + # local frame copies (if you want to store local-frame versions) + self._pert_force_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_torque_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + + self._pert_lindir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_angdir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + + self._pert_durations[robot_name] = torch.zeros((self._num_envs, 1), dtype=torch.int32, device=self._device) + + self._pert_scratch[robot_name] = torch.zeros((self._num_envs, 1), dtype=self._dtype, device=self._device) + self._pert_det_counter[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + + self._masses[robot_name] = torch.sum(self._robots_art_views[robot_name].get_body_masses(clone=True), dim=1).to(dtype=self._dtype, device=self._device) + + self._weights[robot_name] = (self._masses[robot_name] * abs(self._env_opts["gravity"][2].item())).reshape((self._num_envs, 1)) + + def _track_terrain_hits(self, robot_name: str, env_indxs: torch.Tensor = None): + """Track transitions into the terrain boundary margin (1 m) and count hits per env.""" + if self._env_opts["use_flat_ground"]: + return + border = float(self._env_opts["terrain_border"]) + threshold = max(0.0, border - self._terrain_hit_margin) + state = self._terrain_hit_active[robot_name] + counts = self._terrain_hit_counts[robot_name] + if env_indxs is None: + pos_xy = self._root_p[robot_name][:, 0:2] + hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1) + new_hits = (~state) & hitting + if new_hits.any(): + counts[new_hits] += 1 + state.copy_(hitting) + else: + pos_xy = self._root_p[robot_name][env_indxs, 0:2] + hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1) + prev_state = state[env_indxs] + new_hits = (~prev_state) & hitting + if new_hits.any(): + counts[env_indxs[new_hits]] += 1 + state[env_indxs] = hitting + + def _maybe_log_terrain_hits(self): + """Log boundary hits at low frequency only when counters change.""" + if self._env_opts["use_flat_ground"]: + return + period = int(self._env_opts["terrain_hit_log_period"]) + if period <= 0 or (self.step_counter % period != 0): + return + for robot_name in self._robot_names: + active = self._terrain_hit_active.get(robot_name, None) + if active is None: + continue + active_now = int(active.sum().item()) + if active_now == 0: + continue + counts = self._terrain_hit_counts[robot_name] + last = self._terrain_hit_counts_last_logged.get(robot_name, None) + if last is not None and torch.equal(counts, last): + continue + total_hits = int(counts.sum().item()) + msg = f"{active_now} {robot_name} robots within {self._terrain_hit_margin}m of terrain border. Total hits: {total_hits}." + Journal.log(self.__class__.__name__, + "_terrain_hits", + msg, + LogType.WARN, + throw_when_excep = True) + self._terrain_hit_counts_last_logged[robot_name] = counts.clone() + + def _post_world_step(self) -> bool: + res = super()._post_world_step() + self._maybe_log_terrain_hits() + return res + + def _post_world_step_db(self) -> bool: + res = super()._post_world_step_db() + self._maybe_log_terrain_hits() + return res + + def current_tstep(self): + self._world.current_time_step_index + + def current_time(self): + return self._world.current_time + + def physics_dt(self): + return self._world.get_physics_dt() + + def rendering_dt(self): + return self._env_opts["rendering_dt"] + + def set_physics_dt(self, physics_dt:float): + self._world.set_simulation_dt(physics_dt=physics_dt,rendering_dt=None) + + def set_rendering_dt(self, rendering_dt:float): + self._world.set_simulation_dt(physics_dt=None,rendering_dt=rendering_dt) + + def _robot_jnt_names(self, robot_name: str): + return self._robots_art_views[robot_name].dof_names diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/jnt_imp_config.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/jnt_imp_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b5dc879097cf3100892255a00a0a5c3f6fb3d586 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/jnt_imp_config.yaml @@ -0,0 +1,44 @@ +XBotInterface: # just used for retrieving homing in sim with xbot_mujoco + urdf_path: $PWD/xmj_env_files/centauro.urdf + srdf_path: $PWD/xmj_env_files/centauro.srdf + +# defaults +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + +startup_motor_pd: # iannis combo + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [200, 60] + "hip_pitch_*": [200, 60] + "knee_pitch_*": [200, 60] + "ankle_pitch_*": [200, 60] + "ankle_yaw_*": [120, 30] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/jnt_imp_config_no_yaw.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/jnt_imp_config_no_yaw.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4905fef54fcdacafa9924d2e16b1c7e097a0575e --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/jnt_imp_config_no_yaw.yaml @@ -0,0 +1,44 @@ +XBotInterface: # just used for retrieving homing in sim with xbot_mujoco + urdf_path: $PWD/xmj_env_files/centauro.urdf + srdf_path: $PWD/xmj_env_files/centauro.srdf + +# defaults +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + +startup_motor_pd: # iannis combo + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [200, 60] + "hip_pitch_*": [200, 60] + "knee_pitch_*": [200, 60] + "ankle_pitch_*": [200, 60] + "ankle_yaw_*": [600, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_control_cluster.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_control_cluster.py new file mode 100644 index 0000000000000000000000000000000000000000..9c484f45d52cd3cff4da3401453a30258d1e53ab --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_control_cluster.py @@ -0,0 +1,106 @@ +import os +import argparse +import multiprocessing as mp +import importlib.util +import inspect + +from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict + +from EigenIPC.PyEigenIPC import Journal, LogType + +this_script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0] + +# Function to dynamically import a module from a specific file path +def import_env_module(env_path): + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +if __name__ == "__main__": + + # Parse command line arguments for CPU affinity + parser = argparse.ArgumentParser(description="Set CPU affinity for the script.") + parser.add_argument('--ns', type=str, help='Namespace to be used for cluster shared memory') + parser.add_argument('--urdf_path', type=str, help='Robot description package path for URDF ') + parser.add_argument('--srdf_path', type=str, help='Robot description package path for SRDF ') + parser.add_argument('--size', type=int, help='cluster size', default=1) + + # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8 + parser.add_argument('--cloop',action='store_true', help='whether to use RHC controllers in closed loop mode') + parser.add_argument('--cluster_dt', type=float, default=0.05, help='dt used by MPC controllers for discretization') + parser.add_argument('--n_nodes', type=int, default=31, help='n nodes used by MPC controllers') + + parser.add_argument('--verbose',action='store_true', help='run in verbose mode') + + parser.add_argument('--enable_debug',action='store_true', help='enable debug mode for cluster client and all controllers') + + parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data") + + parser.add_argument('--no_mp_fork',action='store_true', help='whether to multiprocess with forkserver context') + + parser.add_argument('--set_affinity',action='store_true', help='set affinity to a core for each controller') + + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--codegen_override_dir', type=str, help='Path to base dir where codegen is to be loaded', default="") + + parser.add_argument('--custom_args_names', nargs='+', default=None, + help='list of custom arguments names') + parser.add_argument('--custom_args_vals', nargs='+', default=None, + help='list of custom arguments values') + parser.add_argument('--custom_args_dtype', nargs='+', default=None, + help='list of custom arguments data types') + + parser.add_argument('--cluster_client_fname', type=str, + default="aug_mpc.controllers.rhc.hybrid_quad_client", + help="cluster client file import pattern (without extension)") + + args = parser.parse_args() + + # Ensure custom_args_names and custom_args_vals have the same length + custom_opts = generate_custom_arg_dict(args=args) + custom_opts.update({"cloop": args.cloop, + "cluster_dt": args.cluster_dt, + "n_nodes": args.n_nodes, + "codegen_override_dir": args.codegen_override_dir}) + if args.no_mp_fork: # this needs to be in the main + mp.set_start_method('spawn') + else: + # mp.set_start_method('forkserver') + mp.set_start_method('fork') + + cluster_module=importlib.import_module(args.cluster_client_fname) + # Get all classes defined in the module + classes_in_module = [name for name, obj in inspect.getmembers(cluster_module, inspect.isclass) + if obj.__module__ == cluster_module.__name__] + if len(classes_in_module) == 1: + cluster_classname=classes_in_module[0] + ClusterClient = getattr(cluster_module, cluster_classname) + cluster_client = ClusterClient(namespace=args.ns, + cluster_size=args.size, + urdf_xacro_path=args.urdf_path, + srdf_xacro_path=args.srdf_path, + open_loop=not args.cloop, + use_mp_fork = not args.no_mp_fork, + verbose=args.verbose, + debug=args.enable_debug, + base_dump_dir=args.dmpdir, + timeout_ms=args.timeout_ms, + custom_opts=custom_opts, + codegen_override=args.codegen_override_dir, + set_affinity=args.set_affinity) + cluster_client.run() + + else: + class_list_str = ", ".join(classes_in_module) + Journal.log("launch_control_cluster.py", + "", + f"Found more than one class in cluster client file {args.cluster_client_fname}. Found: {class_list_str}", + LogType.EXCEP, + throw_when_excep = False) + exit() + + # control_cluster_client = + # control_cluster_client.run() # spawns the controllers on separate processes (blocking) + diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_train_env.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_train_env.py new file mode 100644 index 0000000000000000000000000000000000000000..c943400c74b751af34b0f090398ecd25b4ea5e09 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_train_env.py @@ -0,0 +1,357 @@ +from aug_mpc.utils.determinism import deterministic_run + +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo +from mpc_hive.utilities.shared_data.cluster_data import SharedClusterInfo + +from EigenIPC.PyEigenIPC import VLevel, Journal, LogType +from EigenIPC.PyEigenIPC import StringTensorServer + +import os, argparse, sys, types, inspect + +from perf_sleep.pyperfsleep import PerfSleep + +import importlib.util +import torch +import signal + +algo = None # global to make it accessible by signal handler +exit_request=False +dummy_step_exit_req=False + +def handle_sigint(signum, frame): + global exit_request, dummy_step_exit_req + Journal.log("launch_train_env.py", + "", + f"Received sigint. Will stop training.", + LogType.WARN) + exit_request=True + dummy_step_exit_req=True # in case dummy_step_loop was used + +# Function to dynamically import a module from a specific file path +# def import_env_module(env_path): +# spec = importlib.util.spec_from_file_location("env_module", env_path) +# env_module = importlib.util.module_from_spec(spec) +# spec.loader.exec_module(env_module) +# return env_module + +def import_env_module(env_path, local_env_root: str = None): + """ + env_path: full path to the child env .py file to exec + local_env_root: directory where local copies of aug_mpc_envs.training_envs modules live + """ + if local_env_root is not None: + local_env_root = os.path.abspath(local_env_root) + # override aug_mpc_envs.training_envs package to point to the local_env_root + pkg_name = "aug_mpc_envs.training_envs" + if pkg_name not in sys.modules: + mod = types.ModuleType(pkg_name) + mod.__path__ = [local_env_root] # tell Python to look here first + sys.modules[pkg_name] = mod + else: + existing = getattr(sys.modules[pkg_name], "__path__", None) + if existing is None: + sys.modules[pkg_name].__path__ = [local_env_root] + elif local_env_root not in existing: + existing.insert(0, local_env_root) + + # load the module as usual + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +def log_env_hierarchy(env_class, env_path, env_type="training"): + """ + Logs the env class, its file, and full inheritance hierarchy with file paths. + env_class: the child env class + env_path: file path where the child class was loaded from + env_type: string label, e.g., "training", "evaluation", "resumed_training" + """ + def get_bases_recursive(cls): + """Recursively get all base classes with their file paths.""" + info = [] + for base in cls.__bases__: + try: + file = inspect.getfile(base) + except TypeError: + file = "built-in or unknown" + info.append(f"{base.__name__} (from {file})") + # Recurse unless it's object + if base is not object: + info.extend(get_bases_recursive(base)) + return info + + hierarchy_info = get_bases_recursive(env_class) + hierarchy_str = " -> ".join(hierarchy_info) if hierarchy_info else "No parents" + + Journal.log( + "launch_train_env.py", + "", + f"loading {env_type} env {env_class.__name__} (from {env_path}) " + f"with hierarchy: {hierarchy_str}", + LogType.INFO, + throw_when_excep=True + ) + +def dummy_step_loop(env): + global dummy_step_exit_req + while True: + if dummy_step_exit_req: + return True + step_ok=env.step(action=env.safe_action) # not a busy loop because of MPC in the step + if not step_ok: + return False + +if __name__ == "__main__": + + signal.signal(signal.SIGINT, handle_sigint) + + # Parse command line arguments for CPU affinity + parser = argparse.ArgumentParser(description="Set CPU affinity for the script.") + + parser.add_argument('--run_name', type=str, default=None, help='Name of training run') + parser.add_argument('--ns', type=str, help='Namespace to be used for shared memory') + parser.add_argument('--timeout_ms', type=int, help='Connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--drop_dir', type=str, help='Directory root where all run data will be dumped') + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--seed', type=int, help='Seed', default=1) + parser.add_argument('--use_cpu',action='store_true', help='If set, all the training (data included) will be performed on CPU') + + parser.add_argument('--db',action='store_true', help='Whether to enable local data logging for the algorithm (reward metrics, etc.)') + parser.add_argument('--env_db',action='store_true', help='Whether to enable env db data logging on shared mem (e.g. reward metrics are not available for reading anymore)') + parser.add_argument('--full_env_db',action='store_true', help='Whether to enable detailed episodic data storage (data over single transitions)') + parser.add_argument('--rmdb',action='store_true', help='Whether to enable remote debug (e.g. data logging on remote servers)') + + parser.add_argument('--tot_tsteps', type=int, help='Total number of timesteps to be collected', default=int(30e6)) + parser.add_argument('--action_repeat', type=int, help='Frame skipping (1-> no skip)', default=1) + parser.add_argument('--discount_factor', type=float, help='', default=0.99) + parser.add_argument('--obs_norm',action='store_true', help='Whether to enable the use of running normalizer in agent') + parser.add_argument('--obs_rescale',action='store_true', help='Whether to rescale observation depending on their expected range') + parser.add_argument('--add_weight_norm',action='store_true', help='Whether to add weight normalization to agent interal llayers') + parser.add_argument('--add_layer_norm',action='store_true', help='Whether to add layer normalization to agent internal llayers') + parser.add_argument('--add_batch_norm',action='store_true', help='Whether to add batch normalization to agent internal llayers') + + parser.add_argument('--act_rescale_critic',action='store_true', help='Whether to rescale actions provided to critic (if SAC) to be in range [-1, 1]') + parser.add_argument('--use_period_resets',action='store_true', help='') + + parser.add_argument('--sac',action='store_true', help='Use SAC, otherwise PPO, unless dummy is set') + parser.add_argument('--dummy',action='store_true', help='Use dummy agent (useful for testing and debugging environments)') + + parser.add_argument('--dump_checkpoints',action='store_true', help='Whether to dump model checkpoints during training') + + parser.add_argument('--demo_envs_perc', type=float, help='[0, 1]', default=0.0) + parser.add_argument('--demo_stop_thresh', type=float, default=None, + help='Performance hreshold above which demonstration envs should be deactivated.') + + parser.add_argument('--expl_envs_perc', type=float, help='[0, 1]', default=0) + + parser.add_argument('--use_rnd',action='store_true', help='Whether to use RND for exploration') + + parser.add_argument('--eval',action='store_true', help='Whether to perform an evaluation run') + parser.add_argument('--n_eval_timesteps', type=int, help='Total number of timesteps to be evaluated', default=int(1e6)) + parser.add_argument('--det_eval',action='store_true', help='Whether to perform a deterministic eval (only action mean is used). Only valid if --eval.') + parser.add_argument('--allow_expl_during_eval',action='store_true', help='Whether to allow expl envs during evaluation (useful to tune exploration)') + + parser.add_argument('--resume',action='store_true', help='Resume a previous training using a checkpoint') + parser.add_argument('--mpath', type=str, help='Model path to be used for policy evaluation', default=None) + parser.add_argument('--mname', type=str, help='Model name', default=None) + parser.add_argument('--override_env',action='store_true', help='Whether to override env when running evaluation') + + parser.add_argument('--anomaly_detect',action='store_true', help='Whether to enable anomaly detection (useful for debug)') + + parser.add_argument('--compression_ratio', type=float, + help='If e.g. 0.8, the fist layer will be of dimension [input_features_size x (input_features_size*compression_ratio)]', default=-1.0) + parser.add_argument('--actor_lwidth', type=int, help='Actor network layer width', default=128) + parser.add_argument('--critic_lwidth', type=int, help='Critic network layer width', default=256) + parser.add_argument('--actor_n_hlayers', type=int, help='Actor network size', default=3) + parser.add_argument('--critic_n_hlayers', type=int, help='Critic network size', default=4) + + parser.add_argument('--env_fname', type=str, default="twist_tracking_env", help='Training env file name (without extension)') + parser.add_argument('--env_classname', type=str, default="TwistTrackingEnv", help='Training env class name') + parser.add_argument('--override_agent_actions',action='store_true', help='Whether to override agent actions with custom ones from shared mem (useful for db)') + parser.add_argument('--override_agent_refs',action='store_true', help='Whether to override automatically generated agent refs (useful for debug)') + + parser.add_argument('--step_while_setup',action='store_true', help='Continue stepping env with default actions while setting up agent, etc..') + + args = parser.parse_args() + args_dict = vars(args) + + if args.eval and args.resume: + Journal.log("launch_train_env.py", + "", + f"Cannot set both --eval and --resume flags. Exiting.", + LogType.EXCEP, + throw_when_excep = True) + + deterministic_run(seed=args.seed, torch_det_algos=False) + + anomaly_detect=False + if args.anomaly_detect: + torch.autograd.set_detect_anomaly(True) + + if (not args.mpath is None) and (not args.mname is None): + mpath_full = os.path.join(args.mpath, args.mname) + else: + mpath_full=None + + env_fname=args.env_fname + env_classname = args.env_classname + env_path="" + env_module=None + if (not args.eval and not args.resume) or (args.override_env): + # if starting a fresh traning or overriding env, load from a fresh env from aug_mpc + env_path = f"aug_mpc_envs.training_envs.{env_fname}" + env_module = importlib.import_module(env_path) + else: + if args.mpath is None: + Journal.log("launch_train_env.py", + "", + f"no mpath provided! Cannot load env. Either provide a mpath or run with --override_env", + LogType.EXCEP, + throw_when_excep = True) + + env_path = os.path.join(args.mpath, env_fname + ".py") + env_module = import_env_module(env_path, local_env_root=args.mpath) + + EnvClass = getattr(env_module, env_classname) + env_type = "training" if not args.eval else "evaluation" + if args.resume: + env_type = "resumed_training" + log_env_hierarchy(EnvClass, env_path, env_type) # db print of env class + + env = EnvClass(namespace=args.ns, + verbose=True, + vlevel=VLevel.V2, + use_gpu=not args.use_cpu, + debug=args.env_db, + override_agent_refs=args.override_agent_refs, + timeout_ms=args.timeout_ms, + env_opts=args_dict) + if not env.is_ready(): # something went wrong + exit() + + dummy_step_thread = None + if args.step_while_setup: + import threading + # spawn step thread (we don't true parallelization, thread is fine) + # start the dummy stepping in a separate thread so setup can continue concurrently + dummy_step_thread = threading.Thread(target=dummy_step_loop, args=(env,), daemon=True) + dummy_step_thread.start() + + # getting some sim info for debugging + sim_data = {} + sim_info_shared = SharedEnvInfo(namespace=args.ns, + is_server=False, + safe=False) + sim_info_shared.run() + sim_info_keys = sim_info_shared.param_keys + sim_info_data = sim_info_shared.get().flatten() + for i in range(len(sim_info_keys)): + sim_data[sim_info_keys[i]] = sim_info_data[i] + + # getting come cluster info for debugging + cluster_data={} + cluste_info_shared = SharedClusterInfo(namespace=args.ns, + is_server=False, + safe=False) + cluste_info_shared.run() + cluster_info_keys = cluste_info_shared.param_keys + cluster_info_data = cluste_info_shared.get().flatten() + for i in range(len(cluster_info_keys)): + cluster_data[cluster_info_keys[i]] = cluster_info_data[i] + + custom_args={} + custom_args["uname_host"]="user_host" + try: + username = os.getlogin() # add machine info to db data + hostname = os.uname().nodename + user_host = f"{username}@{hostname}" + custom_args["uname_host"]=user_host + except: + pass + + algo=None + if not args.dummy: + if args.sac: + from aug_mpc.training_algs.sac.sac import SAC + + algo = SAC(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + else: + from aug_mpc.training_algs.ppo.ppo import PPO + + algo = PPO(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + else: + from aug_mpc.training_algs.dummy.dummy import Dummy + + algo=Dummy(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + + custom_args.update(args_dict) + custom_args.update(cluster_data) + custom_args.update(sim_data) + + run_name=env_classname if args.run_name is None else args.run_name + algo.setup(run_name=run_name, + ns=args.ns, + verbose=True, + drop_dir_name=args.drop_dir, + custom_args=custom_args, + comment=args.comment, + eval=args.eval, + resume=args.resume, + model_path=mpath_full, + n_eval_timesteps=args.n_eval_timesteps, + dump_checkpoints=args.dump_checkpoints, + norm_obs=args.obs_norm, + rescale_obs=args.obs_rescale) + + full_drop_dir=algo.drop_dir() + shared_drop_dir = StringTensorServer(length=1, + basename="SharedTrainingDropDir", + name_space=args.ns, + verbose=True, + vlevel=VLevel.V2, + force_reconnection=True) + shared_drop_dir.run() + + while True: + if not shared_drop_dir.write_vec([full_drop_dir], 0): + ns=1000000000 + PerfSleep.thread_sleep(ns) + continue + else: + break + + if args.step_while_setup: + # stop dummy step thread and give algo authority on step + dummy_step_exit_req=True + # wait for thread to join + if dummy_step_thread is not None: + dummy_step_thread.join() + Journal.log("launch_train_env.py", + "", + f"Dummy env step thread joined. Moving step authority to algo.", + LogType.INFO) + + eval=args.eval + if args.override_agent_actions: + eval=True + if not eval: + while not exit_request: + if not algo.learn(): + break + else: # eval phase + with torch.no_grad(): # no need for grad computation + while not exit_request: + if not algo.eval(): + break + + algo.done() # make sure to terminate training properly diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_world_interface.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_world_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..9df7e6bae94651dacb1155529832abe82f5cd3e2 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/launch_world_interface.py @@ -0,0 +1,202 @@ +import os +import argparse +import importlib.util +import inspect + +from aug_mpc.utils.rt_factor import RtFactor +from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict +from aug_mpc.utils.determinism import deterministic_run + +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0] + +# Function to dynamically import a module from a specific file path +def import_env_module(env_path): + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Sim. env launcher") + # Add arguments + parser.add_argument('--robot_name', type=str, help='Alias to be used for the robot and also shared memory') + parser.add_argument('--urdf_path', type=str, help='path to the URDF file description for each robot') + parser.add_argument('--srdf_path', type=str, help='path to the SRDF file description for each robot (used for homing)') + parser.add_argument('--jnt_imp_config_path', type=str, help='path to a valid YAML file containing information on jnt impedance gains') + parser.add_argument('--num_envs', type=int, default=1) + parser.add_argument('--n_contacts', type=int, default=4) + parser.add_argument('--cluster_dt', type=float, default=0.03, help='dt at which the control cluster runs') + parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data") + parser.add_argument('--remote_stepping',action='store_true', + help='Whether to use remote stepping for cluster triggering (to be set during training)') + + # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8 + parser.add_argument('--use_gpu',action='store_true', help='Whether to use gpu simulation') + + parser.add_argument('--enable_debug',action='store_true', help='Whether to enable debug mode (may introduce significant overhead)') + + parser.add_argument('--headless',action='store_true', help='Whether to run simulation in headless mode') + + parser.add_argument('--verbose',action='store_true', help='Enable verbose mode') + + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--physics_dt', type=float, default=5e-4, help='') + + parser.add_argument('--use_custom_jnt_imp',action='store_true', + help='Whether to override the default PD controller with a custom one') + + parser.add_argument('--diff_vels',action='store_true', + help='Whether to obtain velocities by differentiation or not') + + parser.add_argument('--init_timesteps', type=int, help='initialization timesteps', default=None) + parser.add_argument('--seed', type=int, help='seed', default=0) + + parser.add_argument('--custom_args_names', nargs='+', default=None, + help='list of custom arguments names') + parser.add_argument('--custom_args_vals', nargs='+', default=None, + help='list of custom arguments values') + parser.add_argument('--custom_args_dtype', nargs='+', default=None, + help='list of custom arguments data types') + + parser.add_argument('--env_fname', type=str, + default="aug_mpc_envs.world_interfaces.isaac_world_interface", + help="env file import pattern (without extension)") + + args = parser.parse_args() + + deterministic_run(seed=args.seed, torch_det_algos=False) + + default_init_duration=3.0 # [s] + default_init_tsteps=int(default_init_duration/args.physics_dt) + init_tsteps=args.init_timesteps + if init_tsteps is None: + init_tsteps=default_init_tsteps + # Ensure custom_args_names, custom_args_vals, and custom_args_dtype have the same length + custom_opt = generate_custom_arg_dict(args=args) + + Journal.log("launch_world_interface.py", + "", + f"Will warmup world interface for {default_init_duration}s ({default_init_tsteps} physics steps)", + LogType.STAT) + + robot_names = [args.robot_name] + robot_urdf_paths = [args.urdf_path] + robot_srdf_paths = [args.srdf_path] + control_clust_dts = [float(args.cluster_dt)] + use_remote_stepping = [args.remote_stepping] + n_contacts = [args.n_contacts] + jnt_imp_config_paths = [args.jnt_imp_config_path] + num_envs = args.num_envs + control_clust_dt = args.cluster_dt # [s]. Dt at which RHC controllers run + headless = args.headless + + # simulation parameters + remote_env_params = {} + remote_env_params["physics_dt"] = args.physics_dt # physics_dt? + remote_env_params["n_envs"] = num_envs + remote_env_params["use_gpu"] = args.use_gpu + remote_env_params["substepping_dt"] = control_clust_dts[0] + remote_env_params["headless"] = headless + remote_env_params["debug_enabled"] = args.enable_debug + remote_env_params["seed"] = args.seed + remote_env_params.update(custom_opt) + # sim info to be broadcasted on shared memory + # adding some data to dict for debugging + + shared_sim_infos = [] + for i in range(len(robot_names)): + shared_sim_infos.append(SharedEnvInfo( + namespace=robot_names[i], + is_server=True, + env_params_dict=remote_env_params, + verbose=True, + vlevel=VLevel.V2, + force_reconnection=True)) + shared_sim_infos[i].run() + + env_module=importlib.import_module(args.env_fname) + classes_in_module = [name for name, obj in inspect.getmembers(env_module, inspect.isclass) + if obj.__module__ == env_module.__name__] + if len(classes_in_module) == 1: + cluster_classname=classes_in_module[0] + Env = getattr(env_module, cluster_classname) + else: + class_list_str = ", ".join(classes_in_module) + Journal.log("launch_world_interface.py", + "", + f"Found more than one class in env file {args.env_fname}. Found: {class_list_str}", + LogType.EXCEP, + throw_when_excep = False) + exit() + + env = Env(robot_names=robot_names, + robot_urdf_paths=robot_urdf_paths, + robot_srdf_paths=robot_srdf_paths, + cluster_dt=control_clust_dts, + jnt_imp_config_paths=jnt_imp_config_paths, + n_contacts=n_contacts, + use_remote_stepping=use_remote_stepping, + name=classes_in_module[0], + num_envs=num_envs, + debug=args.enable_debug, + verbose=args.verbose, + vlevel=VLevel.V2, + n_init_step=init_tsteps, + timeout_ms=args.timeout_ms, + env_opts=remote_env_params, + use_gpu=args.use_gpu, + override_low_lev_controller=args.use_custom_jnt_imp) # create environment + env.reset(reset_sim=True) + + rt_factor = RtFactor(dt_nom=env.physics_dt(), + window_size=100) + + while True: + + if rt_factor.reset_due(): + rt_factor.reset() + + step_ok=env.step() + + if not step_ok: + break + + rt_factor.update() + + for i in range(len(robot_names)): + robot_name=robot_names[i] + n_steps = env.cluster_sim_step_counters[robot_name] + sol_counter = env.cluster_servers[robot_name].solution_counter() + trigger_counter = env.cluster_servers[robot_name].trigger_counter() + shared_sim_infos[i].write(dyn_info_name=["sim_rt_factor", + "total_rt_factor", + "env_stepping_dt", + "world_stepping_dt", + "time_to_get_states_from_env", + "cluster_state_update_dt", + "cluster_sol_time", + "n_sim_steps", + "n_cluster_trigger_steps", + "n_cluster_sol_steps", + "sim_time", + "cluster_time"], + val=[rt_factor.get(), + rt_factor.get() * num_envs, + rt_factor.get_avrg_step_time(), + env.debug_data["time_to_step_world"], + env.debug_data["time_to_get_states_from_env"], + env.debug_data["cluster_state_update_dt"][robot_name], + env.debug_data["cluster_sol_time"][robot_name], + n_steps, + trigger_counter, + sol_counter, + env.debug_data["sim_time"][robot_name], + sol_counter*env.cluster_servers[robot_name].cluster_dt() + ]) diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/__init__.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/augmpc_cluster_client.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/augmpc_cluster_client.py new file mode 100644 index 0000000000000000000000000000000000000000..54d6474bce9bc1d3aeab78e2574ae6fc159c5886 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/augmpc_cluster_client.py @@ -0,0 +1,105 @@ +from mpc_hive.cluster_client.control_cluster_client import ControlClusterClient +from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds +from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds + +from EigenIPC.PyEigenIPC import Journal, LogType + +from typing import List, Dict + +import os + +from abc import abstractmethod + +class AugMpcClusterClient(ControlClusterClient): + + def _import_aux_libs(self): + super()._import_aux_libs() + + def __init__(self, + namespace: str, + urdf_xacro_path: str, + srdf_xacro_path: str, + cluster_size: int, + set_affinity: bool = False, + use_mp_fork: bool = False, + isolated_cores_only: bool = False, + core_ids_override_list: List[int] = None, + verbose: bool = False, + debug: bool = False, + codegen_base_dirname: str = "CodeGen", + base_dump_dir: str = "/tmp", + codegen_override: str = None, + custom_opts: Dict = {}): + + self._base_dump_dir = base_dump_dir + + self._temp_path = base_dump_dir + "/" + f"{self.__class__.__name__}" + f"_{namespace}" + + self._codegen_base_dirname = codegen_base_dirname + self._codegen_basedir = self._temp_path + "/" + self._codegen_base_dirname + + self._codegen_override = codegen_override # can be used to manually override + # the default codegen dir + + if not os.path.exists(self._temp_path): + os.makedirs(self._temp_path) + if not os.path.exists(self._codegen_basedir): + os.makedirs(self._codegen_basedir) + + self._urdf_xacro_path = urdf_xacro_path + self._srdf_xacro_path = srdf_xacro_path + self._urdf_path="" + self._srdf_path="" + + super().__init__(namespace = namespace, + cluster_size=cluster_size, + isolated_cores_only = isolated_cores_only, + set_affinity = set_affinity, + use_mp_fork = use_mp_fork, + core_ids_override_list = core_ids_override_list, + verbose = verbose, + debug = debug, + custom_opts=custom_opts) + self._generate_srdf(namespace=namespace) + + self._generate_urdf(namespace=namespace) + + + def codegen_dir(self): + + return self._codegen_basedir + + def codegen_dir_override(self): + + return self._codegen_override + + def _generate_srdf(self,namespace:str): + + custom_xacro_args=extract_custom_xacro_args(self._custom_opts) + cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(), + new_cmds=custom_xacro_args) + + self._urdf_path=generate_urdf(robot_name=namespace, + xacro_path=self._urdf_xacro_path, + dump_path=self._temp_path, + xrdf_cmds=cmds) + + def _generate_urdf(self,namespace:str): + + custom_xacro_args=extract_custom_xacro_args(self._custom_opts) + cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(), + new_cmds=custom_xacro_args) + + self._srdf_path=generate_srdf(robot_name=namespace, + xacro_path=self._srdf_xacro_path, + dump_path=self._temp_path, + xrdf_cmds=cmds) + + @abstractmethod + def _xrdf_cmds(self): + + # to be implemented by parent class ( + # for an example have a look at utils/centauro_xrdf_gen.py) + + pass diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/augmpc_cluster_server.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/augmpc_cluster_server.py new file mode 100644 index 0000000000000000000000000000000000000000..ea5cf7226db1ae834b7bb599c33b426022fcc25d --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/augmpc_cluster_server.py @@ -0,0 +1,43 @@ +from mpc_hive.cluster_server.control_cluster_server import ControlClusterServer +from typing import List +from EigenIPC.PyEigenIPC import VLevel + +class AugMpcClusterServer(ControlClusterServer): + + def __init__(self, + robot_name: str, + cluster_size: int, + cluster_dt: float, + control_dt: float, + jnt_names: List[str], + n_contacts: int, + contact_linknames: List[str] = None, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + debug: bool = False, + use_gpu: bool = True, + force_reconnection: bool = True, + timeout_ms: int = 60000, + enable_height_sensor: bool = False, + height_grid_size: int = None, + height_grid_resolution: float = None): + + self.robot_name = robot_name + + super().__init__( + namespace=self.robot_name, + cluster_size=cluster_size, + cluster_dt=cluster_dt, + control_dt=control_dt, + jnt_names=jnt_names, + n_contacts = n_contacts, + contact_linknames = contact_linknames, + verbose=verbose, + vlevel=vlevel, + debug=debug, + use_gpu=use_gpu, + force_reconnection=force_reconnection, + timeout_ms=timeout_ms, + enable_height_sensor=enable_height_sensor, + height_grid_size=height_grid_size, + height_grid_resolution=height_grid_resolution) diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/__init__.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/config/rhc_horizon_config.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/config/rhc_horizon_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db15baee95a0dcc257d9c1ccb788a7aabdb6e456 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/config/rhc_horizon_config.yaml @@ -0,0 +1,200 @@ +# A dummy example of a rhc controller configuration built on top of horizon +# and iLQR + +solver: + type: ilqr + ilqr.tol: 0.01 + ilqr.constr_viol_tol: 0.01 + ilqr.suppress_all_output: 'yes' + ilqr.codegen_enabled: true +# ilqr.codegen_workdir: /tmp/tyhio + ilqr.enable_gn: true + ilqr.hxx_reg_base: 0.0 + ilqr.n_threads: 0 + print_time: 0 + +constraints: + - ball_1_contact + - ball_2_contact + - ball_3_contact + - ball_4_contact + - z_ball_1 + - z_ball_2 + - z_ball_3 + - z_ball_4 + +costs: + - joint_regularization + - joint_posture + - base_position + - base_orientation + +.define: + - &w_base_pos 10 + - &w_base_ori 1 + - &w_ball_z 1 + # - &w_postural 0.0001 + - &w_jnt_v_reg 0.01 + - &w_jnt_a_reg 0.001 + - &w_jnt_f_reg 0.0001 + - &wheel_radius 0.124 + +base_position: + type: Cartesian + distal_link: base_link + indices: [0, 1, 2] + nodes: ${N} + weight: *w_base_pos + +base_orientation: + type: Cartesian + distal_link: base_link + indices: [3, 4, 5] + nodes: ${N} + weight: *w_base_ori + +# =============================== + +rolling_contact_1: + type: Rolling + frame: wheel_1 + radius: *wheel_radius + +rolling_contact_2: + type: Rolling + frame: wheel_2 + radius: *wheel_radius + +rolling_contact_3: + type: Rolling + frame: wheel_3 + radius: *wheel_radius + +rolling_contact_4: + type: Rolling + frame: wheel_4 + radius: *wheel_radius + +# contact_1: +# type: Cartesian +# distal_link: ball_1 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_2: +# type: Cartesian +# distal_link: ball_2 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_3: +# type: Cartesian +# distal_link: ball_3 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_4: +# type: Cartesian +# distal_link: ball_4 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# ================================== + +interaction_wheel_1: + type: VertexForce + frame: ball_1 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_1 + +interaction_wheel_2: + type: VertexForce + frame: ball_2 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_2 + +interaction_wheel_3: + type: VertexForce + frame: ball_3 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_3 + +interaction_wheel_4: + type: VertexForce + frame: ball_4 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_4 + +ball_1_contact: + type: Contact + subtask: [interaction_wheel_1, rolling_contact_1] + +ball_2_contact: + type: Contact + subtask: [interaction_wheel_2, rolling_contact_2] + +ball_3_contact: + type: Contact + subtask: [interaction_wheel_3, rolling_contact_3] + +ball_4_contact: + type: Contact + subtask: [interaction_wheel_4, rolling_contact_4] + +# joint_posture: +# type: Postural +# weight: *w_postural +# indices: [0, 1, 2, +# 4, 5, 6, +# 8, 9, 10, +# 12, 13, 14] +# nodes: ${range(N-8, N)} + +# todo: wrong, as the order COUNTS. If I add the contacts after the joint regularization, they wont get considered. +joint_regularization: + type: Regularization + nodes: all # maybe not on first nodes?? + weight: + velocity: *w_jnt_v_reg + acceleration: *w_jnt_a_reg + force: *w_jnt_f_reg + +z_ball_1: + type: Cartesian + distal_link: ball_1 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_2: + type: Cartesian + distal_link: ball_2 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_3: + type: Cartesian + distal_link: ball_3 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_4: + type: Cartesian + distal_link: ball_4 + indices: [2] + cartesian_type: position + weight: *w_ball_z \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/gait_manager.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/gait_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..b3543272d28aab9cd2ebd260c958d7b4510b3721 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/gait_manager.py @@ -0,0 +1,565 @@ +import numpy as np + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +from typing import Dict + +class GaitManager: + + def __init__(self, + task_interface: TaskInterface, + phase_manager: pymanager.PhaseManager, + injection_node: int = None, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = None, + vertical_landing: bool = False, + landing_vert_weight: float = None, + phase_force_reg: float = None, + flight_duration: int = 15, + post_flight_stance: int = 3, + step_height: float = 0.1, + dh: float = 0.0, + custom_opts: Dict = {}): + + self._custom_opts=custom_opts + + self._is_open_loop=False + if "is_open_loop" in self._custom_opts: + self._is_open_loop=self._custom_opts["is_open_loop"] + + self.task_interface = task_interface + self._n_nodes_prb=self.task_interface.prb.getNNodes() + + self._phase_manager = phase_manager + self._model=self.task_interface.model + self._q0=self._model.q0 + self._kin_dyn=self.task_interface.model.kd + + # phase weights and regs + self._keep_yaw_vert=keep_yaw_vert + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_landing=vertical_landing + self._landing_vert_weight=landing_vert_weight + self._phase_force_reg=phase_force_reg + self._total_weight = np.atleast_2d(np.array([0, 0, self._kin_dyn.mass() * 9.81])).T + + self._f_reg_ref={} + + # flight parameters + self._post_flight_stance=post_flight_stance + self._flight_info_now=None + self._flight_duration_max=self._n_nodes_prb-(injection_node+1) + self._flight_duration_min=3 + self._flight_duration_default=flight_duration + # apex bounds/defaults + self._step_height_default=step_height + self._step_height_min=0.0 + self._step_height_max=0.5 + # end height bounds/defaults + self._dh_default=dh + self._dh_min=0.0 + self._dh_max=0.5 + # landing dx, dy bounds/defaults + self._land_dx_default=0.0 + self._land_dx_min=-0.5 + self._land_dx_max=0.5 + self._land_dy_default=0.0 + self._land_dy_min=-0.5 + self._land_dy_max=0.5 + + # timeline data + self._contact_timelines = dict() + self.timeline_names=[] + + self._flight_phases = {} + self._touchdown_phases = {} + self._contact_phases = {} + self._fk_contacts = {} + self._fkd_contacts = {} + self._f_reg_ref = {} + + # reference traj + self._tg = trajectoryGenerator.TrajectoryGenerator() + self._traj_der= [None, 0, 0] + self._traj_second_der=[None, 0, 0] + self._third_traj_der=[None, None, 0] + + self._ref_trjs = {} + self._ref_vtrjs = {} + + if injection_node is None: + self._injection_node = round(self.task_interface.prb.getNNodes()/2.0) + else: + self._injection_node = injection_node + + self._init_contact_timelines() + + self._reset_contact_timelines() + + def _init_contact_timelines(self): + + short_stance_duration=1 + flight_phase_short_duration=1 + + self.n_contacts=len(self._model.cmap.keys()) + self._dt=float(self.task_interface.prb.getDt()) + + self._name_to_idx_map={} + + j=0 + for contact in self._model.cmap.keys(): + + self._fk_contacts[contact]=self._kin_dyn.fk(contact) + self._fkd_contacts[contact]=self._kin_dyn.frameVelocity(contact, self._model.kd_frame) + self.timeline_names.append(contact) + self._contact_timelines[contact]=self._phase_manager.createTimeline(f'{contact}_timeline') + # stances + self._contact_phases[contact] = self._contact_timelines[contact].createPhase(short_stance_duration, + f'stance_{contact}_short') + + if self.task_interface.getTask(f'{contact}') is not None: + self._contact_phases[contact].addItem(self.task_interface.getTask(f'{contact}')) + else: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"contact task {contact} not found", + LogType.EXCEP, + throw_when_excep=True) + i=0 + self._f_reg_ref[contact]=[] + for force in self._model.cmap[contact]: + f_ref=self.task_interface.prb.createParameter(name=f"{contact}_force_reg_f{i}_ref", + dim=3) + force_reg=self.task_interface.prb.createResidual(f'{contact}_force_reg_f{i}', self._phase_force_reg * (force - f_ref), + nodes=[]) + self._f_reg_ref[contact].append(f_ref) + self.set_f_reg(contact_name=contact, scale=4) + self._contact_phases[contact].addCost(force_reg, nodes=list(range(0, short_stance_duration))) + i+=1 + + # flights + self._flight_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'flight_{contact}_short') + + # sanity checks (z trajectory) + self._zpos_task_found=True + self._zvel_task_found=True + self._xypos_task_found=True + self._xyvel_task_found=True + if self.task_interface.getTask(f'z_{contact}') is None: + self._zpos_task_found=False + if self.task_interface.getTask(f'vz_{contact}') is None: + self._zvel_task_found=False + if self.task_interface.getTask(f'xy_{contact}') is None: + self._xypos_task_found=False + if self.task_interface.getTask(f'vxy_{contact}') is None: + self._xyvel_task_found=False + if (not self._zpos_task_found) and (not self._zvel_task_found): + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"neither pos or vel task for contacts were found! Aborting.", + LogType.EXCEP, + throw_when_excep=True) + if (not self._zpos_task_found) and self._is_open_loop: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Running in open loop, but no contact pos task found. Aborting.", + LogType.EXCEP, + throw_when_excep=True) + if self._zpos_task_found and self._xyvel_task_found: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + if self._zvel_task_found and self._xypos_task_found: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + if (not self._xypos_task_found) and (not self._xyvel_task_found): + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"neither pos or vel task for contact {contact} xy were found! Will proceed without xy landing constraints.", + LogType.WARN) + # if (not self._zvel_task_found) and (not self._is_open_loop): + # Journal.log(self.__class__.__name__, + # "_init_contact_timelines", + # f"Running in closed loop, but contact vel task not found. Aborting", + # LogType.EXCEP, + # throw_when_excep=True) + + self._ref_trjs[contact]=None + self._ref_vtrjs[contact]=None + if self._zpos_task_found: # we use pos trajectory + self._ref_trjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) + init_z_foot = self._fk_contacts[contact](q=self._q0)['ee_pos'].elements()[2] + if self._is_open_loop: + self._ref_trjs[contact][2, :] = np.atleast_2d(init_z_foot) + else: + self._ref_trjs[contact][2, :] = 0.0 # place foot at ground level initially () + + if self._xypos_task_found: # xy + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'xy_{contact}'), + self._ref_trjs[contact][0:2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + self._touchdown_phases[contact]=None + # z + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'z_{contact}'), + self._ref_trjs[contact][2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + self._touchdown_phases[contact]=None + + else: # foot traj in velocity + # ref vel traj + self._ref_vtrjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) # allocate traj + # of max length eual to number of nodes + self._ref_vtrjs[contact][2, :] = np.atleast_2d(0) + + if self._xyvel_task_found: # xy + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vxy_{contact}'), + self._ref_vtrjs[contact][0:2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + # z + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vz_{contact}'), + self._ref_vtrjs[contact][2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'touchdown_{contact}_short') + # self._touchdown_phases[contact].addItemReference(self.task_interface.getTask(f'vz_{contact}'), + # self._ref_vtrjs[contact][2, 0:1], + # nodes=list(range(0, flight_phase_short_duration))) + + # ee_vel=self._fkd_contacts[contact](q=self._model.q, + # qdot=self._model.v)['ee_vel_linear'] + # cstr = self.task_interface.prb.createConstraint(f'{contact}_vert', ee_vel[0:2], []) + # self._flight_phases[contact].addConstraint(cstr, nodes=[0, flight_phase_short_duration-1]) + if self._keep_yaw_vert: + # keep ankle vertical + c_ori = self._model.kd.fk(contact)(q=self._model.q)['ee_rot'][2, :] + cost_ori = self.task_interface.prb.createResidual(f'{contact}_ori', self._yaw_vertical_weight * (c_ori.T - np.array([0, 0, 1]))) + # flight_phase.addCost(cost_ori, nodes=list(range(0, flight_duration+post_landing_stance))) + + if self._vertical_landing and self._touchdown_phases[contact] is not None: + v_xy=self._fkd_contacts[contact](q=self._model.q, qdot=self._model.v)['ee_vel_linear'][0:2] + vertical_landing=self.task_interface.prb.createResidual(f'{contact}_only_vert_v', + self._landing_vert_weight * v_xy, + nodes=[]) + self._touchdown_phases[contact].addCost(vertical_landing, nodes=list(range(0, short_stance_duration))) + + self._name_to_idx_map[contact]=j + + j+=1 + + # current pos [c0, c1, ....], current length, nominal length, nom. apex, no. landing height, landing dx, landing dy (local world aligned) + self._flight_info_now=np.zeros(shape=(7*self.n_contacts)) + + self.update() + + def _reset_contact_timelines(self): + + for contact in self._model.cmap.keys(): + + idx=self._name_to_idx_map[contact] + # we follow same order as in shm for more efficient writing + self._flight_info_now[idx]= -1.0 # pos [nodes] + self._flight_info_now[idx+1*self.n_contacts]= -1.0 # duration (remaining) [nodes] + self._flight_info_now[idx+2*self.n_contacts]=self._flight_duration_default # [nodes] + self._flight_info_now[idx+3*self.n_contacts]=self._step_height_default + self._flight_info_now[idx+4*self.n_contacts]=self._dh_default + self._flight_info_now[idx+5*self.n_contacts]=self._land_dx_default + self._flight_info_now[idx+6*self.n_contacts]=self._land_dy_default + # fill timeline with stances + contact_timeline=self._contact_timelines[contact] + contact_timeline.clear() # remove phases + short_stance_phase = contact_timeline.getRegisteredPhase(f'stance_{contact}_short') + while contact_timeline.getEmptyNodes() > 0: + contact_timeline.addPhase(short_stance_phase) + + self.update() + + def reset(self): + # self.phase_manager.clear() + self.task_interface.reset() + self._reset_contact_timelines() + + def set_f_reg(self, + contact_name, + scale: float = 4.0): + f_refs=self._f_reg_ref[contact_name] + for force in f_refs: + ref=self._total_weight/(scale*len(f_refs)) + force.assign(ref) + + def set_flight_duration(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]=val + + def get_flight_duration(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts] + + def set_step_apexdh(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]=val + + def get_step_apexdh(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts] + + def set_step_enddh(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]=val + + def get_step_enddh(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts] + + def get_step_landing_dx(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts] + + def set_step_landing_dx(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]=val + + def get_step_landing_dy(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts] + + def set_step_landing_dy(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]=val + + def add_stand(self, contact_name): + # always add stand at the end of the horizon + timeline = self._contact_timelines[contact_name] + if timeline.getEmptyNodes() > 0: + timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short')) + + def add_flight(self, contact_name, + robot_q: np.ndarray): + + timeline = self._contact_timelines[contact_name] + + flights_on_horizon=self._contact_timelines[contact_name].getPhaseIdx(self._flight_phases[contact_name]) + + last_flight_idx=self._injection_node-1 # default to make things work + if not len(flights_on_horizon)==0: # some flight phases are there + last_flight_idx=flights_on_horizon[-1]+self._post_flight_stance + + if last_flight_idx1: + Journal.log(self.__class__.__name__, + "add_flight", + f"Got flight duration {flight_duration_req} < 1!", + LogType.WARN, + throw_when_excep=True) + + # process requests to ensure flight params are valid + # duration + if flight_duration_reqself._flight_duration_max: + flight_duration_req=self._flight_duration_max + # apex height + if flight_apex_reqself._step_height_max: + flight_apex_req=self._step_height_max + # landing height + if flight_enddh_reqself._dh_max: + flight_enddh_req=self._dh_max + # landing dx + if flight_land_dx_reqself._land_dx_max: + flight_land_dx_req=self._land_dx_max + # landing dy + if flight_land_dy_reqself._land_dy_max: + flight_land_dy_req=self._land_dy_max + + land_dx_w = flight_land_dx_req + land_dy_w = flight_land_dy_req + if self._xypos_task_found or self._xyvel_task_found: + # landing dx/dy are specified in horizontal frame; rotate into world aligned frame + q_base = robot_q[3:7] + if q_base.ndim == 1: + q_base = q_base.reshape(-1, 1) + q_w = q_base[3, 0] + q_x = q_base[0, 0] + q_y = q_base[1, 0] + q_z = q_base[2, 0] + r11 = 1 - 2 * (q_y * q_y + q_z * q_z) + r21 = 2 * (q_x * q_y + q_z * q_w) + norm = np.hypot(r11, r21) + if norm > 0.0: + cos_yaw = r11 / norm + sin_yaw = r21 / norm + else: + cos_yaw = 1.0 + sin_yaw = 0. + + land_dx_w = flight_land_dx_req * cos_yaw - flight_land_dy_req * sin_yaw + land_dy_w = flight_land_dx_req * sin_yaw + flight_land_dy_req * cos_yaw + + if self._ref_vtrjs[contact_name] is not None and \ + self._ref_trjs[contact_name] is not None: # only allow one mode (pos/velocity traj) + Journal.log(self.__class__.__name__, + "add_flight", + f"Both pos and vel traj for contact {contact_name} are not None! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + + # inject pos traj if pos mode + if self._ref_trjs[contact_name] is not None: + # recompute trajectory online (just needed if using pos traj) + foot_pos=self._fk_contacts[contact_name](q=robot_q)['ee_pos'].elements() + starting_pos=foot_pos[2] # compute foot traj (local world aligned) + starting_x_pos=foot_pos[0] + starting_y_pos=foot_pos[1] + # starting_pos=0.0 + self._ref_trjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.from_derivatives( + flight_duration_req, + p_start=starting_pos, + p_goal=starting_pos+flight_enddh_req, + clearance=flight_apex_req, + derivatives=self._traj_der, + second_der=self._traj_second_der, + third_der=self._third_traj_der) + ) + if self._xypos_task_found: + self._ref_trjs[contact_name][0, 0:flight_duration_req]=starting_x_pos+land_dx_w + self._ref_trjs[contact_name][1, 0:flight_duration_req]=starting_y_pos+land_dy_w + + for i in range(flight_duration_req): + res, phase_token=timeline.addPhase(self._flight_phases[contact_name], + pos=self._injection_node+i, + absolute_position=True) + phase_token.setItemReference(f'z_{contact_name}', + self._ref_trjs[contact_name][:, i]) + if self._xypos_task_found: + phase_token.setItemReference(f'xy_{contact_name}', + self._ref_trjs[contact_name][:, i]) + if self._touchdown_phases[contact_name] is not None: + # add touchdown phase for forcing vertical landing + res, phase_token=timeline.addPhase(self._touchdown_phases[contact_name], + pos=self._injection_node+flight_duration_req, + absolute_position=True) + + # inject vel traj if vel mode + if self._ref_vtrjs[contact_name] is not None: + self._ref_vtrjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.derivative_of_trajectory( + flight_duration_req, + p_start=0.0, + p_goal=flight_enddh_req, + clearance=flight_apex_req, + derivatives=self._traj_der, + second_der=self._traj_second_der, + third_der=self._third_traj_der)) + if self._xyvel_task_found: # compute vel reference using problem dt and flight length + flight_duration_sec=float(flight_duration_req)*self._dt + self._ref_vtrjs[contact_name][0, 0:flight_duration_req]=land_dx_w/flight_duration_sec + self._ref_vtrjs[contact_name][1, 0:flight_duration_req]=land_dy_w/flight_duration_sec + + for i in range(flight_duration_req): + res, phase_token=timeline.addPhase(self._flight_phases[contact_name], + pos=self._injection_node+i, + absolute_position=True) + phase_token.setItemReference(f'vz_{contact_name}', + self._ref_vtrjs[contact_name][2:3, i:i+1]) + if self._touchdown_phases[contact_name] is not None: + # add touchdown phase for forcing vertical landing + res, phase_token=timeline.addPhase(self._touchdown_phases[contact_name], + pos=self._injection_node+flight_duration_req, + absolute_position=True) + + if timeline.getEmptyNodes() > 0: # fill empty nodes at the end of the horizon, if any, with stance + timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short')) + + def update(self): + self._phase_manager.update() + + def update_flight_info(self, timeline_name): + + # get current position and remaining duration of flight phases over the horizon for a single contact + + # phase indexes over timeline + phase_idxs=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name]) + + if not len(phase_idxs)==0: # at least one flight phase on horizon -> read info from timeline + + # all active phases on timeline + active_phases=self._contact_timelines[timeline_name].getActivePhases() + + phase_idx_start=phase_idxs[0] + # active_nodes_start=active_phases[phase_idx_start].getActiveNodes() + pos_start=active_phases[phase_idx_start].getPosition() + # n_nodes_start=active_phases[phase_idx_start].getNNodes() + + phase_idx_last=phase_idxs[-1] # just get info for last phase on the horizon + # active_nodes_last=active_phases[phase_idx_last].getActiveNodes() + pos_last=active_phases[phase_idx_last].getPosition() + # n_nodes_last=active_phases[phase_idx_last].getNNodes() + + # write to info + self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts]=pos_last + self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts]=pos_last - pos_start + + return True + + return False + + def get_flight_info(self, timeline_name): + return (self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+2*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+3*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+4*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+5*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+6*self.n_contacts]) + + def get_flight_info_all(self): + return self._flight_info_now + + def set_ref_pos(self, + timeline_name:str, + ref_height: np.array = None, + threshold: float = 0.05): + + if ref_height is not None: + self._ref_trjs[timeline_name][2, :]=ref_height + if ref_height>threshold: + self.add_flight(timeline_name=timeline_name) + this_flight_token_idx=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])[-1] + active_phases=self._contact_timelines[timeline_name].getActivePhases() + active_phases[this_flight_token_idx].setItemReference(f'z_{timeline_name}', + self._ref_trjs[timeline_name]) + else: + self.add_stand(timeline_name=timeline_name) + + def set_force_feedback(self, + timeline_name: str, + force_norm: float): + + flight_tokens=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name]) + contact_tokens=self._contact_phases[timeline_name].getPhaseIdx(self._contact_phases[timeline_name]) + if not len(flight_tokens)==0: + first_flight=flight_tokens[0] + first_flight + + def check_horizon_full(self, + timeline_name): + timeline = self._contact_timelines[timeline_name] + if timeline.getEmptyNodes() > 0: + error = f"Empty nodes detected over the horizon for timeline {timeline}! Make sure to fill the whole horizon with valid phases!!" + Journal.log(self.__class__.__name__, + "check_horizon_full", + error, + LogType.EXCEP, + throw_when_excep = True) diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/horizon_imports.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/horizon_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..991636dd18679c11050f97d3c47dd7df2d97eb5e --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/horizon_imports.py @@ -0,0 +1,18 @@ +# robot modeling and automatic differentiation +import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn +import casadi as cs + +# horizon stuff +import horizon.utils.kin_dyn as kd +from horizon.problem import Problem +from horizon.rhc.model_description import FullModelInverseDynamics +from horizon.rhc.taskInterface import TaskInterface +from horizon.rhc.tasks.interactionTask import VertexContact +from horizon.rhc.tasks.contactTask import ContactTask +from horizon.utils import trajectoryGenerator, utils + +# phase managing +import phase_manager.pymanager as pymanager +import phase_manager.pyphase as pyphase +import phase_manager.pytimeline as pytimeline + diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/horizon_imports_glob.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/horizon_imports_glob.py new file mode 100644 index 0000000000000000000000000000000000000000..679e7d460ab545b0bd2f741904b5ac8af893f897 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/horizon_imports_glob.py @@ -0,0 +1,28 @@ +""" +Dynamically import all necessary Horizon and related dependencies. +This function is intended to be used within the import_child_lib method +of a class, enabling the parent process to load all required libraries. +""" +def import_horizon_global(): + # Global imports to make modules accessible in child processes + global casadi_kin_dyn, cs, kd, Problem, FullModelInverseDynamics + global TaskInterface, VertexContact, ContactTask, trajectoryGenerator, utils + global pymanager, pyphase, pytimeline + + # robot modeling and automatic differentiation + import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn + import casadi as cs + + # horizon stuff + import horizon.utils.kin_dyn as kd + from horizon.problem import Problem + from horizon.rhc.model_description import FullModelInverseDynamics + from horizon.rhc.taskInterface import TaskInterface + from horizon.rhc.tasks.interactionTask import VertexContact + from horizon.rhc.tasks.contactTask import ContactTask + from horizon.utils import trajectoryGenerator, utils + + # phase managing + import phase_manager.pymanager as pymanager + import phase_manager.pyphase as pyphase + import phase_manager.pytimeline as pytimeline diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/hybrid_quad_rhc.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/hybrid_quad_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..951af04dd38e267986c7da5e1f71f3ad72c1a2a4 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/hybrid_quad_rhc.py @@ -0,0 +1,1243 @@ +from mpc_hive.controllers.rhc import RHController +# from perf_sleep.pyperfsleep import PerfSleep +# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +import numpy as np + +import os +# import shutil + +import time +from abc import ABC, abstractmethod + +from typing import Dict, List +import re + +class HybridQuadRhc(RHController): + + def __init__(self, + srdf_path: str, + urdf_path: str, + config_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes:float = 25, + injection_node:int = 10, + dt: float = 0.02, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {}): + + self._refs_in_hor_frame = refs_in_hor_frame + + self._injection_node = injection_node + + self._open_loop = open_loop + self._close_loop_all = close_loop_all + + self._codegen_dir = codegen_dir + if not os.path.exists(self._codegen_dir): + os.makedirs(self._codegen_dir) + # else: + # # Directory already exists, delete it and recreate + # shutil.rmtree(self._codegen_dir) + # os.makedirs(self._codegen_dir) + + self.step_counter = 0 + self.sol_counter = 0 + + self.max_solver_iter = max_solver_iter + + self._timer_start = time.perf_counter() + self._prb_update_time = time.perf_counter() + self._phase_shift_time = time.perf_counter() + self._task_ref_update_time = time.perf_counter() + self._rti_time = time.perf_counter() + + self.robot_name = robot_name + + self.config_path = config_path + + self.urdf_path = urdf_path + # read urdf and srdf files + with open(self.urdf_path, 'r') as file: + self.urdf = file.read() + self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + self._c_timelines = dict() + self._f_reg_timelines = dict() + + self._custom_opts={"replace_continuous_joints": False, + "use_force_feedback": False, + "lin_a_feedback": False, + "is_open_loop": self._open_loop, # fully open (just for db) + "fully_closed": False, # closed loop with full feedback (just for db) + "closed_partial": False, # closed loop with partial feedback + "adaptive_is": True, # closed loop with adaptation + "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase + "alpha_from_outside": False, # alpha set ext. from shared memory + "alpha_half": 1.0, + "only_vel_wheels": True, # whether wheels (if present) are just vel controlled + "use_jnt_v_feedback": False + } + + self._custom_opts.update(custom_opts) + + self._alpha_half=self._custom_opts["alpha_half"] + + if self._open_loop: + self._custom_opts["fully_closed"]=False + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + else: + self._custom_opts["is_open_loop"]=False + if self._custom_opts["fully_closed"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["closed_partial"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["fully_closed"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["adaptive_is"]: + self._custom_opts["closed_partial"]=False + self._custom_opts["fully_closed"]=False + + super().__init__(srdf_path=srdf_path, + n_nodes=n_nodes, + dt=dt, + namespace=self.robot_name, + dtype=dtype, + verbose=verbose, + debug=debug, + timeout_ms=timeout_ms) + + self._rhc_fpaths.append(self.config_path) + + def _config_override(self): + pass + + def _post_problem_init(self): + + self.rhc_costs={} + self.rhc_constr={} + + self._fail_idx_scale=0.0 + self._expl_idx_window_size=int(1*self._n_nodes) + self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size)) + self._expl_idx_counter=0 + self._expl_idx_buffer_counter=0 + + self._pred_node_idx=self._n_nodes-1 + + self._nq=self.nq() + self._nq_jnts=self._nq-7# assuming floating base + self._nv=self.nv() + self._nv_jnts=self._nv-6 + + self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype) + self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype) + self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype) + self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype) + self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype) + self._alphas_q_root[:, :]=1.0 # default to all open + self._alphas_q_jnts[:, :]=1.0 + self._alphas_twist_root[:, :]=1.0 + self._alphas_v_jnts[:, :]=1.0 + self._alphas_a[:, :]=1.0 + + def _init_problem(self, + fixed_jnt_patterns: List[str] = None, + wheels_patterns: List[str] = None, + foot_linkname: str = None, + flight_duration: int = 10, + post_flight_stance: int = 3, + step_height: float = 0.12, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = 2.0, + vertical_landing: bool = False, + vertical_land_weight: float = 1.0, + phase_force_reg: float = 1e-2, + vel_bounds_weight: float = 1.0): + + self._fixed_jnt_patterns=fixed_jnt_patterns + + self._config_override() + + Journal.log(self.__class__.__name__, + "_init_problem", + f" Will use horizon config file at {self.config_path}", + LogType.INFO, + throw_when_excep=True) + + self._vel_bounds_weight=vel_bounds_weight + self._phase_force_reg=phase_force_reg + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_land_weight=vertical_land_weight + # overrides parent + self._prb = Problem(self._n_intervals, + receding=True, + casadi_type=cs.SX) + self._prb.setDt(self._dt) + + if "replace_continuous_joints" in self._custom_opts: + # continous joints are parametrized in So2 + if self._custom_opts["replace_continuous_joints"]: + self.urdf = self.urdf.replace('continuous', 'revolute') + else: + self.urdf = self.urdf.replace('continuous', 'revolute') + + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + + self._init_robot_homer() + + # handle fixed joints + fixed_joint_map={} + if self._fixed_jnt_patterns is not None: + for jnt_name in self._get_robot_jnt_names(): + for fixed_jnt_pattern in self._fixed_jnt_patterns: + if fixed_jnt_pattern in jnt_name: + fixed_joint_map.update({f"{jnt_name}": + self._homer.get_homing_val(jnt_name=jnt_name)}) + break # do not search for other pattern matches + + if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers + Journal.log(self.__class__.__name__, + "_init_problem", + f"Will fix following joints: \n{str(fixed_joint_map)}", + LogType.INFO, + throw_when_excep=True) + # with the fixed joint map + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map) + # assign again controlled joints names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + # updated robot homer for controlled joints + self._init_robot_homer() + + # handle continuous joints (need to change homing and retrieve + # cont jnts indexes) and homing + self._continuous_joints=self._get_continuous_jnt_names() + # reduced + self._continuous_joints_idxs=[] + self._continuous_joints_idxs_cos=[] + self._continuous_joints_idxs_sin=[] + self._continuous_joints_idxs_red=[] + self._rev_joints_idxs=[] + self._rev_joints_idxs_red=[] + # qfull + self._continuous_joints_idxs_qfull=[] + self._continuous_joints_idxs_cos_qfull=[] + self._continuous_joints_idxs_sin_qfull=[] + self._continuous_joints_idxs_red_qfull=[] + self._rev_joints_idxs_qfull=[] + self._rev_joints_idxs_red_qfull=[] + jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints)) + jnt_names=self._get_robot_jnt_names() + for i in range(len(jnt_names)): + jnt=jnt_names[i] + index=self._get_jnt_id(jnt)# accounting for floating joint + homing_idx=index-7 # homing is only for actuated joints + homing_value=self._homer.get_homing_val(jnt) + if jnt in self._continuous_joints: + jnt_homing[homing_idx]=np.cos(homing_value).item() + jnt_homing[homing_idx+1]=np.sin(homing_value).item() + # just actuated joints + self._continuous_joints_idxs.append(homing_idx) # cos + self._continuous_joints_idxs.append(homing_idx+1) # sin + self._continuous_joints_idxs_cos.append(homing_idx) + self._continuous_joints_idxs_sin.append(homing_idx+1) + self._continuous_joints_idxs_red.append(i) + # q full + self._continuous_joints_idxs_qfull.append(index) # cos + self._continuous_joints_idxs_qfull.append(index+1) # sin + self._continuous_joints_idxs_cos_qfull.append(index) + self._continuous_joints_idxs_sin_qfull.append(index+1) + self._continuous_joints_idxs_red_qfull.append(i+7) + else: + jnt_homing[homing_idx]=homing_value + # just actuated joints + self._rev_joints_idxs.append(homing_idx) + self._rev_joints_idxs_red.append(i) + # q full + self._rev_joints_idxs_qfull.append(index) + self._rev_joints_idxs_red_qfull.append(i+7) + + self._jnts_q_reduced=None + if not len(self._continuous_joints)==0: + cont_joints=", ".join(self._continuous_joints) + + Journal.log(self.__class__.__name__, + "_init_problem", + f"The following continuous joints were found: \n{cont_joints}", + LogType.INFO, + throw_when_excep=True) + # preallocating data + self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype) + self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype) + self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + else: + self._custom_opts["replace_continuous_joints"]=True + Journal.log(self.__class__.__name__, + "_init_problem", + f"No continuous joints were found.", + LogType.INFO, + throw_when_excep=True) + + # retrieve wheels indexes (not considering continuous joints, + # ok just for v, eff, etc..) + self._wheel_patterns=wheels_patterns + self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns) + self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81] + + # we can create an init for the base + init = self._base_init.tolist() + jnt_homing + + if foot_linkname is not None: + FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height + ground_level = FK(q=init)['ee_pos'] + self._base_init[2] = -ground_level[2] # override init + + self._model = FullModelInverseDynamics(problem=self._prb, + kd=self._kin_dyn, + q_init=self._homer.get_homing_map(), + base_init=self._base_init) + + self._ti = TaskInterface(prb=self._prb, + model=self._model, + max_solver_iter=self.max_solver_iter, + debug = self._debug, + verbose = self._verbose, + codegen_workdir = self._codegen_dir) + self._ti.setTaskFromYaml(self.config_path) + + # setting initial base pos ref if exists + base_pos = self._ti.getTask('base_height') + if base_pos is not None: + base_pos.setRef(np.atleast_2d(self._base_init).T) + + self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes????? + + self._gm = GaitManager(self._ti, + self._pm, + self._injection_node, + keep_yaw_vert=keep_yaw_vert, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=vertical_landing, + landing_vert_weight=self._vertical_land_weight, + phase_force_reg=self._phase_force_reg, + custom_opts=self._custom_opts, + flight_duration=flight_duration, + post_flight_stance=post_flight_stance, + step_height=step_height, + dh=0.0) + + self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0) + self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0) + self._ti.model.q.setInitialGuess(self._ti.model.q0) + self._ti.model.v.setInitialGuess(self._ti.model.v0) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f=len(cforces) + for c in cforces: + c.setInitialGuess(np.array(self._f0)/n_contact_f) + + vel_lims = self._model.kd.velocityLimits() + import horizon.utils as utils + self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:])) + self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:])) + + self._meas_lin_a_par=None + # if self._custom_opts["lin_a_feedback"]: + # # acceleration feedback on first node + # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback", + # dim=3, nodes=0) + # base_lin_a_prb=self._prb.getInput().getVars()[0:3] + # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, + # nodes=[0]) + + # if not self._open_loop: + # # we create a residual cost to be used as an attractor to the measured state on the first node + # # hard constraints injecting meas. states are pure EVIL! + # prb_state=self._prb.getState() + # full_state=prb_state.getVars() + # state_dim=prb_state.getBounds()[0].shape[0] + # meas_state=self._prb.createParameter(name="measured_state", + # dim=state_dim, nodes=0) + # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), + # nodes=[0]) + + self._ti.finalize() + self._ti.bootstrap() + + self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes + self._ti.load_initial_guess() + + self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we + # know n_dofs -> we assign it (by default = None) + + self.n_contacts = len(self._model.cmap.keys()) + + # remove variables bounds (before they were necessary to have a good + # quality bootstrap solution) + self._q_inf=np.zeros((self.nq(), 1)) + self._q_inf[:, :]=np.inf + self._v_inf=np.zeros((self.nv(), 1)) + self._v_inf[:, :]=np.inf + self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0) + self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0) + + # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb) + + def get_file_paths(self): + # can be overriden by child + paths = super().get_file_paths() + return paths + + def _get_quat_remap(self): + # overrides parent + return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention + + def _zmp(self, model): + + num = cs.SX([0, 0]) + den = cs.SX([0]) + pos_contact = dict() + force_val = dict() + + q = cs.SX.sym('q', model.nq) + v = cs.SX.sym('v', model.nv) + a = cs.SX.sym('a', model.nv) + + com = model.kd.centerOfMass()(q=q, v=v, a=a)['com'] + + n = cs.SX([0, 0, 1]) + for c in model.fmap.keys(): + pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos'] + force_val[c] = cs.SX.sym('force_val', 3) + num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n) + den += cs.dot(force_val[c], n) + + zmp = com[0:2] + (num / den) + input_list = [] + input_list.append(q) + input_list.append(v) + input_list.append(a) + + for elem in force_val.values(): + input_list.append(elem) + + f = cs.Function('zmp', input_list, [zmp]) + return f + + def _add_zmp(self): + + input_zmp = [] + + input_zmp.append(self._model.q) + input_zmp.append(self._model.v) + input_zmp.append(self._model.a) + + for f_var in self._model.fmap.values(): + input_zmp.append(f_var) + + c_mean = cs.SX([0, 0, 0]) + for c_name, f_var in self._model.fmap.items(): + fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos'] + c_mean += fk_c_pos + + c_mean /= len(self._model.cmap.keys()) + + zmp_nominal_weight = 10. + zmp_fun = self._zmp(self._model)(*input_zmp) + + if 'wheel_joint_1' in self._model.kd.joint_names(): + zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2])) + + def _quaternion_multiply(self, + q1, q2): + x1, y1, z1, w1 = q1 + x2, y2, z2, w2 = q2 + w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return np.array([x, y, z, w]) + + def _get_continuous_jnt_names(self): + import xml.etree.ElementTree as ET + root = ET.fromstring(self.urdf) + continuous_joints = [] + for joint in root.findall('joint'): + joint_type = joint.get('type') + if joint_type == 'continuous': + joint_name = joint.get('name') + continuous_joints.append(joint_name) + return continuous_joints + + def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]): + jnt_names=self._get_robot_jnt_names() + wheels_idxs=[] + for i in range(len(jnt_names)): + jnt_name=jnt_names[i] + for wheel_pattern in wheel_patterns: + if wheel_pattern in jnt_name: + wheels_idxs.append(i) + break + return wheels_idxs + + def _get_jnt_id(self, jnt_name): + return self._kin_dyn.joint_iq(jnt_name) + + def _init_rhc_task_cmds(self): + + rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm, + robot_index_shm=self.controller_index, + robot_index_view=0, # when using optimize_mem the view if always of shape 1x... + namespace=self.namespace, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + use_force_feedback=self._custom_opts["use_force_feedback"], + optimize_mem=True) + + rhc_refs.run() + + rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller) + rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap()) + + rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3], + q_ref=np.atleast_2d(self._base_init)[:, 3:7]) + + return rhc_refs + + def get_vertex_fnames_from_ti(self): + tasks=self._ti.task_list + contact_f_names=[] + for task in tasks: + if isinstance(task, ContactTask): + interaction_task=task.dynamics_tasks[0] + contact_f_names.append(interaction_task.vertex_frames[0]) + return contact_f_names + + def _get_contact_names(self): + # should get contact names from vertex frames + # list(self._ti.model.cmap.keys()) + return self.get_vertex_fnames_from_ti() + + def _get_robot_jnt_names(self): + + joints_names = self._kin_dyn.joint_names() + to_be_removed = ["universe", + "reference", + "world", + "floating", + "floating_base"] + for name in to_be_removed: + if name in joints_names: + joints_names.remove(name) + + return joints_names + + def _get_ndofs(self): + return len(self._model.joint_names) + + def nq(self): + return self._kin_dyn.nq() + + def nv(self): + return self._kin_dyn.nv() + + def _get_robot_mass(self): + + return self._kin_dyn.mass() + + def _get_root_full_q_from_sol(self, node_idx=1): + + root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype) + + np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) + np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full) + + return root_q_full + + def _get_full_q_from_sol(self, node_idx=1): + + return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype) + + def _get_root_twist_from_sol(self, node_idx=1): + # provided in world frame + twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3] + # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6] + return twist_base_local + + def _get_root_a_from_sol(self, node_idx=0): + # provided in world frame + a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3] + # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6] + return a_base_local + + def _get_jnt_q_from_sol(self, node_idx=0, + reduce: bool = True, + clamp: bool = True): + + full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype) + + np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place + np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place + + if self._custom_opts["replace_continuous_joints"] or (not reduce): + if clamp: + return np.fmod(full_jnts_q, 2*np.pi) + else: + return full_jnts_q + else: + cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2) + # copy rev joint vals + self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1) + # and continuous + self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1) + return self._jnts_q_reduced + + def _get_jnt_v_from_sol(self, node_idx=1): + + jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place + # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place + + return jnt_v_sol + + def _get_jnt_a_from_sol(self, node_idx=1): + + return self._get_a_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + + def _get_jnt_eff_from_sol(self, node_idx=0): + + efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx) + + # if self._custom_opts["only_vel_wheels"]: + + jnt_efforts=efforts_on_node[6:, 0] + + if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v: + jnt_efforts[self._wheels_idxs_v]=0.0 + + return jnt_efforts.reshape(1, + self._nv_jnts).astype(self._dtype) + + def _get_rhc_cost(self): + + return self._ti.solution["opt_cost"] + + def _get_rhc_constr_viol(self): + + return self._ti.solution["residual_norm"] + + def _get_rhc_nodes_cost(self): + + cost = self._ti.solver_rti.getCostValOnNodes() + return cost.reshape((1, -1)) + + def _get_rhc_nodes_constr_viol(self): + + constr_viol = self._ti.solver_rti.getConstrValOnNodes() + return constr_viol.reshape((1, -1)) + + def _get_rhc_niter_to_sol(self): + + return self._ti.solution["n_iter2sol"] + + def _set_ig(self): + + shift_num = -1 # shift data by one node + + x_opt = self._ti.solution['x_opt'] + u_opt = self._ti.solution['u_opt'] + + # building ig for state + xig = np.roll(x_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + xig[:, -1 - i] = x_opt[:, -1] + # building ig for inputs + uig = np.roll(u_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + uig[:, -1 - i] = u_opt[:, -1] + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + + return xig, uig + + def _update_open_loop(self): + + xig, _ = self._set_ig() + + q_state, v_state, a_state=self._set_is_open() + + # robot_state=xig[:, 0] + # # open loop update: + # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0 + # # is node 1 in the last opt solution) + + return q_state, v_state, a_state + + def _update_closed_loop(self): + + # set initial guess for controller + xig, _ = self._set_ig() + # set initial state + q_state=None + v_state=None + a_state=None + if self._custom_opts["adaptive_is"]: + # adaptive closed loop + q_state, v_state, a_state=self._set_is_adaptive() + elif self._custom_opts["fully_closed"]: + q_state, v_state, a_state=self._set_is_full() + elif self._custom_opts["closed_partial"]: + q_state, v_state, a_state=self._set_is_partial() + else: + Journal.log(self.__class__.__name__, + "_update_closed_loop", + "Neither adaptive_is, fully_closed, or closed_partial.", + LogType.EXCEP, + throw_when_excep = False) + q_state, v_state, a_state=self._set_is() + + return q_state, v_state, a_state + + def _set_is_open(self): + + # overriding states with rhc data + q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1) + + twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1) + v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_q_full_rhc=q[0:7] # root full q + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_twist_rhc=vel[0:6] # lin v. + jnts_v_rhc=vel[6:] # jnts v + + self.rhc_refs.set_alpha(alpha=1.0) # fully open + + # close state on known quantities + root_q_full_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_twist_rhc.setBounds(lb=twist_root, + ub=twist_root, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((twist_root, v_jnts), + axis=0) + + return (q_state, v_state, None) + + def _set_is_full(self): + + # measurements + q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_full_q_rhc=q[0:7] # root p + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_full_q_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_partial(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # overriding states with rhc data (-> all overridden state are open looop) + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc # position is always open loop + if not self._custom_opts["estimate_v_root"]: + v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1) + # override v jnts with the ones from controller + if not self._custom_opts["use_jnt_v_feedback"]: + v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1) + # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1) + # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1) + # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1) + # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root, + ub=q_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints) + else: # get it from controller + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_adaptive(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + # getting prediction defects + root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # close state on known quantities, estimate some (e.g. lin vel) and + # open loop if thing start to explode + alpha_now=1.0 + delta=0.0 + if self._custom_opts["alpha_from_outside"]: + alpha_now=self.rhc_refs.get_alpha() + else: # "autotuned" alpha + if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.) + delta=np.max(np.abs(jnt_v_delta)) + else: + delta=np.max(np.abs(omega_root_delta)) + # fail_idx=self._get_failure_index() + # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh + alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0 + + bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1] + self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db + self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db + + self._alphas_q_root[:]=alpha_now # for now single alpha for everything + self._alphas_q_jnts[:]=alpha_now + self._alphas_twist_root[:]=alpha_now + self._alphas_v_jnts[:]=alpha_now + self._alphas_a[:]=alpha_now + if not self._custom_opts["estimate_v_root"]: + self._alphas_twist_root[0:3]=1.0 # open + self._alphas_v_jnts[:]=1.0 # open + + # position is always open loop + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc + + # expaning meas q if continuous joints + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + + # continous joints position is always open loop, but we need a delta vector of matching dimension + q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1) + + self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:] + + self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\ + np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.cos(q_jnts[self._continuous_joints_idxs_red, :]) + self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\ + np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.sin(q_jnts[self._continuous_joints_idxs_red, :]) + + q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts + jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts + + self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop + self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop + + # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, + ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, + ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) + else: + root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, + ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0) + root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, + ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, + ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0) + if self._custom_opts["lin_a_feedback"]: + lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _solve(self): + + if self._debug: + return self._db_solve() + else: + return self._min_solve() + + def _min_solve(self): + # minimal solve version -> no debug + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop() # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop() # updates the TO ig and + # initial conditions using robot measurements + + self._pm.shift() # shifts phases of one dt + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + try: + converged = self._ti.rti() # solves the problem + self.sol_counter = self.sol_counter + 1 + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + return False + + def _db_solve(self): + + self._timer_start = time.perf_counter() + + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop() # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop() # updates the TO ig and + # initial conditions using robot measurements + + self._prb_update_time = time.perf_counter() + self._pm.shift() # shifts phases of one dt + self._phase_shift_time = time.perf_counter() + + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + self._task_ref_update_time = time.perf_counter() + + try: + converged = self._ti.rti() # solves the problem + self._rti_time = time.perf_counter() + self.sol_counter = self.sol_counter + 1 + self._update_db_data() + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + if self._verbose: + exception = f"Rti() for controller {self.controller_index} failed" + \ + f" with exception{type(e).__name__}" + Journal.log(self.__class__.__name__, + "solve", + exception, + LogType.EXCEP, + throw_when_excep = False) + self._update_db_data() + return False + + def _get_fail_idx(self): + + self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx() + self._expl_idx_buffer_counter+=1 + self._expl_idx_counter+=1 + if self._expl_idx_counter%self._expl_idx_window_size==0: + self._expl_idx_buffer_counter=0 # restart from 0 + + running_avrg=np.mean(self._explosion_idx_buffer).item() + + return running_avrg + + def _get_explosion_idx(self): + explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale + return explosion_index + + def _update_db_data(self): + + self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start + self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time + self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time + self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time + self.rhc_costs.update(self._ti.solver_rti.getCostsValues()) + self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues()) + + def _reset(self): + + # reset task interface (ig, solvers, etc..) + + # phase manager and sets bootstap as solution + self._gm.reset() + self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution + self._expl_idx_counter=0.0 + self._expl_idx_buffer_counter=0 + + def _get_cost_data(self): + + cost_dict = self._ti.solver_rti.getCostsValues() + cost_names = list(cost_dict.keys()) + cost_dims = [1] * len(cost_names) # costs are always scalar + return cost_names, cost_dims + + def _get_constr_data(self): + + constr_dict = self._ti.solver_rti.getConstraintsValues() + constr_names = list(constr_dict.keys()) + constr_dims = [-1] * len(constr_names) + i = 0 + for constr in constr_dict: + constr_val = constr_dict[constr] + constr_shape = constr_val.shape + constr_dims[i] = constr_shape[0] + i+=1 + return constr_names, constr_dims + + def _get_q_from_sol(self): + full_q=self._ti.solution['q'].astype(self._dtype) + if self._custom_opts["replace_continuous_joints"]: + return full_q + else: + cont_jnts=full_q[self._continuous_joints_idxs_qfull, :] + cos=cont_jnts[::2, :] + sin=cont_jnts[1::2, :] + # copy root + self._full_q_reduced[0:7, :]=full_q[0:7, :] + # copy rev joint vals + self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :] + # and continuous + angle=np.arctan2(sin, cos) + self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle + return self._full_q_reduced + + def _get_v_from_sol(self): + return self._ti.solution['v'].astype(self._dtype) + + def _get_a_from_sol(self): + return self._ti.solution['a'].astype(self._dtype) + + def _get_a_dot_from_sol(self): + return None + + def _get_f_from_sol(self): + # to be overridden by child class + contact_names =self._get_contacts() # we use controller-side names + try: + data=[] + for key in contact_names: + contact_f=self._ti.solution["f_" + key].astype(self._dtype) + np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False) + np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f) + data.append(contact_f) + return np.concatenate(data, axis=0) + except: + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + return self.rhc_costs[cost_name] + + def _get_constr_from_sol(self, + constr_name: str): + return self.rhc_constr[constr_name] \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/hybrid_quad_rhc_refs.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/hybrid_quad_rhc_refs.py new file mode 100644 index 0000000000000000000000000000000000000000..1c751aa8a6e8d32c028f94d4224123863866a933 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/hybrid_quad_rhc_refs.py @@ -0,0 +1,381 @@ +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager +from aug_mpc.controllers.rhc.horizon_based.utils.math_utils import hor2w_frame + +from mpc_hive.utilities.shared_data.rhc_data import RhcRefs + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal + +from typing import Union + +import numpy as np + +class HybridQuadRhcRefs(RhcRefs): + + def __init__(self, + gait_manager: GaitManager, + robot_index_shm: int, + robot_index_view: int, + namespace: str, # namespace used for shared mem + verbose: bool = True, + vlevel: bool = VLevel.V2, + safe: bool = True, + use_force_feedback: bool = False, + optimize_mem: bool = False): + + self.robot_index = robot_index_shm + self.robot_index_view = robot_index_view + self.robot_index_np_view = np.array(self.robot_index_view) + + self._step_idx = 0 + self._print_frequency = 100 + + self._verbose = verbose + + self._use_force_feedback=use_force_feedback + + if optimize_mem: + super().__init__( + is_server=False, + with_gpu_mirror=False, + namespace=namespace, + safe=safe, + verbose=verbose, + vlevel=vlevel, + optimize_mem=optimize_mem, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + else: + super().__init__( + is_server=False, + with_gpu_mirror=False, + namespace=namespace, + safe=safe, + verbose=verbose, + vlevel=vlevel) + + if not isinstance(gait_manager, GaitManager): + exception = f"Provided gait_manager argument should be of GaitManager type!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + self.gait_manager = gait_manager + + self.timeline_names = self.gait_manager.timeline_names + + # task interfaces from horizon for setting commands to rhc + self._get_tasks() + + self._p_ref_default=np.zeros((1, 3)) + self._q_ref_default=np.zeros((1, 4)) + self._q_ref_default[0, 0]=1 + + def _get_tasks(self): + # can be overridden by child + # cartesian tasks are in LOCAL_WORLD_ALIGNED (frame centered at distal link, oriented as WORLD) + self.base_lin_velxy = self.gait_manager.task_interface.getTask('base_lin_velxy') + self.base_lin_velz = self.gait_manager.task_interface.getTask('base_lin_velz') + self.base_omega = self.gait_manager.task_interface.getTask('base_omega') + self.base_height = self.gait_manager.task_interface.getTask('base_height') + + def run(self): + + super().run() + if not (self.robot_index < self.rob_refs.n_robots()): + exception = f"Provided \(0-based\) robot index {self.robot_index} exceeds number of " + \ + " available robots {self.rob_refs.n_robots()}." + Journal.log(self.__class__.__name__, + "run", + exception, + LogType.EXCEP, + throw_when_excep = True) + contact_names = list(self.gait_manager.task_interface.model.cmap.keys()) + if not (self.n_contacts() == len(contact_names)): + exception = f"N of contacts within problem {len(contact_names)} does not match n of contacts {self.n_contacts()}" + Journal.log(self.__class__.__name__, + "run", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # set some defaults from gait manager + for i in range(self.n_contacts()): + self.flight_settings_req.set(data=self.gait_manager.get_flight_duration(contact_name=contact_names[i]), + data_type="len_remain", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + self.flight_settings_req.set(data=self.gait_manager.get_step_apexdh(contact_name=contact_names[i]), + data_type="apex_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + self.flight_settings_req.set(data=self.gait_manager.get_step_enddh(contact_name=contact_names[i]), + data_type="end_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + + self.flight_settings_req.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, + n_cols=self.flight_settings_req.n_cols, + read=False) + + def step(self, qstate: np.ndarray = None, + vstate: np.ndarray = None, + force_norm: np.ndarray = None): + + if self.is_running(): + + # updates robot refs from shared mem + self.rob_refs.synch_from_shared_mem(robot_idx=self.robot_index, robot_idx_view=self.robot_index_view) + self.phase_id.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self.contact_flags.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self.flight_settings_req.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self._set_contact_phases(q_full=qstate) + + # updated internal references with latest available ones + q_base=qstate[3:7,0:1] # quaternion + self._apply_refs_to_tasks(q_base=q_base) + + # if self._use_force_feedback: + # self._set_force_feedback(force_norm=force_norm) + + self._step_idx +=1 + + else: + exception = f"{self.__class__.__name__} is not running" + Journal.log(self.__class__.__name__, + "step", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _set_contact_phases(self, + q_full: np.ndarray): + + # phase_id = self.phase_id.read_retry(row_index=self.robot_index, + # col_index=0)[0] + + contact_flags_refs = self.contact_flags.get_numpy_mirror()[self.robot_index_np_view, :] + target_n_limbs_in_contact=np.sum(contact_flags_refs).item() + if target_n_limbs_in_contact==0: + target_n_limbs_in_contact=4 + + is_contact = contact_flags_refs.flatten().tolist() + n_contacts=len(is_contact) + + for i in range(n_contacts): # loop through contact timelines + timeline_name = self.timeline_names[i] + + self.gait_manager.set_f_reg(contact_name=timeline_name, + scale=target_n_limbs_in_contact) + + if is_contact[i]==False: # release contact + + # flight parameters requests are set only when inserting a flight phase + len_req_now=int(self.flight_settings_req.get(data_type="len_remain", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item()) + apex_req_now=self.flight_settings_req.get(data_type="apex_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + end_req_now=self.flight_settings_req.get(data_type="end_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + landing_dx_req_now=self.flight_settings_req.get(data_type="land_dx", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + landing_dy_req_now=self.flight_settings_req.get(data_type="land_dy", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + + # set flight phase properties depending on last value on shared memory + self.gait_manager.set_flight_duration(contact_name=timeline_name, + val=len_req_now) + self.gait_manager.set_step_apexdh(contact_name=timeline_name, + val=apex_req_now) + self.gait_manager.set_step_enddh(contact_name=timeline_name, + val=end_req_now) + self.gait_manager.set_step_landing_dx(contact_name=timeline_name, + val=landing_dx_req_now) + self.gait_manager.set_step_landing_dy(contact_name=timeline_name, + val=landing_dy_req_now) + # insert flight phase over the horizon + self.gait_manager.add_flight(contact_name=timeline_name, + robot_q=q_full) + + else: # contact phase + self.gait_manager.add_stand(contact_name=timeline_name) + + at_least_one_flight=self.gait_manager.update_flight_info(timeline_name) + # flight_info=self.gait_manager.get_flight_info(timeline_name) + + self.gait_manager.check_horizon_full(timeline_name=timeline_name) + + # write flight info to shared mem for all contacts in one shot (we follow same order as in flight_info shm) + all_flight_info=self.gait_manager.get_flight_info_all() + flight_info_shared=self.flight_info.get_numpy_mirror() + flight_info_shared[self.robot_index_np_view, :]=all_flight_info + self.flight_info.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_np_view, + n_rows=1, n_cols=self.flight_info.n_cols, + read=False) + + self.gait_manager.update() + + def _apply_refs_to_tasks(self, q_base = None): + # overrides parent + if q_base is not None: # rhc refs are assumed to be specified in the so called "horizontal" + # frame, i.e. a vertical frame, with the x axis aligned with the projection of the base x axis + # onto the plane + root_pose = self.rob_refs.root_state.get(data_type = "q_full", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) # this should also be + # rotated into the horizontal frame (however, for now only the z componet is used, so it's ok) + + root_twist_ref = self.rob_refs.root_state.get(data_type="twist", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + + root_twist_ref_h = root_twist_ref.copy() + + hor2w_frame(root_twist_ref, q_base, root_twist_ref_h) # horizon works in local world aligned frame + + if self.base_lin_velxy is not None: + self.base_lin_velxy.setRef(root_twist_ref_h[0:2, :]) + if self.base_omega is not None: + self.base_omega.setRef(root_twist_ref_h[3:, :]) + if self.base_lin_velz is not None: + self.base_lin_velz.setRef(root_twist_ref_h[2:3, :]) + if self.base_height is not None: + self.base_height.setRef(root_pose) + else: + root_pose = self.rob_refs.root_state.get(data_type = "q_full", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + root_twist_ref = self.rob_refs.root_state.get(data_type="twist", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + + if self.base_lin_velxy is not None: + self.base_lin_velxy.setRef(root_twist_ref[0:2, :]) + if self.base_omega is not None: + self.base_omega.setRef(root_twist_ref[3:, :]) + if self.base_lin_velz is not None: + self.base_lin_velz.setRef(root_twist_ref[2:3, :]) + if self.base_height is not None: + self.base_height.setRef(root_pose) + + # def _set_force_feedback(self, + # force_norm: np.ndarray = None): + + # is_contact=force_norm>1.0 + + # for i in range(len(is_contact)): + # timeline_name = self._timeline_names[i] + # self.gait_manager.set_force_feedback(timeline_name=timeline_name, + # force_norm=force_norm[i]) + + # if not is_contact[i]: + + + def set_default_refs(self, + p_ref: np.ndarray, + q_ref: np.ndarray): + + self._p_ref_default[:, :]=p_ref + self._q_ref_default[:, :]=q_ref + + def set_alpha(self, alpha:float): + # set provided value + alpha_shared=self.alpha.get_numpy_mirror() + alpha_shared[self.robot_index_np_view, :] = alpha + self.alpha.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.alpha.n_cols, + read=False) + + def get_alpha(self): + self.alpha.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.alpha.n_cols, + read=True) + alpha=self.alpha.get_numpy_mirror()[self.robot_index_np_view, :].item() + return alpha + + def set_bound_relax(self, bound_relax:float): + # set provided value + bound_rel_shared=self.bound_rel.get_numpy_mirror() + bound_rel_shared[self.robot_index_np_view, :] = bound_relax + self.bound_rel.synch_retry(row_index=self.robot_index, col_index=0, n_rows=1, + row_index_view=self.robot_index_view, + n_cols=self.alpha.n_cols, + read=False) + + def reset(self): + + if self.is_running(): + + # resets shared mem + contact_flags_current = self.contact_flags.get_numpy_mirror() + phase_id_current = self.phase_id.get_numpy_mirror() + contact_flags_current[self.robot_index_np_view, :] = np.full((1, self.n_contacts()), dtype=np.bool_, fill_value=True) + phase_id_current[self.robot_index_np_view, :] = -1 # defaults to custom phase id + + contact_pos_current=self.rob_refs.contact_pos.get_numpy_mirror() + contact_pos_current[self.robot_index_np_view, :] = 0.0 + + flight_info_current=self.flight_info.get_numpy_mirror() + flight_info_current[self.robot_index_np_view, :] = 0.0 + + alpha=self.alpha.get_numpy_mirror() + alpha[self.robot_index_np_view, :] = 0.0 + + self.rob_refs.root_state.set(data_type="p", data=self._p_ref_default, robot_idxs=self.robot_index_np_view) + self.rob_refs.root_state.set(data_type="q", data=self._q_ref_default, robot_idxs=self.robot_index_np_view) + self.rob_refs.root_state.set(data_type="twist", data=np.zeros((1, 6)), robot_idxs=self.robot_index_np_view) + + self.contact_flags.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.contact_flags.n_cols, + read=False) + self.phase_id.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.phase_id.n_cols, + read=False) + self.rob_refs.root_state.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.rob_refs.root_state.n_cols, + read=False) + + self.rob_refs.contact_pos.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.rob_refs.contact_pos.n_cols, + read=False) + + self.flight_info.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.flight_info.n_cols, + read=False) + + # should also empty the timeline for stepping phases + self._step_idx = 0 + + else: + exception = f"Cannot call reset() since run() was not called!" + Journal.log(self.__class__.__name__, + "reset", + exception, + LogType.EXCEP, + throw_when_excep = True) + diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/utils/__init__.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/utils/math_utils.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8d4173d5b51ee8c644a54008ca0a40f1b34d1841 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/horizon_based/utils/math_utils.py @@ -0,0 +1,165 @@ + +import numpy as np + +def w2hor_frame(t_w: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a twist vector expressed in WORLD frame to + an "horizontal" frame (z aligned as world, x aligned as the projection + of the x-axis of the base frame described by q_b). This is useful for specifying locomotion + references in a "game"-like fashion. + t_out will hold the result + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_21 = 2 * (q_i * q_j + q_k * q_w) + + norm = np.sqrt(R_11 ** 2 + R_21 ** 2) + x_proj_x = R_11 / norm + x_proj_y = R_21 / norm + + y_proj_x = -x_proj_y + y_proj_y = x_proj_x + + t_out[0, :] = t_w[0, :] * x_proj_x + t_w[1, :] * x_proj_y + t_out[1, :] = t_w[0, :] * y_proj_x + t_w[1, :] * y_proj_y + t_out[2, :] = t_w[2, :] # z-component remains the same + t_out[3, :] = t_w[3, :] * x_proj_x + t_w[4, :] * x_proj_y + t_out[4, :] = t_w[3, :] * y_proj_x + t_w[4, :] * y_proj_y + t_out[5, :] = t_w[5, :] # z-component remains the same + +def hor2w_frame(t_h: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in "horizontal" frame to WORLD + v_out will hold the result + """ + + # Extract quaternion components + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + # Compute rotation matrix elements + R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_21 = 2 * (q_i * q_j + q_k * q_w) + + # Normalize to get projection components + norm = np.sqrt(R_11 ** 2 + R_21 ** 2) + x_proj_x = R_11 / norm + x_proj_y = R_21 / norm + + # Orthogonal vector components + y_proj_x = -x_proj_y + y_proj_y = x_proj_x + + # Transform velocity vector components from horizontal to world frame + t_out[0, :] = t_h[0, :] * x_proj_x + t_h[1, :] * y_proj_x + t_out[1, :] = t_h[0, :] * x_proj_y + t_h[1, :] * y_proj_y + t_out[2, :] = t_h[2, :] # z-component remains the same + t_out[3, :] = t_h[3, :] * x_proj_x + t_h[4, :] * y_proj_x + t_out[4, :] = t_h[3, :] * x_proj_y + t_h[4, :] * y_proj_y + t_out[5, :] = t_h[5, :] # z-component remains the same + +def base2world_frame(t_b: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in the base frame to + the WORLD frame using the given quaternion that describes the orientation + of the base with respect to the world frame. The result is written in v_out. + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_01 = 2 * (q_i * q_j - q_k * q_w) + R_02 = 2 * (q_i * q_k + q_j * q_w) + + R_10 = 2 * (q_i * q_j + q_k * q_w) + R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2) + R_12 = 2 * (q_j * q_k - q_i * q_w) + + R_20 = 2 * (q_i * q_k - q_j * q_w) + R_21 = 2 * (q_j * q_k + q_i * q_w) + R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2) + + # Transform the velocity to the world frame + t_out[0, :] = t_b[0, :] * R_00 + t_b[1, :] * R_01 + t_b[2, :] * R_02 + t_out[1, :] = t_b[0, :] * R_10 + t_b[1, :] * R_11 + t_b[2, :] * R_12 + t_out[2, :] = t_b[0, :] * R_20 + t_b[1, :] * R_21 + t_b[2, :] * R_22 + t_out[3, :] = t_b[3, :] * R_00 + t_b[4, :] * R_01 + t_b[5, :] * R_02 + t_out[4, :] = t_b[3, :] * R_10 + t_b[4, :] * R_11 + t_b[5, :] * R_12 + t_out[5, :] = t_b[3, :] * R_20 + t_b[4, :] * R_21 + t_b[5, :] * R_22 + +def world2base_frame(t_w: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in the WORLD frame to + the base frame using the given quaternion that describes the orientation + of the base with respect to the world frame. The result is written in v_out. + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_01 = 2 * (q_i * q_j - q_k * q_w) + R_02 = 2 * (q_i * q_k + q_j * q_w) + + R_10 = 2 * (q_i * q_j + q_k * q_w) + R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2) + R_12 = 2 * (q_j * q_k - q_i * q_w) + + R_20 = 2 * (q_i * q_k - q_j * q_w) + R_21 = 2 * (q_j * q_k + q_i * q_w) + R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2) + + # Transform the velocity to the base frame using the transpose of the rotation matrix + t_out[0, :] = t_w[0, :] * R_00 + t_w[1, :] * R_10 + t_w[2, :] * R_20 + t_out[1, :] = t_w[0, :] * R_01 + t_w[1, :] * R_11 + t_w[2, :] * R_21 + t_out[2, :] = t_w[0, :] * R_02 + t_w[1, :] * R_12 + t_w[2, :] * R_22 + t_out[3, :] = t_w[3, :] * R_00 + t_w[4, :] * R_10 + t_w[5, :] * R_20 + t_out[4, :] = t_w[3, :] * R_01 + t_w[4, :] * R_11 + t_w[5, :] * R_21 + t_out[5, :] = t_w[3, :] * R_02 + t_w[4, :] * R_12 + t_w[5, :] * R_22 + +if __name__ == "__main__": + + n_envs = 5000 + t_b = np.random.rand(6, n_envs) + + q_b = np.random.rand(4, n_envs) + q_b_norm = q_b / np.linalg.norm(q_b, axis=0, keepdims=True) + + t_w = np.zeros_like(t_b) # To hold horizontal frame velocities + t_b_recovered = np.zeros_like(t_b) # To hold recovered world frame velocities + base2world_frame(t_b, q_b_norm, t_w) + world2base_frame(t_w, q_b_norm, t_b_recovered) + assert np.allclose(t_b, t_b_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b" + print("Forward test passed: t_b_recovered matches t_b") + + t_b2 = np.zeros_like(t_b) # To hold horizontal frame velocities + t_w_recovered = np.zeros_like(t_b) + world2base_frame(t_b, q_b_norm, t_b2) + base2world_frame(t_b2, q_b_norm, t_w_recovered) + assert np.allclose(t_b, t_w_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b" + print("Backward test passed: t_w_recovered matches t_w") + + # test transf. world-horizontal frame + v_h = np.zeros_like(t_b) # To hold horizontal frame velocities + v_recovered = np.zeros_like(t_b) + w2hor_frame(t_b, q_b_norm, v_h) + hor2w_frame(v_h, q_b_norm, v_recovered) + assert np.allclose(t_b, v_recovered, atol=1e-6), "Test failed: v_recovered does not match t_b" + print("horizontal forward frame test passed: matches ") + + t_w = np.zeros_like(t_b) # To hold horizontal frame velocities + v_h_recovered = np.zeros_like(t_b) + hor2w_frame(t_b, q_b_norm, t_w) + w2hor_frame(t_w, q_b_norm, v_h_recovered) + assert np.allclose(t_b, v_h_recovered, atol=1e-6), "Test failed: v_h_recovered does not match t_b" + print("horizontal backward frame test passed: matches ") + + \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/hybrid_quad_client.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/hybrid_quad_client.py new file mode 100644 index 0000000000000000000000000000000000000000..757dc3aba72a500bbbf6c8fa21145f4ea63a2337 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/other/hybrid_quad_client.py @@ -0,0 +1,95 @@ +from aug_mpc.controllers.rhc.augmpc_cluster_client import AugMpcClusterClient + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds_horizon +from aug_mpc.utils.sys_utils import PathsGetter + +from typing import List, Dict + +class HybridQuadrupedClusterClient(AugMpcClusterClient): + + def _import_aux_libs(self): + super()._import_aux_libs() + # Import Horizon and related dependencies as global libs + from aug_mpc.controllers.rhc.horizon_based.horizon_imports_glob import import_horizon_global + import_horizon_global() + + def __init__(self, + namespace: str, + urdf_xacro_path: str, + srdf_xacro_path: str, + cluster_size: int, + set_affinity: bool = False, + use_mp_fork: bool = False, + isolated_cores_only: bool = False, + core_ids_override_list: List[int] = None, + verbose: bool = False, + debug: bool = False, + open_loop: bool = True, + base_dump_dir: str = "/tmp", + timeout_ms: int = 60000, + codegen_override: str = None, + custom_opts: Dict = {}): + + self._open_loop = open_loop + + self._paths = PathsGetter() + + self._codegen_dir_name = namespace + + self._timeout_ms = timeout_ms + + super().__init__(namespace = namespace, + urdf_xacro_path = urdf_xacro_path, + srdf_xacro_path = srdf_xacro_path, + cluster_size=cluster_size, + set_affinity = set_affinity, + use_mp_fork = use_mp_fork, + isolated_cores_only = isolated_cores_only, + core_ids_override_list = core_ids_override_list, + verbose = verbose, + debug = debug, + base_dump_dir=base_dump_dir, + codegen_override=codegen_override, + custom_opts=custom_opts) + + self._n_nodes = 31 if not ("n_nodes" in self._custom_opts) else self._custom_opts["n_nodes"] + self._dt = 0.05 if not ("cluster_dt" in self._custom_opts) else self._custom_opts["cluster_dt"] + + def _xrdf_cmds(self): + parts = self._urdf_path.split('/') + urdf_descr_root_path = '/'.join(parts[:-2]) + cmds = get_xrdf_cmds_horizon(urdf_descr_root_path=urdf_descr_root_path) + return cmds + + def _process_codegen_dir(self,idx:int): + + codegen_dir = self.codegen_dir() + f"/{self._codegen_dir_name}Rhc{idx}" + codegen_dir_ovveride = self.codegen_dir_override() + if not (codegen_dir_ovveride=="" or \ + codegen_dir_ovveride=="none" or \ + codegen_dir_ovveride=="None" or \ + (codegen_dir_ovveride is None)): # if overrde was provided + codegen_dir = f"{codegen_dir_ovveride}{idx}"# override + + return codegen_dir + + def _generate_controller(self, + idx: int): + + codegen_dir=self._process_codegen_dir(idx=idx) + + controller = HybridQuadRhc( + urdf_path=self._urdf_path, + srdf_path=self._srdf_path, + config_path = self._paths.CONFIGPATH, + robot_name=self._namespace, + codegen_dir=codegen_dir, + n_nodes=self._n_nodes, + dt=self._dt, + max_solver_iter = 1, # rti + open_loop = self._open_loop, + verbose = self._verbose, + debug = self._debug) + + return controller \ No newline at end of file diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rhc.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..d456eb62a35fa08dc97a2e3e08f682828739bed9 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/rhc.py @@ -0,0 +1,1226 @@ +# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com) +# +# This file is part of MPCHive and distributed under the General Public License version 2 license. +# +# MPCHive is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# MPCHive is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MPCHive. If not, see . +# +from abc import ABC, abstractmethod +# from perf_sleep.pyperfsleep import PerfSleep +# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage + +import time + +import numpy as np + +from mpc_hive.utilities.shared_data.rhc_data import RobotState +from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred, RhcPredDelta +from mpc_hive.utilities.shared_data.rhc_data import RhcStatus +from mpc_hive.utilities.shared_data.rhc_data import RhcInternal +from mpc_hive.utilities.shared_data.cluster_profiling import RhcProfiling +from mpc_hive.utilities.remote_triggering import RemoteTriggererClnt + +from mpc_hive.utilities.homing import RobotHomer + +from mpc_hive.utilities.math_utils import world2base_frame + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType +from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper +from EigenIPC.PyEigenIPC import dtype + +from typing import List +# from typing import TypeVar, Union + +import signal +import os +import inspect + +class RHController(ABC): + + def __init__(self, + srdf_path: str, + n_nodes: int, + dt: float, + namespace: str, # shared mem namespace + dtype = np.float32, + verbose = False, + debug = False, + timeout_ms: int = 60000, + allow_less_jnts: bool = True): + + signal.signal(signal.SIGINT, self._handle_sigint) + + self._allow_less_jnts = allow_less_jnts # whether to allow less joints in rhc controller than the ones on the robot + # (e.g. some joints might not be desirable for control purposes) + + self.namespace = namespace + self._dtype = dtype + self._verbose = verbose + self._debug = debug + + # if not self._debug: + np.seterr(over='ignore') # ignoring overflows + + self._n_nodes = n_nodes + self._dt = dt + self._n_intervals = self._n_nodes - 1 + self._t_horizon = self._n_intervals * dt + self._set_rhc_pred_idx() # prection is by default written on last node + self._set_rhc_cmds_idx() # default to idx 2 (i.e. cmds to get to third node) + self.controller_index = None # will be assigned upon registration to a cluster + self.controller_index_np = None + self._robot_mass=1.0 + + self.srdf_path = srdf_path # using for parsing robot homing + + self._registered = False + self._closed = False + + self._allow_triggering_when_failed = True + + self._profiling_data_dict = {} + self._profiling_data_dict["full_solve_dt"] = np.nan + self._profiling_data_dict["rti_solve_dt"] = np.nan + self._profiling_data_dict["problem_update_dt"] = np.nan + self._profiling_data_dict["phases_shift_dt"] = np.nan + self._profiling_data_dict["task_ref_update"] = np.nan + + self.n_dofs = None + self.n_contacts = None + + # shared mem + self.robot_state = None + self.rhc_status = None + self.rhc_internal = None + self.cluster_stats = None + self.robot_cmds = None + self.robot_pred = None + self.rhc_pred_delta = None + self.rhc_refs = None + self._remote_triggerer = None + self._remote_triggerer_timeout = timeout_ms # [ms] + + # remote termination + self._remote_term = None + self._term_req_received = False + + # jnt names + self._env_side_jnt_names = [] + self._controller_side_jnt_names = [] + self._got_jnt_names_from_controllers = False + + # data maps + self._to_controller = [] + self._quat_remap = [0, 1, 2, 3] # defaults to no remap (to be overridden) + + self._got_contact_names = False + + self._received_trigger = False # used for proper termination + + self._n_resets = 0 + self._n_fails = 0 + self._fail_idx_thresh = 5e3 + self._contact_f_thresh = 1e5 + + self._failed = False + + self._start_time = time.perf_counter() # used for profiling when in debug mode + + self._homer = None # robot homing manager + self._homer_env = None # used for setting homing to jnts not contained in rhc prb + + self._class_name_base = f"{self.__class__.__name__}" + + self._contact_force_base_loc_aux=np.zeros((1,3),dtype=self._dtype) + self._norm_grav_vector_w=np.zeros((1,3),dtype=self._dtype) + self._norm_grav_vector_w[:, 2]=-1.0 + self._norm_grav_vector_base_loc=np.zeros((1,3),dtype=self._dtype) + + self._init() # initialize controller + + if not hasattr(self, '_rhc_fpaths'): + self._rhc_fpaths = [] + self._rhc_fpaths.append(os.path.abspath(__file__)) + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + # Get the file path of the class being initialized and append it to the list + if not hasattr(cls, '_rhc_fpaths'): + cls._rhc_fpaths = [] + child_class_file_path = os.path.abspath(inspect.getfile(cls)) + cls._rhc_fpaths.append(child_class_file_path) + + def this_paths(self): + return self._rhc_fpaths + + def __del__(self): + self._close() + + def _handle_sigint(self, signum, frame): + Journal.log(self._class_name_base, + "_handle_sigint", + "SIGINT received", + LogType.WARN) + self._term_req_received = True + + def _set_rhc_pred_idx(self): + # default to last node + self._pred_node_idx=self._n_nodes-1 + + def _set_rhc_cmds_idx(self): + # use index 2 by default to compensate for + # the inevitable action delay + # (get_state, trigger sol -> +dt -> apply sol ) + # if we apply action from second node (idenx 1) + # that action will already be one dt old. We assume we were already + # applying the right action to get to the state of node idx 1 and get the + # cmds for reaching the state at idx 1 + self._rhc_cmds_node_idx=2 + + def _close(self): + if not self._closed: + self._unregister_from_cluster() + if self.robot_cmds is not None: + self.robot_cmds.close() + if self.robot_pred is not None: + self.robot_pred.close() + if self.rhc_pred_delta is not None: + self.rhc_pred_delta.close() + if self.robot_state is not None: + self.robot_state.close() + if self.rhc_status is not None: + self.rhc_status.close() + if self.rhc_internal is not None: + self.rhc_internal.close() + if self.cluster_stats is not None: + self.cluster_stats.close() + if self._remote_triggerer is not None: + self._remote_triggerer.close() + if self._remote_term is not None: + self._remote_term.close() + self._closed = True + + def close(self): + self._close() + + def get_file_paths(self): + # can be overriden by child + base_paths = [] + base_paths.append(self._this_path) + return base_paths + + def init_rhc_task_cmds(self): + + self.rhc_refs = self._init_rhc_task_cmds() + self.rhc_refs.reset() + + def _init_states(self): + + quat_remap = self._get_quat_remap() + self.robot_state = RobotState(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_state.run() + self.robot_cmds = RhcCmds(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_cmds.run() + self.robot_pred = RhcPred(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_pred.run() + self.rhc_pred_delta = RhcPredDelta(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.rhc_pred_delta.run() + + def _rhc(self): + if self._debug: + self._rhc_db() + else: + self._rhc_min() + + def _rhc_db(self): + # rhc with debug data + self._start_time = time.perf_counter() + + self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with + # latest data on shared mem + + self._compute_pred_delta() + + if not self.failed(): + # we can solve only if not in failure state + self._failed = not self._solve() # solve actual TO + if (self._failed): + # perform failure procedure + self._on_failure() + else: + if not self._allow_triggering_when_failed: + Journal.log(self._class_name_base, + "solve", + "Received solution req, but controller is in failure state. " + \ + " You should have reset() the controller!", + LogType.EXCEP, + throw_when_excep = True) + else: + if self._verbose: + Journal.log(self._class_name_base, + "solve", + "Received solution req, but controller is in failure state. No solution will be performed. " + \ + " Use the reset() method to continue solving!", + LogType.WARN) + + self._write_cmds_from_sol() # we update update the views of the cmds + # from the latest solution + + # in debug, rhc internal state is streamed over + # shared mem. + self._update_rhc_internal() + self._profiling_data_dict["full_solve_dt"] = time.perf_counter() - self._start_time + self._update_profiling_data() # updates all profiling data + if self._verbose: + Journal.log(self._class_name_base, + "solve", + f"RHC full solve loop execution time -> " + str(self._profiling_data_dict["full_solve_dt"]), + LogType.INFO, + throw_when_excep = True) + + def _rhc_min(self): + + self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with + # latest data on shared mem + + self._compute_pred_delta() + + if not self.failed(): + # we can solve only if not in failure state + self._failed = not self._solve() # solve actual TO + if (self._failed): + # perform failure procedure + self._on_failure() + else: + if not self._allow_triggering_when_failed: + Journal.log(self._class_name_base, + "solve", + "Received solution req, but controller is in failure state. " + \ + " You should have reset() the controller!", + LogType.EXCEP, + throw_when_excep = True) + else: + if self._verbose: + Journal.log(self._class_name_base, + "solve", + "Received solution req, but controller is in failure state. No solution will be performed. " + \ + " Use the reset() method to continue solving!", + LogType.WARN) + + self._write_cmds_from_sol() # we update the views of the cmds + # from the latest solution even if failed + + def solve(self): + + # run the solution loop and wait for trigger signals + # using cond. variables (efficient) + while not self._term_req_received: + # we are always listening for a trigger signal + if not self._remote_triggerer.wait(self._remote_triggerer_timeout): + Journal.log(self._class_name_base, + "solve", + "Didn't receive any remote trigger req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + break + self._received_trigger = True + # signal received -> we process incoming requests + # perform reset, if required + if self.rhc_status.resets.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0]: + self.reset() # rhc is reset + # check if a trigger request was received + if self.rhc_status.trigger.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0]: + self._rhc() # run solution + self.rhc_status.trigger.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) # allow next solution trigger + + self._remote_triggerer.ack() # send ack signal to server + self._received_trigger = False + + self._term_req_received = self._term_req_received or self._remote_term.read_retry(row_index=0, + col_index=0, + row_index_view=0)[0] + self.close() # is not stricly necessary + + def reset(self): + + if not self._closed: + self.reset_rhc_data() + self._failed = False # allow triggering + self._n_resets += 1 + self.rhc_status.fails.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.resets.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _create_jnt_maps(self): + + # retrieve env-side joint names from shared mem + self._env_side_jnt_names = self.robot_state.jnt_names() + self._check_jnt_names_compatibility() # will raise exception if not self._allow_less_jnts + if not self._got_jnt_names_from_controllers: + exception = f"Cannot run the solve(). assign_env_side_jnt_names() was not called!" + Journal.log(self._class_name_base, + "_create_jnt_maps", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # robot homer guarantees that _controller_side_jnt_names is at least contained in self._env_side_jnt_names (or equal) + self._to_controller = [self._env_side_jnt_names.index(element) for element in self._controller_side_jnt_names] + # set joint remappings for shared data from env data to controller + # all shared data is by convention specified according to the env (jnts are ordered that way) + # the remapping is used so that when data is returned, its a remapped view from env to controller, + # so that data can be assigned direclty from the rhc controller without any issues + self.robot_state.set_jnts_remapping(jnts_remapping=self._to_controller) + self.robot_cmds.set_jnts_remapping(jnts_remapping=self._to_controller) + self.robot_pred.set_jnts_remapping(jnts_remapping=self._to_controller) + self.rhc_pred_delta.set_jnts_remapping(jnts_remapping=self._to_controller) + + return True + + def reset_rhc_data(self): + + self._reset() # custom reset (e.g. it should set the current solution to some + # default solution, like a bootstrap) + + self.rhc_refs.reset() # reset rhc refs to default (has to be called after _reset) + + self._write_cmds_from_sol() # use latest solution (e.g. from bootstrap if called before running + # the first solve) as default state + + def failed(self): + return self._failed + + def robot_mass(self): + return self._robot_mass + + def _assign_cntrl_index(self, reg_state: np.ndarray): + state = reg_state.flatten() # ensure 1D tensor + free_spots = np.nonzero(~state.flatten())[0] + return free_spots[0].item() # just return the first free spot + + def _register_to_cluster(self): + + self.rhc_status = RhcStatus(is_server=False, + namespace=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2, + with_torch_view=False, + with_gpu_mirror=False, + optimize_mem=True, + cluster_size=1, # we just need the row corresponding to this controller + n_contacts=None, # we get this from server + n_nodes=None # we get this from server + ) + self.rhc_status.run() # rhc status (reg. flags, failure, tot cost, tot cnstrl viol, etc...) + + # acquire semaphores since we have to perform non-atomic operations + # on the whole memory views + self.rhc_status.registration.data_sem_acquire() + self.rhc_status.controllers_counter.data_sem_acquire() + self.rhc_status.controllers_counter.synch_all(retry = True, + read = True) + + available_spots = self.rhc_status.cluster_size + # from here on all pre registration ops can be done + + # incrementing cluster controllers counter + controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror() + if controllers_counter[0, 0] + 1 > available_spots: # no space left -> return + self.rhc_status.controllers_counter.data_sem_release() + self.rhc_status.registration.data_sem_release() + exception = "Cannot register to cluster. No space left " + \ + f"({controllers_counter[0, 0]} controllers already registered)" + Journal.log(self._class_name_base, + "_register_to_cluster", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # now we can register + + # increment controllers counter + controllers_counter += 1 + self.controller_index = controllers_counter.item() -1 + + # actually register to cluster + self.rhc_status.controllers_counter.synch_all(retry = True, + read = False) # writes to shared mem (just one for all controllers (i.e n_rows = 1)) + + # read current registration state + self.rhc_status.registration.synch_all(retry = True, + read = True, + row_index=self.controller_index, + row_index_view=0) + registrations = self.rhc_status.registration.get_numpy_mirror() + # self.controller_index = self._assign_cntrl_index(registrations) + + + self._class_name_base = self._class_name_base+str(self.controller_index) + # self.controller_index_np = np.array(self.controller_index) + self.controller_index_np = np.array(0) # given that we use optimize_mem, the shared mem copy has shape 1 x n_cols (we can write and read at [0, :]) + + registrations[self.controller_index_np, 0] = True + self.rhc_status.registration.synch_all(retry = True, + read = False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + # now all heavy stuff that would otherwise make the registration slow + self._remote_term = SharedTWrapper(namespace=self.namespace, + basename="RemoteTermination", + is_server=False, + verbose = self._verbose, + vlevel = VLevel.V2, + with_gpu_mirror=False, + with_torch_view=False, + dtype=dtype.Bool) + self._remote_term.run() + + # other initializations + + self.cluster_stats = RhcProfiling(is_server=False, + name=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=True, + optimize_mem=True, + cluster_size=1 # we just need the row corresponding to this controller + ) # profiling data + self.cluster_stats.run() + self.cluster_stats.synch_info() + + self._create_jnt_maps() + self.init_rhc_task_cmds() # initializes rhc interface to external commands (defined by child class) + self._consinstency_checks() # sanity checks + + if self._homer is None: + self._init_robot_homer() # call this in case it wasn't called by child + + self._robot_mass = self._get_robot_mass() # uses child class implemented method + self._contact_f_scale = self._get_robot_mass() * 9.81 + + # writing some static info about this controller + # self.rhc_status.rhc_static_info.synch_all(retry = True, + # read = True, + # row_index=self.controller_index, + # col_index=0) # first read current static info from other controllers (not necessary if optimize_mem==True) + self.rhc_status.rhc_static_info.set(data=np.array(self._dt), + data_type="dts", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._t_horizon), + data_type="horizons", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._n_nodes), + data_type="nnodes", + rhc_idxs=self.controller_index_np, + gpu=False) + # writing some static rhc info which is only available after problem init + self.rhc_status.rhc_static_info.set(data=np.array(len(self._get_contacts())), + data_type="ncontacts", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self.robot_mass()), + data_type="robot_mass", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._pred_node_idx), + data_type="pred_node_idx", + rhc_idxs=self.controller_index_np, + gpu=False) + + self.rhc_status.rhc_static_info.synch_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.rhc_status.rhc_static_info.n_cols, + read=False) + + # we set homings also for joints which are not in the rhc homing map + # since this is usually required on env side + + homing_full = self._homer_env.get_homing().reshape(1, + self.robot_cmds.n_jnts()) + + null_action = np.zeros((1, self.robot_cmds.n_jnts()), + dtype=self._dtype) + + self.robot_cmds.jnts_state.set(data=homing_full, data_type="q", + robot_idxs=self.controller_index_np, + no_remap=True) + self.robot_cmds.jnts_state.set(data=null_action, data_type="v", + robot_idxs=self.controller_index_np, + no_remap=True) + self.robot_cmds.jnts_state.set(data=null_action, data_type="eff", + robot_idxs=self.controller_index_np, + no_remap=True) + + # write all joints (including homing for env-only ones) + self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # only write data corresponding to this controller + + self.reset() # reset controller + self._n_resets=0 + + # for last we create the trigger client + self._remote_triggerer = RemoteTriggererClnt(namespace=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2) # remote triggering + self._remote_triggerer.run() + + if self._debug: + # internal solution is published on shared mem + # we assume the user has made available the cost + # and constraint data at this point (e.g. through + # the solution of a bootstrap) + cost_data = self._get_cost_data() + constr_data = self._get_constr_data() + config = RhcInternal.Config(is_server=True, + enable_q= True, + enable_v=True, + enable_a=True, + enable_a_dot=False, + enable_f=True, + enable_f_dot=False, + enable_eff=False, + cost_names=cost_data[0], + cost_dims=cost_data[1], + constr_names=constr_data[0], + constr_dims=constr_data[1], + ) + self.rhc_internal = RhcInternal(config=config, + namespace=self.namespace, + rhc_index = self.controller_index, + n_contacts=self.n_contacts, + n_jnts=self.n_dofs, + jnt_names=self._controller_side_jnt_names, + n_nodes=self._n_nodes, + verbose = self._verbose, + vlevel=VLevel.V2, + force_reconnection=True, + safe=True) + self.rhc_internal.run() + + Journal.log(self._class_name_base, + "_register_to_cluster", + f"controller {self.controller_index} registered", + LogType.STAT, + throw_when_excep = True) + + # we can now release everything so that other controllers can register + self.rhc_status.controllers_counter.data_sem_release() + self.rhc_status.registration.data_sem_release() + + self._registered = True + + def _unregister_from_cluster(self): + + if self._received_trigger: + # received interrupt during solution --> + # send ack signal to server anyway + self._remote_triggerer.ack() + if self._registered: + # acquire semaphores since we have to perform operations + # on the whole memory views + self.rhc_status.registration.data_sem_acquire() + self.rhc_status.controllers_counter.data_sem_acquire() + self.rhc_status.registration.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self._deactivate() + # decrementing controllers counter + self.rhc_status.controllers_counter.synch_all(retry = True, + read = True) + controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror() + controllers_counter -= 1 + self.rhc_status.controllers_counter.synch_all(retry = True, + read = False) + Journal.log(self._class_name_base, + "_unregister_from_cluster", + "Done", + LogType.STAT, + throw_when_excep = True) + # we can now release everything + self.rhc_status.registration.data_sem_release() + self.rhc_status.controllers_counter.data_sem_release() + self._registered = False + + def _get_quat_remap(self): + # to be overridden by child class if necessary + return [0, 1, 2, 3] + + def _consinstency_checks(self): + + # check controller dt + server_side_cluster_dt = self.cluster_stats.get_info(info_name="cluster_dt") + if not (abs(server_side_cluster_dt - self._dt) < 1e-4): + exception = f"Trying to initialize a controller with control dt {self._dt}, which" + \ + f"does not match the cluster control dt {server_side_cluster_dt}" + Journal.log(self._class_name_base, + "_consinstency_checks", + exception, + LogType.EXCEP, + throw_when_excep = True) + # check contact names + + server_side_contact_names = set(self.robot_state.contact_names()) + control_side_contact_names = set(self._get_contacts()) + + if not server_side_contact_names == control_side_contact_names: + warn = f"Controller-side contact names do not match server-side names!" + \ + f"\nServer: {self.robot_state.contact_names()}\n Controller: {self._get_contacts()}" + Journal.log(self._class_name_base, + "_consinstency_checks", + warn, + LogType.WARN, + throw_when_excep = True) + if not len(self.robot_state.contact_names()) == len(self._get_contacts()): + # at least, we need the n of contacts to match! + exception = f"Controller-side n contacts {self._get_contacts()} do not match " + \ + f"server-side n contacts {len(self.robot_state.contact_names())}!" + Journal.log(self._class_name_base, + "_consinstency_checks", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _init(self): + + stat = f"Trying to initialize RHC controller " + \ + f"with dt: {self._dt} s, t_horizon: {self._t_horizon} s, n_intervals: {self._n_intervals}" + Journal.log(self._class_name_base, + "_init", + stat, + LogType.STAT, + throw_when_excep = True) + + self._init_states() # initializes shared mem. states + + self._init_problem() # we call the child's initialization method for the actual problem + self._post_problem_init() + + self._register_to_cluster() # registers the controller to the cluster + + Journal.log(self._class_name_base, + "_init", + f"RHC controller initialized with cluster index {self.controller_index} on process {os.getpid()}", + LogType.STAT, + throw_when_excep = True) + + def _deactivate(self): + # signal controller deactivation over shared mem + self.rhc_status.activation_state.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + # also set cmds to homing for safety + # self.reset_rhc_data() + + def _on_failure(self): + + self.rhc_status.fails.write_retry(True, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self._deactivate() + self._n_fails += 1 + self.rhc_status.controllers_fail_counter.write_retry(self._n_fails, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _init_robot_homer(self): + self._homer = RobotHomer(srdf_path=self.srdf_path, + jnt_names=self._controller_side_jnt_names) + + self._homer_env = RobotHomer(srdf_path=self.srdf_path, + jnt_names=self.robot_state.jnt_names()) + + def _update_profiling_data(self): + + # updated debug data on shared memory + # with the latest info available + self.cluster_stats.solve_loop_dt.write_retry(self._profiling_data_dict["full_solve_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.rti_sol_time.write_retry(self._profiling_data_dict["rti_solve_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.prb_update_dt.write_retry(self._profiling_data_dict["problem_update_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.phase_shift_dt.write_retry(self._profiling_data_dict["phases_shift_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.task_ref_update_dt.write_retry(self._profiling_data_dict["task_ref_update"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _write_cmds_from_sol(self): + + # gets data from the solution and updates the view on the shared data + + # cmds for robot + # jnts + self.robot_cmds.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="v", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np) + # root + self.robot_cmds.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q_full", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="twist", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_root_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np) + f_contact = self._get_f_from_sol() + if f_contact is not None: + contact_names = self.robot_state.contact_names() + node_idx_f_estimate=self._rhc_cmds_node_idx-1 # we always write the force to reach the desired state (prev node) + rhc_q_estimate=self._get_root_full_q_from_sol(node_idx=node_idx_f_estimate)[:, 3:7] + for i in range(len(contact_names)): + contact = contact_names[i] + contact_idx = i*3 + contact_force_rhc_world=f_contact[contact_idx:(contact_idx+3), node_idx_f_estimate:node_idx_f_estimate+1].T + world2base_frame(v_w=contact_force_rhc_world, + q_b=rhc_q_estimate, + v_out=self._contact_force_base_loc_aux, + is_q_wijk=False # horizon q is ijkw + ) + self.robot_cmds.contact_wrenches.set(data=self._contact_force_base_loc_aux, + data_type="f", + robot_idxs=self.controller_index_np, + contact_name=contact) + + # prediction data from MPC horizon + self.robot_pred.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._pred_node_idx), data_type="q", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._pred_node_idx), data_type="v", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._pred_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._pred_node_idx), data_type="q_full", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._pred_node_idx), data_type="twist", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._pred_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np) + + # write robot commands + self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # jnt state + self.robot_cmds.root_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.root_state.n_cols, + read=False) # root state, in case it was written + self.robot_cmds.contact_wrenches.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.contact_wrenches.n_cols, + read=False) # contact state + + # write robot pred + self.robot_pred.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) + self.robot_pred.root_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.root_state.n_cols, + read=False) + + # we also fill other data (cost, constr. violation, etc..) + self.rhc_status.rhc_cost.write_retry(self._get_rhc_cost(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_constr_viol.write_retry(self._get_rhc_constr_viol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_n_iter.write_retry(self._get_rhc_niter_to_sol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_nodes_cost.write_retry(data=self._get_rhc_nodes_cost(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_nodes_constr_viol.write_retry(data=self._get_rhc_nodes_constr_viol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_fail_idx.write_retry(self._get_failure_index(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) # write idx on shared mem + + def _compute_pred_delta(self): + + # measurements + q_full_root_meas = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np) + twist_root_meas = self.robot_state.root_state.get(data_type="twist", robot_idxs=self.controller_index_np) + a_root_meas = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np) + g_vec_root_meas = self.robot_state.root_state.get(data_type="gn", robot_idxs=self.controller_index_np) + + q_jnts_meas = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np) + v_jnts_meas = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np) + a_jnts_meas = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np) + eff_jnts_meas = self.robot_state.jnts_state.get(data_type="eff", robot_idxs=self.controller_index_np) + + # prediction from rhc + delta_root_q_full=self._get_root_full_q_from_sol(node_idx=1)-q_full_root_meas + delta_root_twist=self._get_root_twist_from_sol(node_idx=1)-twist_root_meas + delta_root_a=self._get_root_a_from_sol(node_idx=0)-a_root_meas + delta_g_vec=self._get_norm_grav_vector_from_sol(node_idx=0)-g_vec_root_meas + + delta_jnts_q=self._get_jnt_q_from_sol(node_idx=1)-q_jnts_meas + delta_jnts_v=self._get_jnt_v_from_sol(node_idx=1)-v_jnts_meas + delta_jnts_a=self._get_jnt_a_from_sol(node_idx=0)-a_jnts_meas + delta_jnts_eff=self._get_jnt_eff_from_sol(node_idx=0)-eff_jnts_meas + + # writing pred. errors + self.rhc_pred_delta.root_state.set(data=delta_root_q_full, data_type="q_full", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_root_twist,data_type="twist", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_root_a,data_type="a_full", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_g_vec, data_type="gn", robot_idxs=self.controller_index_np) + + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_q,data_type="q", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_v,data_type="v", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_a,data_type="a", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_eff, data_type="eff", robot_idxs=self.controller_index_np) + + # write on shared memory + self.rhc_pred_delta.jnts_state.synch_retry(row_index=self.controller_index, + col_index=0, + n_rows=1, + row_index_view=0, + n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # jnt state + self.rhc_pred_delta.root_state.synch_retry(row_index=self.controller_index, + col_index=0, + n_rows=1, + row_index_view=0, + n_cols=self.robot_cmds.root_state.n_cols, + read=False) # root state + + def _assign_controller_side_jnt_names(self, + jnt_names: List[str]): + + self._controller_side_jnt_names = jnt_names + self._got_jnt_names_from_controllers = True + + def _check_jnt_names_compatibility(self): + + set_rhc = set(self._controller_side_jnt_names) + set_env = set(self._env_side_jnt_names) + + if not set_rhc == set_env: + rhc_is_missing=set_env-set_rhc + env_is_missing=set_rhc-set_env + + msg_type=LogType.WARN + message="" + if not len(rhc_is_missing)==0: # allowed + message = "\nSome env-side joint names are missing on rhc-side!\n" + \ + "ENV-SIDE-> \n" + \ + " ".join(self._env_side_jnt_names) + "\n" +\ + "RHC-SIDE -> \n" + \ + " ".join(self._controller_side_jnt_names) + "\n" \ + "\MISSING -> \n" + \ + " ".join(list(rhc_is_missing)) + "\n" + if not self._allow_less_jnts: # raise exception + msg_type=LogType.EXCEP + + if not len(env_is_missing)==0: # not allowed + message = "\nSome rhc-side joint names are missing on env-side!\n" + \ + "ENV-SIDE-> \n" + \ + " ".join(self._env_side_jnt_names) + \ + "RHC-SIDE -> \n" + \ + " ".join(self._controller_side_jnt_names) + "\n" \ + "\nmissing -> \n" + \ + " ".join(list(env_is_missing)) + msg_type=LogType.EXCEP + + Journal.log(self._class_name_base, + "_check_jnt_names_compatibility", + message, + msg_type, + throw_when_excep = True) + + def _get_cost_data(self): + # to be overridden by child class + return None, None + + def _get_constr_data(self): + # to be overridden by child class + return None, None + + def _get_fail_idx(self): + # to be overriden by parent + return 0.0 + + def _get_failure_index(self): + fail_idx=self._get_fail_idx()/self._fail_idx_thresh + if fail_idx>1.0: + fail_idx=1.0 + return fail_idx + + def _check_rhc_failure(self): + # we use fail idx viol to detect failures + idx = self._get_failure_index() + return idx>=1.0 + + def _update_rhc_internal(self): + # data which is not enabled in the config is not actually + # written so overhead is minimal for non-enabled data + self.rhc_internal.write_q(data= self._get_q_from_sol(), + retry=True) + self.rhc_internal.write_v(data= self._get_v_from_sol(), + retry=True) + self.rhc_internal.write_a(data= self._get_a_from_sol(), + retry=True) + self.rhc_internal.write_a_dot(data= self._get_a_dot_from_sol(), + retry=True) + self.rhc_internal.write_f(data= self._get_f_from_sol(), + retry=True) + self.rhc_internal.write_f_dot(data= self._get_f_dot_from_sol(), + retry=True) + self.rhc_internal.write_eff(data= self._get_eff_from_sol(), + retry=True) + for cost_idx in range(self.rhc_internal.config.n_costs): + # iterate over all costs and update all values + cost_name = self.rhc_internal.config.cost_names[cost_idx] + self.rhc_internal.write_cost(data= self._get_cost_from_sol(cost_name = cost_name), + cost_name = cost_name, + retry=True) + for constr_idx in range(self.rhc_internal.config.n_constr): + # iterate over all constraints and update all values + constr_name = self.rhc_internal.config.constr_names[constr_idx] + self.rhc_internal.write_constr(data= self._get_constr_from_sol(constr_name=constr_name), + constr_name = constr_name, + retry=True) + + def _get_contacts(self): + contact_names = self._get_contact_names() + self._got_contact_names = True + return contact_names + + def _get_q_from_sol(self): + # to be overridden by child class + return None + + def _get_v_from_sol(self): + # to be overridden by child class + return None + + def _get_a_from_sol(self): + # to be overridden by child class + return None + + def _get_a_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_f_from_sol(self): + # to be overridden by child class + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + # to be overridden by child class + return None + + def _get_constr_from_sol(self, + constr_name: str): + # to be overridden by child class + return None + + @abstractmethod + def _reset(self): + pass + + @abstractmethod + def _init_rhc_task_cmds(self): + pass + + @abstractmethod + def _get_robot_jnt_names(self): + pass + + @abstractmethod + def _get_contact_names(self): + pass + + @abstractmethod + def _get_jnt_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_v_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_a_from_sol(self, node_idx=0) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_eff_from_sol(self, node_idx=0) -> np.ndarray: + pass + + @abstractmethod + def _get_root_full_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_full_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_root_twist_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_root_a_from_sol(self, node_idx=0) -> np.ndarray: + pass + + def _get_norm_grav_vector_from_sol(self, node_idx=1) -> np.ndarray: + rhc_q=self._get_root_full_q_from_sol(node_idx=node_idx)[:, 3:7] + world2base_frame(v_w=self._norm_grav_vector_w,q_b=rhc_q,v_out=self._norm_grav_vector_base_loc, + is_q_wijk=False) + return self._norm_grav_vector_base_loc + + def _get_rhc_cost(self): + # to be overridden + return np.nan + + def _get_rhc_constr_viol(self): + # to be overridden + return np.nan + + def _get_rhc_nodes_cost(self): + # to be overridden + return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype) + + def _get_rhc_nodes_constr_viol(self): + # to be overridden + return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype) + + def _get_rhc_niter_to_sol(self) -> np.ndarray: + # to be overridden + return np.nan + + @abstractmethod + def _update_open_loop(self): + # updates rhc controller + # using the internal state + pass + + @abstractmethod + def _update_closed_loop(self): + # uses meas. from robot + pass + + @abstractmethod + def _solve(self) -> bool: + pass + + @abstractmethod + def _get_ndofs(self): + pass + + abstractmethod + def _get_robot_mass(self): + pass + + @abstractmethod + def _init_problem(self): + # initialized horizon's TO problem + pass + + @abstractmethod + def _post_problem_init(self): + pass diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/sac.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/sac.py new file mode 100644 index 0000000000000000000000000000000000000000..fd40a13287ccf6ccda694c218d114ede8d4b9d4b --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/sac.py @@ -0,0 +1,680 @@ +import torch +import torch.nn as nn +from torch.distributions.normal import Normal +import math + +from aug_mpc.utils.nn.normalization_utils import RunningNormalizer +from aug_mpc.utils.nn.layer_utils import llayer_init + +from typing import List + +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import VLevel + +class SACAgent(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + obs_ub: List[float] = None, + obs_lb: List[float] = None, + actions_ub: List[float] = None, + actions_lb: List[float] = None, + rescale_obs: bool = False, + norm_obs: bool = True, + use_action_rescale_for_critic: bool = True, + device:str="cuda", + dtype=torch.float32, + is_eval:bool=False, + load_qf:bool=False, + epsilon:float=1e-8, + debug:bool=False, + compression_ratio:float=-1.0, # > 0; if [0, 1] compression, >1 "expansion" + layer_width_actor:int=256, + n_hidden_layers_actor:int=2, + layer_width_critic:int=512, + n_hidden_layers_critic:int=4, + torch_compile: bool = False, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._use_torch_compile=torch_compile + + self._layer_width_actor=layer_width_actor + self._layer_width_critic=layer_width_critic + self._n_hidden_layers_actor=n_hidden_layers_actor + self._n_hidden_layers_critic=n_hidden_layers_critic + + self._obs_dim=obs_dim + self._actions_dim=actions_dim + self._actions_ub=actions_ub + self._actions_lb=actions_lb + + self._add_weight_norm=add_weight_norm + self._add_layer_norm=add_layer_norm + self._add_batch_norm=add_batch_norm + + self._is_eval=is_eval + self._load_qf=load_qf + + self._epsilon=epsilon + + if compression_ratio > 0.0: + self._layer_width_actor=int(compression_ratio*obs_dim) + self._layer_width_critic=int(compression_ratio*(obs_dim+actions_dim)) + + self._normalize_obs = norm_obs + self._rescale_obs=rescale_obs + if self._rescale_obs and self._normalize_obs: + Journal.log(self.__class__.__name__, + "__init__", + f"Both running normalization and obs rescaling is enabled. Was this intentional?", + LogType.WARN, + throw_when_excep = True) + + self._use_action_rescale_for_critic=use_action_rescale_for_critic + + self._rescaling_epsi=1e-9 + + self._debug = debug + + self._torch_device = device + self._torch_dtype = dtype + + # obs scale and bias + if obs_ub is None: + obs_ub = [1] * obs_dim + if obs_lb is None: + obs_lb = [-1] * obs_dim + if (len(obs_ub) != obs_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Observations ub list length should be equal to {obs_dim}, but got {len(obs_ub)}", + LogType.EXCEP, + throw_when_excep = True) + if (len(obs_lb) != obs_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Observations lb list length should be equal to {obs_dim}, but got {len(obs_lb)}", + LogType.EXCEP, + throw_when_excep = True) + + self._obs_ub = torch.tensor(obs_ub, dtype=self._torch_dtype, + device=self._torch_device) + self._obs_lb = torch.tensor(obs_lb, dtype=self._torch_dtype, + device=self._torch_device) + obs_scale = torch.full((obs_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + obs_scale[:] = (self._obs_ub-self._obs_lb)/2.0 + self.register_buffer( + "obs_scale", obs_scale + ) + obs_bias = torch.full((obs_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + obs_bias[:] = (self._obs_ub+self._obs_lb)/2.0 + self.register_buffer( + "obs_bias", obs_bias) + + self._build_nets() + + self._init_obs_norm() + + msg=f"Created SAC agent with actor [{self._layer_width_actor}, {self._n_hidden_layers_actor}]\ + and critic [{self._layer_width_critic}, {self._n_hidden_layers_critic}] sizes.\ + \n Runningobs normalizer: {type(self.obs_running_norm)} \ + \n Batch normalization: {self._add_batch_norm} \ + \n Layer normalization: {self._add_layer_norm} \ + \n Weight normalization: {self._add_weight_norm} \ + \n Critic input actions are descaled: {self._use_action_rescale_for_critic}" + Journal.log(self.__class__.__name__, + "__init__", + msg, + LogType.INFO) + + def _init_obs_norm(self): + + self.obs_running_norm=None + if self._normalize_obs: + self.obs_running_norm = RunningNormalizer((self._obs_dim,), + epsilon=self._epsilon, + device=self._torch_device, dtype=self._torch_dtype, + freeze_stats=True, # always start with freezed stats + debug=self._debug) + self.obs_running_norm.type(self._torch_dtype) # ensuring correct dtype for whole module + + def _build_nets(self): + + if self._add_weight_norm: + Journal.log(self.__class__.__name__, + "__init__", + f"Will use weight normalization reparametrization\n", + LogType.INFO) + + self.actor=None + self.qf1=None + self.qf2=None + self.qf1_target=None + self.qf2_target=None + + self.actor = Actor(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + actions_ub=self._actions_ub, + actions_lb=self._actions_lb, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_actor, + n_hidden_layers=self._n_hidden_layers_actor, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm, + ) + + if (not self._is_eval) or self._load_qf: # just needed for training or during eval + # for debug, if enabled + self.qf1 = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + self.qf1_target = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + + self.qf2 = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + self.qf2_target = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + + self.qf1_target.load_state_dict(self.qf1.state_dict()) + self.qf2_target.load_state_dict(self.qf2.state_dict()) + + if self._use_torch_compile: + self.obs_running_norm=torch.compile(self.obs_running_norm) + self.actor = torch.compile(self.actor) + if (not self._is_eval) or self._load_qf: + self.qf1 = torch.compile(self.qf1) + self.qf2 = torch.compile(self.qf2) + self.qf1_target = torch.compile(self.qf1_target) + self.qf2_target = torch.compile(self.qf2_target) + + def reset(self, reset_stats: bool = False): + # we should just reinitialize the parameters, but for easiness + # we recreate the networks + + # force deallocation of objects + import gc + del self.actor + del self.qf1 + del self.qf2 + del self.qf1_target + del self.qf2_target + gc.collect() + + self._build_nets() + + if reset_stats: # we also reinitialize obs norm + self._init_obs_norm() + + # self.obs_running_norm.reset() + + def layer_width_actor(self): + return self._layer_width_actor + + def n_hidden_layers_actor(self): + return self._n_hidden_layers_actor + + def layer_width_critic(self): + return self._layer_width_critic + + def n_hidden_layers_critic(self): + return self._n_hidden_layers_critic + + def get_impl_path(self): + import os + return os.path.abspath(__file__) + + def update_obs_bnorm(self, x): + self.obs_running_norm.unfreeze() + self.obs_running_norm.manual_stat_update(x) + self.obs_running_norm.freeze() + + def _obs_scaling_layer(self, x): + x=(x-self.obs_bias) + x=x/(self.obs_scale+self._rescaling_epsi) + return x + + def _preprocess_obs(self, x): + if self._rescale_obs: + x = self._obs_scaling_layer(x) + if self.obs_running_norm is not None: + x = self.obs_running_norm(x) + return x + + def _preprocess_actions(self, a): + if self._use_action_rescale_for_critic: + a=self.actor.remove_scaling(a=a) # rescale to be in range [-1, 1] + return a + + def get_action(self, x): + x = self._preprocess_obs(x) + return self.actor.get_action(x) + + def get_qf1_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf1(x, a) + + def get_qf2_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf2(x, a) + + def get_qf1t_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf1_target(x, a) + + def get_qf2t_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf2_target(x, a) + + def load_state_dict(self, param_dict): + + missing, unexpected = super().load_state_dict(param_dict, + strict=False) + if not len(missing)==0: + Journal.log(self.__class__.__name__, + "load_state_dict", + f"These parameters are missing from the provided state dictionary: {str(missing)}\n", + LogType.EXCEP, + throw_when_excep = True) + if not len(unexpected)==0: + Journal.log(self.__class__.__name__, + "load_state_dict", + f"These parameters present in the provided state dictionary are not needed: {str(unexpected)}\n", + LogType.WARN) + + # sanity check on running normalizer + import re + running_norm_pattern = r"running_norm" + error=f"Found some keys in model state dictionary associated with a running normalizer. Are you running the agent with norm_obs=True?\n" + if any(re.match(running_norm_pattern, key) for key in unexpected): + Journal.log(self.__class__.__name__, + "load_state_dict", + error, + LogType.EXCEP, + throw_when_excep=True) + +class CriticQ(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + device: str = "cuda", + dtype = torch.float32, + layer_width: int = 512, + n_hidden_layers: int = 4, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._lrelu_slope=0.01 + + self._torch_device = device + self._torch_dtype = dtype + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + self._q_net_dim = self._obs_dim + self._actions_dim + + self._first_hidden_layer_width=self._q_net_dim # fist layer fully connected and of same dim + # as input + + init_type="kaiming_uniform" # maintains the variance of activations throughout the network + nonlinearity="leaky_relu" # suited for kaiming + + # Input layer + layers=llayer_init( + layer=nn.Linear(self._q_net_dim, self._first_hidden_layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Hidden layers + layers.extend( + llayer_init( + layer=nn.Linear(self._first_hidden_layer_width, layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + for _ in range(n_hidden_layers - 1): + layers.extend( + llayer_init( + layer=nn.Linear(layer_width, layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Output layer + layers.extend( + llayer_init( + layer=nn.Linear(layer_width, 1), + init_type="uniform", + uniform_biases=False, # contact biases + bias_const=-0.1, # negative to prevent overestimation + scale_weight=1e-2, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False + ) + ) + + # Creating the full sequential network + self._q_net = nn.Sequential(*layers) + self._q_net.to(self._torch_device).type(self._torch_dtype) + + print("Critic architecture") + print(self._q_net) + + def get_n_params(self): + return sum(p.numel() for p in self.parameters()) + + def forward(self, x, a): + x = torch.cat([x, a], dim=1) + return self._q_net(x) + +class Actor(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + actions_ub: List[float] = None, + actions_lb: List[float] = None, + device: str = "cuda", + dtype = torch.float32, + layer_width: int = 256, + n_hidden_layers: int = 2, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._lrelu_slope=0.01 + + self._torch_device = device + self._torch_dtype = dtype + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + + self._first_hidden_layer_width=self._obs_dim # fist layer fully connected and of same dim + + # Action scale and bias + if actions_ub is None: + actions_ub = [1] * actions_dim + if actions_lb is None: + actions_lb = [-1] * actions_dim + if (len(actions_ub) != actions_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Actions ub list length should be equal to {actions_dim}, but got {len(actions_ub)}", + LogType.EXCEP, + throw_when_excep = True) + if (len(actions_lb) != actions_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Actions lb list length should be equal to {actions_dim}, but got {len(actions_lb)}", + LogType.EXCEP, + throw_when_excep = True) + + self._actions_ub = torch.tensor(actions_ub, dtype=self._torch_dtype, + device=self._torch_device) + self._actions_lb = torch.tensor(actions_lb, dtype=self._torch_dtype, + device=self._torch_device) + action_scale = torch.full((actions_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + action_scale[:] = (self._actions_ub-self._actions_lb)/2.0 + self.register_buffer( + "action_scale", action_scale + ) + actions_bias = torch.full((actions_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + actions_bias[:] = (self._actions_ub+self._actions_lb)/2.0 + self.register_buffer( + "action_bias", actions_bias) + + # Network configuration + self.LOG_STD_MAX = 2 + self.LOG_STD_MIN = -5 + + # Input layer followed by hidden layers + layers=llayer_init(nn.Linear(self._obs_dim, self._first_hidden_layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Hidden layers + layers.extend( + llayer_init(nn.Linear(self._first_hidden_layer_width, layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + for _ in range(n_hidden_layers - 1): + layers.extend( + llayer_init(nn.Linear(layer_width, layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Sequential layers for the feature extractor + self._fc12 = nn.Sequential(*layers) + + # Mean and log_std layers + out_fc_mean=llayer_init(nn.Linear(layer_width, self._actions_dim), + init_type="uniform", + uniform_biases=False, # constant bias init + bias_const=0.0, + scale_weight=1e-3, # scaling (output layer) + scale_bias=1.0, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False + ) + self.fc_mean = nn.Sequential(*out_fc_mean) + out_fc_logstd= llayer_init(nn.Linear(layer_width, self._actions_dim), + init_type="uniform", + uniform_biases=False, + bias_const=math.log(0.5), + scale_weight=1e-3, # scaling (output layer) + scale_bias=1.0, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False, + ) + self.fc_logstd = nn.Sequential(*out_fc_logstd) + + # Move all components to the specified device and dtype + self._fc12.to(device=self._torch_device, dtype=self._torch_dtype) + self.fc_mean.to(device=self._torch_device, dtype=self._torch_dtype) + self.fc_logstd.to(device=self._torch_device, dtype=self._torch_dtype) + + print("Actor architecture") + print(self._fc12) + print(self.fc_mean) + print(self.fc_logstd) + + def get_n_params(self): + return sum(p.numel() for p in self.parameters()) + + def forward(self, x): + x = self._fc12(x) + mean = self.fc_mean(x) + log_std = self.fc_logstd(x) + log_std = torch.tanh(log_std) + log_std = self.LOG_STD_MIN + 0.5 * (self.LOG_STD_MAX - self.LOG_STD_MIN) * (log_std + 1) # From SpinUp / Denis Yarats + return mean, log_std + + def get_action(self, x): + mean, log_std = self(x) + std = log_std.exp() + normal = torch.distributions.Normal(mean, std) + x_t = normal.rsample() # Reparameterization trick (for SAC we neex action + # to be differentible since we use Q nets. Using sample() would break the + # comp. graph and not allow gradients to flow) + y_t = torch.tanh(x_t) + action = y_t * self.action_scale + self.action_bias + log_prob_vec = normal.log_prob(x_t) # per-dimension log prob before tanh + log_prob_vec = log_prob_vec - torch.log(self.action_scale * (1 - y_t.pow(2)) + 1e-6) # tanh Jacobian + scaling + log_prob_sum = log_prob_vec.sum(1, keepdim=True) + mean = torch.tanh(mean) * self.action_scale + self.action_bias + return action, (log_prob_sum, log_prob_vec), mean + + def remove_scaling(self, a): + return (a - self.action_bias)/self.action_scale + +if __name__ == "__main__": + device = "cpu" # or "cpu" + import time + obs_dim = 273 + agent = SACAgent( + obs_dim=obs_dim, + actions_dim=10, + actions_lb=None, + actions_ub=None, + obs_lb=None, + obs_ub=None, + rescale_obs=False, + norm_obs=True, + use_action_rescale_for_critic=True, + is_eval=True, + compression_ratio=0.6, + layer_width_actor=128, + layer_width_critic=256, + n_hidden_layers_actor=3, + n_hidden_layers_critic=3, + device=device, + dtype=torch.float32, + add_weight_norm=True, + add_layer_norm=False, + add_batch_norm=False + ) + + n_samples = 10000 + random_obs = torch.rand((1, obs_dim), dtype=torch.float32, device=device) + + if device == "cuda": + torch.cuda.synchronize() + start = time.time() + + for i in range(n_samples): + actions, _, mean = agent.get_action(x=random_obs) + actions = actions.detach() + actions[:, :] = mean.detach() + + if device == "cuda": + torch.cuda.synchronize() + end = time.time() + + avrg_eval_time = (end - start) / n_samples + print(f"Average policy evaluation time on {device}: {avrg_eval_time:.6f} s") diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/sactor_critic_algo.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/sactor_critic_algo.py new file mode 100644 index 0000000000000000000000000000000000000000..58d452b7c550a693626c062c2810ad1446d1aafc --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/sactor_critic_algo.py @@ -0,0 +1,2391 @@ +from aug_mpc.agents.sactor_critic.sac import SACAgent +from aug_mpc.agents.dummies.dummy import DummyAgent + +from aug_mpc.utils.shared_data.algo_infos import SharedRLAlgorithmInfo, QfVal, QfTrgt +from aug_mpc.utils.shared_data.training_env import SubReturns, TotReturns +from aug_mpc.utils.nn.rnd import RNDFull + +import torch +import torch.optim as optim +import torch.nn as nn + +import random +import math +from typing import Dict + +import os +import shutil + +import time + +import wandb +import h5py +import numpy as np + +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import VLevel + +from abc import ABC, abstractmethod + +class SActorCriticAlgoBase(ABC): + + # base class for actor-critic RL algorithms + + def __init__(self, + env, + debug = False, + remote_db = False, + seed: int = 1): + + self._env = env + self._seed = seed + + self._eval = False + self._det_eval = True + + self._full_env_db=False + + self._agent = None + + self._debug = debug + self._remote_db = remote_db + + self._writer = None + + self._run_name = None + self._drop_dir = None + self._dbinfo_drop_fname = None + self._model_path = None + + self._policy_update_db_data_dict = {} + self._custom_env_data_db_dict = {} + self._rnd_db_data_dict = {} + self._hyperparameters = {} + self._wandb_d={} + + # get params from env + self._get_params_from_env() + + self._torch_device = torch.device("cpu") # defaults to cpu + + self._setup_done = False + + self._verbose = False + + self._is_done = False + + self._shared_algo_data = None + + self._this_child_path = None + self._this_basepath = os.path.abspath(__file__) + + def __del__(self): + + self.done() + + def _get_params_from_env(self): + + self._env_name = self._env.name() + self._episodic_reward_metrics = self._env.ep_rewards_metrics() + self._use_gpu = self._env.using_gpu() + self._dtype = self._env.dtype() + self._env_opts=self._env.env_opts() + self._num_envs = self._env.n_envs() + self._obs_dim = self._env.obs_dim() + self._actions_dim = self._env.actions_dim() + self._episode_timeout_lb, self._episode_timeout_ub = self._env.episode_timeout_bounds() + self._task_rand_timeout_lb, self._task_rand_timeout_ub = self._env.task_rand_timeout_bounds() + self._env_n_action_reps = self._env.n_action_reps() + self._is_continuous_actions_bool=self._env.is_action_continuous() + self._is_continuous_actions=torch.where(self._is_continuous_actions_bool)[0] + self._is_discrete_actions_bool=self._env.is_action_discrete() + self._is_discrete_actions=torch.where(self._is_discrete_actions_bool)[0] + + # default to all debug envs + self._db_env_selector=torch.tensor(list(range(0, self._num_envs)), dtype=torch.int) + self._db_env_selector_bool=torch.full((self._num_envs, ), + dtype=torch.bool, device="cpu", + fill_value=True) + # default to no expl envs + self._expl_env_selector=None + self._expl_env_selector_bool=torch.full((self._num_envs, ), dtype=torch.bool, device="cpu", + fill_value=False) + self._pert_counter=0.0 + # demo envs + self._demo_stop_thresh=None # performance metrics above which demo envs are deactivated + # (can be overridden thorugh the provided options) + self._demo_env_selector=self._env.demo_env_idxs() + self._demo_env_selector_bool=self._env.demo_env_idxs(get_bool=True) + + def learn(self): + + if not self._setup_done: + self._should_have_called_setup() + + self._start_time = time.perf_counter() + + # experience collection + with torch.no_grad(): # don't need grad computation here + for i in range(self._collection_freq): + if not self._collect_transition(): + return False + self._vec_transition_counter+=1 + + self._collection_t = time.perf_counter() + + if self._vec_transition_counter % self._bnorm_vecfreq == 0: + with torch.no_grad(): # don't need grad computation here + self._update_batch_norm(bsize=self._bnorm_bsize) + + # policy update + self._policy_update_t_start = time.perf_counter() + for i in range(self._update_freq): + self._update_policy() + self._update_counter+=1 + + self._policy_update_t = time.perf_counter() + + with torch.no_grad(): + if self._validate and (self._vec_transition_counter % self._validation_db_vecstep_freq == 0): + # validation + self._update_validation_losses() + self._validation_t = time.perf_counter() + self._post_step() + + if self._use_period_resets: + # periodic policy resets + if not self._adaptive_resets: + self._periodic_resets_on=(self._vec_transition_counter >= self._reset_vecstep_start) and \ + (self._vec_transition_counter < self._reset_vecstep_end) + + if self._periodic_resets_on and \ + (self._vec_transition_counter-self._reset_vecstep_start) % self._periodic_resets_vecfreq == 0: + self._reset_agent() + else: # trigger reset based on overfit metric + if self._overfit_idx > self._overfit_idx_thresh: + self._reset_agent() + + return not self.is_done() + + def eval(self): + + if not self._setup_done: + self._should_have_called_setup() + + self._start_time = time.perf_counter() + + if not self._collect_eval_transition(): + return False + self._vec_transition_counter+=1 + + self._collection_t = time.perf_counter() + + self._post_step() + + return not self.is_done() + + @abstractmethod + def _collect_transition(self)->bool: + pass + + @abstractmethod + def _collect_eval_transition(self)->bool: + pass + + @abstractmethod + def _update_policy(self): + pass + + @abstractmethod + def _update_validation_losses(self): + pass + + def _update_overfit_idx(self, loss, val_loss): + overfit_now=(val_loss-loss)/loss + self._overfit_idx=self._overfit_idx_alpha*overfit_now+\ + (1-self._overfit_idx_alpha)*self._overfit_idx + + def setup(self, + run_name: str, + ns: str, + custom_args: Dict = {}, + verbose: bool = False, + drop_dir_name: str = None, + eval: bool = False, + resume: bool = False, + model_path: str = None, + n_eval_timesteps: int = None, + comment: str = "", + dump_checkpoints: bool = False, + norm_obs: bool = True, + rescale_obs: bool = False): + + tot_tsteps=int(100e6) + if "tot_tsteps" in custom_args: + tot_tsteps=custom_args["tot_tsteps"] + + self._verbose = verbose + + self._ns=ns # only used for shared mem stuff + + self._dump_checkpoints = dump_checkpoints + + self._init_algo_shared_data(static_params=self._hyperparameters) # can only handle dicts with + # numeric values + + if "full_env_db" in custom_args: + self._full_env_db=custom_args["full_env_db"] + + self._eval = eval + self._resume=resume + if self._eval and self._resume: + Journal.log(self.__class__.__name__, + "setup", + f"Cannot set both eval and resume to true. Exiting.", + LogType.EXCEP, + throw_when_excep = True) + + self._load_qf=False + if self._eval: + if "load_qf" in custom_args: + self._load_qf=custom_args["load_qf"] + if self._resume: + self._load_qf=True # must load qf when resuming + self._eval=False + try: + self._det_eval=custom_args["det_eval"] + except: + pass + + self._override_agent_actions=False + if "override_agent_actions" in custom_args: + self._override_agent_actions=custom_args["override_agent_actions"] + + if self._override_agent_actions: # force evaluation mode + Journal.log(self.__class__.__name__, + "setup", + "will force evaluation mode since override_agent_actions was set to true", + LogType.INFO, + throw_when_excep = True) + self._eval=True + self._validate=False + self._load_qf=False + self._det_eval=False + self._resume=False + + self._run_name = run_name + from datetime import datetime + self._time_id = datetime.now().strftime('d%Y_%m_%d_h%H_m%M_s%S') + self._unique_id = self._time_id + "-" + self._run_name + + self._hyperparameters["unique_run_id"]=self._unique_id + self._hyperparameters.update(custom_args) + + self._torch_device = torch.device("cuda" if torch.cuda.is_available() and self._use_gpu else "cpu") + + try: + layer_width_actor=self._hyperparameters["actor_lwidth"] + layer_width_critic=self._hyperparameters["critic_lwidth"] + n_hidden_layers_actor=self._hyperparameters["actor_n_hlayers"] + n_hidden_layers_critic=self._hyperparameters["critic_n_hlayers"] + except: + layer_width_actor=256 + layer_width_critic=512 + n_hidden_layers_actor=2 + n_hidden_layers_critic=4 + pass + + use_torch_compile=False + add_weight_norm=False + add_layer_norm=False + add_batch_norm=False + compression_ratio=-1.0 + if "use_torch_compile" in self._hyperparameters and \ + self._hyperparameters["use_torch_compile"]: + use_torch_compile=True + if "add_weight_norm" in self._hyperparameters and \ + self._hyperparameters["add_weight_norm"]: + add_weight_norm=True + if "add_layer_norm" in self._hyperparameters and \ + self._hyperparameters["add_layer_norm"]: + add_layer_norm=True + if "add_batch_norm" in self._hyperparameters and \ + self._hyperparameters["add_batch_norm"]: + add_batch_norm=True + if "compression_ratio" in self._hyperparameters: + compression_ratio=self._hyperparameters["compression_ratio"] + + act_rescale_critic=False + if "act_rescale_critic" in self._hyperparameters: + act_rescale_critic=self._hyperparameters["act_rescale_critic"] + if not self._override_agent_actions: + self._agent = SACAgent(obs_dim=self._env.obs_dim(), + obs_ub=self._env.get_obs_ub().flatten().tolist(), + obs_lb=self._env.get_obs_lb().flatten().tolist(), + actions_dim=self._env.actions_dim(), + actions_ub=None, # agent will assume actions are properly normalized in [-1, 1] by the env + actions_lb=None, + rescale_obs=rescale_obs, + norm_obs=norm_obs, + use_action_rescale_for_critic=act_rescale_critic, + compression_ratio=compression_ratio, + device=self._torch_device, + dtype=self._dtype, + is_eval=self._eval, + load_qf=self._load_qf, + debug=self._debug, + layer_width_actor=layer_width_actor, + layer_width_critic=layer_width_critic, + n_hidden_layers_actor=n_hidden_layers_actor, + n_hidden_layers_critic=n_hidden_layers_critic, + torch_compile=use_torch_compile, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm) + else: # we use a fake agent + self._agent = DummyAgent(obs_dim=self._env.obs_dim(), + actions_dim=self._env.actions_dim(), + actions_ub=None, + actions_lb=None, + device=self._torch_device, + dtype=self._dtype, + debug=self._debug) + + # loging actual widths and layers in case they were override inside agent init + self._hyperparameters["actor_lwidth_actual"]=self._agent.layer_width_actor() + self._hyperparameters["actor_n_hlayers_actual"]=self._agent.n_hidden_layers_actor() + self._hyperparameters["critic_lwidth_actual"]=self._agent.layer_width_critic() + self._hyperparameters["critic_n_hlayers_actual"]=self._agent.n_hidden_layers_critic() + + # load model if necessary + if self._eval and (not self._override_agent_actions): # load pretrained model + if model_path is None: + msg = f"No model path provided in eval mode! Was this intentional? \ + No jnt remapping will be available and a randomly init agent will be used." + Journal.log(self.__class__.__name__, + "setup", + msg, + LogType.WARN, + throw_when_excep = True) + if n_eval_timesteps is None: + Journal.log(self.__class__.__name__, + "setup", + f"When eval is True, n_eval_timesteps should be provided!!", + LogType.EXCEP, + throw_when_excep = True) + # everything is ok + self._model_path = model_path + if self._model_path is not None: + self._load_model(self._model_path) + + # overwrite init params + self._init_params(tot_tsteps=n_eval_timesteps, + custom_args=custom_args) + else: + if self._resume: + if model_path is None: + msg = f"No model path provided in resume mode! Please provide a valid checkpoint path." + Journal.log(self.__class__.__name__, + "setup", + msg, + LogType.EXCEP, + throw_when_excep = True) + self._model_path = model_path + if self._model_path is not None: + self._load_model(self._model_path) # load model from checkpoint (including q functions and running normalizers) + self._init_params(tot_tsteps=tot_tsteps, + custom_args=custom_args) + + # adding additional db info + self._hyperparameters["obs_names"]=self._env.obs_names() + self._hyperparameters["action_names"]=self._env.action_names() + self._hyperparameters["sub_reward_names"]=self._env.sub_rew_names() + self._hyperparameters["sub_trunc_names"]=self._env.sub_trunc_names() + self._hyperparameters["sub_term_names"]=self._env.sub_term_names() + + self._allow_expl_during_eval=False + if "allow_expl_during_eval" in self._hyperparameters: + self._allow_expl_during_eval=self._hyperparameters["allow_expl_during_eval"] + + # reset environment + self._env.reset() + if self._eval: + self._env.switch_random_reset(on=False) + + if self._debug and (not self._override_agent_actions): + with torch.no_grad(): + init_obs = self._env.get_obs(clone=True) + _, init_log_pi, _ = self._agent.get_action(init_obs) + init_policy_entropy = (-init_log_pi[0]).mean().item() + init_policy_entropy_per_action = init_policy_entropy / float(self._actions_dim) + Journal.log(self.__class__.__name__, + "setup", + f"Initial policy entropy per action: {init_policy_entropy_per_action:.4f})", + LogType.INFO, + throw_when_excep = True) + + # create dump directory + copy important files for debug + self._init_drop_dir(drop_dir_name) + self._hyperparameters["drop_dir"]=self._drop_dir + + # add env options to hyperparameters + self._hyperparameters.update(self._env_opts) + + if not self._eval: + self._init_agent_optimizers() + + self._init_replay_buffers() # only needed when training + if self._validate: + self._init_validation_buffers() + + if self._autotune: + self._init_alpha_autotuning() + + if self._use_rnd: + self._rnd_net = RNDFull(input_dim=self._rnd_indim, output_dim=self._rnd_outdim, + layer_width=self._rnd_lwidth, n_hidden_layers=self._rnd_hlayers, + device=self._torch_device, + dtype=self._dtype, + normalize=norm_obs # normalize if also used for SAC agent + ) + self._rnd_optimizer = torch.optim.Adam(self._rnd_net.rnd_predictor_net.parameters(), + lr=self._rnd_lr) + + self._rnd_input = torch.full(size=(self._batch_size, self._rnd_net.input_dim()), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rnd_bnorm_input = torch.full(size=(self._bnorm_bsize, self._rnd_net.input_dim()), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + self._proc_exp_bonus_all = torch.full(size=(self._batch_size, 1), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._raw_exp_bonus_all = torch.full(size=(self._batch_size, 1), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + # if self._autotune_rnd_scale: + # self._reward_normalizer=RunningNormalizer((1,), epsilon=1e-8, + # device=self._torch_device, dtype=self._dtype, + # freeze_stats=False, + # debug=self._debug) + + self._init_dbdata() + + if (self._debug): + if self._remote_db: + job_type = "evaluation" if self._eval else "training" + project="IBRIDO-ablations" + wandb.init( + project=project, + group=self._run_name, + name=self._unique_id, + id=self._unique_id, + job_type=job_type, + # tags=None, + notes=comment, + resume="never", # do not allow runs with the same unique id + mode="online", # "online", "offline" or "disabled" + entity=None, + sync_tensorboard=True, + config=self._hyperparameters, + monitor_gym=True, + save_code=True, + dir=self._drop_dir + ) + wandb.watch((self._agent), log="all", log_freq=1000, log_graph=False) + + if "demo_stop_thresh" in self._hyperparameters: + self._demo_stop_thresh=self._hyperparameters["demo_stop_thresh"] + + actions = self._env.get_actions() + self._random_uniform = torch.full_like(actions, fill_value=0.0) # used for sampling random actions (preallocated + # for efficiency) + self._random_normal = torch.full_like(self._random_uniform,fill_value=0.0) + # for efficiency) + + self._actions_override=None + if self._override_agent_actions: + from aug_mpc.utils.shared_data.training_env import Actions + self._actions_override = Actions(namespace=ns+"_override", + n_envs=self._num_envs, + action_dim=actions.shape[1], + action_names=self._env.action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + self._actions_override.run() + + self._start_time_tot = time.perf_counter() + + self._start_time = time.perf_counter() + + self._replay_bf_full = False + self._validation_bf_full = False + self._bpos=0 + self._bpos_val=0 + + self._is_done = False + self._setup_done = True + + def is_done(self): + + return self._is_done + + def model_path(self): + + return self._model_path + + def _init_params(self, + tot_tsteps: int, + custom_args: Dict = {}): + + self._collection_freq=1 + self._update_freq=4 + + self._replay_buffer_size_vec=10*self._task_rand_timeout_ub # cover at least a number of eps + self._replay_buffer_size = self._replay_buffer_size_vec*self._num_envs + if self._replay_buffer_size_vec < 0: # in case env did not properly define _task_rand_timeout_ub + self._replay_buffer_size = int(1e6) + self._replay_buffer_size_vec = self._replay_buffer_size//self._num_envs + self._replay_buffer_size=self._replay_buffer_size_vec*self._num_envs + + self._batch_size = 16394 + + new_transitions_per_batch=self._collection_freq*self._num_envs/self._replay_buffer_size # assumes uniform sampling + self._utd_ratio=self._update_freq/(new_transitions_per_batch*self._batch_size) + + self._lr_policy = 1e-3 + self._lr_q = 5e-4 + + self._discount_factor = 0.99 + if "discount_factor" in custom_args: + self._discount_factor=custom_args["discount_factor"] + + self._smoothing_coeff = 0.01 + + self._policy_freq = 2 + self._trgt_net_freq = 1 + self._rnd_freq = 1 + + # exploration + + # entropy regularization (separate "discrete" and "continuous" actions) + self._entropy_metric_high = 0.5 + self._entropy_metric_low = 0.0 + + # self._entropy_disc_start = -0.05 + # self._entropy_disc_end = -0.5 + + # self._entropy_cont_start = -0.05 + # self._entropy_cont_end = -2.0 + + self._entropy_disc_start = -0.2 + self._entropy_disc_end = -0.2 + + self._entropy_cont_start = -0.5 + self._entropy_cont_end = -0.5 + + # enable/disable entropy annealing (default: enabled) + self._anneal_entropy = False + + self._trgt_avrg_entropy_per_action_disc = self._entropy_disc_start + self._trgt_avrg_entropy_per_action_cont = self._entropy_cont_start + + self._disc_idxs = self._is_discrete_actions.clone().to(torch.long) + self._cont_idxs = self._is_continuous_actions.clone().to(torch.long) + + self._target_entropy_disc = float(self._disc_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_disc) + self._target_entropy_cont = float(self._cont_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_cont) + self._target_entropy = self._target_entropy_disc + self._target_entropy_cont + self._trgt_avrg_entropy_per_action = self._target_entropy / float(max(self._actions_dim, 1)) + self._hyperparameters["anneal_entropy"] = self._anneal_entropy + + self._autotune = True + self._alpha_disc = 0.2 # initial values + self._alpha_cont = 0.2 + self._alpha = 0.5*(self._alpha_disc + self._alpha_cont) + self._log_alpha_disc = math.log(self._alpha_disc) + self._log_alpha_cont = math.log(self._alpha_cont) + self._a_optimizer_disc = None + self._a_optimizer_cont = None + + # random expl ens + self._expl_envs_perc=0.0 # [0, 1] + if "expl_envs_perc" in custom_args: + self._expl_envs_perc=custom_args["expl_envs_perc"] + self._n_expl_envs = int(self._num_envs*self._expl_envs_perc) # n of random envs on which noisy actions will be applied + self._noise_freq_vec = 100 # substeps + self._noise_duration_vec = 5 # should be less than _noise_freq + # correct with env substepping + self._noise_freq_vec=self._noise_freq_vec//self._env_n_action_reps + self._noise_duration_vec=self._noise_duration_vec//self._env_n_action_reps + + self._continuous_act_expl_noise_std=0.3 # wrt actions scale + self._discrete_act_expl_noise_std=1.2 # setting it a bit > 1 helps in ensuring discr. actions range is explored + + # rnd + self._use_rnd=True + self._rnd_net=None + self._rnd_optimizer = None + self._rnd_lr = 1e-3 + if "use_rnd" in custom_args and (not self._eval): + self._use_rnd=custom_args["use_rnd"] + self._rnd_weight=1.0 + self._alpha_rnd=0.0 + self._novelty_scaler=None + if self._use_rnd: + from adarl.utils.NoveltyScaler import NoveltyScaler + + self._novelty_scaler=NoveltyScaler(th_device=self._torch_device, + bonus_weight=self._rnd_weight, + avg_alpha=self._alpha_rnd) + + self._rnd_lwidth=512 + self._rnd_hlayers=3 + self._rnd_outdim=16 + self._rnd_indim=self._obs_dim+self._actions_dim + + # batch normalization + self._bnorm_bsize = 4096 + self._bnorm_vecfreq_nom = 5 # wrt vec steps + # make sure _bnorm_vecfreq is a multiple of _collection_freq + self._bnorm_vecfreq = (self._bnorm_vecfreq_nom//self._collection_freq)*self._collection_freq + if self._bnorm_vecfreq == 0: + self._bnorm_vecfreq=self._collection_freq + self._reward_normalizer=None + + self._total_timesteps = int(tot_tsteps) + # self._total_timesteps = self._total_timesteps//self._env_n_action_reps # correct with n of action reps + self._total_timesteps_vec = self._total_timesteps // self._num_envs + self._total_steps = self._total_timesteps_vec//self._collection_freq + self._total_timesteps_vec = self._total_steps*self._collection_freq # correct to be a multiple of self._total_steps + self._total_timesteps = self._total_timesteps_vec*self._num_envs # actual n transitions + + # self._warmstart_timesteps = int(5e3) + warmstart_length_single_env=min(self._episode_timeout_lb, self._episode_timeout_ub, + self._task_rand_timeout_lb, self._task_rand_timeout_ub) + self._warmstart_timesteps=warmstart_length_single_env*self._num_envs + if self._warmstart_timesteps < self._batch_size: # ensure we collect sufficient experience before + # starting training + self._warmstart_timesteps=4*self._batch_size + self._warmstart_vectimesteps = self._warmstart_timesteps//self._num_envs + # ensuring multiple of collection_freq + self._warmstart_timesteps = self._num_envs*self._warmstart_vectimesteps # actual + + # period nets resets (for tackling the primacy bias) + self._use_period_resets=False + if "use_period_resets" in custom_args: + self._use_period_resets=custom_args["use_period_resets"] + self._adaptive_resets=True # trigger reset based on overfit metric + self._just_one_reset=False + self._periodic_resets_freq=int(4e6) + self._periodic_resets_start=int(1.5e6) + self._periodic_resets_end=int(0.8*self._total_timesteps) + + self._periodic_resets_vecfreq=self._periodic_resets_freq//self._num_envs + self._periodic_resets_vecfreq = (self._periodic_resets_vecfreq//self._collection_freq)*self._collection_freq + self._reset_vecstep_start=self._periodic_resets_start//self._num_envs + self._reset_vecstep_end=self._periodic_resets_end//self._num_envs + + if self._just_one_reset: + # we set the end as the fist reset + a fraction of the reset frequency (this way only one reset will happen) + self._reset_vecstep_end=int(self._reset_vecstep_start+0.8*self._periodic_resets_vecfreq) + + self._periodic_resets_on=False + + # debug + self._m_checkpoint_freq_nom = 1e6 # n totoal timesteps after which a checkpoint model is dumped + self._m_checkpoint_freq= self._m_checkpoint_freq_nom//self._num_envs + + # expl envs + if self._n_expl_envs>0 and ((self._num_envs-self._n_expl_envs)>0): # log data only from envs which are not altered (e.g. by exploration noise) + # computing expl env selector + self._expl_env_selector = torch.randperm(self._num_envs, device="cpu")[:self._n_expl_envs] + self._expl_env_selector_bool[self._expl_env_selector]=True + + # demo envs + if self._demo_env_selector_bool is None: + self._db_env_selector_bool[:]=~self._expl_env_selector_bool + else: # we log db data separately for env which are neither for demo nor for random exploration + self._demo_env_selector_bool=self._demo_env_selector_bool.cpu() + self._demo_env_selector=self._demo_env_selector.cpu() + self._db_env_selector_bool[:]=torch.logical_and(~self._expl_env_selector_bool, ~self._demo_env_selector_bool) + + self._n_expl_envs = self._expl_env_selector_bool.count_nonzero() + self._num_db_envs = self._db_env_selector_bool.count_nonzero() + + if not self._num_db_envs>0: + Journal.log(self.__class__.__name__, + "_init_params", + "No indipendent db env can be computed (check your demo and expl settings)! Will use all envs.", + LogType.EXCEP, + throw_when_excep = False) + self._num_db_envs=self._num_envs + self._db_env_selector_bool[:]=True + self._db_env_selector=torch.nonzero(self._db_env_selector_bool).flatten() + + self._transition_noise_freq=float(self._noise_duration_vec)/float(self._noise_freq_vec) + self._env_noise_freq=float(self._n_expl_envs)/float(self._num_envs) + self._noise_buff_freq=self._transition_noise_freq*self._env_noise_freq + + self._db_vecstep_frequency = 32 # log db data every n (vectorized) SUB timesteps + self._db_vecstep_frequency=round(self._db_vecstep_frequency/self._env_n_action_reps) # correcting with actions reps + # correct db vecstep frequency to ensure it's a multiple of self._collection_freq + self._db_vecstep_frequency=(self._db_vecstep_frequency//self._collection_freq)*self._collection_freq + if self._db_vecstep_frequency == 0: + self._db_vecstep_frequency=self._collection_freq + + self._env_db_checkpoints_vecfreq=150*self._db_vecstep_frequency # detailed db data from envs + + self._validate=True + self._validation_collection_vecfreq=50 # add vec transitions to val buffer with some vec freq + self._validation_ratio=1.0/self._validation_collection_vecfreq # [0, 1], 0.1 10% size of training buffer + self._validation_buffer_size_vec = int(self._replay_buffer_size*self._validation_ratio)//self._num_envs + self._validation_buffer_size = self._validation_buffer_size_vec*self._num_envs + self._validation_batch_size = int(self._batch_size*self._validation_ratio) + self._validation_db_vecstep_freq=self._db_vecstep_frequency + if self._eval: # no need for validation transitions during evaluation + self._validate=False + self._overfit_idx=0.0 + self._overfit_idx_alpha=0.03 # exponential MA + self._overfit_idx_thresh=2.0 + + self._n_policy_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq #TD3 delayed update + self._n_qf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq # qf updated at each vec timesteps + self._n_tqf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq//self._trgt_net_freq + + self._exp_to_policy_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_policy_updates_to_be_done) + self._exp_to_qf_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_qf_updates_to_be_done) + self._exp_to_qft_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_tqf_updates_to_be_done) + + self._db_data_size = round(self._total_timesteps_vec/self._db_vecstep_frequency)+self._db_vecstep_frequency + + # write them to hyperparam dictionary for debugging + self._hyperparameters["n_envs"] = self._num_envs + self._hyperparameters["obs_dim"] = self._obs_dim + self._hyperparameters["actions_dim"] = self._actions_dim + + self._hyperparameters["seed"] = self._seed + self._hyperparameters["using_gpu"] = self._use_gpu + self._hyperparameters["total_timesteps_vec"] = self._total_timesteps_vec + + self._hyperparameters["collection_freq"]=self._collection_freq + self._hyperparameters["update_freq"]=self._update_freq + self._hyperparameters["total_steps"]=self._total_steps + + self._hyperparameters["utd_ratio"] = self._utd_ratio + + self._hyperparameters["n_policy_updates_when_done"] = self._n_policy_updates_to_be_done + self._hyperparameters["n_qf_updates_when_done"] = self._n_qf_updates_to_be_done + self._hyperparameters["n_tqf_updates_when_done"] = self._n_tqf_updates_to_be_done + self._hyperparameters["experience_to_policy_grad_steps_ratio"] = self._exp_to_policy_grad_ratio + self._hyperparameters["experience_to_quality_fun_grad_steps_ratio"] = self._exp_to_qf_grad_ratio + self._hyperparameters["experience_to_trgt_quality_fun_grad_steps_ratio"] = self._exp_to_qft_grad_ratio + + self._hyperparameters["episodes timeout lb"] = self._episode_timeout_lb + self._hyperparameters["episodes timeout ub"] = self._episode_timeout_ub + self._hyperparameters["task rand timeout lb"] = self._task_rand_timeout_lb + self._hyperparameters["task rand timeout ub"] = self._task_rand_timeout_ub + + self._hyperparameters["warmstart_timesteps"] = self._warmstart_timesteps + self._hyperparameters["warmstart_vectimesteps"] = self._warmstart_vectimesteps + self._hyperparameters["replay_buffer_size"] = self._replay_buffer_size + self._hyperparameters["batch_size"] = self._batch_size + self._hyperparameters["total_timesteps"] = self._total_timesteps + self._hyperparameters["lr_policy"] = self._lr_policy + self._hyperparameters["lr_q"] = self._lr_q + self._hyperparameters["discount_factor"] = self._discount_factor + self._hyperparameters["smoothing_coeff"] = self._smoothing_coeff + self._hyperparameters["policy_freq"] = self._policy_freq + self._hyperparameters["trgt_net_freq"] = self._trgt_net_freq + self._hyperparameters["autotune"] = self._autotune + self._hyperparameters["target_entropy"] = self._target_entropy + self._hyperparameters["target_entropy_disc"] = self._target_entropy_disc + self._hyperparameters["target_entropy_cont"] = self._target_entropy_cont + self._hyperparameters["disc_entropy_idxs"] = self._disc_idxs.tolist() + self._hyperparameters["cont_entropy_idxs"] = self._cont_idxs.tolist() + self._hyperparameters["log_alpha_disc"] = None if self._log_alpha_disc is None else self._log_alpha_disc + self._hyperparameters["log_alpha_cont"] = None if self._log_alpha_cont is None else self._log_alpha_cont + self._hyperparameters["alpha"] = self._alpha + self._hyperparameters["alpha_disc"] = self._alpha_disc + self._hyperparameters["alpha_cont"] = self._alpha_cont + self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq + self._hyperparameters["db_vecstep_frequency"] = self._db_vecstep_frequency + self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq + + self._hyperparameters["use_period_resets"]= self._use_period_resets + self._hyperparameters["just_one_reset"]= self._just_one_reset + self._hyperparameters["period_resets_vecfreq"]= self._periodic_resets_vecfreq + self._hyperparameters["period_resets_vecstart"]= self._reset_vecstep_start + self._hyperparameters["period_resets_vecend"]= self._reset_vecstep_end + self._hyperparameters["period_resets_freq"]= self._periodic_resets_freq + self._hyperparameters["period_resets_start"]= self._periodic_resets_start + self._hyperparameters["period_resets_end"]= self._periodic_resets_end + + self._hyperparameters["use_rnd"] = self._use_rnd + self._hyperparameters["rnd_lwidth"] = self._rnd_lwidth + self._hyperparameters["rnd_hlayers"] = self._rnd_hlayers + self._hyperparameters["rnd_outdim"] = self._rnd_outdim + self._hyperparameters["rnd_indim"] = self._rnd_indim + + self._hyperparameters["n_db_envs"] = self._num_db_envs + self._hyperparameters["n_expl_envs"] = self._n_expl_envs + self._hyperparameters["noise_freq"] = self._noise_freq_vec + self._hyperparameters["noise_duration_vec"] = self._noise_duration_vec + self._hyperparameters["noise_buff_freq"] = self._noise_buff_freq + self._hyperparameters["continuous_act_expl_noise_std"] = self._continuous_act_expl_noise_std + self._hyperparameters["discrete_act_expl_noise_std"] = self._discrete_act_expl_noise_std + + self._hyperparameters["n_demo_envs"] = self._env.n_demo_envs() + + self._hyperparameters["bnorm_bsize"] = self._bnorm_bsize + self._hyperparameters["bnorm_vecfreq"] = self._bnorm_vecfreq + + self._hyperparameters["validate"] = self._validate + self._hyperparameters["validation_ratio"] = self._validation_ratio + self._hyperparameters["validation_buffer_size_vec"] = self._validation_buffer_size_vec + self._hyperparameters["validation_buffer_size"] = self._validation_buffer_size + self._hyperparameters["validation_batch_size"] = self._validation_batch_size + self._hyperparameters["validation_collection_vecfreq"] = self._validation_collection_vecfreq + + # small debug log + info = f"\nUsing \n" + \ + f"total (vectorized) timesteps to be simulated {self._total_timesteps_vec}\n" + \ + f"total timesteps to be simulated {self._total_timesteps}\n" + \ + f"warmstart timesteps {self._warmstart_timesteps}\n" + \ + f"training replay buffer size {self._replay_buffer_size}\n" + \ + f"training replay buffer vec size {self._replay_buffer_size_vec}\n" + \ + f"training batch size {self._batch_size}\n" + \ + f"validation enabled {self._validate}\n" + \ + f"validation buffer size {self._validation_buffer_size}\n" + \ + f"validation buffer vec size {self._validation_buffer_size_vec}\n" + \ + f"validation collection freq {self._validation_collection_vecfreq}\n" + \ + f"validation update freq {self._validation_db_vecstep_freq}\n" + \ + f"validation batch size {self._validation_batch_size}\n" + \ + f"policy update freq {self._policy_freq}\n" + \ + f"target networks freq {self._trgt_net_freq}\n" + \ + f"episode timeout max steps {self._episode_timeout_ub}\n" + \ + f"episode timeout min steps {self._episode_timeout_lb}\n" + \ + f"task rand. max n steps {self._task_rand_timeout_ub}\n" + \ + f"task rand. min n steps {self._task_rand_timeout_lb}\n" + \ + f"number of action reps {self._env_n_action_reps}\n" + \ + f"total policy updates to be performed: {self._n_policy_updates_to_be_done}\n" + \ + f"total q fun updates to be performed: {self._n_qf_updates_to_be_done}\n" + \ + f"total trgt q fun updates to be performed: {self._n_tqf_updates_to_be_done}\n" + \ + f"experience to policy grad ratio: {self._exp_to_policy_grad_ratio}\n" + \ + f"experience to q fun grad ratio: {self._exp_to_qf_grad_ratio}\n" + \ + f"experience to trgt q fun grad ratio: {self._exp_to_qft_grad_ratio}\n" + \ + f"amount of noisy transitions in replay buffer: {self._noise_buff_freq*100}% \n" + db_env_idxs=", ".join(map(str, self._db_env_selector.tolist())) + n_db_envs_str=f"db envs {self._num_db_envs}/{self._num_envs} \n" + info=info + n_db_envs_str + "Debug env. indexes are [" + db_env_idxs+"]\n" + if self._env.demo_env_idxs() is not None: + demo_idxs_str=", ".join(map(str, self._env.demo_env_idxs().tolist())) + n_demo_envs_str=f"demo envs {self._env.n_demo_envs()}/{self._num_envs} \n" + info=info + n_demo_envs_str + "Demo env. indexes are [" + demo_idxs_str+"]\n" + if self._expl_env_selector is not None: + random_expl_idxs=", ".join(map(str, self._expl_env_selector.tolist())) + n_expl_envs_str=f"expl envs {self._n_expl_envs}/{self._num_envs} \n" + info=info + n_expl_envs_str + "Random exploration env. indexes are [" + random_expl_idxs+"]\n" + + Journal.log(self.__class__.__name__, + "_init_params", + info, + LogType.INFO, + throw_when_excep = True) + + # init counters + self._step_counter = 0 + self._vec_transition_counter = 0 + self._update_counter = 0 + self._log_it_counter = 0 + + def _init_dbdata(self): + + # initalize some debug data + self._collection_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._collection_t = -1.0 + + self._env_step_fps = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._env_step_rt_factor = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._batch_norm_update_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._policy_update_t_start = -1.0 + self._policy_update_t = -1.0 + self._policy_update_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._policy_update_fps = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._validation_t = -1.0 + self._validation_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._n_of_played_episodes = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_timesteps_done = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_policy_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_qfun_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_tqfun_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._elapsed_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._ep_tsteps_env_distribution = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + self._reward_names = self._episodic_reward_metrics.reward_names() + self._reward_names_str = "[" + ', '.join(self._reward_names) + "]" + self._n_rewards = self._episodic_reward_metrics.n_rewards() + + # db environments + self._tot_rew_max = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_min = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_max_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_min_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_avrg_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_std_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._sub_rew_max = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # custom data from env # (log data just from db envs for simplicity) + self._custom_env_data = {} + db_data_names = list(self._env.custom_db_data.keys()) + for dbdatan in db_data_names: # loop thorugh custom data + + self._custom_env_data[dbdatan] = {} + + max = self._env.custom_db_data[dbdatan].get_max(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + avrg = self._env.custom_db_data[dbdatan].get_avrg(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + min = self._env.custom_db_data[dbdatan].get_min(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + max_over_envs = self._env.custom_db_data[dbdatan].get_max_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + min_over_envs = self._env.custom_db_data[dbdatan].get_min_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + avrg_over_envs = self._env.custom_db_data[dbdatan].get_avrg_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + std_over_envs = self._env.custom_db_data[dbdatan].get_std_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + + self._custom_env_data[dbdatan]["max"] =torch.full((self._db_data_size, + max.shape[0], + max.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["avrg"] =torch.full((self._db_data_size, + avrg.shape[0], + avrg.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["min"] =torch.full((self._db_data_size, + min.shape[0], + min.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["max_over_envs"] =torch.full((self._db_data_size, + max_over_envs.shape[0], + max_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["min_over_envs"] =torch.full((self._db_data_size, + min_over_envs.shape[0], + min_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["avrg_over_envs"] =torch.full((self._db_data_size, + avrg_over_envs.shape[0], + avrg_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["std_over_envs"] =torch.full((self._db_data_size, + std_over_envs.shape[0], + std_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # exploration envs + if self._n_expl_envs > 0: + self._ep_tsteps_expl_env_distribution = torch.full((self._db_data_size, self._n_expl_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + # also log sub rewards metrics for exploration envs + self._sub_rew_max_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # demo environments + self._demo_envs_active = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._demo_perf_metric = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + if self._env.demo_env_idxs() is not None: + n_demo_envs=self._env.demo_env_idxs().shape[0] + + self._ep_tsteps_demo_env_distribution = torch.full((self._db_data_size, n_demo_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + # also log sub rewards metrics for exploration envs + self._sub_rew_max_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # algorithm-specific db info + self._qf1_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._min_qft_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._min_qft_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_max = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_max = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._qf1_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._actor_loss= torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_disc = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_cont = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + if self._validate: # add db data for validation losses + self._overfit_index = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._actor_loss_validation= torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_disc_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_cont_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._alphas = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alphas_disc = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alphas_cont = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._policy_entropy_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._running_mean_obs=None + self._running_std_obs=None + if self._agent.obs_running_norm is not None and not self._eval: + # some db data for the agent + self._running_mean_obs = torch.full((self._db_data_size, self._env.obs_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._running_std_obs = torch.full((self._db_data_size, self._env.obs_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + + # RND + self._rnd_loss=None + if self._use_rnd: + self._rnd_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._expl_bonus_raw_avrg = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._expl_bonus_raw_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_raw_min = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_raw_max = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._expl_bonus_proc_avrg = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._expl_bonus_proc_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_proc_min = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_proc_max = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._n_rnd_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._running_mean_rnd_input = None + self._running_std_rnd_input = None + if self._rnd_net.obs_running_norm is not None: + self._running_mean_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._running_std_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + + def _init_agent_optimizers(self): + self._qf_optimizer = optim.Adam(list(self._agent.qf1.parameters()) + list(self._agent.qf2.parameters()), + lr=self._lr_q) + self._actor_optimizer = optim.Adam(list(self._agent.actor.parameters()), + lr=self._lr_policy) + + def _init_alpha_autotuning(self): + self._log_alpha_disc = torch.full((1,), fill_value=math.log(self._alpha_disc), requires_grad=True, device=self._torch_device) + self._log_alpha_cont = torch.full((1,), fill_value=math.log(self._alpha_cont), requires_grad=True, device=self._torch_device) + self._alpha_disc = self._log_alpha_disc.exp().item() + self._alpha_cont = self._log_alpha_cont.exp().item() + self._alpha = 0.5*(self._alpha_disc + self._alpha_cont) + self._a_optimizer_disc = optim.Adam([self._log_alpha_disc], lr=self._lr_q) + self._a_optimizer_cont = optim.Adam([self._log_alpha_cont], lr=self._lr_q) + + def _init_replay_buffers(self): + + self._bpos = 0 + + self._obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._actions = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._actions_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rewards = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_terminal = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1), + fill_value=False, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + def _init_validation_buffers(self): + + self._bpos_val = 0 + + self._obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._actions_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._actions_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rewards_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_terminal_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1), + fill_value=False, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + def _save_model(self, + is_checkpoint: bool = False): + + path = self._model_path + if is_checkpoint: # use iteration as id + path = path + "_checkpoint" + str(self._log_it_counter) + info = f"Saving model to {path}" + Journal.log(self.__class__.__name__, + "_save_model", + info, + LogType.INFO, + throw_when_excep = True) + agent_state_dict=self._agent.state_dict() + if not self._eval: # training + # we log the joints which were observed during training + observed_joints=self._env.get_observed_joints() + if observed_joints is not None: + agent_state_dict["observed_jnts"]=self._env.get_observed_joints() + + torch.save(agent_state_dict, path) # saves whole agent state + # torch.save(self._agent.parameters(), path) # only save agent parameters + info = f"Done." + Journal.log(self.__class__.__name__, + "_save_model", + info, + LogType.INFO, + throw_when_excep = True) + + def _dump_env_checkpoints(self): + + path = self._env_db_checkpoints_fname+str(self._log_it_counter) + + if path is not None: + info = f"Saving env db checkpoint data to {path}" + Journal.log(self.__class__.__name__, + "_dump_env_checkpoints", + info, + LogType.INFO, + throw_when_excep = True) + + with h5py.File(path+".hdf5", 'w') as hf: + + for key, value in self._hyperparameters.items(): + if value is None: + value = "None" + hf.attrs[key] = value + + # full training envs + sub_rew_full=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._db_env_selector) + tot_rew_full=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._db_env_selector) + + if self._n_expl_envs > 0: + sub_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._expl_env_selector) + tot_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._expl_env_selector) + if self._env.n_demo_envs() > 0: + sub_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._demo_env_selector) + tot_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._demo_env_selector) + + ep_vec_freq=self._episodic_reward_metrics.ep_vec_freq() # assuming all db data was collected with the same ep_vec_freq + + hf.attrs['sub_reward_names'] = self._reward_names + hf.attrs['log_iteration'] = self._log_it_counter + hf.attrs['n_timesteps_done'] = self._n_timesteps_done[self._log_it_counter] + hf.attrs['n_policy_updates'] = self._n_policy_updates[self._log_it_counter] + hf.attrs['elapsed_min'] = self._elapsed_min[self._log_it_counter] + hf.attrs['ep_vec_freq'] = ep_vec_freq + hf.attrs['n_expl_envs'] = self._n_expl_envs + hf.attrs['n_demo_envs'] = self._env.n_demo_envs() + + # first dump custom db data names + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + episodic_data_names = self._env.custom_db_data[db_dname].data_names() + var_name = db_dname + hf.attrs[var_name+"_data_names"] = episodic_data_names + + for ep_idx in range(ep_vec_freq): # create separate datasets for each episode + ep_prefix=f'ep_{ep_idx}_' + + # rewards + hf.create_dataset(ep_prefix+'sub_rew', + data=sub_rew_full[ep_idx, :, :, :]) + hf.create_dataset(ep_prefix+'tot_rew', + data=tot_rew_full[ep_idx, :, :, :]) + if self._n_expl_envs > 0: + hf.create_dataset(ep_prefix+'sub_rew_expl', + data=sub_rew_full_expl[ep_idx, :, :, :]) + hf.create_dataset(ep_prefix+'tot_rew_expl', + data=tot_rew_full_expl[ep_idx, :, :, :]) + hf.create_dataset('expl_env_selector', data=self._expl_env_selector.cpu().numpy()) + if self._env.n_demo_envs() > 0: + hf.create_dataset(ep_prefix+'sub_rew_demo', + data=sub_rew_full_demo) + hf.create_dataset(ep_prefix+'tot_rew_demo', + data=tot_rew_full_demo[ep_idx, :, :, :]) + hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().cpu().numpy()) + + # dump all custom env data + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + episodic_data=self._env.custom_db_data[db_dname] + var_name = db_dname + hf.create_dataset(ep_prefix+var_name, + data=episodic_data.get_full_episodic_data(env_selector=self._db_env_selector)[ep_idx, :, :, :]) + if self._n_expl_envs > 0: + hf.create_dataset(ep_prefix+var_name+"_expl", + data=episodic_data.get_full_episodic_data(env_selector=self._expl_env_selector)[ep_idx, :, :, :]) + if self._env.n_demo_envs() > 0: + hf.create_dataset(ep_prefix+var_name+"_demo", + data=episodic_data.get_full_episodic_data(env_selector=self._demo_env_selector)[ep_idx, :, :, :]) + + Journal.log(self.__class__.__name__, + "_dump_env_checkpoints", + "done.", + LogType.INFO, + throw_when_excep = True) + + def done(self): + + if not self._is_done: + + if not self._eval: + self._save_model() + + self._dump_dbinfo_to_file() + + if self._full_env_db: + self._dump_env_checkpoints() + + if self._shared_algo_data is not None: + self._shared_algo_data.write(dyn_info_name=["is_done"], + val=[1.0]) + self._shared_algo_data.close() # close shared memory + + self._env.close() + + self._is_done = True + + def _dump_dbinfo_to_file(self): + + info = f"Dumping debug info at {self._dbinfo_drop_fname}" + Journal.log(self.__class__.__name__, + "_dump_dbinfo_to_file", + info, + LogType.INFO, + throw_when_excep = True) + + with h5py.File(self._dbinfo_drop_fname+".hdf5", 'w') as hf: + n_valid = int(max(0, min(self._log_it_counter, self._db_data_size))) + + def _slice_valid(arr): + if isinstance(arr, torch.Tensor): + return arr[:n_valid] + if isinstance(arr, np.ndarray): + return arr[:n_valid] + return arr + + def _ds(name, arr): + data = _slice_valid(arr) + hf.create_dataset(name, data=data.numpy() if isinstance(data, torch.Tensor) else data) + + # hf.create_dataset('numpy_data', data=numpy_data) + # Write dictionaries to HDF5 as attributes + for key, value in self._hyperparameters.items(): + if value is None: + value = "None" + hf.attrs[key] = value + + # rewards + hf.create_dataset('sub_reward_names', data=self._reward_names, + dtype='S40') + _ds('sub_rew_max', self._sub_rew_max) + _ds('sub_rew_avrg', self._sub_rew_avrg) + _ds('sub_rew_min', self._sub_rew_min) + _ds('sub_rew_max_over_envs', self._sub_rew_max_over_envs) + _ds('sub_rew_min_over_envs', self._sub_rew_min_over_envs) + _ds('sub_rew_avrg_over_envs', self._sub_rew_avrg_over_envs) + _ds('sub_rew_std_over_envs', self._sub_rew_std_over_envs) + + _ds('tot_rew_max', self._tot_rew_max) + _ds('tot_rew_avrg', self._tot_rew_avrg) + _ds('tot_rew_min', self._tot_rew_min) + _ds('tot_rew_max_over_envs', self._tot_rew_max_over_envs) + _ds('tot_rew_min_over_envs', self._tot_rew_min_over_envs) + _ds('tot_rew_avrg_over_envs', self._tot_rew_avrg_over_envs) + _ds('tot_rew_std_over_envs', self._tot_rew_std_over_envs) + + _ds('ep_tsteps_env_distr', self._ep_tsteps_env_distribution) + + if self._n_expl_envs > 0: + # expl envs + _ds('sub_rew_max_expl', self._sub_rew_max_expl) + _ds('sub_rew_avrg_expl', self._sub_rew_avrg_expl) + _ds('sub_rew_min_expl', self._sub_rew_min_expl) + _ds('sub_rew_max_over_envs_expl', self._sub_rew_max_over_envs_expl) + _ds('sub_rew_min_over_envs_expl', self._sub_rew_min_over_envs_expl) + _ds('sub_rew_avrg_over_envs_expl', self._sub_rew_avrg_over_envs_expl) + _ds('sub_rew_std_over_envs_expl', self._sub_rew_std_over_envs_expl) + + _ds('ep_timesteps_expl_env_distr', self._ep_tsteps_expl_env_distribution) + + hf.create_dataset('expl_env_selector', data=self._expl_env_selector.numpy()) + + _ds('demo_envs_active', self._demo_envs_active) + _ds('demo_perf_metric', self._demo_perf_metric) + + if self._env.n_demo_envs() > 0: + # demo envs + _ds('sub_rew_max_demo', self._sub_rew_max_demo) + _ds('sub_rew_avrg_demo', self._sub_rew_avrg_demo) + _ds('sub_rew_min_demo', self._sub_rew_min_demo) + _ds('sub_rew_max_over_envs_demo', self._sub_rew_max_over_envs_demo) + _ds('sub_rew_min_over_envs_demo', self._sub_rew_min_over_envs_demo) + _ds('sub_rew_avrg_over_envs_demo', self._sub_rew_avrg_over_envs_demo) + _ds('sub_rew_std_over_envs_demo', self._sub_rew_std_over_envs_demo) + + _ds('ep_timesteps_demo_env_distr', self._ep_tsteps_demo_env_distribution) + + hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().numpy()) + + # profiling data + _ds('env_step_fps', self._env_step_fps) + _ds('env_step_rt_factor', self._env_step_rt_factor) + _ds('collection_dt', self._collection_dt) + _ds('batch_norm_update_dt', self._batch_norm_update_dt) + _ds('policy_update_dt', self._policy_update_dt) + _ds('policy_update_fps', self._policy_update_fps) + _ds('validation_dt', self._validation_dt) + + _ds('n_of_played_episodes', self._n_of_played_episodes) + _ds('n_timesteps_done', self._n_timesteps_done) + _ds('n_policy_updates', self._n_policy_updates) + _ds('n_qfun_updates', self._n_qfun_updates) + _ds('n_tqfun_updates', self._n_tqfun_updates) + + _ds('elapsed_min', self._elapsed_min) + + # algo data + _ds('qf1_vals_mean', self._qf1_vals_mean) + _ds('qf2_vals_mean', self._qf2_vals_mean) + _ds('qf1_vals_std', self._qf1_vals_std) + _ds('qf2_vals_std', self._qf2_vals_std) + _ds('qf1_vals_max', self._qf1_vals_max) + _ds('qf1_vals_min', self._qf1_vals_min) + _ds('qf2_vals_max', self._qf2_vals_max) + _ds('qf2_vals_min', self._qf1_vals_min) + + _ds('min_qft_vals_mean', self._min_qft_vals_mean) + _ds('min_qft_vals_std', self._min_qft_vals_std) + + _ds('qf1_loss', self._qf1_loss) + _ds('qf2_loss', self._qf2_loss) + _ds('actor_loss', self._actor_loss) + _ds('alpha_loss', self._alpha_loss) + _ds('alpha_loss_disc', self._alpha_loss_disc) + _ds('alpha_loss_cont', self._alpha_loss_cont) + if self._validate: + _ds('qf1_loss_validation', self._qf1_loss_validation) + _ds('qf2_loss_validation', self._qf2_loss_validation) + _ds('actor_loss_validation', self._actor_loss_validation) + _ds('alpha_loss_validation', self._alpha_loss_validation) + _ds('alpha_loss_disc_validation', self._alpha_loss_disc_validation) + _ds('alpha_loss_cont_validation', self._alpha_loss_cont_validation) + _ds('overfit_index', self._overfit_index) + + _ds('alphas', self._alphas) + _ds('alphas_disc', self._alphas_disc) + _ds('alphas_cont', self._alphas_cont) + + _ds('policy_entropy_mean', self._policy_entropy_mean) + _ds('policy_entropy_std', self._policy_entropy_std) + _ds('policy_entropy_max', self._policy_entropy_max) + _ds('policy_entropy_min', self._policy_entropy_min) + _ds('policy_entropy_disc_mean', self._policy_entropy_disc_mean) + _ds('policy_entropy_disc_std', self._policy_entropy_disc_std) + _ds('policy_entropy_disc_max', self._policy_entropy_disc_max) + _ds('policy_entropy_disc_min', self._policy_entropy_disc_min) + _ds('policy_entropy_cont_mean', self._policy_entropy_cont_mean) + _ds('policy_entropy_cont_std', self._policy_entropy_cont_std) + _ds('policy_entropy_cont_max', self._policy_entropy_cont_max) + _ds('policy_entropy_cont_min', self._policy_entropy_cont_min) + hf.create_dataset('target_entropy', data=self._target_entropy) + hf.create_dataset('target_entropy_disc', data=self._target_entropy_disc) + hf.create_dataset('target_entropy_cont', data=self._target_entropy_cont) + + if self._use_rnd: + _ds('n_rnd_updates', self._n_rnd_updates) + _ds('expl_bonus_raw_avrg', self._expl_bonus_raw_avrg) + _ds('expl_bonus_raw_std', self._expl_bonus_raw_std) + _ds('expl_bonus_proc_avrg', self._expl_bonus_proc_avrg) + _ds('expl_bonus_proc_std', self._expl_bonus_proc_std) + + if self._rnd_net.obs_running_norm is not None: + if self._running_mean_rnd_input is not None: + _ds('running_mean_rnd_input', self._running_mean_rnd_input) + if self._running_std_rnd_input is not None: + _ds('running_std_rnd_input', self._running_std_rnd_input) + + # dump all custom env data + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + data=self._custom_env_data[db_dname] + subnames = list(data.keys()) + for subname in subnames: + var_name = db_dname + "_" + subname + _ds(var_name, data[subname]) + + # other data + if self._agent.obs_running_norm is not None: + if self._running_mean_obs is not None: + _ds('running_mean_obs', self._running_mean_obs) + if self._running_std_obs is not None: + _ds('running_std_obs', self._running_std_obs) + + info = f"done." + Journal.log(self.__class__.__name__, + "_dump_dbinfo_to_file", + info, + LogType.INFO, + throw_when_excep = True) + + def _load_model(self, + model_path: str): + + info = f"Loading model at {model_path}" + + Journal.log(self.__class__.__name__, + "_load_model", + info, + LogType.INFO, + throw_when_excep = True) + model_dict=torch.load(model_path, + map_location=self._torch_device, + weights_only=False) + + observed_joints=self._env.get_observed_joints() + if not ("observed_jnts" in model_dict): + Journal.log(self.__class__.__name__, + "_load_model", + "No observed joints key found in loaded model dictionary! Let's hope joints are ordered in the same way.", + LogType.WARN) + else: + required_joints=model_dict["observed_jnts"] + self._check_observed_joints(observed_joints,required_joints) + + self._agent.load_state_dict(model_dict) + + if self._eval: + self._switch_training_mode(False) + + def _check_observed_joints(self, + observed_joints, + required_joints): + + observed=set(observed_joints) + required=set(required_joints) + + all_required_joints_avail = required.issubset(observed) + if not all_required_joints_avail: + missing=[item for item in required if item not in observed] + missing_str=', '.join(missing) + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"not all required joints are available. Missing {missing_str}", + LogType.EXCEP, + throw_when_excep = True) + exceeding=observed-required + if not len(exceeding)==0: + # do not support having more joints than the required + exc_jnts=" ".join(list(exceeding)) + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"more than the required joints found in the observed joint: {exc_jnts}", + LogType.EXCEP, + throw_when_excep = True) + + # here we are sure that required and observed sets match + self._to_agent_jnt_remap=None + if not required_joints==observed_joints: + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"required jnt obs from agent have different ordering from observed ones. Will compute a remapping.", + LogType.WARN, + throw_when_excep = True) + self._to_agent_jnt_remap = [observed_joints.index(element) for element in required_joints] + + self._env.set_jnts_remapping(remapping= self._to_agent_jnt_remap) + + def drop_dir(self): + return self._drop_dir + + def _init_drop_dir(self, + drop_dir_name: str = None): + + # main drop directory + if drop_dir_name is None: + # drop to current directory + self._drop_dir = "./" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id + else: + self._drop_dir = drop_dir_name + "/" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id + os.makedirs(self._drop_dir) + + self._env_db_checkpoints_dropdir=None + self._env_db_checkpoints_fname=None + if self._full_env_db>0: + self._env_db_checkpoints_dropdir=self._drop_dir+"/env_db_checkpoints" + self._env_db_checkpoints_fname = self._env_db_checkpoints_dropdir + \ + "/" + self._unique_id + "_env_db_checkpoint" + os.makedirs(self._env_db_checkpoints_dropdir) + # model + if not self._eval or (self._model_path is None): + self._model_path = self._drop_dir + "/" + self._unique_id + "_model" + else: # we copy the model under evaluation to the drop dir + shutil.copy(self._model_path, self._drop_dir) + + # debug info + self._dbinfo_drop_fname = self._drop_dir + "/" + self._unique_id + "db_info" # extension added later + + # other auxiliary db files + aux_drop_dir = self._drop_dir + "/other" + os.makedirs(aux_drop_dir) + filepaths = self._env.get_file_paths() # envs implementation + filepaths.append(self._this_basepath) # algorithm implementation + filepaths.append(self._this_child_path) + filepaths.append(self._agent.get_impl_path()) # agent implementation + for file in filepaths: + shutil.copy(file, self._drop_dir) + aux_dirs = self._env.get_aux_dir() + for aux_dir in aux_dirs: + shutil.copytree(aux_dir, aux_drop_dir, dirs_exist_ok=True) + + def _get_performance_metric(self): + # to be overridden + return 0.0 + + def _post_step(self): + + self._collection_dt[self._log_it_counter] += \ + (self._collection_t-self._start_time) + self._batch_norm_update_dt[self._log_it_counter] += \ + (self._policy_update_t_start-self._collection_t) + self._policy_update_dt[self._log_it_counter] += \ + (self._policy_update_t - self._policy_update_t_start) + if self._validate: + self._validation_dt[self._log_it_counter] += \ + (self._validation_t - self._policy_update_t) + + self._step_counter+=1 # counts algo steps + + self._demo_envs_active[self._log_it_counter]=self._env.demo_active() + self._demo_perf_metric[self._log_it_counter]=self._get_performance_metric() + if self._env.n_demo_envs() > 0 and (self._demo_stop_thresh is not None): + # check if deactivation condition applies + self._env.switch_demo(active=self._demo_perf_metric[self._log_it_counter] 0: + self._ep_tsteps_expl_env_distribution[self._log_it_counter, :]=\ + self._episodic_reward_metrics.step_counters(env_selector=self._expl_env_selector)*self._env_n_action_reps + + self._sub_rew_max_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._expl_env_selector) + self._sub_rew_avrg_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._expl_env_selector) + self._sub_rew_min_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._expl_env_selector) + self._sub_rew_max_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_min_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_std_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._expl_env_selector) + + # demo envs + if self._env.n_demo_envs() > 0 and self._env.demo_active(): + # only log if demo envs are active (db data will remaing to nan if that case) + self._ep_tsteps_demo_env_distribution[self._log_it_counter, :]=\ + self._episodic_reward_metrics.step_counters(env_selector=self._demo_env_selector)*self._env_n_action_reps + + self._sub_rew_max_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._demo_env_selector) + self._sub_rew_avrg_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._demo_env_selector) + self._sub_rew_min_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._demo_env_selector) + self._sub_rew_max_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_min_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_std_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._demo_env_selector) + + # other data + if self._agent.obs_running_norm is not None: + if self._running_mean_obs is not None: + self._running_mean_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_mean() + if self._running_std_obs is not None: + self._running_std_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_std() + + if self._use_rnd: + if self._running_mean_rnd_input is not None: + self._running_mean_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_mean() + if self._running_std_rnd_input is not None: + self._running_std_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_std() + + # write some episodic db info on shared mem + sub_returns=self._sub_returns.get_torch_mirror(gpu=False) + sub_returns[:, :]=self._episodic_reward_metrics.get_sub_rew_avrg() + tot_returns=self._tot_returns.get_torch_mirror(gpu=False) + tot_returns[:, :]=self._episodic_reward_metrics.get_tot_rew_avrg() + self._sub_returns.synch_all(read=False) + self._tot_returns.synch_all(read=False) + + self._log_info() + + self._log_it_counter+=1 + + if self._dump_checkpoints and \ + (self._vec_transition_counter % self._m_checkpoint_freq == 0): + self._save_model(is_checkpoint=True) + + if self._full_env_db and \ + (self._vec_transition_counter % self._env_db_checkpoints_vecfreq == 0): + self._dump_env_checkpoints() + + if self._vec_transition_counter == self._total_timesteps_vec: + self.done() + + def _should_have_called_setup(self): + + exception = f"setup() was not called!" + + Journal.log(self.__class__.__name__, + "_should_have_called_setup", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _log_info(self): + + if self._debug or self._verbose: + elapsed_h = self._elapsed_min[self._log_it_counter].item()/60.0 + est_remaining_time_h = elapsed_h * 1/(self._vec_transition_counter) * (self._total_timesteps_vec-self._vec_transition_counter) + is_done=self._vec_transition_counter==self._total_timesteps_vec + + actual_tsteps_with_updates=-1 + experience_to_policy_grad_ratio=-1 + experience_to_qfun_grad_ratio=-1 + experience_to_tqfun_grad_ratio=-1 + if not self._eval: + actual_tsteps_with_updates=(self._n_timesteps_done[self._log_it_counter].item()-self._warmstart_timesteps) + epsi=1e-6 # to avoid div by 0 + experience_to_policy_grad_ratio=actual_tsteps_with_updates/(self._n_policy_updates[self._log_it_counter].item()-epsi) + experience_to_qfun_grad_ratio=actual_tsteps_with_updates/(self._n_qfun_updates[self._log_it_counter].item()-epsi) + experience_to_tqfun_grad_ratio=actual_tsteps_with_updates/(self._n_tqfun_updates[self._log_it_counter].item()-epsi) + + if self._debug: + + if self._remote_db: + # write general algo debug info to shared memory + info_names=self._shared_algo_data.dynamic_info.get() + info_data = [ + self._n_timesteps_done[self._log_it_counter].item(), + self._n_policy_updates[self._log_it_counter].item(), + experience_to_policy_grad_ratio, + elapsed_h, + est_remaining_time_h, + self._env_step_fps[self._log_it_counter].item(), + self._env_step_rt_factor[self._log_it_counter].item(), + self._collection_dt[self._log_it_counter].item(), + self._policy_update_fps[self._log_it_counter].item(), + self._policy_update_dt[self._log_it_counter].item(), + is_done, + self._n_of_played_episodes[self._log_it_counter].item(), + self._batch_norm_update_dt[self._log_it_counter].item(), + ] + self._shared_algo_data.write(dyn_info_name=info_names, + val=info_data) + + # write debug info to remote wandb server + db_data_names = list(self._env.custom_db_data.keys()) + for dbdatan in db_data_names: + data = self._custom_env_data[dbdatan] + data_names = self._env.custom_db_data[dbdatan].data_names() + + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_max": + wandb.Histogram(data["max"][self._log_it_counter, :, :].numpy())}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_avrg": + wandb.Histogram(data["avrg"][self._log_it_counter, :, :].numpy())}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_min": + wandb.Histogram(data["min"][self._log_it_counter, :, :].numpy())}) + + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_max_over_envs": + data["max_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_min_over_envs": + data["min_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_avrg_over_envs": + data["avrg_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_std_over_envs": + data["std_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + + self._wandb_d.update({'log_iteration' : self._log_it_counter}) + self._wandb_d.update(dict(zip(info_names, info_data))) + + # debug environments + self._wandb_d.update({'correlation_db/ep_timesteps_env_distr': + wandb.Histogram(self._ep_tsteps_env_distribution[self._log_it_counter, :, :].numpy())}) + + self._wandb_d.update({'tot_reward/tot_rew_max': wandb.Histogram(self._tot_rew_max[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_avrg': wandb.Histogram(self._tot_rew_avrg[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_min': wandb.Histogram(self._tot_rew_min[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_max_over_envs': self._tot_rew_max_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_min_over_envs': self._tot_rew_min_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_avrg_over_envs': self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_std_over_envs': self._tot_rew_std_over_envs[self._log_it_counter, :, :].item(), + }) + # sub rewards from db envs + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max": + wandb.Histogram(self._sub_rew_max.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min": + wandb.Histogram(self._sub_rew_min.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg": + wandb.Histogram(self._sub_rew_avrg.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max_over_envs": + self._sub_rew_max_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min_over_envs": + self._sub_rew_min_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg_over_envs": + self._sub_rew_avrg_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_std_over_envs": + self._sub_rew_std_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + # exploration envs + if self._n_expl_envs > 0: + self._wandb_d.update({'correlation_db/ep_timesteps_expl_env_distr': + wandb.Histogram(self._ep_tsteps_expl_env_distribution[self._log_it_counter, :, :].numpy())}) + + # sub reward from expl envs + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_expl": + wandb.Histogram(self._sub_rew_max_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_expl": + wandb.Histogram(self._sub_rew_avrg_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_expl": + wandb.Histogram(self._sub_rew_min_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_over_envs_expl": + self._sub_rew_max_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_over_envs_expl": + self._sub_rew_min_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_over_envs_expl": + self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_std_over_envs_expl": + self._sub_rew_std_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + # demo envs (only log if active, otherwise we log nans which wandb doesn't like) + if self._env.n_demo_envs() > 0: + if self._env.demo_active(): + # log hystograms only if there are no nan in the data + self._wandb_d.update({'correlation_db/ep_timesteps_demo_env_distr': + wandb.Histogram(self._ep_tsteps_demo_env_distribution[self._log_it_counter, :, :].numpy())}) + + # sub reward from expl envs + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_demo": + wandb.Histogram(self._sub_rew_max_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_demo": + wandb.Histogram(self._sub_rew_avrg_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_demo": + wandb.Histogram(self._sub_rew_min_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_over_envs_demo": + self._sub_rew_max_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_over_envs_demo": + self._sub_rew_min_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_over_envs_demo": + self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_std_over_envs_demo": + self._sub_rew_std_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + if self._vec_transition_counter > (self._warmstart_vectimesteps-1): + # algo info + self._policy_update_db_data_dict.update({ + "sac_q_info/qf1_vals_mean": self._qf1_vals_mean[self._log_it_counter, 0], + "sac_q_info/qf2_vals_mean": self._qf2_vals_mean[self._log_it_counter, 0], + "sac_q_info/min_qft_vals_mean": self._min_qft_vals_mean[self._log_it_counter, 0], + "sac_q_info/qf1_vals_std": self._qf1_vals_std[self._log_it_counter, 0], + "sac_q_info/qf2_vals_std": self._qf2_vals_std[self._log_it_counter, 0], + "sac_q_info/min_qft_vals_std": self._min_qft_vals_std[self._log_it_counter, 0], + "sac_q_info/qf1_vals_max": self._qf1_vals_max[self._log_it_counter, 0], + "sac_q_info/qf2_vals_max": self._qf2_vals_max[self._log_it_counter, 0], + "sac_q_info/qf1_vals_min": self._qf1_vals_min[self._log_it_counter, 0], + "sac_q_info/qf2_vals_min": self._qf2_vals_min[self._log_it_counter, 0], + + "sac_actor_info/policy_entropy_mean": self._policy_entropy_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_std": self._policy_entropy_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_max": self._policy_entropy_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_min": self._policy_entropy_min[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_mean": self._policy_entropy_disc_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_std": self._policy_entropy_disc_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_max": self._policy_entropy_disc_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_min": self._policy_entropy_disc_min[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_mean": self._policy_entropy_cont_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_std": self._policy_entropy_cont_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_max": self._policy_entropy_cont_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_min": self._policy_entropy_cont_min[self._log_it_counter, 0], + + "sac_q_info/qf1_loss": self._qf1_loss[self._log_it_counter, 0], + "sac_q_info/qf2_loss": self._qf2_loss[self._log_it_counter, 0], + "sac_actor_info/actor_loss": self._actor_loss[self._log_it_counter, 0]}) + alpha_logs = { + "sac_alpha_info/alpha": self._alphas[self._log_it_counter, 0], + "sac_alpha_info/alpha_disc": self._alphas_disc[self._log_it_counter, 0], + "sac_alpha_info/alpha_cont": self._alphas_cont[self._log_it_counter, 0], + "sac_alpha_info/target_entropy": self._target_entropy, + "sac_alpha_info/target_entropy_disc": self._target_entropy_disc, + "sac_alpha_info/target_entropy_cont": self._target_entropy_cont + } + if self._autotune: + alpha_logs.update({ + "sac_alpha_info/alpha_loss": self._alpha_loss[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_disc": self._alpha_loss_disc[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_cont": self._alpha_loss_cont[self._log_it_counter, 0], + }) + self._policy_update_db_data_dict.update(alpha_logs) + + if self._validate: + self._policy_update_db_data_dict.update({ + "sac_q_info/qf1_loss_validation": self._qf1_loss_validation[self._log_it_counter, 0], + "sac_q_info/qf2_loss_validation": self._qf2_loss_validation[self._log_it_counter, 0], + "sac_q_info/overfit_index": self._overfit_index[self._log_it_counter, 0], + "sac_actor_info/actor_loss_validation": self._actor_loss_validation[self._log_it_counter, 0]}) + if self._autotune: + self._policy_update_db_data_dict.update({ + "sac_alpha_info/alpha_loss_validation": self._alpha_loss_validation[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_disc_validation": self._alpha_loss_disc_validation[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_cont_validation": self._alpha_loss_cont_validation[self._log_it_counter, 0]}) + + self._wandb_d.update(self._policy_update_db_data_dict) + + if self._use_rnd: + self._rnd_db_data_dict.update({ + "rnd_info/expl_bonus_raw_avrg": self._expl_bonus_raw_avrg[self._log_it_counter, 0], + "rnd_info/expl_bonus_raw_std": self._expl_bonus_raw_std[self._log_it_counter, 0], + "rnd_info/expl_bonus_proc_avrg": self._expl_bonus_proc_avrg[self._log_it_counter, 0], + "rnd_info/expl_bonus_proc_std": self._expl_bonus_proc_std[self._log_it_counter, 0], + "rnd_info/rnd_loss": self._rnd_loss[self._log_it_counter, 0], + }) + self._wandb_d.update(self._rnd_db_data_dict) + + if self._rnd_net.obs_running_norm is not None: + # adding info on running normalizer if used + if self._running_mean_rnd_input is not None: + self._wandb_d.update({f"rnd_info/running_mean_rhc_input": self._running_mean_rnd_input[self._log_it_counter, :]}) + if self._running_std_rnd_input is not None: + self._wandb_d.update({f"rnd_info/running_std_rhc_input": self._running_std_rnd_input[self._log_it_counter, :]}) + + if self._agent.obs_running_norm is not None: + # adding info on running normalizer if used + if self._running_mean_obs is not None: + self._wandb_d.update({f"running_norm/mean": self._running_mean_obs[self._log_it_counter, :]}) + if self._running_std_obs is not None: + self._wandb_d.update({f"running_norm/std": self._running_std_obs[self._log_it_counter, :]}) + + self._wandb_d.update(self._custom_env_data_db_dict) + + wandb.log(self._wandb_d) + + if self._verbose: + + info =f"\nTotal n. timesteps simulated: {self._n_timesteps_done[self._log_it_counter].item()}/{self._total_timesteps}\n" + \ + f"N. policy updates performed: {self._n_policy_updates[self._log_it_counter].item()}/{self._n_policy_updates_to_be_done}\n" + \ + f"N. q fun updates performed: {self._n_qfun_updates[self._log_it_counter].item()}/{self._n_qf_updates_to_be_done}\n" + \ + f"N. trgt q fun updates performed: {self._n_tqfun_updates[self._log_it_counter].item()}/{self._n_tqf_updates_to_be_done}\n" + \ + f"experience to policy grad ratio: {experience_to_policy_grad_ratio}\n" + \ + f"experience to q fun grad ratio: {experience_to_qfun_grad_ratio}\n" + \ + f"experience to trgt q fun grad ratio: {experience_to_tqfun_grad_ratio}\n"+ \ + f"Warmstart completed: {self._vec_transition_counter > self._warmstart_vectimesteps or self._eval} ; ({self._vec_transition_counter}/{self._warmstart_vectimesteps})\n" +\ + f"Replay buffer full: {self._replay_bf_full}; current position {self._bpos}/{self._replay_buffer_size_vec}\n" +\ + f"Validation buffer full: {self._validation_bf_full}; current position {self._bpos_val}/{self._validation_buffer_size_vec}\n" +\ + f"Elapsed time: {self._elapsed_min[self._log_it_counter].item()/60.0} h\n" + \ + f"Estimated remaining training time: " + \ + f"{est_remaining_time_h} h\n" + \ + f"Total reward episodic data --> \n" + \ + f"max: {self._tot_rew_max_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"avg: {self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"min: {self._tot_rew_min_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"Episodic sub-rewards episodic data --> \nsub rewards names: {self._reward_names_str}\n" + \ + f"max: {self._sub_rew_max_over_envs[self._log_it_counter, :]}\n" + \ + f"min: {self._sub_rew_min_over_envs[self._log_it_counter, :]}\n" + \ + f"avg: {self._sub_rew_avrg_over_envs[self._log_it_counter, :]}\n" + \ + f"std: {self._sub_rew_std_over_envs[self._log_it_counter, :]}\n" + \ + f"N. of episodes on which episodic rew stats are computed: {self._n_of_played_episodes[self._log_it_counter].item()}\n" + \ + f"Current env. step sps: {self._env_step_fps[self._log_it_counter].item()}, time for experience collection {self._collection_dt[self._log_it_counter].item()} s\n" + \ + f"Current env (sub-stepping) rt factor: {self._env_step_rt_factor[self._log_it_counter].item()}\n" + \ + f"Current policy update fps: {self._policy_update_fps[self._log_it_counter].item()}, time for policy updates {self._policy_update_dt[self._log_it_counter].item()} s\n" + \ + f"Time spent updating batch normalizations {self._batch_norm_update_dt[self._log_it_counter].item()} s\n" + \ + f"Time spent for computing validation {self._validation_dt[self._log_it_counter].item()} s\n" + \ + f"Demo envs are active: {self._demo_envs_active[self._log_it_counter].item()}. N demo envs if active {self._env.n_demo_envs()}\n" + \ + f"Performance metric now: {self._demo_perf_metric[self._log_it_counter].item()}\n" + \ + f"Entropy (disc): current {float(self._policy_entropy_disc_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_disc:.4f}\n" + \ + f"Entropy (cont): current {float(self._policy_entropy_cont_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_cont:.4f}\n" + if self._use_rnd: + info = info + f"N. rnd updates performed: {self._n_rnd_updates[self._log_it_counter].item()}\n" + + Journal.log(self.__class__.__name__, + "_post_step", + info, + LogType.INFO, + throw_when_excep = True) + + def _add_experience(self, + obs: torch.Tensor, actions: torch.Tensor, rewards: torch.Tensor, + next_obs: torch.Tensor, + next_terminal: torch.Tensor) -> None: + + if self._validate and \ + (self._vec_transition_counter % self._validation_collection_vecfreq == 0): + # fill validation buffer + + self._obs_val[self._bpos_val] = obs + self._next_obs_val[self._bpos_val] = next_obs + self._actions_val[self._bpos_val] = actions + self._rewards_val[self._bpos_val] = rewards + self._next_terminal_val[self._bpos_val] = next_terminal + + self._bpos_val += 1 + if self._bpos_val == self._validation_buffer_size_vec: + self._validation_bf_full = True + self._bpos_val = 0 + + else: # fill normal replay buffer + self._obs[self._bpos] = obs + self._next_obs[self._bpos] = next_obs + self._actions[self._bpos] = actions + self._rewards[self._bpos] = rewards + self._next_terminal[self._bpos] = next_terminal + + self._bpos += 1 + if self._bpos == self._replay_buffer_size_vec: + self._replay_bf_full = True + self._bpos = 0 + + def _sample(self, size: int = None): + + if size is None: + size=self._batch_size + + batched_obs = self._obs.view((-1, self._env.obs_dim())) + batched_next_obs = self._next_obs.view((-1, self._env.obs_dim())) + batched_actions = self._actions.view((-1, self._env.actions_dim())) + batched_rewards = self._rewards.view(-1) + batched_terminal = self._next_terminal.view(-1) + + # sampling from the batched buffer + up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (size,)) + + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_next_obs = batched_next_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + sampled_rewards = batched_rewards[shuffled_buffer_idxs] + sampled_terminal = batched_terminal[shuffled_buffer_idxs] + + return sampled_obs, sampled_actions,\ + sampled_next_obs,\ + sampled_rewards, \ + sampled_terminal + + def _sample_validation(self, size: int = None): + + if size is None: + size=self._validation_batch_size + + batched_obs = self._obs_val.view((-1, self._env.obs_dim())) + batched_next_obs = self._next_obs_val.view((-1, self._env.obs_dim())) + batched_actions = self._actions_val.view((-1, self._env.actions_dim())) + batched_rewards = self._rewards_val.view(-1) + batched_terminal = self._next_terminal_val.view(-1) + + # sampling from the batched buffer + up_to = self._validation_buffer_size if self._validation_bf_full else self._bpos_val*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (size,)) + + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_next_obs = batched_next_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + sampled_rewards = batched_rewards[shuffled_buffer_idxs] + sampled_terminal = batched_terminal[shuffled_buffer_idxs] + + return sampled_obs, sampled_actions,\ + sampled_next_obs,\ + sampled_rewards, \ + sampled_terminal + + def _sample_random_actions(self): + + self._random_uniform.uniform_(-1,1) + random_actions = self._random_uniform + + return random_actions + + def _perturb_some_actions(self, + actions: torch.Tensor): + + if self._is_continuous_actions_bool.any(): # if there are any continuous actions + self._perturb_actions(actions, + action_idxs=self._is_continuous_actions, + env_idxs=self._expl_env_selector.to(actions.device), + normal=True, # use normal for continuous + scaling=self._continuous_act_expl_noise_std) + if self._is_discrete_actions_bool.any(): # actions to be treated as discrete + self._perturb_actions(actions, + action_idxs=self._is_discrete_actions, + env_idxs=self._expl_env_selector.to(actions.device), + normal=False, # use uniform distr for discrete + scaling=self._discrete_act_expl_noise_std) + self._pert_counter+=1 + if self._pert_counter >= self._noise_duration_vec: + self._pert_counter=0 + + def _perturb_actions(self, + actions: torch.Tensor, + action_idxs: torch.Tensor, + env_idxs: torch.Tensor, + normal: bool = True, + scaling: float = 1.0): + if normal: # gaussian + # not super efficient (in theory random_normal can be made smaller in size) + self._random_normal.normal_(mean=0, std=1) + noise=self._random_normal + else: # uniform + self._random_uniform.uniform_(-1,1) + noise=self._random_uniform + + env_indices = env_idxs.reshape(-1,1) # Get indices of True environments + action_indices = action_idxs.reshape(1,-1) # Get indices of True actions + action_indices_flat=action_indices.flatten() + + perturbation=noise[env_indices, action_indices]*scaling + perturbed_actions=actions[env_indices, action_indices]+perturbation + perturbed_actions.clamp_(-1.0, 1.0) # enforce normalized bounds + + actions[env_indices, action_indices]=\ + perturbed_actions + + def _update_batch_norm(self, bsize: int = None): + + if bsize is None: + bsize=self._batch_size # same used for training + + up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (bsize,)) + + # update obs normalization + # (we should sample also next obs, but if most of the transitions are not terminal, + # this is not an issue and is more efficient) + if (self._agent.obs_running_norm is not None) and \ + (not self._eval): + batched_obs = self._obs.view((-1, self._obs_dim)) + sampled_obs = batched_obs[shuffled_buffer_idxs] + self._agent.update_obs_bnorm(x=sampled_obs) + + if self._use_rnd: # update running norm for RND + batched_obs = self._obs.view((-1, self._obs_dim)) + batched_actions = self._actions.view((-1, self._actions_dim)) + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + torch.cat(tensors=(sampled_obs, sampled_actions), dim=1, + out=self._rnd_bnorm_input) + self._rnd_net.update_input_bnorm(x=self._rnd_bnorm_input) + + # update running norm on rewards also + # if self._reward_normalizer is not None: + # batched_rew = self._rewards.view(-1) + # sampled_rew = batched_rew[shuffled_buffer_idxs] + # self._reward_normalizer.manual_stat_update(x=sampled_rew) + + def _reset_agent(self): + # not super efficient, but effective -> + # brand new agent, brand new optimizers + self._agent.reset() + # forcing deallocation of previous optimizers + import gc + del self._qf_optimizer + del self._actor_optimizer + if self._autotune: + del self._a_optimizer_disc + del self._a_optimizer_cont + del self._log_alpha_disc + del self._log_alpha_cont + gc.collect() + self._init_agent_optimizers() + if self._autotune: # also reinitialize alpha optimization + self._init_alpha_autotuning() + + self._overfit_idx=0.0 + + def _switch_training_mode(self, + train: bool = True): + + self._agent.train(train) + + def _init_algo_shared_data(self, + static_params: Dict): + + self._shared_algo_data = SharedRLAlgorithmInfo(namespace=self._ns, + is_server=True, + static_params=static_params, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + + self._shared_algo_data.run() + + # write some initializations + self._shared_algo_data.write(dyn_info_name=["is_done"], + val=[0.0]) + + # only written to if flags where enabled + self._qf_vals=QfVal(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._qf_vals.run() + self._qf_trgt=QfTrgt(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._qf_trgt.run() + + # episodic returns + reward_names=self._episodic_reward_metrics.data_names() + self._sub_returns=SubReturns(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + n_rewards=len(reward_names), + reward_names=reward_names, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._sub_returns.run() + + self._tot_returns=TotReturns(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._tot_returns.run() diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/training_env_base.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/training_env_base.py new file mode 100644 index 0000000000000000000000000000000000000000..fce03b3698fad4c0ffd9513342446e3d33b14008 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/training_env_base.py @@ -0,0 +1,2000 @@ +import torch +import math +from aug_mpc.utils.math_utils import quaternion_to_angular_velocity, quaternion_difference + +from mpc_hive.utilities.shared_data.rhc_data import RobotState +from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred +from mpc_hive.utilities.shared_data.rhc_data import RhcRefs +from mpc_hive.utilities.shared_data.rhc_data import RhcStatus +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo + +from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperSrvr +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetSrvr +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest + +from aug_mpc.utils.shared_data.agent_refs import AgentRefs +from aug_mpc.utils.shared_data.training_env import SharedTrainingEnvInfo + +from aug_mpc.utils.shared_data.training_env import Observations, NextObservations +from aug_mpc.utils.shared_data.training_env import TotRewards +from aug_mpc.utils.shared_data.training_env import SubRewards +from aug_mpc.utils.shared_data.training_env import Actions +from aug_mpc.utils.shared_data.training_env import Terminations, SubTerminations +from aug_mpc.utils.shared_data.training_env import Truncations, SubTruncations +from aug_mpc.utils.shared_data.training_env import EpisodesCounter,TaskRandCounter,SafetyRandResetsCounter,RandomTruncCounter,SubStepAbsCounter + +from aug_mpc.utils.episodic_rewards import EpisodicRewards +from aug_mpc.utils.episodic_data import EpisodicData +from aug_mpc.utils.episodic_data import MemBuffer +from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother +from aug_mpc.utils.math_utils import check_capsize + +from mpc_hive.utilities.math_utils_torch import world2base_frame + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import StringTensorClient + +from perf_sleep.pyperfsleep import PerfSleep + +from abc import abstractmethod, ABC + +import os +from typing import List, Dict + +class AugMPCTrainingEnvBase(ABC): + + """Base class for a remote training environment tailored to Learning-based Receding Horizon Control""" + + def __init__(self, + namespace: str, + obs_dim: int, + actions_dim: int, + env_name: str = "", + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + debug: bool = True, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._this_path = os.path.abspath(__file__) + + self.custom_db_data = None + + self._random_reset_active=False + + self._action_smoother_continuous=None + self._action_smoother_discrete=None + + self._closed = False + self._ready=False + + self._namespace = namespace + self._with_gpu_mirror = True + self._safe_shared_mem = False + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + + self._use_gpu = use_gpu + if self._use_gpu: + self._device="cuda" + else: + self._device="cpu" + + self._dtype = dtype + + self._verbose = verbose + self._vlevel = vlevel + + self._is_debug = debug + + self._env_name = env_name + + self._override_agent_refs = override_agent_refs + + self._substep_dt=1.0 # dt [s] between each substep + + self._env_opts={} + self._env_opts.update(env_opts) + self._process_env_opts() + + self._robot_state = None + self._rhc_cmds = None + self._rhc_pred = None + self._rhc_refs = None + self._rhc_status = None + + self._remote_stepper = None + self._remote_resetter = None + self._remote_reset_req = None + + self._agent_refs = None + + self._n_envs = 0 + + self._ep_timeout_counter = None + self._task_rand_counter = None + self._rand_safety_reset_counter = None + self._rand_trunc_counter = None + + self._actions_map={} # to be used to hold info like action idxs + self._obs_map={} + + self._obs = None + self._obs_ub = None + self._obs_lb = None + self._next_obs = None + self._actions = None + self._actual_actions = None + self._actions_ub = None + self._actions_lb = None + self._tot_rewards = None + self._sub_rewards = None + self._sub_terminations = None + self._sub_truncations = None + self._terminations = None + self._truncations = None + self._act_mem_buffer = None + + self._episodic_rewards_metrics = None + + self._timeout = timeout_ms + + self._height_grid_size = None + self._height_flat_dim = 0 + + self._attach_to_shared_mem() + + self._init_obs(obs_dim) + self._init_actions(actions_dim) + self._init_rewards() + self._init_terminations() + self._init_truncations() + self._init_custom_db_data() + + self._demo_setup() # setup for demo envs + + # to ensure maps are properly initialized + _ = self._get_action_names() + _ = self._get_obs_names() + _ = self._get_sub_trunc_names() + _ = self._get_sub_term_names() + + self._set_substep_rew() + self._set_substep_obs() + + self._custom_post_init() + + # update actions scale and offset in case it was modified in _custom_post_init + self._actions_scale = (self._actions_ub - self._actions_lb)/2.0 + self._actions_offset = (self._actions_ub + self._actions_lb)/2.0 + + if self._env_opts["use_action_smoothing"]: + self._init_action_smoothing() + + self._ready=self._init_step() + + def _add_env_opt(self, + opts: Dict, + name: str, + default): + + if not name in opts: + opts[name]=default + + def _process_env_opts(self, ): + + self._check_for_env_opts("episode_timeout_lb", int) + self._check_for_env_opts("episode_timeout_ub", int) + self._check_for_env_opts("n_steps_task_rand_lb", int) + self._check_for_env_opts("n_steps_task_rand_ub", int) + self._check_for_env_opts("use_random_trunc", bool) + self._check_for_env_opts("random_trunc_freq", int) + self._check_for_env_opts("random_trunc_freq_delta", int) + self._check_for_env_opts("use_random_safety_reset", bool) + self._check_for_env_opts("random_reset_freq", int) + + self._check_for_env_opts("action_repeat", int) + + self._check_for_env_opts("n_preinit_steps", int) + + self._check_for_env_opts("demo_envs_perc", float) + + self._check_for_env_opts("vec_ep_freq_metrics_db", int) + + self._check_for_env_opts("srew_drescaling", bool) + + self._check_for_env_opts("use_action_history", bool) + self._check_for_env_opts("actions_history_size", int) + + self._check_for_env_opts("use_action_smoothing", bool) + self._check_for_env_opts("smoothing_horizon_c", float) + self._check_for_env_opts("smoothing_horizon_d", float) + + self._check_for_env_opts("add_heightmap_obs", bool) + + # parse action repeat opt + get some sim information + if self._env_opts["action_repeat"] <=0: + self._env_opts["action_repeat"] = 1 + self._action_repeat=self._env_opts["action_repeat"] + # parse remote sim info + sim_info = {} + sim_info_shared = SharedEnvInfo(namespace=self._namespace, + is_server=False, + safe=False, + verbose=self._verbose, + vlevel=self._vlevel) + sim_info_shared.run() + sim_info_keys = sim_info_shared.param_keys + sim_info_data = sim_info_shared.get().flatten() + for i in range(len(sim_info_keys)): + sim_info[sim_info_keys[i]] = sim_info_data[i] + if "substepping_dt" in sim_info_keys: + self._substep_dt=sim_info["substepping_dt"] + self._env_opts.update(sim_info) + + self._env_opts["substep_dt"]=self._substep_dt + + self._env_opts["override_agent_refs"]=self._override_agent_refs + + self._env_opts["episode_timeout_lb"] = round(self._env_opts["episode_timeout_lb"]/self._action_repeat) + self._env_opts["episode_timeout_ub"] = round(self._env_opts["episode_timeout_ub"]/self._action_repeat) + + self._env_opts["n_steps_task_rand_lb"] = round(self._env_opts["n_steps_task_rand_lb"]/self._action_repeat) + self._env_opts["n_steps_task_rand_ub"] = round(self._env_opts["n_steps_task_rand_ub"]/self._action_repeat) + + if self._env_opts["random_reset_freq"] <=0: + self._env_opts["use_random_safety_reset"]=False + self._env_opts["random_reset_freq"]=-1 + self._random_reset_active=self._env_opts["use_random_safety_reset"] + + self._env_opts["random_trunc_freq"] = round(self._env_opts["random_trunc_freq"]/self._action_repeat) + self._env_opts["random_trunc_freq_delta"] = round(self._env_opts["random_trunc_freq_delta"]/self._action_repeat) + + if self._env_opts["random_trunc_freq"] <=0: + self._env_opts["use_random_trunc"]=False + self._env_opts["random_trunc_freq"]=-1 + + self._full_db=False + if "full_env_db" in self._env_opts: + self._full_db=self._env_opts["full_env_db"] + + def _check_for_env_opts(self, + name: str, + expected_type): + if not (name in self._env_opts): + Journal.log(self.__class__.__name__, + "_check_for_env_opts", + f"Required option {name} missing for env opts!", + LogType.EXCEP, + throw_when_excep=True) + if not isinstance(self._env_opts[name], expected_type): + Journal.log(self.__class__.__name__, + "_check_for_env_opts", + f"Option {name} in env opts is not of type {expected_type} (got {type(self._env_opts[name])})!", + LogType.EXCEP, + throw_when_excep=True) + + def __del__(self): + + self.close() + + def _demo_setup(self): + + self._demo_envs_idxs=None + self._demo_envs_idxs_bool=None + self._n_demo_envs=round(self._env_opts["demo_envs_perc"]*self._n_envs) + self._add_demos=False + if not self._n_demo_envs >0: + Journal.log(self.__class__.__name__, + "__init__", + "will not use demo environments", + LogType.INFO, + throw_when_excep=False) + else: + Journal.log(self.__class__.__name__, + "__init__", + f"Will run with {self._n_demo_envs} demonstration envs.", + LogType.INFO) + self._demo_envs_idxs = torch.randperm(self._n_envs, device=self._device)[:self._n_demo_envs] + self._demo_envs_idxs_bool=torch.full((self._n_envs, ), dtype=torch.bool, device=self._device, + fill_value=False) + self._demo_envs_idxs_bool[self._demo_envs_idxs]=True + + self._init_demo_envs() # custom logic + + demo_idxs_str=", ".join(map(str, self._demo_envs_idxs.tolist())) + Journal.log(self.__class__.__name__, + "__init__", + f"Demo env. indexes are [{demo_idxs_str}]", + LogType.INFO) + + def env_opts(self): + return self._env_opts + + def demo_env_idxs(self, get_bool: bool=False): + if get_bool: + return self._demo_envs_idxs_bool + else: + return self._demo_envs_idxs + + def _init_demo_envs(self): + pass + + def n_demo_envs(self): + return self._n_demo_envs + + def demo_active(self): + return self._add_demos + + def switch_demo(self, active: bool = False): + if self._demo_envs_idxs is not None: + self._add_demos=active + else: + Journal.log(self.__class__.__name__, + "switch_demo", + f"Cannot switch demostrations on. No demo envs available!", + LogType.EXCEP, + throw_when_excep=True) + + def _get_this_file_path(self): + return self._this_path + + def episode_timeout_bounds(self): + return self._env_opts["episode_timeout_lb"], self._env_opts["episode_timeout_ub"] + + def task_rand_timeout_bounds(self): + return self._env_opts["n_steps_task_rand_lb"], self._env_opts["n_steps_task_rand_ub"] + + def n_action_reps(self): + return self._action_repeat + + def get_file_paths(self): + from aug_mpc.utils.sys_utils import PathsGetter + path_getter = PathsGetter() + base_paths = [] + base_paths.append(self._get_this_file_path()) + base_paths.append(path_getter.REMOTENVPATH) + for script_path in path_getter.SCRIPTSPATHS: + base_paths.append(script_path) + + # rhc files + from EigenIPC.PyEigenIPC import StringTensorClient + from perf_sleep.pyperfsleep import PerfSleep + shared_rhc_shared_files = StringTensorClient( + basename="SharedRhcFilesDropDir", + name_space=self._namespace, + verbose=self._verbose, + vlevel=VLevel.V2) + shared_rhc_shared_files.run() + shared_rhc_files_vals=[""]*shared_rhc_shared_files.length() + while not shared_rhc_shared_files.read_vec(shared_rhc_files_vals, 0): + nsecs = 1000000000 # 1 sec + PerfSleep.thread_sleep(nsecs) # we just keep it alive + rhc_list=[] + for rhc_files in shared_rhc_files_vals: + file_list = rhc_files.split(", ") + rhc_list.extend(file_list) + rhc_list = list(set(rhc_list)) # removing duplicates + base_paths.extend(rhc_list) + + # world interface files + get_world_interface_paths = self.get_world_interface_paths() + base_paths.extend(get_world_interface_paths) + return base_paths + + def get_world_interface_paths(self): + paths = [] + shared_world_iface_files = StringTensorClient( + basename="SharedWorldInterfaceFilesDropDir", + name_space=self._namespace, + verbose=self._verbose, + vlevel=VLevel.V2) + shared_world_iface_files.run() + world_iface_vals=[""]*shared_world_iface_files.length() + while not shared_world_iface_files.read_vec(world_iface_vals, 0): + nsecs = 1000000000 # 1 sec + PerfSleep.thread_sleep(nsecs) # keep alive while waiting + shared_world_iface_files.close() + for files in world_iface_vals: + if files == "": + continue + file_list = files.split(", ") + for f in file_list: + if f not in paths: + paths.append(f) + return paths + + def get_aux_dir(self): + empty_list = [] + return empty_list + + def _init_step(self): + + self._check_controllers_registered(retry=True) + self._activate_rhc_controllers() + + # just an auxiliary tensor + initial_reset_aux = self._terminations.get_torch_mirror(gpu=self._use_gpu).clone() + initial_reset_aux[:, :] = True # we reset all sim envs first + init_step_ok=True + init_step_ok=self._remote_sim_step() and init_step_ok + if not init_step_ok: + return False + init_step_ok=self._remote_reset(reset_mask=initial_reset_aux) and init_step_ok + if not init_step_ok: + return False + + for i in range(self._env_opts["n_preinit_steps"]): # perform some + # dummy remote env stepping to make sure to have meaningful + # initializations (doesn't increment step counter) + init_step_ok=self._remote_sim_step() and init_step_ok # 1 remote sim. step + if not init_step_ok: + return False + init_step_ok=self._send_remote_reset_req() and init_step_ok # fake reset request + if not init_step_ok: + return False + + self.reset() + + return init_step_ok + + def _debug(self): + + if self._use_gpu: + # using non_blocking which is not safe when GPU->CPU + self._obs.synch_mirror(from_gpu=True,non_blocking=True) # copy data from gpu to cpu view + self._next_obs.synch_mirror(from_gpu=True,non_blocking=True) + self._actions.synch_mirror(from_gpu=True,non_blocking=True) + self._truncations.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_truncations.synch_mirror(from_gpu=True,non_blocking=True) + self._terminations.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_terminations.synch_mirror(from_gpu=True,non_blocking=True) + self._tot_rewards.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_rewards.synch_mirror(from_gpu=True,non_blocking=True) + # if we want reliable db data then we should synchronize data streams + torch.cuda.synchronize() + + # copy CPU view on shared memory + self._obs.synch_all(read=False, retry=True) + self._next_obs.synch_all(read=False, retry=True) + self._actions.synch_all(read=False, retry=True) + self._tot_rewards.synch_all(read=False, retry=True) + self._sub_rewards.synch_all(read=False, retry=True) + self._truncations.synch_all(read=False, retry = True) + self._sub_truncations.synch_all(read=False, retry = True) + self._terminations.synch_all(read=False, retry = True) + self._sub_terminations.synch_all(read=False, retry = True) + + self._debug_agent_refs() + + def _debug_agent_refs(self): + if self._use_gpu: + if not self._override_agent_refs: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) + if not self._override_agent_refs: + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True) + + def _remote_sim_step(self): + + self._remote_stepper.trigger() # triggers simulation + RHC + if not self._remote_stepper.wait_ack_from(1, self._timeout): + Journal.log(self.__class__.__name__, + "_remote_sim_step", + "Remote sim. env step ack not received within timeout", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _remote_reset(self, + reset_mask: torch.Tensor = None): + + reset_reqs = self._remote_reset_req.get_torch_mirror() + if reset_mask is None: # just send the signal to allow stepping, but do not reset any of + # the remote envs + reset_reqs[:, :] = False + else: + reset_reqs[:, :] = reset_mask # remotely reset envs corresponding to + # the mask (True--> to be reset) + self._remote_reset_req.synch_all(read=False, retry=True) # write on shared buffer + remote_reset_ok = self._send_remote_reset_req() # process remote request + + if reset_mask is not None: + self._synch_state(gpu=self._use_gpu) # if some env was reset, we use _obs + # to hold the states, including resets, while _next_obs will always hold the + # state right after stepping the sim env + # (could be a bit more efficient, since in theory we only need to read the envs + # corresponding to the reset_mask) + + + return remote_reset_ok + + def _send_remote_reset_req(self): + + self._remote_resetter.trigger() + if not self._remote_resetter.wait_ack_from(1, self._timeout): # remote reset completed + Journal.log(self.__class__.__name__, + "_post_step", + "Remote reset did not complete within the prescribed timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def step(self, + action): + + actions_norm = action.detach() # IMPORTANT: assumes actions are already normalized in [-1, 1] + + actions = self._actions.get_torch_mirror(gpu=self._use_gpu) # will hold agent actions (real range) + + # scale normalized actions to physical space before interfacing with controllers + actions[:, :] = actions_norm*self._actions_scale + self._actions_offset + + self._override_actions_with_demo() # if necessary override some actions with expert demonstrations + # (getting actions with get_actions will return the modified actions tensor) + + actions.clamp_(self._actions_lb, self._actions_ub) # just to be safe + + if self._act_mem_buffer is not None: # store norm actions in memory buffer + self._act_mem_buffer.update(new_data=actions_norm) + + if self._env_opts["use_action_smoothing"]: + self._apply_actions_smoothing() # smooth actions if enabled (the tensor returned by + # get_actions does not contain smoothing and can be safely employed for experience collection) + + self._apply_actions_to_rhc() # apply last agent actions to rhc controller + + stepping_ok = True + tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu) + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + tot_rewards.zero_() + sub_rewards.zero_() + self._substep_rewards.zero_() + next_obs.zero_() # necessary for substep obs + + for i in range(0, self._action_repeat): + + self._pre_substep() # custom logic @ substep freq + + stepping_ok = stepping_ok and self._check_controllers_registered(retry=False) # does not make sense to run training + # if we lost some controllers + stepping_ok = stepping_ok and self._remote_sim_step() # blocking, + + # no sim substepping is allowed to fail + self._synch_state(gpu=self._use_gpu) # read state from shared mem (done in substeps also, + # since substeps rewards will need updated substep obs) + + self._custom_post_substp_pre_rew() # custom substepping logic + self._compute_substep_rewards() + self._assemble_substep_rewards() # includes rewards clipping + self._custom_post_substp_post_rew() # custom substepping logic + + # fill substep obs + self._fill_substep_obs(self._substep_obs) + self._assemble_substep_obs() + if not i==(self._action_repeat-1): + # sends reset signal to complete remote step sequence, + # but does not reset any remote env + stepping_ok = stepping_ok and self._remote_reset(reset_mask=None) + else: # last substep + + self._fill_step_obs(next_obs) # update next obs + self._clamp_obs(next_obs) # good practice + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + obs[:, :] = next_obs # start from next observation, unless reset (handled in post_step()) + + self._compute_step_rewards() # implemented by child + + tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu) + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + self._clamp_rewards(sub_rewards) # clamp all sub rewards + + tot_rewards[:, :] = torch.sum(sub_rewards, dim=1, keepdim=True) + + scale=1 # scale tot rew by the number of action repeats + if self._env_opts["srew_drescaling"]: # scale rewards depending on the n of subrewards + scale*=sub_rewards.shape[1] # n. dims rescaling + tot_rewards.mul_(1/scale) + + self._substep_abs_counter.increment() # @ substep freq + + if not stepping_ok: + return False + + stepping_ok = stepping_ok and self._post_step() # post sub-stepping operations + # (if action_repeat > 1, then just the db data at the last substep is logged) + # also, if a reset of an env occurs, obs will hold the reset state + + return stepping_ok + + def _post_step(self): + + # first increment counters + self._ep_timeout_counter.increment() # episode timeout + self._task_rand_counter.increment() # task randomization + if self._rand_trunc_counter is not None: # random truncations (for removing temp. correlations) + self._rand_trunc_counter.increment() + + # check truncation and termination conditions + self._check_truncations() # defined in child env + self._check_terminations() + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu) + ignore_ep_end=None + if self._rand_trunc_counter is not None: + ignore_ep_end=self._rand_trunc_counter.time_limits_reached() + if self._use_gpu: + ignore_ep_end=ignore_ep_end.cuda() + + truncated = torch.logical_or(truncated, + ignore_ep_end) # add truncation (sub truncations defined in child env + # remain untouched) + + episode_finished = torch.logical_or(terminated, + truncated) + episode_finished_cpu = episode_finished.cpu() + + if self._rand_safety_reset_counter is not None and self._random_reset_active: + self._rand_safety_reset_counter.increment(to_be_incremented=episode_finished_cpu.flatten()) + # truncated[:,:] = torch.logical_or(truncated, + # self._rand_safety_reset_counter.time_limits_reached().cuda()) + + if self._act_mem_buffer is not None: + self._act_mem_buffer.reset(to_be_reset=episode_finished.flatten(), + init_data=self._normalize_actions(self.default_action)) + + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.reset(to_be_reset=episode_finished.flatten(), + reset_val=self.default_action[:, self._is_continuous_actions]) + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.reset(to_be_reset=episode_finished.flatten(), + reset_val=self.default_action[:, ~self._is_continuous_actions]) + + # debug step if required (IMPORTANT: must be before remote reset so that we always db + # actual data from the step and not after reset) + if self._is_debug: + self._debug() # copies db data on shared memory + ignore_ep_end_cpu=ignore_ep_end if not self._use_gpu else ignore_ep_end.cpu() + self._update_custom_db_data(episode_finished=episode_finished_cpu, + ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc + ) + self._episodic_rewards_metrics.update(rewards = self._sub_rewards.get_torch_mirror(gpu=False), + ep_finished=episode_finished_cpu, + ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc + ) + + # remotely reset envs + to_be_reset=self._to_be_reset() + to_be_reset_custom=self._custom_reset() + if to_be_reset_custom is not None: + to_be_reset[:, :] = torch.logical_or(to_be_reset,to_be_reset_custom) + rm_reset_ok = self._remote_reset(reset_mask=to_be_reset) + + self._custom_post_step(episode_finished=episode_finished) # any additional logic from child env + # here, before actual reset taskes place (at this point the state is the reset one) + + # updating also prev pos and orientation in case some env was reset + self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + self._fill_step_obs(obs) + self._clamp_obs(obs) + + # updating prev step quantities + self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + # synchronize and reset counters for finished episodes + self._ep_timeout_counter.reset(to_be_reset=episode_finished) + self._task_rand_counter.reset(to_be_reset=episode_finished) + self._substep_abs_counter.reset(to_be_reset=torch.logical_or(terminated,to_be_reset), + randomize_offsets=True # otherwise timers across envs would be strongly correlated + ) # reset only if resetting environment or if terminal + + if self._rand_trunc_counter is not None: + # only reset when safety truncation was is triggered + self._rand_trunc_counter.reset(to_be_reset=self._rand_trunc_counter.time_limits_reached(), + randomize_limits=True, # we need to randomize otherwise the other counters will synchronize + # with the episode counters + randomize_offsets=False # always restart at 0 + ) + # safety reset counter is only when it reches its reset interval (just to keep + # the counter bounded) + if self._rand_safety_reset_counter is not None and self._random_reset_active: + self._rand_safety_reset_counter.reset(to_be_reset=self._rand_safety_reset_counter.time_limits_reached()) + + return rm_reset_ok + + def _to_be_reset(self): + # always reset if a termination occurred or if there's a random safety reset + # request + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + to_be_reset=terminated.clone() + if (self._rand_safety_reset_counter is not None) and self._random_reset_active: + to_be_reset=torch.logical_or(to_be_reset, + self._rand_safety_reset_counter.time_limits_reached()) + + return to_be_reset + + def _custom_reset(self): + # can be overridden by child + return None + + def _apply_actions_smoothing(self): + + actions = self._actions.get_torch_mirror(gpu=self._use_gpu) + actual_actions=self.get_actual_actions() # will write smoothed actions here + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.update(new_signal= + actions[:, self._is_continuous_actions]) + actual_actions[:, self._is_continuous_actions]=self._action_smoother_continuous.get() + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.update(new_signal= + actions[:, ~self._is_continuous_actions]) + actual_actions[:, ~self._is_continuous_actions]=self._action_smoother_discrete.get() + + def _update_custom_db_data(self, + episode_finished, + ignore_ep_end): + + # update defaults + self.custom_db_data["RhcRefsFlag"].update(new_data=self._rhc_refs.contact_flags.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) # before potentially resetting the flags, get data + self.custom_db_data["Actions"].update(new_data=self._actions.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Obs"].update(new_data=self._obs.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + self.custom_db_data["SubTerminations"].update(new_data=self._sub_terminations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["SubTruncations"].update(new_data=self._sub_truncations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + self.custom_db_data["Terminations"].update(new_data=self._terminations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Truncations"].update(new_data=self._truncations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + + self._get_custom_db_data(episode_finished=episode_finished, ignore_ep_end=ignore_ep_end) + + def reset_custom_db_data(self, keep_track: bool = False): + # to be called periodically to reset custom db data stat. collection + for custom_db_data in self.custom_db_data.values(): + custom_db_data.reset(keep_track=keep_track) + + def _assemble_substep_rewards(self): + # by default assemble substep rewards by averaging + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + # average over substeps depending on scale + # sub_rewards[:, self._is_substep_rew] = sub_rewards[:, self._is_substep_rew] + \ + # self._substep_rewards[:, self._is_substep_rew]/self._action_repeat + sub_rewards[:, self._is_substep_rew] += self._substep_rewards[:, self._is_substep_rew]/self._action_repeat + + def _assemble_substep_obs(self): + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + next_obs[:, self._is_substep_obs] += self._substep_obs[:, self._is_substep_obs]/self._action_repeat + + def randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + if self._override_agent_refs: + self._override_refs(env_indxs=env_indxs) + else: + self._randomize_task_refs(env_indxs=env_indxs) + + def reset(self): + + self.randomize_task_refs(env_indxs=None) # randomize all refs across envs + + self._obs.reset() + self._actions.reset() + self._next_obs.reset() + self._sub_rewards.reset() + self._tot_rewards.reset() + self._terminations.reset() + self._sub_terminations.reset() + self._truncations.reset() + self._sub_truncations.reset() + + self._ep_timeout_counter.reset(randomize_offsets=True) + self._task_rand_counter.reset() + self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._rand_safety_reset_counter is not None: + self._rand_safety_reset_counter.reset() + self._substep_abs_counter.reset() + + if self._act_mem_buffer is not None: + self._act_mem_buffer.reset_all(init_data=self._normalize_actions(self.default_action)) + + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.reset(reset_val=self.default_action[:, self._is_continuous_actions]) + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.reset(reset_val=self.default_action[:, ~self._is_continuous_actions]) + + self._synch_state(gpu=self._use_gpu) # read obs from shared mem + + # just calling custom post step to ensure tak refs are updated + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu) + episode_finished = torch.logical_or(terminated, + truncated) + self._custom_post_step(episode_finished=episode_finished) + + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + self._fill_step_obs(obs) # initialize observations + self._clamp_obs(obs) # to avoid bad things + self._fill_step_obs(next_obs) # and next obs + self._clamp_obs(next_obs) + + self.reset_custom_db_data(keep_track=False) + self._episodic_rewards_metrics.reset(keep_track=False) + + self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + def is_ready(self): + return self._ready + + def close(self): + + if not self._closed: + + # close all shared mem. clients + self._robot_state.close() + self._rhc_cmds.close() + self._rhc_pred.close() + self._rhc_refs.close() + self._rhc_status.close() + + self._remote_stepper.close() + + self._ep_timeout_counter.close() + self._task_rand_counter.close() + if self._rand_safety_reset_counter is not None: + self._rand_safety_reset_counter.close() + + # closing env.-specific shared data + self._obs.close() + self._next_obs.close() + self._actions.close() + if self._actual_actions is not None: + self._actual_actions.close() + self._sub_rewards.close() + self._tot_rewards.close() + + self._terminations.close() + self._sub_terminations.close() + self._truncations.close() + self._sub_truncations.close() + + self._closed = True + + def get_obs(self, clone:bool=False): + if clone: + return self._obs.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._obs.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_next_obs(self, clone:bool=False): + if clone: + return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_actions(self, clone:bool=False, normalized: bool = False): + actions = self._actions.get_torch_mirror(gpu=self._use_gpu).detach() + if normalized: + normalized_actions = self._normalize_actions(actions) + return normalized_actions.clone() if clone else normalized_actions + return actions.clone() if clone else actions + + def get_actual_actions(self, clone:bool=False, normalized: bool = False): + if self._env_opts["use_action_smoothing"]: + actions = self._actual_actions.get_torch_mirror(gpu=self._use_gpu).detach() + else: # actual action coincides with the one from the agent + possible modif. + actions = self.get_actions(clone=False, normalized=False) + if normalized: + normalized_actions = self._normalize_actions(actions) + return normalized_actions.clone() if clone else normalized_actions + return actions.clone() if clone else actions + + def _normalize_actions(self, actions: torch.Tensor): + scale = torch.where(self._actions_scale == 0.0, + torch.ones_like(self._actions_scale), + self._actions_scale) + normalized = (actions - self._actions_offset)/scale + zero_scale_mask = torch.eq(self._actions_scale, 0.0).squeeze(0) + if torch.any(zero_scale_mask): + normalized[:, zero_scale_mask] = 0.0 + return normalized + + def get_rewards(self, clone:bool=False): + if clone: + return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_terminations(self, clone:bool=False): + if clone: + return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_truncations(self, clone:bool=False): + if clone: + return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach() + + def obs_dim(self): + + return self._obs_dim + + def actions_dim(self): + + return self._actions_dim + + def ep_rewards_metrics(self): + + return self._episodic_rewards_metrics + + def using_gpu(self): + + return self._use_gpu + + def name(self): + + return self._env_name + + def n_envs(self): + + return self._n_envs + + def dtype(self): + + return self._dtype + + def obs_names(self): + return self._get_obs_names() + + def action_names(self): + return self._get_action_names() + + def sub_rew_names(self): + return self._get_rewards_names() + + def sub_term_names(self): + return self._get_sub_term_names() + + def sub_trunc_names(self): + return self._get_sub_trunc_names() + + def _get_obs_names(self): + # to be overridden by child class + return None + + def get_robot_jnt_names(self): + return self._robot_state.jnt_names() + + def _get_action_names(self): + # to be overridden by child class + return None + + def _get_rewards_names(self): + # to be overridden by child class + return None + + def _get_sub_term_names(self): + # to be overridden by child class + sub_term_names = [] + sub_term_names.append("rhc_failure") + sub_term_names.append("robot_capsize") + sub_term_names.append("rhc_capsize") + + return sub_term_names + + def _get_sub_trunc_names(self): + # to be overridden by child class + sub_trunc_names = [] + sub_trunc_names.append("ep_timeout") + + return sub_trunc_names + + def _get_custom_db_data(self, episode_finished): + # to be overridden by child class + pass + + def set_observed_joints(self): + # ny default observe all joints available + return self._robot_state.jnt_names() + + def _set_jnts_blacklist_pattern(self): + self._jnt_q_blacklist_patterns=[] + + def get_observed_joints(self): + return self._observed_jnt_names + + def _init_obs(self, obs_dim: int): + + device = "cuda" if self._use_gpu else "cpu" + + obs_threshold_default = 1e3 + self._obs_threshold_lb = -obs_threshold_default # used for clipping observations + self._obs_threshold_ub = obs_threshold_default + + self._obs_ub = torch.full((1, obs_dim), dtype=self._dtype, device=device, + fill_value=1.0) + self._obs_lb = torch.full((1, obs_dim), dtype=self._dtype, device=device, + fill_value=-1.0) + self._obs_scale = (self._obs_ub - self._obs_lb)/2.0 + self._obs_offset = (self._obs_ub + self._obs_lb)/2.0 + + if not self._obs_dim==len(self._get_obs_names()): + error=f"obs dim {self._obs_dim} does not match obs names length {len(self._get_obs_names())}!!" + Journal.log(self.__class__.__name__, + "_init_obs", + error, + LogType.EXCEP, + throw_when_excep = True) + + self._obs = Observations(namespace=self._namespace, + n_envs=self._n_envs, + obs_dim=self._obs_dim, + obs_names=self._get_obs_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._next_obs = NextObservations(namespace=self._namespace, + n_envs=self._n_envs, + obs_dim=self._obs_dim, + obs_names=self._get_obs_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._obs.run() + self._next_obs.run() + + self._is_substep_obs = torch.zeros((self.obs_dim(),), dtype=torch.bool, device=device) + self._is_substep_obs.fill_(False) # default to all step obs + + # not super memory efficient + self._substep_obs=torch.full_like(self._obs.get_torch_mirror(gpu=self._use_gpu), fill_value=0.0) + + def _init_actions(self, actions_dim: int): + + device = "cuda" if self._use_gpu else "cpu" + # action scalings to be applied to agent's output + self._actions_ub = torch.full((1, actions_dim), dtype=self._dtype, device=device, + fill_value=1.0) + self._actions_lb = torch.full((1, actions_dim), dtype=self._dtype, device=device, + fill_value=-1.0) + self._actions_scale = (self._actions_ub - self._actions_lb)/2.0 + self._actions_offset = (self._actions_ub + self._actions_lb)/2.0 + + if not self._actions_dim==len(self._get_action_names()): + error=f"action dim {self._actions_dim} does not match action names length {len(self._get_action_names())}!!" + Journal.log(self.__class__.__name__, + "_init_actions", + error, + LogType.EXCEP, + throw_when_excep = True) + self._actions = Actions(namespace=self._namespace, + n_envs=self._n_envs, + action_dim=self._actions_dim, + action_names=self._get_action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._actions.run() + + self.default_action = torch.full_like(input=self.get_actions(),fill_value=0.0) + self.safe_action = torch.full_like(input=self.get_actions(),fill_value=0.0) + + if self._env_opts["use_action_history"]: + self._act_mem_buffer=MemBuffer(name="ActionMemBuf", + data_tensor=self._actions.get_torch_mirror(), + data_names=self._get_action_names(), + debug=self._debug, + horizon=self._env_opts["actions_history_size"], + dtype=self._dtype, + use_gpu=self._use_gpu) + + # default to all continuous actions (changes the way noise is added) + self._is_continuous_actions=torch.full((actions_dim, ), + dtype=torch.bool, device=device, + fill_value=True) + + def _init_action_smoothing(self): + + continuous_actions=self.get_actions()[:, self._is_continuous_actions] + discrete_actions=self.get_actions()[:, ~self._is_continuous_actions] + self._action_smoother_continuous=ExponentialSignalSmoother(signal=continuous_actions, + update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent + smoothing_horizon=self._env_opts["smoothing_horizon_c"], + target_smoothing=0.5, + debug=self._debug, + dtype=self._dtype, + use_gpu=self._use_gpu, + name="ActionSmootherContinuous") + self._action_smoother_discrete=ExponentialSignalSmoother(signal=discrete_actions, + update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent + smoothing_horizon=self._env_opts["smoothing_horizon_d"], + target_smoothing=0.5, + debug=self._debug, + dtype=self._dtype, + use_gpu=self._use_gpu, + name="ActionSmootherDiscrete") + + # we also need somewhere to keep the actual actions after smoothing + self._actual_actions = Actions(namespace=self._namespace+"_actual", + n_envs=self._n_envs, + action_dim=self._actions_dim, + action_names=self._get_action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + self._actual_actions.run() + + def _init_rewards(self): + + reward_thresh_default = 1.0 + n_sub_rewards = len(self._get_rewards_names()) + device = "cuda" if self._use_gpu else "cpu" + self._reward_thresh_lb = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=-reward_thresh_default, device=device) # used for clipping rewards + self._reward_thresh_ub = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=reward_thresh_default, device=device) + + self._sub_rewards = SubRewards(namespace=self._namespace, + n_envs=self._n_envs, + n_rewards=n_sub_rewards, + reward_names=self._get_rewards_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._tot_rewards = TotRewards(namespace=self._namespace, + n_envs=self._n_envs, + reward_names=["total_reward"], + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._sub_rewards.run() + self._tot_rewards.run() + + self._substep_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() + # used to hold substep rewards (not super mem. efficient) + self._is_substep_rew = torch.zeros((self._substep_rewards.shape[1],),dtype=torch.bool,device=device) + self._is_substep_rew.fill_(True) # default to all substep rewards + + self._episodic_rewards_metrics = EpisodicRewards(reward_tensor=self._sub_rewards.get_torch_mirror(), + reward_names=self._get_rewards_names(), + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._episodic_rewards_metrics.set_constant_data_scaling(scaling=self._get_reward_scaling()) + + def _get_reward_scaling(self): + # to be overridden by child (default to no scaling) + return 1 + + def _max_ep_length(self): + #.should be overriden by child + return self._env_opts["episode_timeout_ub"] + + def _init_custom_db_data(self): + + self.custom_db_data = {} + # by default always log this contact data + rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror() + contact_names = self._rhc_refs.rob_refs.contact_names() + stepping_data = EpisodicData("RhcRefsFlag", rhc_latest_contact_ref, contact_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=stepping_data) + + # log also action data + actions = self._actions.get_torch_mirror() + action_names = self._get_action_names() + action_data = EpisodicData("Actions", actions, action_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=action_data) + + # and observations + observations = self._obs.get_torch_mirror() + observations_names = self._get_obs_names() + obs_data = EpisodicData("Obs", observations, observations_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=obs_data) + + # log sub-term and sub-truncations data + t_scaling=1 # 1 so that we log an interpretable data in terms of why the episode finished + data_scaling = torch.full((self._n_envs, 1), + fill_value=t_scaling, + dtype=torch.int32,device="cpu") + sub_term = self._sub_terminations.get_torch_mirror() + term = self._terminations.get_torch_mirror() + sub_termination_names = self.sub_term_names() + + sub_term_data = EpisodicData("SubTerminations", sub_term, sub_termination_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + sub_term_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=sub_term_data) + term_data = EpisodicData("Terminations", term, ["terminations"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + term_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=term_data) + + sub_trunc = self._sub_truncations.get_torch_mirror() + trunc = self._truncations.get_torch_mirror() + sub_truncations_names = self.sub_trunc_names() + sub_trunc_data = EpisodicData("SubTruncations", sub_trunc, sub_truncations_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + sub_trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=sub_trunc_data) + trunc_data = EpisodicData("Truncations", trunc, ["truncations"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=trunc_data) + + def _add_custom_db_data(self, db_data: EpisodicData): + self.custom_db_data[db_data.name()] = db_data + + def _init_terminations(self): + + # Boolean array indicating whether each environment episode has terminated after + # the current step. An episode termination could occur based on predefined conditions + # in the environment, such as reaching a goal or exceeding a time limit. + + self._terminations = Terminations(namespace=self._namespace, + n_envs=self._n_envs, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._terminations.run() + + sub_t_names = self.sub_term_names() + self._sub_terminations = SubTerminations(namespace=self._namespace, + n_envs=self._n_envs, + n_term=len(sub_t_names), + term_names=sub_t_names, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._sub_terminations.run() + + device = "cuda" if self._use_gpu else "cpu" + self._is_capsized=torch.zeros((self._n_envs,1), + dtype=torch.bool, device=device) + self._is_rhc_capsized=torch.zeros((self._n_envs,1), + dtype=torch.bool, device=device) + self._max_pitch_angle=60.0*math.pi/180.0 + + def _init_truncations(self): + + self._truncations = Truncations(namespace=self._namespace, + n_envs=self._n_envs, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + + self._truncations.run() + + sub_trc_names = self.sub_trunc_names() + self._sub_truncations = SubTruncations(namespace=self._namespace, + n_envs=self._n_envs, + n_trunc=len(sub_trc_names), + truc_names=sub_trc_names, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._sub_truncations.run() + + def _update_jnt_blacklist(self): + device = "cuda" if self._use_gpu else "cpu" + all_available_jnts=self.get_observed_joints() + blacklist=[] + for i in range(len(all_available_jnts)): + for pattern in self._jnt_q_blacklist_patterns: + if pattern in all_available_jnts[i]: + # stop at first pattern match + blacklist.append(i) + break + if not len(blacklist)==0: + self._jnt_q_blacklist_idxs=torch.tensor(blacklist, dtype=torch.int, device=device) + + def _attach_to_shared_mem(self): + + # runs shared mem clients for getting observation and setting RHC commands + + # remote stepping data + self._remote_stepper = RemoteStepperSrvr(namespace=self._namespace, + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._remote_stepper.run() + self._remote_resetter = RemoteResetSrvr(namespace=self._namespace, + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._remote_resetter.run() + self._remote_reset_req = RemoteResetRequest(namespace=self._namespace, + is_server=False, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True) + self._remote_reset_req.run() + + self._jnts_remapping=None + self._jnt_q_blacklist_idxs=None + + self._robot_state = RobotState(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True, + enable_height_sensor=self._env_opts["add_heightmap_obs"]) + + self._rhc_cmds = RhcCmds(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_pred = RhcPred(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_refs = RhcRefs(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_status = RhcStatus(namespace=self._namespace, + is_server=False, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._robot_state.run() + self._n_envs = self._robot_state.n_robots() + self._n_jnts = self._robot_state.n_jnts() + self._n_contacts = self._robot_state.n_contacts() # we assume same n contacts for all rhcs for now + + self._rhc_cmds.run() + self._rhc_pred.run() + self._rhc_refs.run() + self._rhc_status.run() + # we read rhc info now and just this time, since it's assumed to be static + self._check_controllers_registered(retry=True) # blocking + # (we need controllers to be connected to read meaningful data) + + self._rhc_status.rhc_static_info.synch_all(read=True,retry=True) + if self._use_gpu: + self._rhc_status.rhc_static_info.synch_mirror(from_gpu=False,non_blocking=False) + rhc_horizons=self._rhc_status.rhc_static_info.get("horizons",gpu=self._use_gpu) + rhc_nnodes=self._rhc_status.rhc_static_info.get("nnodes",gpu=self._use_gpu) + rhc_dts=self._rhc_status.rhc_static_info.get("dts",gpu=self._use_gpu) + + # height sensor metadata (client side) + if self._env_opts["add_heightmap_obs"]: + self._height_grid_size = self._robot_state.height_sensor.grid_size + self._height_flat_dim = self._robot_state.height_sensor.n_cols + rhc_ncontacts=self._rhc_status.rhc_static_info.get("ncontacts",gpu=self._use_gpu) + robot_mass=self._rhc_status.rhc_static_info.get("robot_mass",gpu=self._use_gpu) + pred_node_idxs_rhc=self._rhc_status.rhc_static_info.get("pred_node_idx",gpu=self._use_gpu) + + self._n_nodes_rhc=torch.round(rhc_nnodes) # we assume nodes are static during an env lifetime + self._rhc_horizons=rhc_horizons + self._rhc_dts=rhc_dts + self._n_contacts_rhc=rhc_ncontacts + self._rhc_robot_masses=robot_mass + if (self._rhc_robot_masses == 0).any(): + zero_indices = torch.nonzero(self._rhc_robot_masses == 0, as_tuple=True) + print(zero_indices) # This will print the indices of zero elements + Journal.log(self.__class__.__name__, + "_attach_to_shared_mem", + "Found at least one robot with 0 mass from RHC static info!!", + LogType.EXCEP, + throw_when_excep=True) + + self._rhc_robot_weight=robot_mass*9.81 + self._pred_node_idxs_rhc=pred_node_idxs_rhc + self._pred_horizon_rhc=self._pred_node_idxs_rhc*self._rhc_dts + + # run server for agent commands + self._agent_refs = AgentRefs(namespace=self._namespace, + is_server=True, + n_robots=self._n_envs, + n_jnts=self._robot_state.n_jnts(), + n_contacts=self._robot_state.n_contacts(), + contact_names=self._robot_state.contact_names(), + q_remapping=None, + with_gpu_mirror=self._use_gpu, + force_reconnection=True, + safe=False, + verbose=self._verbose, + vlevel=self._vlevel, + fill_value=0) + self._agent_refs.run() + q_init_agent_refs=torch.full_like(self._robot_state.root_state.get(data_type="q", gpu=self._use_gpu),fill_value=0.0) + q_init_agent_refs[:, 0]=1.0 + self._agent_refs.rob_refs.root_state.set(data_type="q", data=q_init_agent_refs, + gpu=self._use_gpu) + if self._use_gpu: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=True) + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry=True) + # episode steps counters (for detecting episode truncations for + # time limits) + self._ep_timeout_counter = EpisodesCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["episode_timeout_lb"], + n_steps_ub=self._env_opts["episode_timeout_ub"], + randomize_offsets_at_startup=True, # this has to be randomized + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) # handles step counter through episodes and through envs + self._ep_timeout_counter.run() + self._task_rand_counter = TaskRandCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["n_steps_task_rand_lb"], + n_steps_ub=self._env_opts["n_steps_task_rand_ub"], + randomize_offsets_at_startup=False, # not necessary since it will be synched with the timeout counter + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) # handles step counter through episodes and through envs + self._task_rand_counter.run() + self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._env_opts["use_random_trunc"]: + self._rand_trunc_counter=RandomTruncCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["random_trunc_freq"]-self._env_opts["random_trunc_freq_delta"], + n_steps_ub=self._env_opts["random_trunc_freq"], + randomize_offsets_at_startup=True, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=False) + self._rand_trunc_counter.run() + # self._rand_trunc_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._env_opts["use_random_safety_reset"]: + self._rand_safety_reset_counter=SafetyRandResetsCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["random_reset_freq"], + n_steps_ub=self._env_opts["random_reset_freq"], + randomize_offsets_at_startup=True, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=False) + self._rand_safety_reset_counter.run() + # self._rand_safety_reset_counter.sync_counters(other_counter=self._ep_timeout_counter) + + # timer to track abs time in each env (reset logic to be implemented in child) + self._substep_abs_counter = SubStepAbsCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=1e9, + n_steps_ub=1e9, + randomize_offsets_at_startup=True, # randomizing startup offsets + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) + self._substep_abs_counter.run() + + # debug data servers + traing_env_param_dict = {} + traing_env_param_dict["use_gpu"] = self._use_gpu + traing_env_param_dict["debug"] = self._is_debug + traing_env_param_dict["n_preinit_steps"] = self._env_opts["n_preinit_steps"] + traing_env_param_dict["n_preinit_steps"] = self._n_envs + + self._training_sim_info = SharedTrainingEnvInfo(namespace=self._namespace, + is_server=True, + training_env_params_dict=traing_env_param_dict, + safe=False, + force_reconnection=True, + verbose=self._verbose, + vlevel=self._vlevel) + self._training_sim_info.run() + + self._observed_jnt_names=self.set_observed_joints() + self._set_jnts_blacklist_pattern() + self._update_jnt_blacklist() + + self._prev_root_p_substep=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone() + self._prev_root_q_substep=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone() + self._prev_root_p_step=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone() + self._prev_root_q_step=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone() + + def _activate_rhc_controllers(self): + self._rhc_status.activation_state.get_torch_mirror()[:, :] = True + self._rhc_status.activation_state.synch_all(read=False, retry=True) # activates all controllers + + def _synch_state(self, + gpu: bool = True): + + # read from shared memory on CPU + # robot state + self._robot_state.root_state.synch_all(read = True, retry = True) + self._robot_state.jnts_state.synch_all(read = True, retry = True) + # rhc cmds + self._rhc_cmds.root_state.synch_all(read = True, retry = True) + self._rhc_cmds.jnts_state.synch_all(read = True, retry = True) + self._rhc_cmds.contact_wrenches.synch_all(read = True, retry = True) + # rhc pred + self._rhc_pred.root_state.synch_all(read = True, retry = True) + # self._rhc_pred.jnts_state.synch_all(read = True, retry = True) + # self._rhc_pred.contact_wrenches.synch_all(read = True, retry = True) + # refs for root link and contacts + self._rhc_refs.rob_refs.root_state.synch_all(read = True, retry = True) + self._rhc_refs.contact_flags.synch_all(read = True, retry = True) + self._rhc_refs.flight_info.synch_all(read = True, retry = True) + self._rhc_refs.flight_settings_req.synch_all(read = True, retry = True) + self._rhc_refs.rob_refs.contact_pos.synch_all(read = True, retry = True) + # rhc cost + self._rhc_status.rhc_cost.synch_all(read = True, retry = True) + # rhc constr. violations + self._rhc_status.rhc_constr_viol.synch_all(read = True, retry = True) + # failure states + self._rhc_status.fails.synch_all(read = True, retry = True) + # tot cost and cnstr viol on nodes + step variable + self._rhc_status.rhc_nodes_cost.synch_all(read = True, retry = True) + self._rhc_status.rhc_nodes_constr_viol.synch_all(read = True, retry = True) + self._rhc_status.rhc_fcn.synch_all(read = True, retry = True) + self._rhc_status.rhc_fail_idx.synch_all(read = True, retry = True) + if self._env_opts["add_heightmap_obs"]: + self._robot_state.height_sensor.synch_all(read=True, retry=True) + if gpu: + # copies data to "mirror" on GPU --> we can do it non-blocking since + # in this direction it should be safe + self._robot_state.root_state.synch_mirror(from_gpu=False,non_blocking=True) # copies shared data on GPU + self._robot_state.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.root_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_pred.root_state.synch_mirror(from_gpu=False,non_blocking=True) + # self._rhc_pred.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + # self._rhc_pred.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.contact_flags.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.flight_info.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.flight_settings_req.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_cost.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_constr_viol.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.fails.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_nodes_cost.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_nodes_constr_viol.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_fcn.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_fail_idx.synch_mirror(from_gpu=False,non_blocking=True) + if self._env_opts["add_heightmap_obs"]: + self._robot_state.height_sensor.synch_mirror(from_gpu=False, non_blocking=True) + torch.cuda.synchronize() # ensuring that all the streams on the GPU are completed \ + # before the CPU continues execution + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # just used for setting agent refs externally (i.e. from shared mem on CPU) + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + def _clamp_obs(self, + obs: torch.Tensor): + if self._is_debug: + self._check_finite(obs, "observations", False) + torch.nan_to_num(input=obs, out=obs, nan=self._obs_threshold_ub, + posinf=self._obs_threshold_ub, + neginf=self._obs_threshold_lb) # prevent nans + + obs.clamp_(self._obs_threshold_lb, self._obs_threshold_ub) + + def _clamp_rewards(self, + rewards: torch.Tensor): + if self._is_debug: + self._check_finite(rewards, "rewards", False) + torch.nan_to_num(input=rewards, out=rewards, nan=0.0, + posinf=None, + neginf=None) # prevent nans + rewards.clamp_(self._reward_thresh_lb, self._reward_thresh_ub) + + def get_actions_lb(self): + return self._actions_lb + + def get_actions_ub(self): + return self._actions_ub + + def get_actions_scale(self): + return self._actions_scale + + def get_actions_offset(self): + return self._actions_offset + + def get_obs_lb(self): + return self._obs_lb + + def get_obs_ub(self): + return self._obs_ub + + def get_obs_scale(self): + self._obs_scale = (self._obs_ub - self._obs_lb)/2.0 + return self._obs_scale + + def get_obs_offset(self): + self._obs_offset = (self._obs_ub + self._obs_lb)/2.0 + return self._obs_offset + + def switch_random_reset(self, on: bool = True): + self._random_reset_active=on + + def set_jnts_remapping(self, + remapping: List = None): + + self._jnts_remapping=remapping + if self._jnts_remapping is not None: + self._robot_state.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + self._rhc_cmds.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + self._rhc_pred.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + # we need to also update the list of observed joints to match + available_joints=self._robot_state.jnt_names() + # the remapping ordering + self._observed_jnt_names=[] + for i in range(len(available_joints)): + self._observed_jnt_names.append(available_joints[self._jnts_remapping[i]]) + + self._update_jnt_blacklist() + + updated_obs_names=self._get_obs_names() # get updated obs names (should use get_observed_joints + # internally, so that jnt names are updated) + + # also update jnt obs names on shared memory + names_old=self._obs.get_obs_names() + names_old_next=self._next_obs.get_obs_names() + names_old[:]=updated_obs_names + names_old_next[:]=updated_obs_names + self._obs.update_names() + self._next_obs.update_names() + + # also update + if "Obs" in self.custom_db_data: + db_obs_names=self.custom_db_data["Obs"].data_names() + db_obs_names[:]=updated_obs_names + + def _check_finite(self, + tensor: torch.Tensor, + name: str, + throw: bool = False): + if not torch.isfinite(tensor).all().item(): + exception = f"Found nonfinite elements in {name} tensor!!" + non_finite_idxs=torch.nonzero(~torch.isfinite(tensor)) + n_nonf_elems=non_finite_idxs.shape[0] + + if name=="observations": + for i in range(n_nonf_elems): + db_msg=f"{self.obs_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \ + f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}" + print(db_msg) + if name=="rewards": + for i in range(n_nonf_elems): + db_msg=f"{self.sub_rew_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \ + f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}" + print(db_msg) + print(tensor) + Journal.log(self.__class__.__name__, + "_check_finite", + exception, + LogType.EXCEP, + throw_when_excep = throw) + + def _check_controllers_registered(self, + retry: bool = False): + + if retry: + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + while not (n_connected_controllers == self._n_envs): + warn = f"Expected {self._n_envs} controllers to be connected during training, " + \ + f"but got {n_connected_controllers}. Will wait for all to be connected..." + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + warn, + LogType.WARN, + throw_when_excep = False) + nsecs = int(2 * 1000000000) + PerfSleep.thread_sleep(nsecs) + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + info = f"All {n_connected_controllers} controllers connected!" + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + info, + LogType.INFO, + throw_when_excep = False) + return True + else: + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + if not (n_connected_controllers == self._n_envs): + exception = f"Expected {self._n_envs} controllers to be connected during training, " + \ + f"but got {n_connected_controllers}. Aborting..." + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + exception, + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _check_truncations(self): + + self._check_sub_truncations() + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + truncations = self._truncations.get_torch_mirror(gpu=self._use_gpu) + truncations[:, :] = torch.any(sub_truncations,dim=1,keepdim=True) + + def _check_terminations(self): + + self._check_sub_terminations() + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + terminations = self._terminations.get_torch_mirror(gpu=self._use_gpu) + terminations[:, :] = torch.any(sub_terminations,dim=1,keepdim=True) + + def _check_sub_truncations(self): + # default behaviour-> to be overriden by child + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + sub_truncations[:, 0:1]=self._ep_timeout_counter.time_limits_reached() + + def _check_sub_terminations(self): + # default behaviour-> to be overriden by child + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + + # terminate when either the real robot or the prediction from the MPC are capsized + check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle, + output_t=self._is_capsized) + check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle, + output_t=self._is_rhc_capsized) + + sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu) + sub_terminations[:, 1:2] = self._is_capsized + sub_terminations[:, 2:3] = self._is_rhc_capsized + + def is_action_continuous(self): + return self._is_continuous_actions + + def is_action_discrete(self): + return ~self._is_continuous_actions + + @abstractmethod + def _pre_substep(self): + pass + + @abstractmethod + def _custom_post_step(self,episode_finished): + pass + + @abstractmethod + def _custom_post_substp_post_rew(self): + pass + + @abstractmethod + def _custom_post_substp_pre_rew(self): + pass + + @abstractmethod + def _apply_actions_to_rhc(self): + pass + + def _override_actions_with_demo(self): + pass + + @abstractmethod + def _compute_substep_rewards(self): + pass + + @abstractmethod + def _set_substep_rew(self): + pass + + @abstractmethod + def _set_substep_obs(self): + pass + + @abstractmethod + def _compute_step_rewards(self): + pass + + @abstractmethod + def _fill_substep_obs(self, + obs: torch.Tensor): + pass + + @abstractmethod + def _fill_step_obs(self, + obs: torch.Tensor): + pass + + @abstractmethod + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + pass + + def _custom_post_init(self): + pass + + def _get_avrg_substep_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + # to be called at each substep + robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + root_v_avrg_w=(robot_p_meas-self._prev_root_p_substep)/self._substep_dt + root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_substep,robot_q_meas),\ + dt=self._substep_dt) + twist_w=torch.cat((root_v_avrg_w, + root_omega_avrg_w), + dim=1) + if not base_loc: + self._prev_root_p_substep[:, :]=robot_p_meas + self._prev_root_q_substep[:, :]=robot_q_meas + out[:, :]=twist_w + # rotate using the current (end-of-substep) orientation for consistency with other signals + world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out) + self._prev_root_p_substep[:, :]=robot_p_meas + self._prev_root_q_substep[:, :]=robot_q_meas + + def _get_avrg_step_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + # to be called after substeps of actions repeats + robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + dt=self._substep_dt*self._action_repeat # accounting for frame skipping + root_v_avrg_w=(robot_p_meas-self._prev_root_p_step)/(dt) + root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_step,robot_q_meas),\ + dt=dt) + twist_w=torch.cat((root_v_avrg_w, + root_omega_avrg_w), + dim=1) + if not base_loc: + out[:, :]=twist_w + # rotate using the current (end-of-step) orientation for consistency with other signals + world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out) + + def _get_avrg_rhc_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + + rhc_root_p =self._rhc_cmds.root_state.get(data_type="p",gpu=self._use_gpu) + rhc_root_q =self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + rhc_root_p_pred =self._rhc_pred.root_state.get(data_type="p",gpu=self._use_gpu) + rhc_root_q_pred =self._rhc_pred.root_state.get(data_type="q",gpu=self._use_gpu) + + rhc_root_v_avrg_rhc_w=(rhc_root_p_pred-rhc_root_p)/self._pred_horizon_rhc + rhc_root_omega_avrg_rhc_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(rhc_root_q,rhc_root_q_pred),\ + dt=self._pred_horizon_rhc) + + rhc_pred_avrg_twist_rhc_w = torch.cat((rhc_root_v_avrg_rhc_w, + rhc_root_omega_avrg_rhc_w), + dim=1) + if not base_loc: + out[:, :]=rhc_pred_avrg_twist_rhc_w + # to rhc base frame (using first node as reference) + world2base_frame(t_w=rhc_pred_avrg_twist_rhc_w, q_b=rhc_root_q, t_out=out) diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/twist_tracking_env.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/twist_tracking_env.py new file mode 100644 index 0000000000000000000000000000000000000000..fb10234614dfba256e6935e68d7be4e759c28147 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/twist_tracking_env.py @@ -0,0 +1,1396 @@ +from typing import Dict + +import os + +import torch + +from EigenIPC.PyEigenIPC import VLevel, LogType, Journal + +from mpc_hive.utilities.shared_data.rhc_data import RobotState, RhcStatus, RhcRefs +from mpc_hive.utilities.math_utils_torch import world2base_frame, base2world_frame, w2hor_frame + +from aug_mpc.utils.sys_utils import PathsGetter +from aug_mpc.utils.timers import PeriodicTimer +from aug_mpc.utils.episodic_data import EpisodicData +from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother +from aug_mpc.utils.math_utils import check_capsize +from aug_mpc.training_envs.training_env_base import AugMPCTrainingEnvBase + +class TwistTrackingEnv(AugMPCTrainingEnvBase): + """Base AugMPC training env that tracks commanded twists by pushing velocity and contact targets into the RHC controller while handling locomotion rewards/resets.""" + + def __init__(self, + namespace: str, + actions_dim: int = 10, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + env_name = "LinVelTrack" + device = "cuda" if use_gpu else "cpu" + + self._add_env_opt(env_opts, "srew_drescaling", + False) + + self._add_env_opt(env_opts, "step_thresh", 0.) # when step action < thresh, a step is requested + + # counters settings + self._add_env_opt(env_opts, "single_task_ref_per_episode", + True # if True, the task ref is constant over the episode (ie + # episodes are truncated when task is changed) + ) + self._add_env_opt(env_opts, "add_angvel_ref_rand", default=True) # randomize also agular vel ref (just z component) + + self._add_env_opt(env_opts, "episode_timeout_lb", + 1024) + self._add_env_opt(env_opts, "episode_timeout_ub", + 1024) + self._add_env_opt(env_opts, "n_steps_task_rand_lb", + 512) + self._add_env_opt(env_opts, "n_steps_task_rand_ub", + 512) + self._add_env_opt(env_opts, "use_random_safety_reset", + True) + self._add_env_opt(env_opts, "random_reset_freq", + 10) # a random reset once every n-episodes (per env) + self._add_env_opt(env_opts, "use_random_trunc", + True) + self._add_env_opt(env_opts, "random_trunc_freq", + env_opts["episode_timeout_ub"]*5) # to remove temporal correlations between envs + self._add_env_opt(env_opts, "random_trunc_freq_delta", + env_opts["episode_timeout_ub"]*2) # to randomize trunc frequency between envs + + if not env_opts["single_task_ref_per_episode"]: + env_opts["random_reset_freq"]=int(env_opts["random_reset_freq"]/\ + (env_opts["episode_timeout_lb"]/float(env_opts["n_steps_task_rand_lb"]))) + + self._add_env_opt(env_opts, "action_repeat", 1) # frame skipping (different agent action every action_repeat + # env substeps) + + self._add_env_opt(env_opts, "n_preinit_steps", 1) # n steps of the controllers to properly initialize everything + + self._add_env_opt(env_opts, "vec_ep_freq_metrics_db", 1) # n eps over which debug metrics are reported + self._add_env_opt(env_opts, "demo_envs_perc", 0.0) + self._add_env_opt(env_opts, "max_cmd_v", 1.5) # maximum cmd v for lin v actions (single component) + self._add_env_opt(env_opts, "max_cmd_omega", 1.0) # maximum cmd v for omega v actions (single component) + + # action smoothing + self._add_env_opt(env_opts, "use_action_smoothing", False) + self._add_env_opt(env_opts, "smoothing_horizon_c", 0.01) + self._add_env_opt(env_opts, "smoothing_horizon_d", 0.03) + + # whether to smooth vel error signal + self._add_env_opt(env_opts, "use_track_reward_smoother", False) + self._add_env_opt(env_opts, "smoothing_horizon_vel_err", 0.08) + self._add_env_opt(env_opts, "track_rew_smoother", None) + + # rewards + self._reward_map={} + self._reward_lb_map={} + + self._add_env_opt(env_opts, "reward_lb_default", -0.5) + self._add_env_opt(env_opts, "reward_ub_default", 1e6) + + self._add_env_opt(env_opts, "task_error_reward_lb", -0.5) + self._add_env_opt(env_opts, "CoT_reward_lb", -0.5) + self._add_env_opt(env_opts, "power_reward_lb", -0.5) + self._add_env_opt(env_opts, "action_rate_reward_lb", -0.5) + self._add_env_opt(env_opts, "jnt_vel_reward_lb", -0.5) + self._add_env_opt(env_opts, "rhc_avrg_vel_reward_lb", -0.5) + + self._add_env_opt(env_opts, "add_power_reward", False) + self._add_env_opt(env_opts, "add_CoT_reward", True) + self._add_env_opt(env_opts, "use_CoT_wrt_ref", False) + self._add_env_opt(env_opts, "add_action_rate_reward", True) + self._add_env_opt(env_opts, "add_jnt_v_reward", False) + + self._add_env_opt(env_opts, "use_rhc_avrg_vel_tracking", False) + + # task tracking + self._add_env_opt(env_opts, "use_relative_error", default=False) # use relative vel error (wrt current task norm) + self._add_env_opt(env_opts, "directional_tracking", default=True) # whether to compute tracking error based on reference direction + # if env_opts["add_angvel_ref_rand"]: + # env_opts["directional_tracking"]=False + + self._add_env_opt(env_opts, "use_L1_norm", default=True) # whether to use L1 norm for the error (otherwise L2) + self._add_env_opt(env_opts, "use_exp_track_rew", default=True) # whether to use a reward of the form A*e^(B*x), + # otherwise A*(1-B*x) + + self._add_env_opt(env_opts, "use_fail_idx_weight", default=False) + self._add_env_opt(env_opts, "task_track_offset_exp", default=1.0) + self._add_env_opt(env_opts, "task_track_scale_exp", default=5.0) + self._add_env_opt(env_opts, "task_track_offset", default=1.0) + self._add_env_opt(env_opts, "task_track_scale", default=1.5) + self._add_env_opt(env_opts, "task_track_front_weight", default=1.0) + self._add_env_opt(env_opts, "task_track_lat_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_vert_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_omega_z_weight", default=0.2) + self._add_env_opt(env_opts, "task_track_omega_x_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_omega_y_weight", default=0.05) + # if env_opts["add_angvel_ref_rand"]: + # env_opts["task_track_omega_x_weight"]=0.0 + # env_opts["task_track_omega_y_weight"]=0.0 + # env_opts["task_track_omega_z_weight"]=1.0 + + # task pred tracking + self._add_env_opt(env_opts, "task_pred_track_offset", default=1.0) + self._add_env_opt(env_opts, "task_pred_track_scale", default=3.0) + + # energy penalties + self._add_env_opt(env_opts, "CoT_offset", default=0.3) + self._add_env_opt(env_opts, "CoT_scale", default=0.3) + self._add_env_opt(env_opts, "power_offset", default=0.1) + self._add_env_opt(env_opts, "power_scale", default=8e-4) + + # action rate penalty + self._add_env_opt(env_opts, "action_rate_offset", default=0.1) + self._add_env_opt(env_opts, "action_rate_scale", default=2.0) + self._add_env_opt(env_opts, "action_rate_rew_d_weight", default=0.1) + self._add_env_opt(env_opts, "action_rate_rew_c_weight", default=1.0) + + # jnt vel penalty + self._add_env_opt(env_opts, "jnt_vel_offset", default=0.1) + self._add_env_opt(env_opts, "jnt_vel_scale", default=2.0) + + # terminations + self._add_env_opt(env_opts, "add_term_mpc_capsize", default=False) # add termination based on mpc capsizing prediction + + # observations + self._add_env_opt(env_opts, "rhc_fail_idx_scale", default=1.0) + self._add_env_opt(env_opts, "use_action_history", default=True) # whether to add information on past actions to obs + self._add_env_opt(env_opts, "add_prev_actions_stats_to_obs", default=False) # add actions std, mean + last action over a horizon to obs (if self._use_action_history True) + self._add_env_opt(env_opts, "actions_history_size", default=3) + + self._add_env_opt(env_opts, "add_mpc_contact_f_to_obs", default=True) # add estimate vertical contact f to obs + self._add_env_opt(env_opts, "add_fail_idx_to_obs", default=True) # we need to obserse mpc failure idx to correlate it with terminations + + self._add_env_opt(env_opts, "use_linvel_from_rhc", default=True) # no lin vel meas available, we use est. from mpc + self._add_env_opt(env_opts, "add_flight_info", default=True) # add feedback info on pos, remamining duration, length, + # apex and landing height of flight phases + self._add_env_opt(env_opts, "add_flight_settings", default=False) # add feedback info on current flight requests for mpc + + self._add_env_opt(env_opts, "use_prob_based_stepping", default=False) # interpret actions as stepping prob (never worked) + + self._add_env_opt(env_opts, "add_rhc_cmds_to_obs", default=True) # add the rhc cmds which are being applied now to the robot + + if not "add_periodic_clock_to_obs" in env_opts: + # add a sin/cos clock to obs (useful if task is explicitly + # time-dependent) + self._add_env_opt(env_opts, "add_periodic_clock_to_obs", default=False) + + self._add_env_opt(env_opts, "add_heightmap_obs", default=True) + + # temporarily creating robot state client to get some data + robot_state_tmp = RobotState(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False, + enable_height_sensor=env_opts["add_heightmap_obs"]) + robot_state_tmp.run() + rhc_status_tmp = RhcStatus(is_server=False, + namespace=namespace, + verbose=verbose, + vlevel=vlevel, + with_torch_view=False, + with_gpu_mirror=False) + rhc_status_tmp.run() + rhc_refs_tmp = RhcRefs(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False) + rhc_refs_tmp.run() + n_jnts = robot_state_tmp.n_jnts() + self._contact_names = robot_state_tmp.contact_names() + self._n_contacts = len(self._contact_names) + self._flight_info_size=rhc_refs_tmp.flight_info.n_cols + self._flight_setting_size=rhc_refs_tmp.flight_settings_req.n_cols + # height sensor metadata (if present) + self._height_grid_size = None + self._height_flat_dim = 0 + if env_opts["add_heightmap_obs"]: + self._height_grid_size = robot_state_tmp.height_sensor.grid_size + self._height_flat_dim = robot_state_tmp.height_sensor.n_cols + + robot_state_tmp.close() + rhc_status_tmp.close() + rhc_refs_tmp.close() + + # defining obs dimension + obs_dim=3 # normalized gravity vector in base frame + obs_dim+=6 # meas twist in base frame + obs_dim+=2*n_jnts # joint pos + vel + if env_opts["add_mpc_contact_f_to_obs"]: + obs_dim+=3*self._n_contacts + obs_dim+=6 # twist reference in base frame frame + if env_opts["add_fail_idx_to_obs"]: + obs_dim+=1 # rhc controller failure index + if env_opts["add_term_mpc_capsize"]: + obs_dim+=3 # gravity vec from mpc + if env_opts["use_rhc_avrg_vel_tracking"]: + obs_dim+=6 # mpc avrg twist + if env_opts["add_flight_info"]: # contact pos, remaining duration, length, apex, landing height, landing dx, dy + obs_dim+=self._flight_info_size + if env_opts["add_flight_settings"]: + obs_dim+=self._flight_setting_size + if env_opts["add_rhc_cmds_to_obs"]: + obs_dim+=3*n_jnts + if env_opts["use_action_history"]: + if env_opts["add_prev_actions_stats_to_obs"]: + obs_dim+=3*actions_dim # previous agent actions statistics (mean, std + last action) + else: # full action history + obs_dim+=env_opts["actions_history_size"]*actions_dim + if env_opts["use_action_smoothing"]: + obs_dim+=actions_dim # it's better to also add the smoothed actions as obs + if env_opts["add_periodic_clock_to_obs"]: + obs_dim+=2 + if env_opts["add_heightmap_obs"]: + obs_dim+=self._height_flat_dim + # Agent task reference + self._add_env_opt(env_opts, "use_pof0", default=True) # with some prob, references will be null + self._add_env_opt(env_opts, "pof0_linvel", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr) + self._add_env_opt(env_opts, "pof0_omega", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr) + self._add_env_opt(env_opts, "max_linvel_ref", default=0.3) # m/s + self._add_env_opt(env_opts, "max_angvel_ref", default=0.0) # rad/s + if env_opts["add_angvel_ref_rand"]: + env_opts["max_angvel_ref"]=0.4 + + # ready to init base class + self._this_child_path = os.path.abspath(__file__) + AugMPCTrainingEnvBase.__init__(self, + namespace=namespace, + obs_dim=obs_dim, + actions_dim=actions_dim, + env_name=env_name, + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def _custom_post_init(self): + + device = "cuda" if self._use_gpu else "cpu" + + self._update_jnt_blacklist() # update blacklist for joints + + # constant base-frame unit vectors (reuse to avoid per-call allocations) + self._base_x_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device) + self._base_x_dir[:, 0] = 1.0 + self._base_y_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device) + self._base_y_dir[:, 1] = 1.0 + + self._twist_ref_lb = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=-1.5) + self._twist_ref_ub = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=1.5) + + # task reference parameters (world frame) + # lin vel + self._twist_ref_lb[0, 0] = -self._env_opts["max_linvel_ref"] + self._twist_ref_lb[0, 1] = -self._env_opts["max_linvel_ref"] + self._twist_ref_lb[0, 2] = 0.0 + self._twist_ref_ub[0, 0] = self._env_opts["max_linvel_ref"] + self._twist_ref_ub[0, 1] = self._env_opts["max_linvel_ref"] + self._twist_ref_ub[0, 2] = 0.0 + # angular vel + self._twist_ref_lb[0, 3] = 0.0 + self._twist_ref_lb[0, 4] = 0.0 + self._twist_ref_lb[0, 5] = -self._env_opts["max_angvel_ref"] + self._twist_ref_ub[0, 3] = 0.0 + self._twist_ref_ub[0, 4] = 0.0 + self._twist_ref_ub[0, 5] = self._env_opts["max_angvel_ref"] + + self._twist_ref_offset = (self._twist_ref_ub + self._twist_ref_lb)/2.0 + self._twist_ref_scale = (self._twist_ref_ub - self._twist_ref_lb)/2.0 + + # adding some custom db info + agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=False) + agent_twist_ref_data = EpisodicData("AgentTwistRefs", agent_twist_ref, + ["v_x", "v_y", "v_z", "omega_x", "omega_y", "omega_z"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + rhc_fail_idx = EpisodicData("RhcFailIdx", self._rhc_fail_idx(gpu=False), ["rhc_fail_idx"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + f_names=[] + for contact in self._contact_names: + f_names.append(f"fc_{contact}_x_base_loc") + f_names.append(f"fc_{contact}_y_base_loc") + f_names.append(f"fc_{contact}_z_base_loc") + rhc_contact_f = EpisodicData("RhcContactForces", + self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), + f_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._pow_db_data=torch.full(size=(self._n_envs,2), + dtype=self._dtype, device="cpu", + fill_value=-1.0) + power_db = EpisodicData("Power", + self._pow_db_data, + ["CoT", "W"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._track_error_db=torch.full_like(agent_twist_ref, fill_value=0.0) + task_err_db = EpisodicData("TrackingError", + agent_twist_ref, + ["e_vx", "e_vy", "e_vz", "e_omegax", "e_omegay", "e_omegaz"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._add_custom_db_data(db_data=agent_twist_ref_data) + self._add_custom_db_data(db_data=rhc_fail_idx) + self._add_custom_db_data(db_data=rhc_contact_f) + self._add_custom_db_data(db_data=power_db) + self._add_custom_db_data(db_data=task_err_db) + + # rewards + self._task_err_weights = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=0.0) + if self._env_opts["directional_tracking"]: + self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] # frontal + self._task_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] # lateral + self._task_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] # vertical + self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + else: + self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 1] = self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + + self._task_pred_err_weights = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=0.0) + if self._env_opts["directional_tracking"]: + self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] + self._task_pred_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] + self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + else: + self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 1] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + + self._power_penalty_weights = torch.full((1, self._n_jnts), dtype=self._dtype, device=device, + fill_value=1.0) + self._power_penalty_weights_sum = torch.sum(self._power_penalty_weights).item() + subr_names=self._get_rewards_names() # initializes + + # reward clipping + self._reward_thresh_lb[:, :] = self._env_opts["reward_lb_default"] + self._reward_thresh_ub[:, :]= self._env_opts["reward_ub_default"] + + for reward_name, env_opt_key in self._reward_lb_map.items(): + if reward_name in self._reward_map: + self._reward_thresh_lb[:, self._reward_map[reward_name]] = self._env_opts[env_opt_key] + + # obs bounds + self._obs_threshold_lb = -1e3 # used for clipping observations + self._obs_threshold_ub = 1e3 + + # actions + if not self._env_opts["use_prob_based_stepping"]: + self._is_continuous_actions[6:10]=False + + v_cmd_max = self._env_opts["max_cmd_v"] + omega_cmd_max = self._env_opts["max_cmd_omega"] + self._actions_lb[:, 0:3] = -v_cmd_max + self._actions_ub[:, 0:3] = v_cmd_max + self._actions_lb[:, 3:6] = -omega_cmd_max # twist cmds + self._actions_ub[:, 3:6] = omega_cmd_max + if "contact_flag_start" in self._actions_map: + idx=self._actions_map["contact_flag_start"] + if self._env_opts["use_prob_based_stepping"]: + self._actions_lb[:, idx:idx+self._n_contacts] = 0.0 # contact flags + self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 + else: + self._actions_lb[:, idx:idx+self._n_contacts] = -1.0 + self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 + + self.default_action[:, :] = (self._actions_ub+self._actions_lb)/2.0 + # self.default_action[:, ~self._is_continuous_actions] = 1.0 + self.safe_action[:, :] = self.default_action + if "contact_flag_start" in self._actions_map: # safe actions for contacts is 1 (keep contact) + idx=self._actions_map["contact_flag_start"] + self.safe_action[:, idx:idx+self._n_contacts] = 1.0 + + # assign obs bounds (useful if not using automatic obs normalization) + obs_names=self._get_obs_names() + obs_patterns=["gn", + "linvel", + "omega", + "q_jnt", + "v_jnt", + "fc", + "rhc_fail", + "rhc_cmd_q", + "rhc_cmd_v", + "rhc_cmd_eff", + "flight_pos" + ] + obs_ubs=[1.0, + 5*v_cmd_max, + 5*omega_cmd_max, + 2*torch.pi, + 30.0, + 2.0, + 1.0, + 2*torch.pi, + 30.0, + 200.0, + self._n_nodes_rhc.mean().item()] + obs_lbs=[-1.0, + -5*v_cmd_max, + -5*omega_cmd_max, + -2*torch.pi, + -30.0, + -2.0, + 0.0, + -2*torch.pi, + -30.0, + -200.0, + 0.0] + obs_bounds = {name: (lb, ub) for name, lb, ub in zip(obs_patterns, obs_lbs, obs_ubs)} + + for i in range(len(obs_names)): + obs_name=obs_names[i] + for pattern in obs_patterns: + if pattern in obs_name: + lb=obs_bounds[pattern][0] + ub=obs_bounds[pattern][1] + self._obs_lb[:, i]=lb + self._obs_ub[:, i]=ub + break + + # handle action memory buffer in obs + if self._env_opts["use_action_history"]: # just history stats + if self._env_opts["add_prev_actions_stats_to_obs"]: + i=0 + prev_actions_idx = next((i for i, s in enumerate(obs_names) if "_prev_act" in s), None) + prev_actions_mean_idx=next((i for i, s in enumerate(obs_names) if "_avrg_act" in s), None) + prev_actions_std_idx=next((i for i, s in enumerate(obs_names) if "_std_act" in s), None) + + # assume actions are always normalized in [-1, 1] by agent + if prev_actions_idx is not None: + self._obs_lb[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=-1.0 + self._obs_ub[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=1.0 + if prev_actions_mean_idx is not None: + self._obs_lb[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=-1.0 + self._obs_ub[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=1.0 + if prev_actions_std_idx is not None: + self._obs_lb[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=0 + self._obs_ub[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=1.0 + + else: # full history + i=0 + first_action_mem_buffer_idx = next((i for i, s in enumerate(obs_names) if "_m1_act" in s), None) + if first_action_mem_buffer_idx is not None: + action_idx_start_idx_counter=first_action_mem_buffer_idx + for j in range(self._env_opts["actions_history_size"]): + self._obs_lb[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=-1.0 + self._obs_ub[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=1.0 + action_idx_start_idx_counter+=self.actions_dim() + + # some aux data to avoid allocations at training runtime + self._rhc_twist_cmd_rhc_world=self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu).detach().clone() + self._rhc_twist_cmd_rhc_h=self._rhc_twist_cmd_rhc_world.detach().clone() + self._agent_twist_ref_current_w=self._rhc_twist_cmd_rhc_world.detach().clone() + self._agent_twist_ref_current_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._substep_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._step_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._root_twist_avrg_rhc_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._root_twist_avrg_rhc_base_loc_next=self._rhc_twist_cmd_rhc_world.detach().clone() + + self._random_thresh_contacts=torch.rand((self._n_envs,self._n_contacts), device=device) + # aux data + self._task_err_scaling = torch.zeros((self._n_envs, 1),dtype=self._dtype,device=device) + + self._pof1_b_linvel= torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_linvel"]) + self._pof1_b_omega = torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_omega"]) + self._bernoulli_coeffs_linvel = self._pof1_b_linvel.clone() + self._bernoulli_coeffs_linvel[:, :] = 1.0 + self._bernoulli_coeffs_omega = self._pof1_b_omega.clone() + self._bernoulli_coeffs_omega[:, :] = 1.0 + + # smoothing + self._track_rew_smoother=None + if self._env_opts["use_track_reward_smoother"]: + sub_reward_proxy=self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)[:, 0:1] + smoothing_dt=self._substep_dt + if not self._is_substep_rew[self._reward_map["task_error"]]: # assuming first reward is tracking + smoothing_dt=self._substep_dt*self._action_repeat + self._track_rew_smoother=ExponentialSignalSmoother( + name=self.__class__.__name__+"VelErrorSmoother", + signal=sub_reward_proxy, # same dimension of vel error + update_dt=smoothing_dt, + smoothing_horizon=self._env_opts["smoothing_horizon_vel_err"], + target_smoothing=0.5, + debug=self._is_debug, + dtype=self._dtype, + use_gpu=self._use_gpu) + + # if we need the action rate, we also need the action history + if self._env_opts["add_action_rate_reward"]: + if not self._env_opts["use_action_history"]: + Journal.log(self.__class__.__name__, + "_custom_post_init", + "add_action_rate_reward is True, but ", + LogType.EXCEP, + throw_when_excep=True) + + history_size=self._env_opts["actions_history_size"] + if history_size < 2: + Journal.log(self.__class__.__name__, + "_custom_post_init", + f"add_action_rate_reward requires actions history ({history_size}) to be >=2!", + LogType.EXCEP, + throw_when_excep=True) + + # add periodic timer if required + self._periodic_clock=None + if self._env_opts["add_periodic_clock_to_obs"]: + self._add_env_opt(self._env_opts, "clock_period", + default=int(1.5*self._action_repeat*self.task_rand_timeout_bounds()[1])) # correcting with n substeps + # (we are using the _substep_abs_counter counter) + self._periodic_clock=PeriodicTimer(counter=self._substep_abs_counter, + period=self._env_opts["clock_period"], + dtype=self._dtype, + device=self._device) + + def get_file_paths(self): + paths=AugMPCTrainingEnvBase.get_file_paths(self) + paths.append(self._this_child_path) + return paths + + def get_aux_dir(self): + aux_dirs = [] + path_getter = PathsGetter() + aux_dirs.append(path_getter.RHCDIR) + return aux_dirs + + def _get_reward_scaling(self): + if self._env_opts["single_task_ref_per_episode"]: + return self._env_opts["n_steps_task_rand_ub"] + else: + return self._env_opts["episode_timeout_ub"] + + def _max_ep_length(self): + if self._env_opts["single_task_ref_per_episode"]: + return self._env_opts["n_steps_task_rand_ub"] + else: + return self._env_opts["episode_timeout_ub"] + + def _check_sub_truncations(self): + # overrides parent + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + sub_truncations[:, 0:1] = self._ep_timeout_counter.time_limits_reached() + if self._env_opts["single_task_ref_per_episode"]: + sub_truncations[:, 1:2] = self._task_rand_counter.time_limits_reached() + + def _check_sub_terminations(self): + # default behaviour-> to be overriden by child + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + + # terminate if mpc just failed + sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu) + + # check if robot is capsizing + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle, + output_t=self._is_capsized) + sub_terminations[:, 1:2] = self._is_capsized + + if self._env_opts["add_term_mpc_capsize"]: + # check if robot is about to capsize accordin to MPC + robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle, + output_t=self._is_rhc_capsized) + sub_terminations[:, 2:3] = self._is_rhc_capsized + + def _custom_reset(self): + return None + + def reset(self): + AugMPCTrainingEnvBase.reset(self) + + def _pre_substep(self): + pass + + def _custom_post_step(self,episode_finished): + # executed after checking truncations and terminations and remote env reset + if self._use_gpu: + time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached().cuda(),episode_finished) + self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten()) + else: + time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached(),episode_finished) + self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten()) + # task refs are randomized in world frame -> we rotate them in base local + # (not super efficient, we should do it just for the finished envs) + self._update_loc_twist_refs() + + if self._track_rew_smoother is not None: # reset smoother + self._track_rew_smoother.reset_all(to_be_reset=episode_finished.flatten(), + value=0.0) + + def _custom_post_substp_pre_rew(self): + self._update_loc_twist_refs() + + def _custom_post_substp_post_rew(self): + pass + + def _update_loc_twist_refs(self): + # get fresh robot orientation + if not self._override_agent_refs: + robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + # rotate agent ref from world to robot base + world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, + t_out=self._agent_twist_ref_current_base_loc) + # write it to agent refs tensors + self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc, + gpu=self._use_gpu) + + def _apply_actions_to_rhc(self): + + self._set_rhc_refs() + + self._write_rhc_refs() + + def _set_rhc_refs(self): + + action_to_be_applied = self.get_actual_actions() # see _get_action_names() to get + # the meaning of each component of this tensor + + rhc_latest_twist_cmd = self._rhc_refs.rob_refs.root_state.get(data_type="twist", gpu=self._use_gpu) + rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror(gpu=self._use_gpu) + rhc_latest_pos_ref = self._rhc_refs.rob_refs.contact_pos.get(data_type="p_z", gpu=self._use_gpu) + rhc_q=self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) # this is always + # avaialble + + # reference twist for MPC is assumed to always be specified in MPC's + # horizontal frame, while agent actions are interpreted as in MPC's + # base frame -> we need to rotate the actions into the horizontal frame + base2world_frame(t_b=action_to_be_applied[:, 0:6],q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_world) + w2hor_frame(t_w=self._rhc_twist_cmd_rhc_world,q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_h) + + rhc_latest_twist_cmd[:, 0:6] = self._rhc_twist_cmd_rhc_h + + # self._rhc_refs.rob_refs.root_state.set(data_type="p", data=rhc_latest_p_ref, + # gpu=self._use_gpu) + self._rhc_refs.rob_refs.root_state.set(data_type="twist", data=rhc_latest_twist_cmd, + gpu=self._use_gpu) + + # contact flags + idx=self._actions_map["contact_flag_start"] + if self._env_opts["use_prob_based_stepping"]: + # encode actions as probs + self._random_thresh_contacts.uniform_() # random values in-place between 0 and 1 + rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] >= self._random_thresh_contacts # keep contact with + # probability action_to_be_applied[:, 6:10] + else: # just use a threshold + rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] > self._env_opts["step_thresh"] + # actually apply actions to controller + + def _write_rhc_refs(self): + + if self._use_gpu: + # GPU->CPU --> we cannot use asynchronous data transfer since it's unsafe + self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) # write from gpu to cpu mirror + self._rhc_refs.contact_flags.synch_mirror(from_gpu=True,non_blocking=False) + self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=True,non_blocking=False) + + self._rhc_refs.rob_refs.root_state.synch_all(read=False, retry=True) # write mirror to shared mem + self._rhc_refs.contact_flags.synch_all(read=False, retry=True) + self._rhc_refs.rob_refs.contact_pos.synch_all(read=False, retry=True) + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # runs at every post_step + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + agent_linvel_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="v", + gpu=self._use_gpu) + + agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega", + gpu=self._use_gpu) + + # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \ + # agent_p_ref_current[:, 0:2] + self._agent_twist_ref_current_w[:, 0:3]=agent_linvel_ref_current # set linvel target + + self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem + + def _fill_substep_obs(self, + obs: torch.Tensor): + + # measured stuff + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + + if self._env_opts["use_linvel_from_rhc"]: + # twist estimate from mpc + robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu) + obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_rhc_base_loc_next[:, 0:3] + else: + obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_meas_base_loc[:, 0:3] + obs[:, self._obs_map["omega_meas"]:(self._obs_map["omega_meas"]+3)] = robot_twist_meas_base_loc[:, 3:6] + + obs[:, self._obs_map["v_jnt"]:(self._obs_map["v_jnt"]+self._n_jnts)] = robot_jnt_v_meas + + def _fill_step_obs(self, + obs: torch.Tensor): + + # measured stuff + robot_gravity_norm_base_loc = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + robot_jnt_q_meas = self._robot_state.jnts_state.get(data_type="q",gpu=self._use_gpu) + if self._jnt_q_blacklist_idxs is not None: # we don't want to read joint pos from blacklist + robot_jnt_q_meas[:, self._jnt_q_blacklist_idxs]=0.0 + robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + + # twist estimate from mpc + robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu) + # cmds for jnt imp to be applied next + robot_jnt_q_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="q",gpu=self._use_gpu) + robot_jnt_v_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="v",gpu=self._use_gpu) + robot_jnt_eff_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="eff",gpu=self._use_gpu) + + flight_info_now = self._rhc_refs.flight_info.get(data_type="all",gpu=self._use_gpu) + flight_settings_now = self._rhc_refs.flight_settings_req.get(data_type="all",gpu=self._use_gpu) + + # refs + agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) + + obs[:, self._obs_map["gn_base"]:(self._obs_map["gn_base"]+3)] = robot_gravity_norm_base_loc # norm. gravity vector in base frame + + obs[:, self._obs_map["q_jnt"]:(self._obs_map["q_jnt"]+self._n_jnts)] = robot_jnt_q_meas # meas jnt pos + obs[:, self._obs_map["twist_ref"]:(self._obs_map["twist_ref"]+6)] = agent_twist_ref # high lev agent refs to be tracked + + if self._env_opts["add_mpc_contact_f_to_obs"]: + n_forces=3*len(self._contact_names) + obs[:, self._obs_map["contact_f_mpc"]:(self._obs_map["contact_f_mpc"]+n_forces)] = self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=self._use_gpu) + if self._env_opts["add_fail_idx_to_obs"]: + obs[:, self._obs_map["rhc_fail_idx"]:(self._obs_map["rhc_fail_idx"]+1)] = self._rhc_fail_idx(gpu=self._use_gpu) + if self._env_opts["add_term_mpc_capsize"]: + obs[:, self._obs_map["gn_base_mpc"]:(self._obs_map["gn_base_mpc"]+3)] = self._rhc_cmds.root_state.get(data_type="gn",gpu=self._use_gpu) + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc, base_loc=True) + obs[:, self._obs_map["avrg_twist_mpc"]:(self._obs_map["avrg_twist_mpc"]+6)] = self._root_twist_avrg_rhc_base_loc + if self._env_opts["add_flight_info"]: + obs[:, self._obs_map["flight_info"]:(self._obs_map["flight_info"]+self._flight_info_size)] = flight_info_now + if self._env_opts["add_flight_settings"]: + obs[:, self._obs_map["flight_settings_req"]:(self._obs_map["flight_settings_req"]+self._flight_setting_size)] = \ + flight_settings_now + + if self._env_opts["add_rhc_cmds_to_obs"]: + obs[:, self._obs_map["rhc_cmds_q"]:(self._obs_map["rhc_cmds_q"]+self._n_jnts)] = robot_jnt_q_rhc_applied_next + obs[:, self._obs_map["rhc_cmds_v"]:(self._obs_map["rhc_cmds_v"]+self._n_jnts)] = robot_jnt_v_rhc_applied_next + obs[:, self._obs_map["rhc_cmds_eff"]:(self._obs_map["rhc_cmds_eff"]+self._n_jnts)] = robot_jnt_eff_rhc_applied_next + if self._env_opts["use_action_history"]: + if self._env_opts["add_prev_actions_stats_to_obs"]: # just add last, std and mean to obs + obs[:, self._obs_map["action_history_prev"]:(self._obs_map["action_history_prev"]+self.actions_dim())]=self._act_mem_buffer.get(idx=0) + obs[:, self._obs_map["action_history_avrg"]:(self._obs_map["action_history_avrg"]+self.actions_dim())]=self._act_mem_buffer.mean(clone=False) + obs[:, self._obs_map["action_history_std"]:(self._obs_map["action_history_std"]+self.actions_dim())]=self._act_mem_buffer.std(clone=False) + else: # add whole memory buffer to obs + next_idx=self._obs_map["action_history"] + for i in range(self._env_opts["actions_history_size"]): + obs[:, next_idx:(next_idx+self.actions_dim())]=self._act_mem_buffer.get(idx=i) # get all (n_envs x (obs_dim x horizon)) + next_idx+=self.actions_dim() + + if self._env_opts["use_action_smoothing"]: # adding smoothed actions + obs[:, self._obs_map["action_smoothing"]:(self._obs_map["action_smoothing"]+self.actions_dim())]=self.get_actual_actions(normalized=True) + next_idx+=self.actions_dim() + + if self._env_opts["add_periodic_clock_to_obs"]: + obs[:, next_idx:(next_idx+2)]=self._periodic_clock.get() + next_idx+=2 + if self._env_opts["add_heightmap_obs"]: + hm = self._robot_state.height_sensor.get(gpu=self._use_gpu) + obs[:, self._obs_map["heightmap"]:(self._obs_map["heightmap"]+self._height_flat_dim)] = hm + + def _get_custom_db_data(self, + episode_finished, + ignore_ep_end): + episode_finished = episode_finished.cpu() + self.custom_db_data["AgentTwistRefs"].update( + new_data=self._agent_refs.rob_refs.root_state.get(data_type="twist", gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["RhcFailIdx"].update(new_data=self._rhc_fail_idx(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["RhcContactForces"].update( + new_data=self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Power"].update( + new_data=self._pow_db_data, + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["TrackingError"].update( + new_data=self._track_error_db, + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + # reward functions + def _action_rate(self): + continuous_actions=self._is_continuous_actions + discrete_actions=~self._is_continuous_actions + n_c_actions=continuous_actions.sum().item() + n_d_actions=discrete_actions.sum().item() + actions_prev=self._act_mem_buffer.get(idx=1) + actions_now=self._act_mem_buffer.get(idx=0) + actions_rate=(actions_now-actions_prev) # actions already normalized + actions_rate_c=actions_rate[:, continuous_actions] + actions_rate_d=actions_rate[:, discrete_actions] + + actions_rate_sqrd=None # assuming n_c_actions > 0 always + actions_rate_sqrd=self._env_opts["action_rate_rew_c_weight"]*torch.sum(actions_rate_c*actions_rate_c, dim=1, keepdim=True)/n_c_actions + if discrete_actions.any(): + actions_rate_sqrd+=self._env_opts["action_rate_rew_d_weight"]*torch.sum(actions_rate_d*actions_rate_d, dim=1, keepdim=True)/n_d_actions + return actions_rate_sqrd + + def _mech_pow(self, jnts_vel, jnts_effort, autoscaled: bool = False, drained: bool = True): + mech_pow_jnts=(jnts_effort*jnts_vel)*self._power_penalty_weights + if drained: + mech_pow_jnts.clamp_(0.0,torch.inf) # do not account for regenerative power + mech_pow_tot = torch.sum(mech_pow_jnts, dim=1, keepdim=True) + self._pow_db_data[:, 1:2]=mech_pow_tot.cpu() + if autoscaled: + mech_pow_tot=mech_pow_tot/self._power_penalty_weights_sum + return mech_pow_tot + + def _cost_of_transport(self, jnts_vel, jnts_effort, v_norm, mass_weight: bool = False): + drained_mech_pow=self._mech_pow(jnts_vel=jnts_vel, + jnts_effort=jnts_effort, + drained=True) + CoT=drained_mech_pow/(v_norm+1e-2) + if mass_weight: + robot_weight=self._rhc_robot_weight + CoT=CoT/robot_weight + + # add to db metrics + self._pow_db_data[:, 0:1]=CoT.cpu() + self._pow_db_data[:, 1:2]=drained_mech_pow.cpu() + + return CoT + + def _jnt_vel_penalty(self, jnts_vel): + weighted_jnt_vel = torch.sum(jnts_vel*jnts_vel, dim=1, keepdim=True)/self._n_jnts + return weighted_jnt_vel + + def _rhc_fail_idx(self, gpu: bool): + rhc_fail_idx = self._rhc_status.rhc_fail_idx.get_torch_mirror(gpu=gpu) + return self._env_opts["rhc_fail_idx_scale"]*rhc_fail_idx + + # basic L1 and L2 error functions + def _track_err_wmse(self, task_ref, task_meas, scaling, weights): + # weighted mean-squared error computation + task_error = (task_meas-task_ref) + # add to db metrics + self._track_error_db[:, :]=torch.abs(task_error) + scaled_error=task_error/scaling + + task_wmse = torch.sum(scaled_error*scaled_error*weights, dim=1, keepdim=True)/torch.sum(weights).item() + return task_wmse # weighted mean square error (along task dimension) + + def _track_err_dir_wmse(self, task_ref, task_meas, scaling, weights): + # weighted DIRECTIONAL mean-squared error computation + task_error = (task_meas-task_ref) + # add to db metrics + self._track_error_db[:, :]=torch.abs(task_error) + task_error=task_error/scaling + + # projection along commanded direction and gravity, matching paper formulation + v_ref=task_ref[:, 0:3] + delta_v=task_error[:, 0:3] + + v_ref_norm=torch.norm(v_ref, dim=1, keepdim=True) + cmd_dir=v_ref/(v_ref_norm+1e-8) + # fallback to measured direction if command is (near) zero to avoid degenerate projection + meas_dir=task_meas[:, 0:3] + meas_dir=meas_dir/(torch.norm(meas_dir, dim=1, keepdim=True)+1e-8) + cmd_dir=torch.where((v_ref_norm>1e-6), cmd_dir, meas_dir) + + gravity_dir = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) # normalized gravity in base frame + gravity_dir = gravity_dir/(torch.norm(gravity_dir, dim=1, keepdim=True)+1e-8) + + forward_error=torch.sum(delta_v*cmd_dir, dim=1, keepdim=True) + vertical_error=torch.sum(delta_v*gravity_dir, dim=1, keepdim=True) + lateral_vec=delta_v - vertical_error*gravity_dir - forward_error*cmd_dir + lateral_error=torch.norm(lateral_vec, dim=1, keepdim=True) + + # angular directional components: use gravity as vertical, project base x onto the world xy plane for roll, and close the triad with pitch + base_x = self._base_x_dir + base_y = self._base_y_dir + + roll_dir = base_x - torch.sum(base_x*gravity_dir, dim=1, keepdim=True)*gravity_dir + roll_norm = torch.norm(roll_dir, dim=1, keepdim=True) + roll_dir_alt = base_y - torch.sum(base_y*gravity_dir, dim=1, keepdim=True)*gravity_dir # fallback if base x is almost aligned with gravity + roll_norm_alt = torch.norm(roll_dir_alt, dim=1, keepdim=True) + use_alt_roll = roll_norm < 1e-6 + roll_dir = torch.where(use_alt_roll, roll_dir_alt, roll_dir) + roll_norm = torch.where(use_alt_roll, roll_norm_alt, roll_norm) + roll_dir = roll_dir/(roll_norm+1e-8) + + pitch_dir = torch.cross(gravity_dir, roll_dir, dim=1) + pitch_dir = pitch_dir/(torch.norm(pitch_dir, dim=1, keepdim=True)+1e-8) + + delta_omega = task_error[:, 3:6] + omega_roll_error = torch.sum(delta_omega*roll_dir, dim=1, keepdim=True) + omega_pitch_error = torch.sum(delta_omega*pitch_dir, dim=1, keepdim=True) + omega_vertical_error = torch.sum(delta_omega*gravity_dir, dim=1, keepdim=True) + + full_error=torch.cat((forward_error, lateral_error, vertical_error, omega_roll_error, omega_pitch_error, omega_vertical_error), dim=1) + task_wmse_dir = torch.sum(full_error*full_error*weights, dim=1, keepdim=True)/torch.sum(weights).item() + return task_wmse_dir # weighted mean square error (along task dimension) + + # L2 errors + def _tracking_err_rel_wmse(self, task_ref, task_meas, weights, directional: bool = False): + ref_norm = task_ref.norm(dim=1,keepdim=True) # norm of the full twist reference + self._task_err_scaling[:, :] = ref_norm+1e-2 + if directional: + task_rel_err_wmse=self._track_err_dir_wmse(task_ref=task_ref, task_meas=task_meas, + scaling=self._task_err_scaling, weights=weights) + else: + task_rel_err_wmse=self._track_err_wmse(task_ref=task_ref, task_meas=task_meas, + scaling=self._task_err_scaling, weights=weights) + return task_rel_err_wmse + + def _tracking_err_wmse(self, task_ref, task_meas, weights, directional: bool = False): + self._task_err_scaling[:, :] = 1 + if directional: + task_err_wmse = self._track_err_dir_wmse(task_ref=task_ref, + task_meas=task_meas, scaling=self._task_err_scaling, weights=weights) + else: + task_err_wmse = self._track_err_wmse(task_ref=task_ref, + task_meas=task_meas, scaling=self._task_err_scaling, weights=weights) + return task_err_wmse + + # L1 errors + def _tracking_err_rel_lin(self, task_ref, task_meas, weights, directional): + task_rel_err_wmse = self._tracking_err_rel_wmse(task_ref=task_ref, + task_meas=task_meas, weights=weights, directional=directional) + return task_rel_err_wmse.sqrt() + + def _tracking_err_lin(self, task_ref, task_meas, weights, directional: bool = False): + self._task_err_scaling[:, :] = 1 + task_err_wmse=self._tracking_err_wmse(task_ref=task_ref, + task_meas=task_meas, weights=weights, directional=directional) + return task_err_wmse.sqrt() + + # reward computation over steps/substeps + def _compute_step_rewards(self): + + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + # tracking reward + if self._env_opts["use_L1_norm"]: # linear errors + task_error_fun = self._tracking_err_lin + if self._env_opts["use_relative_error"]: + task_error_fun = self._tracking_err_rel_lin + else: # quadratic error + task_error_fun = self._tracking_err_wmse + if self._env_opts["use_relative_error"]: + task_error_fun = self._tracking_err_rel_wmse + + agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) # high level agent refs (hybrid twist) + self._get_avrg_step_root_twist(out=self._step_avrg_root_twist_base_loc, base_loc=True) + task_error = task_error_fun(task_meas=self._step_avrg_root_twist_base_loc, + task_ref=agent_task_ref_base_loc, + weights=self._task_err_weights, + directional=self._env_opts["directional_tracking"]) + + idx=self._reward_map["task_error"] + if self._env_opts["use_exp_track_rew"]: + sub_rewards[:, idx:(idx+1)] = \ + self._env_opts["task_track_offset_exp"]*torch.exp(-self._env_opts["task_track_scale_exp"]*task_error) + else: # simple linear reward + sub_rewards[:, idx:(idx+1)] = \ + self._env_opts["task_track_offset"]*(1.0-self._env_opts["task_track_scale"]*task_error) + + if self._env_opts["use_fail_idx_weight"]: # add weight based on fail idx + fail_idx=self._rhc_fail_idx(gpu=self._use_gpu) + sub_rewards[:, idx:(idx+1)]=(1-fail_idx)*sub_rewards[:, idx:(idx+1)] + if self._track_rew_smoother is not None: # smooth reward if required + self._track_rew_smoother.update(new_signal=sub_rewards[:, 0:1]) + sub_rewards[:, idx:(idx+1)]=self._track_rew_smoother.get() + + # action rate + if self._env_opts["add_action_rate_reward"]: + action_rate=self._action_rate() + idx=self._reward_map["action_rate"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["action_rate_offset"]*(1.0-self._env_opts["action_rate_scale"]*action_rate) + + # mpc vel tracking + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc_next,base_loc=True) # get estimated avrg vel + # from MPC after stepping + task_pred_error=task_error_fun(task_meas=self._root_twist_avrg_rhc_base_loc_next, + task_ref=agent_task_ref_base_loc, + weights=self._task_pred_err_weights, + directional=self._env_opts["directional_tracking"]) + idx=self._reward_map["rhc_avrg_vel_error"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["task_pred_track_offset"]*torch.exp(-self._env_opts["task_pred_track_scale"]*task_pred_error) + + def _compute_substep_rewards(self): + + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + if self._env_opts["add_CoT_reward"] or self._env_opts["add_power_reward"]: + jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + jnts_effort = self._robot_state.jnts_state.get(data_type="eff",gpu=self._use_gpu) + + if self._env_opts["add_CoT_reward"]: + if self._env_opts["use_CoT_wrt_ref"]: # uses v ref norm for computing cot + agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) + v_norm=torch.norm(agent_task_ref_base_loc, dim=1, keepdim=True) + else: # uses measured velocity + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + v_norm=torch.norm(robot_twist_meas_base_loc[:,0:3], dim=1, keepdim=True) + CoT=self._cost_of_transport(jnts_vel=jnts_vel,jnts_effort=jnts_effort,v_norm=v_norm, + mass_weight=True + ) + idx=self._reward_map["CoT"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["CoT_offset"]*(1-self._env_opts["CoT_scale"]*CoT) + if self._env_opts["add_power_reward"]: + weighted_mech_power=self._mech_pow(jnts_vel=jnts_vel,jnts_effort=jnts_effort, drained=True) + idx=self._reward_map["mech_pow"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["power_offset"]*(1-self._env_opts["power_scale"]*weighted_mech_power) + + if self._env_opts["add_jnt_v_reward"]: + jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + jnt_v=self._jnt_vel_penalty(jnts_vel=jnts_vel) + idx=self._reward_map["jnt_v"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["jnt_vel_offset"]*(1-self._env_opts["jnt_vel_scale"]*jnt_v) + + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + # we randomize the reference in world frame, since it's much more intuitive + # (it will be rotated in base frame when provided to the agent and used for rew + # computation) + + if self._env_opts["use_pof0"]: # sample from bernoulli distribution + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + if env_indxs is None: + random_uniform=torch.full_like(self._agent_twist_ref_current_w, fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[:, :] = random_uniform*self._twist_ref_scale + self._twist_ref_offset + self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel + self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega + else: + random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, :], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[env_indxs, :] = random_uniform * self._twist_ref_scale + self._twist_ref_offset + self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :] + self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega + + def _get_obs_names(self): + + obs_names = [""] * self.obs_dim() + + # proprioceptive stream of obs + next_idx=0 + + self._obs_map["gn_base"]=next_idx + obs_names[0] = "gn_x_base_loc" + obs_names[1] = "gn_y_base_loc" + obs_names[2] = "gn_z_base_loc" + next_idx+=3 + + self._obs_map["linvel_meas"]=next_idx + obs_names[next_idx] = "linvel_x_base_loc" + obs_names[next_idx+1] = "linvel_y_base_loc" + obs_names[next_idx+2] = "linvel_z_base_loc" + next_idx+=3 + + self._obs_map["omega_meas"]=next_idx + obs_names[next_idx] = "omega_x_base_loc" + obs_names[next_idx+1] = "omega_y_base_loc" + obs_names[next_idx+2] = "omega_z_base_loc" + next_idx+=3 + + jnt_names=self.get_observed_joints() + self._obs_map["q_jnt"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"q_jnt_{jnt_names[i]}" + next_idx+=self._n_jnts + + self._obs_map["v_jnt"]=next_idx + for i in range(self._n_jnts): # jnt obs (v): + obs_names[next_idx+i] = f"v_jnt_{jnt_names[i]}" + next_idx+=self._n_jnts + + # references + self._obs_map["twist_ref"]=next_idx + obs_names[next_idx] = "linvel_x_ref_base_loc" + obs_names[next_idx+1] = "linvel_y_ref_base_loc" + obs_names[next_idx+2] = "linvel_z_ref_base_loc" + obs_names[next_idx+3] = "omega_x_ref_base_loc" + obs_names[next_idx+4] = "omega_y_ref_base_loc" + obs_names[next_idx+5] = "omega_z_ref_base_loc" + next_idx+=6 + + # contact forces + if self._env_opts["add_mpc_contact_f_to_obs"]: + i = 0 + self._obs_map["contact_f_mpc"]=next_idx + for contact in self._contact_names: + obs_names[next_idx+i] = f"fc_{contact}_x_base_loc" + obs_names[next_idx+i+1] = f"fc_{contact}_y_base_loc" + obs_names[next_idx+i+2] = f"fc_{contact}_z_base_loc" + i+=3 + next_idx+=3*len(self._contact_names) + + # data directly from MPC + if self._env_opts["add_fail_idx_to_obs"]: + self._obs_map["rhc_fail_idx"]=next_idx + obs_names[next_idx] = "rhc_fail_idx" + next_idx+=1 + if self._env_opts["add_term_mpc_capsize"]: + self._obs_map["gn_base_mpc"]=next_idx + obs_names[next_idx] = "gn_x_rhc_base_loc" + obs_names[next_idx+1] = "gn_y_rhc_base_loc" + obs_names[next_idx+2] = "gn_z_rhc_base_loc" + next_idx+=3 + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._obs_map["avrg_twist_mpc"]=next_idx + obs_names[next_idx] = "linvel_x_avrg_rhc" + obs_names[next_idx+1] = "linvel_y_avrg_rhc" + obs_names[next_idx+2] = "linvel_z_avrg_rhc" + obs_names[next_idx+3] = "omega_x_avrg_rhc" + obs_names[next_idx+4] = "omega_y_avrg_rhc" + obs_names[next_idx+5] = "omega_z_avrg_rhc" + next_idx+=6 + + if self._env_opts["add_flight_info"]: + self._obs_map["flight_info"]=next_idx + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_pos_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_remaining_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_apex_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_end_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + + if self._env_opts["add_flight_settings"]: + self._obs_map["flight_settings_req"]=next_idx + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_apex_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_end_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + + if self._env_opts["add_rhc_cmds_to_obs"]: + self._obs_map["rhc_cmds_q"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_q_{jnt_names[i]}" + next_idx+=self._n_jnts + self._obs_map["rhc_cmds_v"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_v_{jnt_names[i]}" + next_idx+=self._n_jnts + self._obs_map["rhc_cmds_eff"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_eff_{jnt_names[i]}" + next_idx+=self._n_jnts + + # previous actions info + if self._env_opts["use_action_history"]: + self._obs_map["action_history"]=next_idx + action_names = self._get_action_names() + if self._env_opts["add_prev_actions_stats_to_obs"]: + self._obs_map["action_history_prev"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_prev_act" + next_idx+=self.actions_dim() + self._obs_map["action_history_avrg"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_avrg_act" + next_idx+=self.actions_dim() + self._obs_map["action_history_std"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_std_act" + next_idx+=self.actions_dim() + else: + for i in range(self._env_opts["actions_history_size"]): + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_m{i+1}_act" + next_idx+=self.actions_dim() + + if self._env_opts["use_action_smoothing"]: + self._obs_map["action_smoothing"]=next_idx + for smoothed_action in range(self.actions_dim()): + obs_names[next_idx+smoothed_action] = action_names[smoothed_action]+f"_smoothed" + next_idx+=self.actions_dim() + + if self._env_opts["add_periodic_clock_to_obs"]: + self._obs_map["clock"]=next_idx + obs_names[next_idx] = "clock_cos" + obs_names[next_idx+1] = "clock_sin" + next_idx+=2 + if self._env_opts["add_heightmap_obs"] and self._height_grid_size is not None: + self._obs_map["heightmap"]=next_idx + gs = self._height_grid_size + for r in range(gs): + for c in range(gs): + obs_names[next_idx] = f"height_r{r}_c{c}" + next_idx += 1 + + return obs_names + + def _set_substep_obs(self): + # which obs are to be averaged over substeps? + + self._is_substep_obs[self._obs_map["linvel_meas"]:self._obs_map["linvel_meas"]+3]=True + self._is_substep_obs[self._obs_map["omega_meas"]:self._obs_map["omega_meas"]+3]=True + self._is_substep_obs[self._obs_map["v_jnt"]:self._obs_map["v_jnt"]+self._n_jnts]=True # also good for noise + + # self._is_substep_obs[self._obs_map["contact_f_mpc"]:self._obs_map["contact_f_mpc"]+3*len(self._contact_names)]=True + + def _get_action_names(self): + + action_names = [""] * self.actions_dim() + action_names[0] = "vx_cmd" # twist commands from agent to RHC controller + action_names[1] = "vy_cmd" + action_names[2] = "vz_cmd" + action_names[3] = "roll_omega_cmd" + action_names[4] = "pitch_omega_cmd" + action_names[5] = "yaw_omega_cmd" + + next_idx=6 + + self._actions_map["contact_flag_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx] = f"contact_flag_{contact}" + next_idx+=1 + + return action_names + + def _set_substep_rew(self): + + # which rewards are to be computed at substeps frequency? + self._is_substep_rew[self._reward_map["task_error"]]=False + if self._env_opts["add_CoT_reward"]: + self._is_substep_rew[self._reward_map["CoT"]]=True + if self._env_opts["add_power_reward"]: + self._is_substep_rew[self._reward_map["mech_pow"]]=True + if self._env_opts["add_action_rate_reward"]: + self._is_substep_rew[self._reward_map["action_rate"]]=False + if self._env_opts["add_jnt_v_reward"]: + self._is_substep_rew[self._reward_map["jnt_v"]]=True + + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._is_substep_rew[self._reward_map["rhc_avrg_vel_error"]]=False + + def _get_rewards_names(self): + + counter=0 + reward_names = [] + + # adding rewards + reward_names.append("task_error") + self._reward_map["task_error"]=counter + self._reward_lb_map["task_error"]="task_error_reward_lb" + counter+=1 + if self._env_opts["add_power_reward"] and self._env_opts["add_CoT_reward"]: + Journal.log(self.__class__.__name__, + "__init__", + "Only one between CoT and power reward can be used!", + LogType.EXCEP, + throw_when_excep=True) + if self._env_opts["add_CoT_reward"]: + reward_names.append("CoT") + self._reward_map["CoT"]=counter + self._reward_lb_map["CoT"]="CoT_reward_lb" + counter+=1 + if self._env_opts["add_power_reward"]: + reward_names.append("mech_pow") + self._reward_map["mech_pow"]=counter + self._reward_lb_map["mech_pow"]="power_reward_lb" + counter+=1 + if self._env_opts["add_action_rate_reward"]: + reward_names.append("action_rate") + self._reward_map["action_rate"]=counter + self._reward_lb_map["action_rate"]="action_rate_reward_lb" + counter+=1 + if self._env_opts["add_jnt_v_reward"]: + reward_names.append("jnt_v") + self._reward_map["jnt_v"]=counter + self._reward_lb_map["jnt_v"]="jnt_vel_reward_lb" + counter+=1 + if self._env_opts["use_rhc_avrg_vel_tracking"]: + reward_names.append("rhc_avrg_vel_error") + self._reward_map["rhc_avrg_vel_error"]=counter + self._reward_lb_map["rhc_avrg_vel_error"]="rhc_avrg_vel_reward_lb" + counter+=1 + + return reward_names + + def _get_sub_trunc_names(self): + sub_trunc_names = [] + sub_trunc_names.append("ep_timeout") + if self._env_opts["single_task_ref_per_episode"]: + sub_trunc_names.append("task_ref_rand") + return sub_trunc_names + + def _get_sub_term_names(self): + # to be overridden by child class + sub_term_names = [] + sub_term_names.append("rhc_failure") + sub_term_names.append("robot_capsize") + if self._env_opts["add_term_mpc_capsize"]: + sub_term_names.append("rhc_capsize") + + return sub_term_names + + def _set_jnts_blacklist_pattern(self): + # used to exclude pos measurement from wheels + self._jnt_q_blacklist_patterns=["wheel"] diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/world_interface_base.py b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/world_interface_base.py new file mode 100644 index 0000000000000000000000000000000000000000..747888cecf6d6846d6e0dad3b83059ec47cfa191 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/world_interface_base.py @@ -0,0 +1,1486 @@ +from aug_mpc.controllers.rhc.augmpc_cluster_server import AugMpcClusterServer +from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperClnt +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetClnt +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest +from aug_mpc.utils.jnt_imp_control_base import JntImpCntrlBase +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds +from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf +from aug_mpc.utils.math_utils import quaternion_difference +from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds + +from aug_mpc.utils.filtering import FirstOrderFilter + +from mpc_hive.utilities.homing import RobotHomer +from mpc_hive.utilities.shared_data.jnt_imp_control import JntImpCntrlData + +from EigenIPC.PyEigenIPC import VLevel, Journal, LogType, dtype +from EigenIPC.PyEigenIPC import StringTensorServer +from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper + +from typing import List, Union, Dict, TypeVar + +import os +import inspect +import signal +import time + +import numpy as np +import torch + +from abc import ABC, abstractmethod + +JntImpCntrlChild = TypeVar('JntImpCntrlChild', bound='JntImpCntrlBase') + +class AugMPCWorldInterfaceBase(ABC): + + def __init__(self, + robot_names: List[str], + robot_urdf_paths: List[str], + robot_srdf_paths: List[str], + jnt_imp_config_paths: List[str], + n_contacts: List[int], + cluster_dt: List[float], + use_remote_stepping: List[bool], + name: str = "AugMPCWorldInterfaceBase", + num_envs: int = 1, + debug = False, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + n_init_step: int = 0, + timeout_ms: int = 60000, + env_opts: Dict = None, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + dump_basepath: str = "/tmp", + override_low_lev_controller: bool = False): + + # checks on input args + # type checks + if not isinstance(robot_names, List): + exception = "robot_names must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(robot_urdf_paths, List): + exception = "robot_urdf_paths must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(robot_srdf_paths, List): + exception = "robot_srdf_paths must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(cluster_dt, List): + exception = "cluster_dt must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(use_remote_stepping, List): + exception = "use_remote_stepping must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(n_contacts, List): + exception = "n_contacts must be a list (of integers)!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(jnt_imp_config_paths, List): + exception = "jnt_imp_config_paths must be a list paths!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # dim checks + if not len(robot_urdf_paths) == len(robot_names): + exception = f"robot_urdf_paths has len {len(robot_urdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(robot_srdf_paths) == len(robot_names): + exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(cluster_dt) == len(robot_names): + exception = f"cluster_dt has len {len(cluster_dt)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(use_remote_stepping) == len(robot_names): + exception = f"use_remote_stepping has len {len(use_remote_stepping)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(robot_srdf_paths) == len(robot_names): + exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(jnt_imp_config_paths) == len(robot_names): + exception = f"jnt_imp_config_paths has len {len(jnt_imp_config_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + self._remote_exit_flag=None + + self._name=name + self._num_envs=num_envs + self._debug=debug + self._verbose=verbose + self._vlevel=vlevel + self._force_reconnection=True + self._timeout_ms=timeout_ms + self._use_gpu=use_gpu + self._device = "cuda" if self._use_gpu else "cpu" + self._dtype=dtype + self._robot_names=robot_names + self._env_opts={} + self._env_opts["deact_when_failure"]=True + self._env_opts["filter_jnt_vel"]=False + self._env_opts["filter_cutoff_freq"]=10.0 # [Hz] + self._env_opts["filter_sampling_rate"]=100 # rate at which state is filtered [Hz] + self._env_opts["add_remote_exit_flag"]=False # add shared data server to trigger a remote exit + + self._env_opts["enable_height_sensor"]=False + self._env_opts["height_sensor_resolution"]=0.16 + self._env_opts["height_sensor_pixels"]=10 + self._env_opts["height_sensor_lateral_offset"]=0.0 + self._env_opts["height_sensor_forward_offset"]=0.0 + + self._filter_step_ssteps_freq=None + + self._env_opts.update(env_opts) + + self.step_counter = 0 # global step counter + self._n_init_steps = n_init_step # n steps to be performed before applying solutions from control clusters + self._srdf_dump_paths = robot_srdf_paths + self._homers = {} + self._homing = None + self._jnt_imp_cntrl_shared_data = {} + self._jnt_imp_controllers = {} + self._jnt_imp_config_paths = {} + + # control cluster data + self.cluster_sim_step_counters = {} + self.cluster_servers = {} + self._trigger_sol = {} + self._wait_sol = {} + self._cluster_dt = {} + self._robot_urdf_paths={} + self._robot_srdf_paths={} + self._contact_names={} + self._num_contacts={} + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + self._cluster_dt[robot_name]=cluster_dt[i] + self._robot_urdf_paths[robot_name]=robot_urdf_paths[i] + self._robot_srdf_paths[robot_name]=robot_srdf_paths[i] + self._contact_names[robot_name]=None + self._num_contacts[robot_name]=n_contacts[i] + self._jnt_imp_config_paths[robot_name]=jnt_imp_config_paths[i] + # db data + self.debug_data = {} + self.debug_data["time_to_step_world"] = np.nan + self.debug_data["time_to_get_states_from_env"] = np.nan + self.debug_data["cluster_sol_time"] = {} + self.debug_data["cluster_state_update_dt"] = {} + self.debug_data["sim_time"] = {} + self.debug_data["cluster_time"] = {} + + self._env_timer = time.perf_counter() + + # remote sim stepping options + self._timeout = timeout_ms # timeout for remote stepping + self._use_remote_stepping = use_remote_stepping + # should use remote stepping + self._remote_steppers = {} + self._remote_resetters = {} + self._remote_reset_requests = {} + self._is_first_trigger = {} + + self._closed = False + + self._this_child_path=os.path.abspath(inspect.getfile(self.__class__)) + self._descr_dump_path=dump_basepath+"/"+f"{self.__class__.__name__}" + self._urdf_dump_paths = {} + self._srdf_dump_paths = {} + self.xrdf_cmd_vals = [] # by default empty, needs to be overriden by + # child class + self._world_iface_files_server=None + + self._override_low_lev_controller=override_low_lev_controller + + self._root_p = {} + self._root_q = {} + self._jnts_q = {} + self._root_p_prev = {} # used for num differentiation + self._root_q_prev = {} # used for num differentiation + self._jnts_q_prev = {} # used for num differentiation + self._root_v_prev = {} # used for num differentiation + self._root_omega_prev = {} # used for num differentiation + self._root_p_default = {} + self._root_q_default = {} + self._jnts_q_default = {} + + self._gravity_normalized = {} + self._gravity_normalized_base_loc = {} + + self._root_v = {} + self._root_v_base_loc = {} + self._root_v_default = {} + self._root_omega = {} + self._root_omega_base_loc = {} + self._root_omega_default = {} + self._root_a = {} + self._root_a_base_loc = {} + self._root_alpha = {} + self._root_alpha_base_loc = {} + + self._jnts_v = {} + self._jnt_vel_filter = {} + self._jnts_v_default = {} + self._jnts_eff = {} + self._jnts_eff_default = {} + + self._root_pos_offsets = {} + self._root_q_offsets = {} + + self._parse_env_opts() + + self._enable_height_shared = self._env_opts["enable_height_sensor"] + self._height_sensor_resolution = self._env_opts["height_sensor_resolution"] + self._height_sensor_pixels = self._env_opts["height_sensor_pixels"] + + self._pre_setup() # child's method + + self._init_world() # after this point all info from sim or robot is + # available + + self._publish_world_interface_files() + + self._setup() + + self._exit_request=False + signal.signal(signal.SIGINT, self.signal_handler) + + def signal_handler(self, sig, frame): + Journal.log(self.__class__.__name__, + "signal_handler", + "received SIGINT -> cleaning up", + LogType.WARN) + self._exit_request=True + + def __del__(self): + self.close() + + def is_closed(self): + return self._closed + + def close(self) -> None: + if not self._closed: + for i in range(len(self._robot_names)): + if self._robot_names[i] in self.cluster_servers: + self.cluster_servers[self._robot_names[i]].close() + if self._use_remote_stepping[i]: # remote signaling + if self._robot_names[i] in self._remote_reset_requests: + self._remote_reset_requests[self._robot_names[i]].close() + self._remote_resetters[self._robot_names[i]].close() + self._remote_steppers[self._robot_names[i]].close() + if self._robot_names[i] in self._jnt_imp_cntrl_shared_data: + jnt_imp_shared_data=self._jnt_imp_cntrl_shared_data[self._robot_names[i]] + if jnt_imp_shared_data is not None: + jnt_imp_shared_data.close() + if self._remote_exit_flag is not None: + self._remote_exit_flag.close() + if self._world_iface_files_server is not None: + self._world_iface_files_server.close() + self._close() + self._closed=True + + def _collect_world_interface_files(self): + files = [self._this_child_path] + # prefer generated URDF/SRDF if available, fallback to provided xacros + if len(self._urdf_dump_paths) > 0: + files.extend(list(self._urdf_dump_paths.values())) + else: + files.extend(list(self._robot_urdf_paths.values())) + if len(self._srdf_dump_paths) > 0: + files.extend(list(self._srdf_dump_paths.values())) + else: + files.extend(list(self._robot_srdf_paths.values())) + files.extend(list(self._jnt_imp_config_paths.values())) + # remove duplicates while preserving order + unique_files=[] + for f in files: + if f not in unique_files: + unique_files.append(f) + return unique_files + + def _publish_world_interface_files(self): + + if not any(self._use_remote_stepping): + return + self._world_iface_files_server=StringTensorServer(length=1, + basename="SharedWorldInterfaceFilesDropDir", + name_space=self._robot_names[0], + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._world_iface_files_server.run() + combined_paths=", ".join(self._collect_world_interface_files()) + while not self._world_iface_files_server.write_vec([combined_paths], 0): + Journal.log(self.__class__.__name__, + "_publish_world_interface_files", + f"Failed to pub world interface files. Retrying...", + LogType.WARN) + time.sleep(0.1) + Journal.log(self.__class__.__name__, + "_publish_world_interface_files", + f"World interface files advertised: {combined_paths}", + LogType.STAT) + + def _setup(self) -> None: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + # normalized gravity vector + self._gravity_normalized[robot_name]=torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._gravity_normalized[robot_name][:, 2]=-1.0 + self._gravity_normalized_base_loc[robot_name]=self._gravity_normalized[robot_name].detach().clone() + + self.cluster_sim_step_counters[robot_name]=0 + self._is_first_trigger[robot_name] = True + if not isinstance(self._cluster_dt[robot_name], (float)): + exception = f"cluster_dt[{i}] should be a float!" + Journal.log(self.__class__.__name__, + "_setup", + exception, + LogType.EXCEP, + throw_when_excep = True) + self._cluster_dt[robot_name] = self._cluster_dt[robot_name] + self._trigger_sol[robot_name] = True # allow first trigger + self._wait_sol[robot_name] = False + + # initialize a lrhc cluster server for communicating with rhc controllers + self.cluster_servers[robot_name] = AugMpcClusterServer(cluster_size=self._num_envs, + cluster_dt=self._cluster_dt[robot_name], + control_dt=self.physics_dt(), + jnt_names=self._robot_jnt_names(robot_name=robot_name), + n_contacts=self._n_contacts(robot_name=robot_name), + contact_linknames=self._contact_names[robot_name], + verbose=self._verbose, + vlevel=self._vlevel, + debug=self._debug, + robot_name=robot_name, + use_gpu=self._use_gpu, + force_reconnection=self._force_reconnection, + timeout_ms=self._timeout, + enable_height_sensor=self._enable_height_shared, + height_grid_size=self._height_sensor_pixels, + height_grid_resolution=self._height_sensor_resolution) + self.cluster_servers[robot_name].run() + self.debug_data["cluster_sol_time"][robot_name] = np.nan + self.debug_data["cluster_state_update_dt"][robot_name] = np.nan + self.debug_data["sim_time"][robot_name] = np.nan + # remote sim stepping + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name] = RemoteStepperClnt(namespace=robot_name, + verbose=self._debug, + vlevel=self._vlevel) + self._remote_resetters[robot_name] = RemoteResetClnt(namespace=robot_name, + verbose=self._debug, + vlevel=self._vlevel) + self._remote_reset_requests[robot_name] = RemoteResetRequest(namespace=robot_name, + n_env=self._num_envs, + is_server=True, + verbose=self._debug, + vlevel=self._vlevel, + force_reconnection=self._force_reconnection, + safe=False) + self._remote_steppers[robot_name].run() + self._remote_resetters[robot_name].run() + self._remote_reset_requests[robot_name].run() + else: + self._remote_steppers[robot_name] = None + self._remote_reset_requests[robot_name] = None + self._remote_resetters[robot_name] = None + + self._homers[robot_name] = RobotHomer(srdf_path=self._srdf_dump_paths[robot_name], + jnt_names=self._robot_jnt_names(robot_name=robot_name), + filter=True) + robot_homing=torch.from_numpy(self._homers[robot_name].get_homing().reshape(1,-1)) + if "cuda" in self._device: + robot_homing=robot_homing.cuda() + self._homing=robot_homing.repeat(self._num_envs, 1) + + self._jnts_q_default[robot_name] = self._homing + self._set_jnts_to_homing(robot_name=robot_name) + self._set_root_to_defconfig(robot_name=robot_name) + self._reset_sim() + + self._init_safe_cluster_actions(robot_name=robot_name) + + Journal.log(self.__class__.__name__, + "_setup", + f"Will use joint impedance config at {self._jnt_imp_config_paths[robot_name]} for {robot_name}", + LogType.STAT) + + self._jnt_imp_controllers[robot_name] = self._generate_jnt_imp_control(robot_name=robot_name) + self._jnt_imp_cntrl_shared_data[robot_name] = JntImpCntrlData(is_server=True, + n_envs=self._num_envs, + n_jnts=len(self._robot_jnt_names(robot_name=robot_name)), + jnt_names=self._robot_jnt_names(robot_name=robot_name), + namespace=robot_name, + verbose=self._verbose, + force_reconnection=self._force_reconnection, + vlevel=self._vlevel, + use_gpu=self._use_gpu, + safe=False) + self._jnt_imp_cntrl_shared_data[robot_name].run() + + self._jnt_vel_filter[robot_name]=None + if self._env_opts["filter_jnt_vel"]: + self._jnt_vel_filter[robot_name]=FirstOrderFilter(dt=1.0/self._env_opts["filter_sampling_rate"], + filter_BW=self._env_opts["filter_cutoff_freq"], + rows=self._num_envs, + cols=len(self._robot_jnt_names(robot_name=robot_name)), + device=self._device, + dtype=self._dtype) + + physics_rate=1.0/self.physics_dt() + self._filter_step_ssteps_freq=int(physics_rate/self._env_opts["filter_sampling_rate"]) + if self._filter_step_ssteps_freq <=0: + Journal.log(self.__class__.__name__, + "_setup", + f"The filter_sampling_rate should be smaller that the physics rate ({physics_rate} Hz)", + LogType.EXCEP, + throw_when_excep=True) + + for n in range(self._n_init_steps): # run some initialization steps + self._step_world() + if hasattr(self, "_zero_angular_velocities"): + self._zero_angular_velocities(robot_name=robot_name, env_indxs=None) + self._read_jnts_state_from_robot(robot_name=robot_name, + env_indxs=None) + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + # allow child to perform additional warmup validations (e.g., terrain/tilt) + # retry_done = False + if hasattr(self, "_post_warmup_validation"): + failing = self._post_warmup_validation(robot_name=robot_name) + if failing is not None and failing.numel() > 0: + # retry: reset only failing envs, rerun warmup, revalidate once + failing = failing.to(self._device) + Journal.log(self.__class__.__name__, + "_setup", + f"Warmup validation failed for {robot_name}, envs indexes {failing.tolist()}", + LogType.EXCEP, + throw_when_excep=True) + else: + Journal.log(self.__class__.__name__, + "_setup", + f"Warmup validation passed for {robot_name}", + LogType.INFO) + + # write some inits for all robots + self._update_root_offsets(robot_name) + self._synch_default_root_states(robot_name=robot_name) + epsi=0.03 # adding a bit of height to avoid initial penetration + self._root_p_default[robot_name][:, 2]=self._root_p_default[robot_name][:, 2]+epsi + + self._reset(env_indxs=None, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + + control_cluster=self.cluster_servers[robot_name] + self._set_state_to_cluster(robot_name=robot_name) + control_cluster.write_robot_state() + control_cluster.pre_trigger() + to_be_activated=control_cluster.get_inactive_controllers() + if to_be_activated is not None: + control_cluster.activate_controllers( + idxs=to_be_activated) + + if self._use_remote_stepping[i]: + self._wait_for_remote_step_req(robot_name=robot_name) + + self._set_startup_jnt_imp_gains(robot_name=robot_name) # set gains to + # startup config (usually lower) + control_cluster.trigger_solution() + + if self._env_opts["add_remote_exit_flag"]: + self._remote_exit_flag=SharedTWrapper(namespace = self._robot_names[0],# use first robot as name + basename = "IbridoRemoteEnvExitFlag", + is_server = True, + n_rows = 1, + n_cols = 1, + verbose = True, + vlevel = self._vlevel, + safe = False, + dtype=dtype.Bool, + force_reconnection=True, + fill_value = False) + self._remote_exit_flag.run() + + self._setup_done=True + + def step(self) -> bool: + + success=False + + if self._remote_exit_flag is not None: + # check for exit request + self._remote_exit_flag.synch_all(read=True, retry = False) + self._exit_request=bool(self._remote_exit_flag.get_numpy_mirror()[0, 0].item()) + + if self._exit_request: + self.close() + + if self.is_running() and (not self.is_closed()): + if self._debug: + self._pre_step_db() + self._env_timer=time.perf_counter() + self._step_world() + self.debug_data["time_to_step_world"] = \ + time.perf_counter() - self._env_timer + self._post_world_step_db() + success=True + else: + self._pre_step() + self._step_world() + self._post_world_step() + success=True + + return success + + def render(self, mode:str="human") -> None: + self._render_sim(mode) + + def reset(self, + env_indxs: torch.Tensor = None, + reset_cluster: bool = False, + reset_cluster_counter = False, + randomize: bool = False, + reset_sim: bool = False) -> None: + + for i in range(len(self._robot_names)): + robot_name=self._robot_names[i] + self._reset(robot_name=robot_name, + env_indxs=env_indxs, + randomize=randomize, + reset_cluster=reset_cluster, + reset_cluster_counter=reset_cluster_counter) + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=env_indxs) + + if reset_sim: + self._reset_sim() + + def _reset_cluster(self, + robot_name: str, + env_indxs: torch.Tensor = None, + reset_cluster_counter: bool = False): + + control_cluster = self.cluster_servers[robot_name] + + control_cluster.reset_controllers(idxs=env_indxs) + + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=env_indxs) + control_cluster.write_robot_state() # writes to shared memory + + if reset_cluster_counter: + self.cluster_sim_step_counters[robot_name] = 0 + + def _step_jnt_vel_filter(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._jnt_vel_filter[robot_name].update(refk=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs), + idxs=env_indxs) + + def _set_state_to_cluster(self, + robot_name: str, + env_indxs: torch.Tensor = None, + base_loc: bool = True): + + if self._debug: + if not isinstance(env_indxs, (torch.Tensor, type(None))): + msg = "Provided env_indxs should be a torch tensor of indexes!" + raise Exception(f"[{self.__class__.__name__}]" + f"[{self.journal.exception}]: " + msg) + + control_cluster = self.cluster_servers[robot_name] + # floating base + rhc_state = control_cluster.get_state() + # configuration + rhc_state.root_state.set(data=self.root_p_rel(robot_name=robot_name, env_idxs=env_indxs), + data_type="p", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_q(robot_name=robot_name, env_idxs=env_indxs), + data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + + # twist + rhc_state.root_state.set(data=self.root_v(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_omega(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="omega", robot_idxs = env_indxs, gpu=self._use_gpu) + + # angular accc. + rhc_state.root_state.set(data=self.root_a(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="a", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_alpha(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="alpha", robot_idxs = env_indxs, gpu=self._use_gpu) + + # gravity vec + rhc_state.root_state.set(data=self.gravity(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="gn", robot_idxs = env_indxs, gpu=self._use_gpu) + + # joints + rhc_state.jnts_state.set(data=self.jnts_q(robot_name=robot_name, env_idxs=env_indxs), + data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + + v_jnts=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs) + if self._jnt_vel_filter[robot_name] is not None: # apply filtering + v_jnts=self._jnt_vel_filter[robot_name].get(idxs=env_indxs) + rhc_state.jnts_state.set(data=v_jnts, + data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.jnts_state.set(data=self.jnts_eff(robot_name=robot_name, env_idxs=env_indxs), + data_type="eff", robot_idxs = env_indxs, gpu=self._use_gpu) + + # height map + if self._enable_height_shared: + hdata = self._height_imgs[robot_name] + if env_indxs is not None: + hdata = hdata[env_indxs] + flat = hdata.reshape(hdata.shape[0], -1) + rhc_state.height_sensor.set(data=flat, data_type=None, robot_idxs=env_indxs, gpu=self._use_gpu) + + # Updating contact state for selected contact links + self._update_contact_state(robot_name=robot_name, env_indxs=env_indxs) + + def _update_contact_state(self, + robot_name: str, + env_indxs: torch.Tensor = None): + for i in range(0, self.cluster_servers[robot_name].n_contact_sensors()): + contact_link = self.cluster_servers[robot_name].contact_linknames()[i] + f_contact = self._get_contact_f(robot_name=robot_name, + contact_link=contact_link, + env_indxs=env_indxs) + if f_contact is not None: + self.cluster_servers[robot_name].get_state().contact_wrenches.set(data=f_contact, data_type="f", + contact_name=contact_link, + robot_idxs = env_indxs, + gpu=self._use_gpu) + + def _init_safe_cluster_actions(self, + robot_name: str): + + # this does not actually write on shared memory, + # but it's enough to get safe actions for the simulator before the + # cluster starts to receive data from the controllers + control_cluster = self.cluster_servers[robot_name] + rhc_cmds = control_cluster.get_actions() + n_jnts = rhc_cmds.n_jnts() + + null_action = torch.zeros((self._num_envs, n_jnts), + dtype=self._dtype, + device=self._device) + rhc_cmds.jnts_state.set(data=self._homing, data_type="q", gpu=self._use_gpu) + rhc_cmds.jnts_state.set(data=null_action, data_type="v", gpu=self._use_gpu) + rhc_cmds.jnts_state.set(data=null_action, data_type="eff", gpu=self._use_gpu) + + def _pre_step_db(self) -> None: + + # cluster step logic here + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + if self._override_low_lev_controller: + # if overriding low-lev jnt imp. this has to run at the highest + # freq possible + start=time.perf_counter() + self._read_jnts_state_from_robot(robot_name=robot_name) + self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start + + self._write_state_to_jnt_imp(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + if self._jnt_vel_filter[robot_name] is not None and \ + (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0: + # filter joint vel at a fixed frequency wrt sim steps + if not self._override_low_lev_controller: + # we need a fresh sensor reading + self._read_jnts_state_from_robot(robot_name=robot_name) + self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None) + + control_cluster = self.cluster_servers[robot_name] + if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]): + control_cluster.wait_for_solution() # this is blocking + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + self._set_cluster_actions(robot_name=robot_name) # write last cmds to low level control + if not self._override_low_lev_controller: + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + # we can update the jnt state just at the rate at which the cluster needs it + start=time.perf_counter() + self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None) + else: + # read state necessary for cluster + start=time.perf_counter() + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start + start=time.perf_counter() + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=None) + control_cluster.write_robot_state() + self.debug_data["cluster_state_update_dt"][robot_name] = time.perf_counter()-start + + self._update_jnt_imp_cntrl_shared_data() # only if debug_mode_jnt_imp is enabled + + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name].ack() # signal cluster stepping is finished + if failed is not None and self._env_opts["deact_when_failure"]: # deactivate robot completely + self._deactivate(env_indxs=failed, + robot_name=robot_name) + + self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking) + self._wait_for_remote_step_req(robot_name=robot_name) + else: + if failed is not None: + self._reset(env_indxs=failed, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=failed) + + control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers()) + + control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving + # values of some rhc flags on shared memory + control_cluster.trigger_solution() # trigger only active controllers + + def _pre_step(self) -> None: + + # cluster step logic here + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + if self._override_low_lev_controller: + # if overriding low-lev jnt imp. this has to run at the highest + # freq possible + self._read_jnts_state_from_robot(robot_name=robot_name) + self._write_state_to_jnt_imp(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + if self._jnt_vel_filter[robot_name] is not None and \ + (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0: + # filter joint vel at a fixed frequency wrt sim steps + if not self._override_low_lev_controller: + # we need a fresh sensor reading + self._read_jnts_state_from_robot(robot_name=robot_name) + self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None) + + control_cluster = self.cluster_servers[robot_name] + if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]): + control_cluster.wait_for_solution() # this is blocking + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + self._set_cluster_actions(robot_name=robot_name) # set last cmds to low level control + if not self._override_low_lev_controller: + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + # we can update the jnt state just at the rate at which the cluster needs it + self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None) + # read state necessary for cluster + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + # write last robot state to the cluster of controllers + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=None) + control_cluster.write_robot_state() # write on shared mem + + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name].ack() # signal cluster stepping is finished + if failed is not None and self._env_opts["deact_when_failure"]: + self._deactivate(env_indxs=failed, + robot_name=robot_name) + self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking) + self._wait_for_remote_step_req(robot_name=robot_name) + else: + if failed is not None: + self._reset(env_indxs=failed, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=failed) + control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers()) + + control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving + # values of some rhc flags on shared memory + control_cluster.trigger_solution() # trigger only active controllers + + def _post_world_step_db(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + control_cluster = self.cluster_servers[robot_name] + self.cluster_sim_step_counters[robot_name]+=1 # this has to be update with sim freq + if self._debug: + self.debug_data["sim_time"][robot_name]=self._get_world_time(robot_name=robot_name) + self.debug_data["cluster_sol_time"][robot_name] = \ + control_cluster.solution_time() + + self.step_counter +=1 + + def _get_world_time(self, robot_name: str): + return self.cluster_sim_step_counters[robot_name]*self.physics_dt() + + def _post_world_step(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + self.cluster_sim_step_counters[robot_name]+=1 + self.step_counter +=1 + + def _reset(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False, + reset_cluster: bool = False, + reset_cluster_counter = False): + + # resets the state of target robot and env to the defaults + self._reset_state(env_indxs=env_indxs, + robot_name=robot_name, + randomize=randomize) + # and jnt imp. controllers + self._reset_jnt_imp_control(robot_name=robot_name, + env_indxs=env_indxs) + + self._read_jnts_state_from_robot(robot_name=robot_name, + env_indxs=env_indxs) + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=env_indxs) + + if self._jnt_vel_filter[robot_name] is not None: + self._jnt_vel_filter[robot_name].reset(idxs=env_indxs) + + if reset_cluster: # reset controllers remotely + self._reset_cluster(env_indxs=env_indxs, + robot_name=robot_name, + reset_cluster_counter=reset_cluster_counter) + + def _randomize_yaw(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + root_q_default = self._root_q_default[robot_name] + if env_indxs is None: + env_indxs = torch.arange(root_q_default.shape[0]) + + num_indices = env_indxs.shape[0] + yaw_angles = torch.rand((num_indices,), + device=root_q_default.device) * 2 * torch.pi # uniformly distributed random angles + + # Compute cos and sin once + cos_half = torch.cos(yaw_angles / 2) + root_q_default[env_indxs, :] = torch.stack((cos_half, + torch.zeros_like(cos_half), + torch.zeros_like(cos_half), + torch.sin(yaw_angles / 2)), dim=1).reshape(num_indices, 4) + + def _deactivate(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + # deactivate jnt imp controllers for given robots and envs (makes the robot fall) + self._jnt_imp_controllers[robot_name].deactivate(robot_indxs=env_indxs) + + def _n_contacts(self, robot_name: str) -> List[int]: + return self._num_contacts[robot_name] + + def root_p(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._root_p[robot_name] + else: + return self._root_p[robot_name][env_idxs, :] + + def root_p_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + rel_pos = torch.sub(self.root_p(robot_name=robot_name, + env_idxs=env_idxs), + self._root_pos_offsets[robot_name][env_idxs, :]) + return rel_pos + + def root_q(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._root_q[robot_name] + else: + return self._root_q[robot_name][env_idxs, :] + + def root_q_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + rel_q = quaternion_difference(self._root_q_offsets[robot_name][env_idxs, :], + self.root_q(robot_name=robot_name, + env_idxs=env_idxs)) + return rel_q + + def root_v(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_v=self._root_v[robot_name] + if base_loc: + root_v=self._root_v_base_loc[robot_name] + if env_idxs is None: + return root_v + else: + return root_v[env_idxs, :] + + def root_omega(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_omega=self._root_omega[robot_name] + if base_loc: + root_omega=self._root_omega_base_loc[robot_name] + if env_idxs is None: + return root_omega + else: + return root_omega[env_idxs, :] + + def root_a(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_a=self._root_a[robot_name] + if base_loc: + root_a=self._root_a_base_loc[robot_name] + if env_idxs is None: + return root_a + else: + return root_a[env_idxs, :] + + def root_alpha(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_alpha=self._root_alpha[robot_name] + if base_loc: + root_alpha=self._root_alpha_base_loc[robot_name] + if env_idxs is None: + return root_alpha + else: + return root_alpha[env_idxs, :] + + def gravity(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + gravity_loc=self._gravity_normalized[robot_name] + if base_loc: + gravity_loc=self._gravity_normalized_base_loc[robot_name] + if env_idxs is None: + return gravity_loc + else: + return gravity_loc[env_idxs, :] + + def jnts_q(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._jnts_q[robot_name] + else: + return self._jnts_q[robot_name][env_idxs, :] + + def jnts_v(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._jnts_v[robot_name] + else: + return self._jnts_v[robot_name][env_idxs, :] + + def jnts_eff(self, + robot_name: str, + env_idxs: torch.Tensor = None): # (measured) efforts + + if env_idxs is None: + return self._jnts_eff[robot_name] + else: + return self._jnts_eff[robot_name][env_idxs, :] + + def _wait_for_remote_step_req(self, + robot_name: str): + if not self._remote_steppers[robot_name].wait(self._timeout): + self.close() + Journal.log(self.__class__.__name__, + "_wait_for_remote_step_req", + "Didn't receive any remote step req within timeout!", + LogType.EXCEP, + throw_when_excep = True) + + def _process_remote_reset_req(self, + robot_name: str): + + if not self._remote_resetters[robot_name].wait(self._timeout): + self.close() + Journal.log(self.__class__.__name__, + "_process_remote_reset_req", + "Didn't receive any remote reset req within timeout!", + LogType.EXCEP, + throw_when_excep = True) + + reset_requests = self._remote_reset_requests[robot_name] + reset_requests.synch_all(read=True, retry=True) # read reset requests from shared mem + to_be_reset = reset_requests.to_be_reset(gpu=self._use_gpu) + if to_be_reset is not None: + self._reset(env_indxs=to_be_reset, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=to_be_reset) + control_cluster = self.cluster_servers[robot_name] + control_cluster.activate_controllers(idxs=to_be_reset) # activate controllers + # (necessary if failed) + + self._remote_resetters[robot_name].ack() # signal reset performed + + def _update_jnt_imp_cntrl_shared_data(self): + if self._debug: + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + # updating all the jnt impedance data - > this may introduce some overhead + imp_data = self._jnt_imp_cntrl_shared_data[robot_name].imp_data_view + # set data + imp_data.set(data_type="pos_err", + data=self._jnt_imp_controllers[robot_name].pos_err(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_err", + data=self._jnt_imp_controllers[robot_name].vel_err(), + gpu=self._use_gpu) + imp_data.set(data_type="pos_gains", + data=self._jnt_imp_controllers[robot_name].pos_gains(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_gains", + data=self._jnt_imp_controllers[robot_name].vel_gains(), + gpu=self._use_gpu) + imp_data.set(data_type="eff_ff", + data=self._jnt_imp_controllers[robot_name].eff_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="pos", + data=self._jnt_imp_controllers[robot_name].pos(), + gpu=self._use_gpu) + imp_data.set(data_type="pos_ref", + data=self._jnt_imp_controllers[robot_name].pos_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="vel", + data=self._jnt_imp_controllers[robot_name].vel(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_ref", + data=self._jnt_imp_controllers[robot_name].vel_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="eff", + data=self._jnt_imp_controllers[robot_name].eff(), + gpu=self._use_gpu) + imp_data.set(data_type="imp_eff", + data=self._jnt_imp_controllers[robot_name].imp_eff(), + gpu=self._use_gpu) + # copy from GPU to CPU if using gpu + if self._use_gpu: + imp_data.synch_mirror(from_gpu=True,non_blocking=True) + # even if it's from GPU->CPu we can use non-blocking since it's just for db + # purposes + # write copies to shared memory + imp_data.synch_all(read=False, retry=False) + + def _set_startup_jnt_imp_gains(self, + robot_name:str, + env_indxs: torch.Tensor = None): + + startup_p_gains=self._jnt_imp_controllers[robot_name].startup_p_gains() + startup_d_gains=self._jnt_imp_controllers[robot_name].startup_d_gains() + + if env_indxs is not None: + self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs, + pos_gains=startup_p_gains[env_indxs, :], + vel_gains=startup_d_gains[env_indxs, :]) + else: + self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs, + pos_gains=startup_p_gains[:, :], + vel_gains=startup_d_gains[:, :]) + + def _write_state_to_jnt_imp(self, + robot_name: str): + + # always update ,imp. controller internal state (jnt imp control is supposed to be + # always running) + self._jnt_imp_controllers[robot_name].update_state(pos=self.jnts_q(robot_name=robot_name), + vel = self.jnts_v(robot_name=robot_name), + eff = self.jnts_eff(robot_name=robot_name)) + + def _set_cluster_actions(self, + robot_name): + control_cluster = self.cluster_servers[robot_name] + actions=control_cluster.get_actions() + active_controllers=control_cluster.get_active_controllers(gpu=self._use_gpu) + + if active_controllers is not None: + self._jnt_imp_controllers[robot_name].set_refs( + pos_ref=actions.jnts_state.get(data_type="q", gpu=self._use_gpu)[active_controllers, :], + vel_ref=actions.jnts_state.get(data_type="v", gpu=self._use_gpu)[active_controllers, :], + eff_ref=actions.jnts_state.get(data_type="eff", gpu=self._use_gpu)[active_controllers, :], + robot_indxs=active_controllers) + + def _jnt_imp_reset_overrride(self, robot_name:str): + # to be overriden + pass + + def _apply_cmds_to_jnt_imp_control(self, robot_name:str): + + self._jnt_imp_controllers[robot_name].apply_cmds() + + def _update_root_offsets(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + msg = "Provided env_indxs should be a torch tensor of indexes!" + Journal.log(self.__class__.__name__, + "update_root_offsets", + msg, + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist()) + if self._verbose: + Journal.log(self.__class__.__name__, + "update_root_offsets", + f"updating root offsets " + for_robots, + LogType.STAT, + throw_when_excep = True) + + # only planar position used + if env_indxs is None: + self._root_pos_offsets[robot_name][:, 0:2] = self._root_p[robot_name][:, 0:2] + self._root_q_offsets[robot_name][:, :] = self._root_q[robot_name] + + else: + self._root_pos_offsets[robot_name][env_indxs, 0:2] = self._root_p[robot_name][env_indxs, 0:2] + self._root_q_offsets[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + + def _reset_jnt_imp_control(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + Journal.log(self.__class__.__name__, + "reset_jnt_imp_control", + "Provided env_indxs should be a torch tensor of indexes!", + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs) + + if self._verbose: + Journal.log(self.__class__.__name__, + "reset_jnt_imp_control", + f"resetting joint impedances " + for_robots, + LogType.STAT, + throw_when_excep = True) + + # resets all internal data, refs to defaults + self._jnt_imp_controllers[robot_name].reset(robot_indxs=env_indxs) + + #restore jnt imp refs to homing + if env_indxs is None: + self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[:, :], + robot_indxs = None) + else: + self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[env_indxs, :], + robot_indxs = env_indxs) + + # self._write_state_to_jnt_imp(robot_name=robot_name) + # actually applies reset commands to the articulation + self._write_state_to_jnt_imp(robot_name=robot_name) + self._jnt_imp_reset_overrride(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + def _synch_default_root_states(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + msg = "Provided env_indxs should be a torch tensor of indexes!" + Journal.log(self.__class__.__name__, + "synch_default_root_states", + msg, + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist()) + if self._verbose: + Journal.log(self.__class__.__name__, + "synch_default_root_states", + f"updating default root states " + for_robots, + LogType.STAT, + throw_when_excep = True) + + if env_indxs is None: + self._root_p_default[robot_name][:, :] = self._root_p[robot_name] + self._root_q_default[robot_name][:, :] = self._root_q[robot_name] + else: + self._root_p_default[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] + self._root_q_default[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + + def _generate_rob_descriptions(self, + robot_name: str, + urdf_path: str, + srdf_path: str): + + custom_xacro_args=extract_custom_xacro_args(self._env_opts) + Journal.log(self.__class__.__name__, + "_generate_rob_descriptions", + "generating URDF for robot "+ f"{robot_name}, from URDF {urdf_path}...", + LogType.STAT, + throw_when_excep = True) + xrdf_cmds=self._xrdf_cmds(robot_name=robot_name) + xrdf_cmds=merge_xacro_cmds(prev_cmds=xrdf_cmds, + new_cmds=custom_xacro_args) + self._urdf_dump_paths[robot_name]=generate_urdf(robot_name=robot_name, + xacro_path=urdf_path, + dump_path=self._descr_dump_path, + xrdf_cmds=xrdf_cmds) + Journal.log(self.__class__.__name__, + "_generate_rob_descriptions", + "generating SRDF for robot "+ f"{robot_name}, from SRDF {srdf_path}...", + LogType.STAT, + throw_when_excep = True) + # we also generate SRDF files, which are useful for control + self._srdf_dump_paths[robot_name]=generate_srdf(robot_name=robot_name, + xacro_path=srdf_path, + dump_path=self._descr_dump_path, + xrdf_cmds=xrdf_cmds) + + def _xrdf_cmds(self, robot_name:str): + urdfpath=self._robot_urdf_paths[robot_name] + # we assume directory tree of the robot package is like + # robot-ros-pkg/robot_urdf/urdf/robot.urdf.xacro + parts = urdfpath.split('/') + urdf_descr_root_path = '/'.join(parts[:-2]) + cmds = get_xrdf_cmds(urdf_descr_root_path=urdf_descr_root_path) + return cmds + + @abstractmethod + def current_tstep(self) -> int: + pass + + @abstractmethod + def current_time(self) -> float: + pass + + @abstractmethod + def is_running(self) -> bool: + pass + + @abstractmethod + def _get_contact_f(self, + robot_name: str, + contact_link: str, + env_indxs: torch.Tensor) -> torch.Tensor: + return None + + @abstractmethod + def physics_dt(self) -> float: + pass + + @abstractmethod + def rendering_dt(self) -> float: + pass + + @abstractmethod + def set_physics_dt(self, physics_dt:float): + pass + + @abstractmethod + def set_rendering_dt(self, rendering_dt:float): + pass + + @abstractmethod + def _robot_jnt_names(self, robot_name: str) -> List[str]: + pass + + @abstractmethod + def _read_root_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + pass + + @abstractmethod + def _read_jnts_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + pass + + @abstractmethod + def _init_robots_state(self): + pass + + @abstractmethod + def _reset_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False): + pass + + @abstractmethod + def _init_world(self): + pass + + @abstractmethod + def _reset_sim(self) -> None: + pass + + @abstractmethod + def _set_jnts_to_homing(self, robot_name: str): + pass + + @abstractmethod + def _set_root_to_defconfig(self, robot_name: str): + pass + + @abstractmethod + def _parse_env_opts(self): + pass + + @abstractmethod + def _pre_setup(self): + pass + + @abstractmethod + def _generate_jnt_imp_control(self) -> JntImpCntrlChild: + pass + + @abstractmethod + def _render_sim(self, mode:str="human") -> None: + pass + + @abstractmethod + def _close(self) -> None: + pass + + @abstractmethod + def _step_world(self) -> None: + pass diff --git a/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/xbot2_basic.yaml b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/xbot2_basic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3bd15da7038ad15240968fc21d975ef54c08df47 --- /dev/null +++ b/bundles/centauro/d2026_01_19_h14_m54_s32-CentauroCloopPartialNoYawUbPercep_FakePosTrackEnvPhaseControl/xbot2_basic.yaml @@ -0,0 +1,81 @@ +XBotInterface: + urdf_path: $PWD/centauro.urdf + srdf_path: $PWD/centauro.srdf + +ModelInterface: + model_type: RBDL + is_model_floating_base: true + +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + +startup_motor_pd: + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [800, 80] + "hip_pitch_*": [800, 80] + "knee_pitch_*": [800, 80] + "ankle_pitch_*": [800, 80] + "ankle_yaw_*": [480, 50] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [800, 80] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + +motor_vel: + j_wheel_*: [1] + neck_velodyne: [1] + +# hal +xbotcore_device_configs: + sim: $PWD/hal/centauro_gz.yaml + dummy: $PWD/hal/centauro_dummy.yaml + +# threads +xbotcore_threads: + rt_main: {sched: fifo , prio: 60, period: 0.001} + nrt_main: {sched: other, prio: 0 , period: 0.005} + +# plugins +xbotcore_plugins: + + homing: + thread: rt_main + type: homing + + ros_io: {thread: nrt_main, type: ros_io} + + ros_control: {thread: nrt_main, type: ros_control, params: {autostart: {type: bool, value: true}}} + +# global parameters +xbotcore_param: + /xbot/hal/joint_safety/filter_autostart: {value: true, type: bool} + # /xbot/hal/joint_safety/filter_cutoff_hz: {value: 1.0, type: double} + /xbot/hal/joint_safety/filter_safe_cutoff_hz: {value: 5.0, type: double} + /xbot/hal/joint_safety/filter_medium_cutoff_hz: {value: 10.0, type: double} + /xbot/hal/joint_safety/filter_fast_cutoff_hz: {value: 15.0, type: double} + /xbot/hal/enable_safety: {value: false, type: bool} \ No newline at end of file diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/bundle.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/bundle.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0c904857c15141d65741be976bb5b1c7fccd1db8 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/bundle.yaml @@ -0,0 +1,107 @@ +bundle_format: augmpc_model_bundle_v1 +bundle_name: d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv +checkpoint_file: d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv_model +preserved_training_cfgs: + - ibrido_run__2026_02_21_13_59_20_ID/training_cfg_centauro_ub_cloop.sh +framework: + repos: + AugMPC: + commit: f2ff28085ea76d2b548841de6f363f7183480f86 + branch: ibrido + remote: git@github.com:AndrePatri/AugMPC.git + dirty: false + AugMPCEnvs: + commit: 46c5d4161cb4b249b3a2e50c93c6bc2aa087f027 + branch: ibrido + remote: git@github.com:AndrePatri/AugMPCEnvs.git + dirty: false + AugMPCModels: + commit: 8b15c800c0f5684c61fdaf4847156ff71df61ebc + branch: main + remote: https://huggingface.co/AndrePatri/AugMPCModels + dirty: true + CentauroHybridMPC: + commit: 640889d4c3b9c8d360b5a3ccde6fc2bd8f139891 + branch: ibrido + remote: git@github.com:ADVRHumanoids/CentauroHybridMPC.git + dirty: false + EigenIPC: + commit: 7c1c3ecd08716e61ed164a6e0bb788aa716705ca + branch: devel + remote: git@github.com:AndrePatri/EigenIPC.git + dirty: false + IBRIDO: + commit: 0ceb5f3e0508b6ecdce12bcc8f0dcbcde8f29a93 + branch: main + remote: git@github.com:AndrePatri/IBRIDO.git + dirty: false + IsaacLab: + commit: a520a883ce996d855cc9d5255d71fd1c1307633f + branch: HEAD + remote: git@github.com:isaac-sim/IsaacLab.git + dirty: true + KyonRLStepping: + commit: 2bea2b8d70078974869df0549e90fc27ff31f851 + branch: ibrido + remote: git@github.com:ADVRHumanoids/KyonRLStepping.git + dirty: false + MPCHive: + commit: 45b4fc692850cef607020dda2e32fd708e7fff62 + branch: devel + remote: git@github.com:AndrePatri/MPCHive.git + dirty: false + MPCViz: + commit: 806d03efcc9d8ab1fb04991a159c19ba89bfb85b + branch: ros2_humble + remote: git@github.com:AndrePatri/MPCViz.git + dirty: false + adarl: + commit: f585499e49fa05fdd070a77f3211c0996599b87b + branch: ibrido + remote: git@github.com:c-rizz/adarl.git + dirty: false + casadi: + commit: 79cebe3b416dee22abc87de0076b80a5b97bd345 + branch: optional_float + remote: git@github.com:AndrePatri/casadi.git + dirty: true + horizon: + commit: 3b084317709ff9b88d4a07ac5436f5988b39eece + branch: ibrido + remote: git@github.com:AndrePatri/horizon.git + dirty: true + iit-centauro-ros-pkg: + commit: 6069807ebb37a6d7df39430a02685e09ed9b167a + branch: ibrido_ros2 + remote: git@github.com:AndrePatri/iit-centauro-ros-pkg.git + dirty: false + iit-dagana-ros-pkg: + commit: f7ecd6c84a0b7f3320c1b7de6449cbcd4445d2fe + branch: ibrido_ros2 + remote: git@github.com:AndrePatri/iit-dagana-ros-pkg.git + dirty: false + iit-kyon-description: + commit: 50fd7c8909b3ddfd1ebbe67c61d6b775b86df6b1 + branch: ibrido_ros2 + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + iit-kyon-description-mpc: + commit: 3a92bee28405172e8f6c628b4498703724d35bf5 + branch: ibrido_ros2 + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + iit-kyon-ros-pkg: + commit: 298917efffb63dbca540e0aedbd21b41bf393fbf + branch: ibrido_ros2_simple + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + phase_manager: + commit: 9925d18c0d7a55d013cbaa4998c61d85a3a8944f + branch: ibrido + remote: git@github.com:AndrePatri/phase_manager.git + dirty: false + unitree_ros: + commit: c75a622b8782d11dd6aa4c2ebd3b0f9c13a56aae + branch: ibrido + remote: git@github.com:AndrePatri/unitree_ros.git + dirty: true diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_big_wheels_ub_2026_02_21_13_59_20_ID.srdf b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_big_wheels_ub_2026_02_21_13_59_20_ID.srdf new file mode 100644 index 0000000000000000000000000000000000000000..adf957c2d948b8f62f760014fc1b9a95cb3e8cfb --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_big_wheels_ub_2026_02_21_13_59_20_ID.srdf @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_big_wheels_ub_2026_02_21_13_59_20_ID.urdf b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_big_wheels_ub_2026_02_21_13_59_20_ID.urdf new file mode 100644 index 0000000000000000000000000000000000000000..70256a67050070e647bdcf5cf3b142c14b5db50b --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_big_wheels_ub_2026_02_21_13_59_20_ID.urdf @@ -0,0 +1,1569 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_rhc.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..e2c5d1d97a8a91c01688ec97dd78a23ac8d25823 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_rhc.py @@ -0,0 +1,145 @@ +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc + +import numpy as np + +from typing import Dict + +from centaurohybridmpc.utils.sysutils import PathsGetter + +class CentauroRhc(HybridQuadRhc): + + def __init__(self, + srdf_path: str, + urdf_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes: float = 31, + dt: float = 0.03, + injection_node: int = 10, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {} + ): + + paths = PathsGetter() + self._files_suffix="" + if open_loop: + self._files_suffix="_open" + + self._add_upper_body=False + if ("add_upper_body" in custom_opts) and \ + (custom_opts["add_upper_body"]): + self._add_upper_body=True + self._files_suffix+="_ub" + + config_path=paths.RHCCONFIGPATH_NO_WHEELS+self._files_suffix+".yaml" + + super().__init__(srdf_path=srdf_path, + urdf_path=urdf_path, + config_path=config_path, + robot_name=robot_name, # used for shared memory namespaces + codegen_dir=codegen_dir, + n_nodes=n_nodes, + dt=dt, + injection_node=injection_node, + max_solver_iter=max_solver_iter, # defaults to rt-iteration + open_loop=open_loop, + close_loop_all=close_loop_all, + dtype=dtype, + verbose=verbose, + debug=debug, + refs_in_hor_frame=refs_in_hor_frame, + timeout_ms=timeout_ms, + custom_opts=custom_opts) + + self._fail_idx_scale=1e-9 + self._fail_idx_thresh_open_loop=1e0 + self._fail_idx_thresh_close_loop=10 + + if open_loop: + self._fail_idx_thresh=self._fail_idx_thresh_open_loop + else: + self._fail_idx_thresh=self._fail_idx_thresh_close_loop + + # adding some additional config files for jnt imp control + self._rhc_fpaths.append(paths.JNT_IMP_CONFIG_XBOT+".yaml") + self._rhc_fpaths.append(paths.JNT_IMP_CONFIG+".yaml") + + def _set_rhc_pred_idx(self): + self._pred_node_idx=round((self._n_nodes-1)*2/3) + + def _set_rhc_cmds_idx(self): + self._rhc_cmds_node_idx=2 + + def _config_override(self): + paths = PathsGetter() + if ("control_wheels" in self._custom_opts): + if self._custom_opts["control_wheels"]: + self.config_path = paths.RHCCONFIGPATH_WHEELS+self._files_suffix+".yaml" + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + self.config_path = paths.RHCCONFIGPATH_WHEELS_NO_YAW+self._files_suffix+".yaml" + if ("replace_continuous_joints" in self._custom_opts) and \ + (not self._custom_opts["replace_continuous_joints"]): + # use continuous joints -> different config + self.config_path = paths.RHCCONFIGPATH_WHEELS_CONTINUOUS+self._files_suffix+".yaml" + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + self.config_path = paths.RHCCONFIGPATH_WHEELS_CONTINUOUS_NO_YAW+self._files_suffix+".yaml" + + else: + self._custom_opts["control_wheels"]=False + + if not self._custom_opts["control_wheels"]: + self._fixed_jnt_patterns=self._fixed_jnt_patterns+\ + ["j_wheel", + "ankle_yaw"] + self._custom_opts["replace_continuous_joints"]=True + + def _init_problem(self): + + if not self._custom_opts["control_wheels"]: + self._yaw_vertical_weight=120.0 + else: + self._yaw_vertical_weight=50.0 + + fixed_jnts_patterns=[ + "d435_head", + "velodyne_joint", + "dagana"] + + if not self._add_upper_body: + fixed_jnts_patterns.append("j_arm") + fixed_jnts_patterns.append("torso") + + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + fixed_jnts_patterns.append("ankle_yaw") + + flight_duration_sec=0.5 # [s] + flight_duration=int(flight_duration_sec/self._dt) + post_flight_duration_sec=0.2 # [s] + post_flight_duration=int(post_flight_duration_sec/self._dt) + + step_height=0.1 + if ("step_height" in self._custom_opts): + step_height=self._custom_opts["step_height"] + + super()._init_problem(fixed_jnt_patterns=fixed_jnts_patterns, + wheels_patterns=["wheel_"], + foot_linkname="wheel_1", + flight_duration=flight_duration, + post_flight_stance=post_flight_duration, + step_height=step_height, + keep_yaw_vert=True, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=True, + vertical_land_weight=10.0, + phase_force_reg=5e-2, + vel_bounds_weight=1.0) \ No newline at end of file diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_rhc_config_no_wheels_ub.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_rhc_config_no_wheels_ub.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9b71f97b5219e81763f8d3cb8fe4eb058ba349ec --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/centauro_rhc_config_no_wheels_ub.yaml @@ -0,0 +1,295 @@ +solver: + type: ilqr + ipopt.linear_solver: ma57 + ipopt.tol: 0.1 +# ilqr.merit_der_threshold: 1e-3 +# ilqr.defect_norm_threshold: 1e-3 + ipopt.constr_viol_tol: 0.01 + ilqr.constraint_violation_threshold: 1e-2 +# ipopt.hessian_approximation: exact + ipopt.print_level: 5 + ipopt.suppress_all_output: 'yes' + ipopt.sb: 'yes' + ilqr.suppress_all_output: 'yes' + ilqr.codegen_enabled: true + ilqr.codegen_workdir: /tmp/tyhio + ilqr.enable_gn: true + ilqr.hxx_reg_base: 0.0 + ilqr.n_threads: 0 + print_time: 0 + +constraints: + - contact_1 + - contact_2 + - contact_3 + - contact_4 + +costs: + - z_contact_1 + - z_contact_2 + - z_contact_3 + - z_contact_4 + # - vz_contact_1 + # - vz_contact_2 + # - vz_contact_3 + # - vz_contact_4 + - base_lin_velxy + - base_lin_velz + - base_omega + - base_capture + - joint_posture_capture + - v_regularization + - a_regularization + # - force_regularization + +#open loop +.define: + - &w_base_omega 15. + - &w_base_vxy 18. + - &w_base_vz 15. + - &w_base_z 15. + - &w_contact_z 200.0 + - &w_contact_vz 250.0 + - &w_base_capture 200. + - &wheel_radius 0.124 + +base_lin_velxy: + type: Cartesian + distal_link: base_link + indices: [0, 1] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_vxy + +base_lin_velz: + type: Cartesian + distal_link: base_link + indices: [2] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_vz + +base_omega: + type: Cartesian + distal_link: base_link + indices: [3, 4, 5] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_omega + +base_capture: + type: Cartesian + distal_link: base_link + indices: [0, 1, 2, 3, 4, 5] + nodes: ${range(N-5, N+1)} + cartesian_type: velocity + weight: *w_base_capture + +# =============================== + +zero_velocity_contact_1: + type: Cartesian + base_link: "world" + distal_link: wheel_1 + indices: [0, 1, 2] + cartesian_type: velocity + # offset: *wheel_radius + +zero_velocity_contact_2: + type: Cartesian + base_link: "world" + distal_link: wheel_2 + indices: [0, 1, 2] + cartesian_type: velocity + # offset: *wheel_radius + +zero_velocity_contact_3: + type: Cartesian + base_link: "world" + distal_link: wheel_3 + indices: [0, 1, 2] + cartesian_type: velocity + # offset: *wheel_radius + +zero_velocity_contact_4: + type: Cartesian + base_link: "world" + distal_link: wheel_4 + indices: [0, 1, 2] + cartesian_type: velocity + # offset: *wheel_radius + +# ================================== + +interaction_contact_1: + type: VertexForce + frame: contact_1 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.2 + vertex_frames: + - wheel_1 + +interaction_contact_2: + type: VertexForce + frame: contact_2 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.2 + vertex_frames: + - wheel_2 + +interaction_contact_3: + type: VertexForce + frame: contact_3 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.2 + vertex_frames: + - wheel_3 + +interaction_contact_4: + type: VertexForce + frame: contact_4 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.2 + vertex_frames: + - wheel_4 + +contact_1: + type: Contact + subtask: [interaction_contact_1, zero_velocity_contact_1] + +contact_2: + type: Contact + subtask: [interaction_contact_2, zero_velocity_contact_2] + +contact_3: + type: Contact + subtask: [interaction_contact_3, zero_velocity_contact_3] + +contact_4: + type: Contact + subtask: [interaction_contact_4, zero_velocity_contact_4] + +joint_posture_capture: + type: Postural + weight: [25.0, 10.0, 10.0, 10.0, + 25.0, 10.0, 10.0, 10.0, + 25.0, 10.0, 10.0, 10.0, + 25.0, 10.0, 10.0, 10.0, + 55.0, + 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, + 25.0, 25.0, 25.0, 25.0, 25.0, 25.0] + indices: [0, 1, 2, 3, + 4, 5, 6, 7, + 8, 9, 10, 11, + 12, 13, 14, 15, + 16, + 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28] + nodes: ${range(N-5, N+1)} + +v_regularization: + type: Regularization + variable_name: v + nodes: ${range(0, N+1)} + indices: [0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, + 10, 11, 12, 13, + 14, 15, 16, 17, + 18, 19, 20, 21, + 22, + 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34] + weight: [2e-1, 2e-1, 2e-1, 2e-1, 2e-1, 2e-1, + 8e-1, 8e-1, 8e-1, 8e-1, + 8e-1, 8e-1, 8e-1, 8e-1, + 8e-1, 8e-1, 8e-1, 8e-1, + 8e-1, 8e-1, 8e-1, 8e-1, + 8e-1, + 1.2, 1.2, 1.2, 1.2, 1.2, 1.2, + 1.2, 1.2, 1.2, 1.2, 1.2, 1.2] + +a_regularization: + type: Regularization + variable_name: a + nodes: ${range(0, N+1)} + indices: [0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, + 10, 11, 12, 13, + 14, 15, 16, 17, + 18, 19, 20, 21, + 22, + 23, 24, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34] + weight: [1e-1, 1e-1, 1e-1, 1e-1, 1e-1, 1e-1, + 2e-1, 2e-1, 2e-1, 2e-1, + 2e-1, 2e-1, 2e-1, 2e-1, + 2e-1, 2e-1, 2e-1, 2e-1, + 2e-1, 2e-1, 2e-1, 2e-1, + 2e-1, + 2e-1, 2e-1, 2e-1, 2e-1, 2e-1, 2e-1, + 2e-1, 2e-1, 2e-1, 2e-1, 2e-1, 2e-1] + +# flight phase traj tracking +z_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +vz_contact_1: + type: Cartesian + base_link: base_link + distal_link: contact_1 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_2: + type: Cartesian + base_link: base_link + distal_link: contact_2 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_3: + type: Cartesian + base_link: base_link + distal_link: contact_3 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_4: + type: Cartesian + base_link: base_link + distal_link: contact_4 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz \ No newline at end of file diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv_model b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv_model new file mode 100644 index 0000000000000000000000000000000000000000..81aca7f526ed12a4ce5b6af57e429a60bd1021bc --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3303cf6df03c15e74cc116dd689f2d1593719b4b24c67f22a3a6289b1cd2158a +size 4452447 diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/fake_pos_tracking_env.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/fake_pos_tracking_env.py new file mode 100644 index 0000000000000000000000000000000000000000..d6cb165e2eeb89dd9dc87b5895e8628a0f6e5d8a --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/fake_pos_tracking_env.py @@ -0,0 +1,202 @@ +import os + +from typing import Dict + +import torch + +from EigenIPC.PyEigenIPC import VLevel + +from mpc_hive.utilities.math_utils_torch import world2base_frame + +from aug_mpc_envs.training_envs.twist_tracking_env import TwistTrackingEnv + +class FakePosTrackingEnv(TwistTrackingEnv): + """Converts random planar position goals into twist references so the agent learns to drive the robot toward targets while managing contact scheduling.""" + + def __init__(self, + namespace: str, + actions_dim: int = 10, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._add_env_opt(env_opts, "max_distance", default=5.0) # [m] + self._add_env_opt(env_opts, "min_distance", default=0.0) # [m] + self._add_env_opt(env_opts, "max_vref", default=1.0) # [m/s] + self._add_env_opt(env_opts, "max_dp", default=5.0) # [m] after this, v ref saturates + self._add_env_opt(env_opts, "max_dt", default=env_opts["max_dp"]/ env_opts["max_vref"]) + + TwistTrackingEnv.__init__(self, + namespace=namespace, + actions_dim=actions_dim, # twist + contact flags + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def get_file_paths(self): + paths=TwistTrackingEnv.get_file_paths(self) + paths.append(os.path.abspath(__file__)) + return paths + + def _custom_post_init(self): + TwistTrackingEnv._custom_post_init(self) + + # position targets to be reached (wrt robot's pos at ep start) + self._p_trgt_w=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2].detach().clone() + self._p_delta_w=self._p_trgt_w.detach().clone() + self._dp_norm=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + self._dp_versor=self._p_trgt_w.detach().clone() + + self._trgt_d=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + self._trgt_theta=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + + def _update_loc_twist_refs(self): + # this is called at each env substep + + self._compute_twist_ref_w() + + if not self._override_agent_refs: + agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p", + gpu=self._use_gpu) + agent_p_ref_current[:, 0:2]=self._p_trgt_w + + # then convert it to base ref local for the agent + robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + # rotate agent ref from world to robot base + world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, + t_out=self._agent_twist_ref_current_base_loc) + # write it to agent refs tensors + self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc, + gpu=self._use_gpu) + + def _compute_twist_ref_w(self, env_indxs: torch.Tensor = None): + + # angular refs are not altered + if env_indxs is None: + # we update the position error using the current base position + self._p_delta_w[:, :]=self._p_trgt_w-\ + self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + + self._dp_norm[:, :]=self._p_delta_w.norm(dim=1,keepdim=True)+1e-6 + self._dp_versor[:, :]=self._p_delta_w/self._dp_norm + + # apply for vref saturation + to_be_saturated=self._dp_norm[:, :]>self._env_opts["max_dp"] + self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"] + + # we compute the twist refs for the agent depending of the position error + self._agent_twist_ref_current_w[:, 0:2]=self._dp_norm*self._dp_versor/self._env_opts["max_dt"] + self._agent_twist_ref_current_w[:, 2:3]=0 # no vertical vel + + # apply pof0 using last value of bernoully coeffs + self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel + self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega + else: + self._p_delta_w[env_indxs, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[env_indxs, 0:2] -\ + self._p_trgt_w[env_indxs, :] + + # apply for vref saturation + to_be_saturated=torch.logical_and((self._dp_norm[:, :]>self._env_opts["max_dp"]).flatten(),env_indxs) + self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"] + + self._dp_norm[env_indxs, :]=self._p_delta_w[env_indxs, :].norm(dim=1,keepdim=True)+1e-6 + self._dp_versor[env_indxs, :]=self._p_delta_w[env_indxs, :]/self._dp_norm[env_indxs, :] + + self._agent_twist_ref_current_w[env_indxs, 0:2]=self._dp_norm[env_indxs, :]*self._dp_versor[env_indxs, :]/self._env_opts["max_dt"] + self._agent_twist_ref_current_w[env_indxs, 2:3]=0 # no vertical vel + + # apply pof0 using last value of bernoully coeffs + self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :] + self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # runs at every post_step + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p", + gpu=self._use_gpu) + + agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega", + gpu=self._use_gpu) + + # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \ + # agent_p_ref_current[:, 0:2] + self._p_trgt_w[:, :]=agent_p_ref_current[:, 0:2] # set p target target from shared mem + + self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem + + def _debug_agent_refs(self): + if self._use_gpu: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True) + + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + # we randomize the target position/omega in world frame + if env_indxs is None: + self._trgt_d.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"]) + self._trgt_theta.uniform_(0.0, 2*torch.pi) + + self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] +\ + torch.cat((self._trgt_d*torch.cos(self._trgt_theta) + ,self._trgt_d*torch.sin(self._trgt_theta)), dim=1) + + # randomize just omega + random_uniform=torch.full_like(self._agent_twist_ref_current_w[:, 3:6], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[:, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6] + + # sample for all envs pof0 + if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + + else: + + if env_indxs.any(): + integer_idxs=torch.nonzero(env_indxs).flatten() + + trgt_d_selected=self._trgt_d[integer_idxs, :] + trgt_d_selected.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"]) + self._trgt_d[integer_idxs, :]=trgt_d_selected + + trgt_theta_selected=self._trgt_theta[integer_idxs, :] + trgt_theta_selected.uniform_(0.0, 2*torch.pi) + self._trgt_theta[integer_idxs, :]=trgt_theta_selected + + self._p_trgt_w[integer_idxs, 0:1]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 0:1] +\ + self._trgt_d[integer_idxs, :]*torch.cos(self._trgt_theta[integer_idxs, :]) + self._p_trgt_w[integer_idxs, 1:2]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 1:2] +\ + self._trgt_d[integer_idxs, :]*torch.sin(self._trgt_theta[integer_idxs, :]) + + # randomize just omega + random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, 3:6], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[env_indxs, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6] + + # sample for all envs pof0, then reset to 1 for envs which are not to be randomized + if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + self._bernoulli_coeffs_linvel[~env_indxs, :]=1 + self._bernoulli_coeffs_omega[~env_indxs, :]=1 + + self._compute_twist_ref_w(env_indxs=env_indxs) # update linear vel twist refs based on pos error + + diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/hybrid_quad_rhc.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/hybrid_quad_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/hybrid_quad_rhc.py @@ -0,0 +1,1324 @@ +from mpc_hive.controllers.rhc import RHController + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +import numpy as np + +import os + +import time + +from typing import Dict, List + +class HybridQuadRhc(RHController): + + def __init__(self, + srdf_path: str, + urdf_path: str, + config_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes:float = 25, + injection_node:int = 10, + dt: float = 0.02, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {}): + + self._refs_in_hor_frame = refs_in_hor_frame + + self._injection_node = injection_node + + self._open_loop = open_loop + self._close_loop_all = close_loop_all + + self._codegen_dir = codegen_dir + if not os.path.exists(self._codegen_dir): + os.makedirs(self._codegen_dir) + # else: + # # Directory already exists, delete it and recreate + # shutil.rmtree(self._codegen_dir) + # os.makedirs(self._codegen_dir) + + self.step_counter = 0 + self.sol_counter = 0 + + self.max_solver_iter = max_solver_iter + + self._timer_start = time.perf_counter() + self._prb_update_time = time.perf_counter() + self._phase_shift_time = time.perf_counter() + self._task_ref_update_time = time.perf_counter() + self._rti_time = time.perf_counter() + + self.robot_name = robot_name + + self.config_path = config_path + + self.urdf_path = urdf_path + # read urdf and srdf files + with open(self.urdf_path, 'r') as file: + self.urdf = file.read() + self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + self._c_timelines = dict() + self._f_reg_timelines = dict() + + self._custom_opts={"replace_continuous_joints": False, + "use_force_feedback": False, + "lin_a_feedback": False, + "is_open_loop": self._open_loop, # fully open (just for db) + "fully_closed": False, # closed loop with full feedback (just for db) + "closed_partial": False, # closed loop with partial feedback + "adaptive_is": True, # closed loop with adaptation + "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase + "alpha_from_outside": False, # alpha set ext. from shared memory + "alpha_half": 1.0, + "only_vel_wheels": True, # whether wheels (if present) are just vel controlled + "use_jnt_v_feedback": False + } + + self._custom_opts.update(custom_opts) + + self._alpha_half=self._custom_opts["alpha_half"] + + if self._open_loop: + self._custom_opts["fully_closed"]=False + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + else: + self._custom_opts["is_open_loop"]=False + if self._custom_opts["fully_closed"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["closed_partial"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["fully_closed"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["adaptive_is"]: + self._custom_opts["closed_partial"]=False + self._custom_opts["fully_closed"]=False + + super().__init__(srdf_path=srdf_path, + n_nodes=n_nodes, + dt=dt, + namespace=self.robot_name, + dtype=dtype, + verbose=verbose, + debug=debug, + timeout_ms=timeout_ms) + + self._rhc_fpaths.append(self.config_path) + + def _config_override(self): + pass + + def _post_problem_init(self): + + self.rhc_costs={} + self.rhc_constr={} + + self._fail_idx_scale=0.0 + self._expl_idx_window_size=int(1*self._n_nodes) + self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size)) + self._expl_idx_counter=0 + self._expl_idx_buffer_counter=0 + + self._pred_node_idx=self._n_nodes-1 + + self._nq=self.nq() + self._nq_jnts=self._nq-7# assuming floating base + self._nv=self.nv() + self._nv_jnts=self._nv-6 + + self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype) + self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype) + self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype) + self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype) + self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype) + self._alphas_q_root[:, :]=1.0 # default to all open + self._alphas_q_jnts[:, :]=1.0 + self._alphas_twist_root[:, :]=1.0 + self._alphas_v_jnts[:, :]=1.0 + self._alphas_a[:, :]=1.0 + + def _init_problem(self, + fixed_jnt_patterns: List[str] = None, + wheels_patterns: List[str] = None, + foot_linkname: str = None, + flight_duration: int = 10, + post_flight_stance: int = 3, + step_height: float = 0.12, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = 2.0, + vertical_landing: bool = False, + vertical_land_weight: float = 1.0, + phase_force_reg: float = 1e-2, + vel_bounds_weight: float = 1.0): + + self._fixed_jnt_patterns=fixed_jnt_patterns + + self._config_override() + + Journal.log(self.__class__.__name__, + "_init_problem", + f" Will use horizon config file at {self.config_path}", + LogType.INFO, + throw_when_excep=True) + + self._vel_bounds_weight=vel_bounds_weight + self._phase_force_reg=phase_force_reg + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_land_weight=vertical_land_weight + # overrides parent + self._prb = Problem(self._n_intervals, + receding=True, + casadi_type=cs.SX) + self._prb.setDt(self._dt) + + if "replace_continuous_joints" in self._custom_opts: + # continous joints are parametrized in So2 + if self._custom_opts["replace_continuous_joints"]: + self.urdf = self.urdf.replace('continuous', 'revolute') + else: + self.urdf = self.urdf.replace('continuous', 'revolute') + + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + + self._init_robot_homer() + + # handle fixed joints + fixed_joint_map={} + if self._fixed_jnt_patterns is not None: + for jnt_name in self._get_robot_jnt_names(): + for fixed_jnt_pattern in self._fixed_jnt_patterns: + if fixed_jnt_pattern in jnt_name: + fixed_joint_map.update({f"{jnt_name}": + self._homer.get_homing_val(jnt_name=jnt_name)}) + break # do not search for other pattern matches + + if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers + Journal.log(self.__class__.__name__, + "_init_problem", + f"Will fix following joints: \n{str(fixed_joint_map)}", + LogType.INFO, + throw_when_excep=True) + # with the fixed joint map + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map) + # assign again controlled joints names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + # updated robot homer for controlled joints + self._init_robot_homer() + + # handle continuous joints (need to change homing and retrieve + # cont jnts indexes) and homing + self._continuous_joints=self._get_continuous_jnt_names() + # reduced + self._continuous_joints_idxs=[] + self._continuous_joints_idxs_cos=[] + self._continuous_joints_idxs_sin=[] + self._continuous_joints_idxs_red=[] + self._rev_joints_idxs=[] + self._rev_joints_idxs_red=[] + # qfull + self._continuous_joints_idxs_qfull=[] + self._continuous_joints_idxs_cos_qfull=[] + self._continuous_joints_idxs_sin_qfull=[] + self._continuous_joints_idxs_red_qfull=[] + self._rev_joints_idxs_qfull=[] + self._rev_joints_idxs_red_qfull=[] + jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints)) + jnt_names=self._get_robot_jnt_names() + for i in range(len(jnt_names)): + jnt=jnt_names[i] + index=self._get_jnt_id(jnt)# accounting for floating joint + homing_idx=index-7 # homing is only for actuated joints + homing_value=self._homer.get_homing_val(jnt) + if jnt in self._continuous_joints: + jnt_homing[homing_idx]=np.cos(homing_value).item() + jnt_homing[homing_idx+1]=np.sin(homing_value).item() + # just actuated joints + self._continuous_joints_idxs.append(homing_idx) # cos + self._continuous_joints_idxs.append(homing_idx+1) # sin + self._continuous_joints_idxs_cos.append(homing_idx) + self._continuous_joints_idxs_sin.append(homing_idx+1) + self._continuous_joints_idxs_red.append(i) + # q full + self._continuous_joints_idxs_qfull.append(index) # cos + self._continuous_joints_idxs_qfull.append(index+1) # sin + self._continuous_joints_idxs_cos_qfull.append(index) + self._continuous_joints_idxs_sin_qfull.append(index+1) + self._continuous_joints_idxs_red_qfull.append(i+7) + else: + jnt_homing[homing_idx]=homing_value + # just actuated joints + self._rev_joints_idxs.append(homing_idx) + self._rev_joints_idxs_red.append(i) + # q full + self._rev_joints_idxs_qfull.append(index) + self._rev_joints_idxs_red_qfull.append(i+7) + + self._jnts_q_reduced=None + if not len(self._continuous_joints)==0: + cont_joints=", ".join(self._continuous_joints) + + Journal.log(self.__class__.__name__, + "_init_problem", + f"The following continuous joints were found: \n{cont_joints}", + LogType.INFO, + throw_when_excep=True) + # preallocating data + self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype) + self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype) + self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + else: + self._custom_opts["replace_continuous_joints"]=True + Journal.log(self.__class__.__name__, + "_init_problem", + f"No continuous joints were found.", + LogType.INFO, + throw_when_excep=True) + + # retrieve wheels indexes (not considering continuous joints, + # ok just for v, eff, etc..) + self._wheel_patterns=wheels_patterns + self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns) + self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81] + + # we can create an init for the base + init = self._base_init.tolist() + jnt_homing + + if foot_linkname is not None: + FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height + ground_level = FK(q=init)['ee_pos'] + self._base_init[2] = -ground_level[2] # override init + + self._model = FullModelInverseDynamics(problem=self._prb, + kd=self._kin_dyn, + q_init=self._homer.get_homing_map(), + base_init=self._base_init) + + self._ti = TaskInterface(prb=self._prb, + model=self._model, + max_solver_iter=self.max_solver_iter, + debug = self._debug, + verbose = self._verbose, + codegen_workdir = self._codegen_dir) + self._ti.setTaskFromYaml(self.config_path) + + # setting initial base pos ref if exists + base_pos = self._ti.getTask('base_height') + if base_pos is not None: + base_pos.setRef(np.atleast_2d(self._base_init).T) + + self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes????? + + self._gm = GaitManager(self._ti, + self._pm, + self._injection_node, + keep_yaw_vert=keep_yaw_vert, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=vertical_landing, + landing_vert_weight=self._vertical_land_weight, + phase_force_reg=self._phase_force_reg, + custom_opts=self._custom_opts, + flight_duration=flight_duration, + post_flight_stance=post_flight_stance, + step_height=step_height, + dh=0.0) + + self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0) + self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0) + self._ti.model.q.setInitialGuess(self._ti.model.q0) + self._ti.model.v.setInitialGuess(self._ti.model.v0) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f=len(cforces) + for c in cforces: + c.setInitialGuess(np.array(self._f0)/n_contact_f) + + vel_lims = self._model.kd.velocityLimits() + import horizon.utils as utils + self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:])) + self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:])) + + self._meas_lin_a_par=None + # if self._custom_opts["lin_a_feedback"]: + # # acceleration feedback on first node + # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback", + # dim=3, nodes=0) + # base_lin_a_prb=self._prb.getInput().getVars()[0:3] + # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, + # nodes=[0]) + + # if not self._open_loop: + # # we create a residual cost to be used as an attractor to the measured state on the first node + # # hard constraints injecting meas. states are pure EVIL! + # prb_state=self._prb.getState() + # full_state=prb_state.getVars() + # state_dim=prb_state.getBounds()[0].shape[0] + # meas_state=self._prb.createParameter(name="measured_state", + # dim=state_dim, nodes=0) + # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), + # nodes=[0]) + + self._ti.finalize() + self._ti.bootstrap() + + self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes + self._ti.load_initial_guess() + + self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we + # know n_dofs -> we assign it (by default = None) + + self.n_contacts = len(self._model.cmap.keys()) + + # remove variables bounds (before they were necessary to have a good + # quality bootstrap solution) + self._q_inf=np.zeros((self.nq(), 1)) + self._q_inf[:, :]=np.inf + self._v_inf=np.zeros((self.nv(), 1)) + self._v_inf[:, :]=np.inf + self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0) + self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0) + + # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb) + + def get_file_paths(self): + # can be overriden by child + paths = super().get_file_paths() + return paths + + def _get_quat_remap(self): + # overrides parent + return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention + + def _zmp(self, model): + + num = cs.SX([0, 0]) + den = cs.SX([0]) + pos_contact = dict() + force_val = dict() + + q = cs.SX.sym('q', model.nq) + v = cs.SX.sym('v', model.nv) + a = cs.SX.sym('a', model.nv) + + com = model.kd.centerOfMass()(q=q, v=v, a=a)['com'] + + n = cs.SX([0, 0, 1]) + for c in model.fmap.keys(): + pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos'] + force_val[c] = cs.SX.sym('force_val', 3) + num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n) + den += cs.dot(force_val[c], n) + + zmp = com[0:2] + (num / den) + input_list = [] + input_list.append(q) + input_list.append(v) + input_list.append(a) + + for elem in force_val.values(): + input_list.append(elem) + + f = cs.Function('zmp', input_list, [zmp]) + return f + + def _add_zmp(self): + + input_zmp = [] + + input_zmp.append(self._model.q) + input_zmp.append(self._model.v) + input_zmp.append(self._model.a) + + for f_var in self._model.fmap.values(): + input_zmp.append(f_var) + + c_mean = cs.SX([0, 0, 0]) + for c_name, f_var in self._model.fmap.items(): + fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos'] + c_mean += fk_c_pos + + c_mean /= len(self._model.cmap.keys()) + + zmp_nominal_weight = 10. + zmp_fun = self._zmp(self._model)(*input_zmp) + + if 'wheel_joint_1' in self._model.kd.joint_names(): + zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2])) + + def _quaternion_multiply(self, + q1, q2): + x1, y1, z1, w1 = q1 + x2, y2, z2, w2 = q2 + w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return np.array([x, y, z, w]) + + def _get_continuous_jnt_names(self): + import xml.etree.ElementTree as ET + root = ET.fromstring(self.urdf) + continuous_joints = [] + for joint in root.findall('joint'): + joint_type = joint.get('type') + if joint_type == 'continuous': + joint_name = joint.get('name') + continuous_joints.append(joint_name) + return continuous_joints + + def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]): + jnt_names=self._get_robot_jnt_names() + wheels_idxs=[] + for i in range(len(jnt_names)): + jnt_name=jnt_names[i] + for wheel_pattern in wheel_patterns: + if wheel_pattern in jnt_name: + wheels_idxs.append(i) + break + return wheels_idxs + + def _get_jnt_id(self, jnt_name): + return self._kin_dyn.joint_iq(jnt_name) + + def _init_rhc_task_cmds(self): + + rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm, + robot_index_shm=self.controller_index, + robot_index_view=0, # when using optimize_mem the view if always of shape 1x... + namespace=self.namespace, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + use_force_feedback=self._custom_opts["use_force_feedback"], + optimize_mem=True) + + rhc_refs.run() + + rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller) + rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap()) + + rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3], + q_ref=np.atleast_2d(self._base_init)[:, 3:7]) + + return rhc_refs + + def get_vertex_fnames_from_ti(self): + tasks=self._ti.task_list + contact_f_names=[] + for task in tasks: + if isinstance(task, ContactTask): + interaction_task=task.dynamics_tasks[0] + contact_f_names.append(interaction_task.vertex_frames[0]) + return contact_f_names + + def _get_contact_names(self): + # should get contact names from vertex frames + # list(self._ti.model.cmap.keys()) + return self.get_vertex_fnames_from_ti() + + def _get_robot_jnt_names(self): + + joints_names = self._kin_dyn.joint_names() + to_be_removed = ["universe", + "reference", + "world", + "floating", + "floating_base"] + for name in to_be_removed: + if name in joints_names: + joints_names.remove(name) + + return joints_names + + def _get_ndofs(self): + return len(self._model.joint_names) + + def nq(self): + return self._kin_dyn.nq() + + def nv(self): + return self._kin_dyn.nv() + + def _get_robot_mass(self): + + return self._kin_dyn.mass() + + def _get_root_full_q_from_sol(self, node_idx=1): + + root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype) + + np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) + np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full) + + return root_q_full + + def _get_full_q_from_sol(self, node_idx=1): + + return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype) + + def _get_root_twist_from_sol(self, node_idx=1): + # provided in world frame + twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3] + # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6] + return twist_base_local + + def _get_root_a_from_sol(self, node_idx=0): + # provided in world frame + a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3] + # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6] + return a_base_local + + def _get_jnt_q_from_sol(self, node_idx=0, + reduce: bool = True, + clamp: bool = True): + + full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype) + + np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place + np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place + + if self._custom_opts["replace_continuous_joints"] or (not reduce): + if clamp: + return np.fmod(full_jnts_q, 2*np.pi) + else: + return full_jnts_q + else: + cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2) + # copy rev joint vals + self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1) + # and continuous + self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1) + return self._jnts_q_reduced + + def _get_jnt_v_from_sol(self, node_idx=1): + + jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place + # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place + + return jnt_v_sol + + def _get_jnt_a_from_sol(self, node_idx=1): + + return self._get_a_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + + def _get_jnt_eff_from_sol(self, node_idx=0): + + efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx) + + # if self._custom_opts["only_vel_wheels"]: + + jnt_efforts=efforts_on_node[6:, 0] + + if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v: + jnt_efforts[self._wheels_idxs_v]=0.0 + + return jnt_efforts.reshape(1, + self._nv_jnts).astype(self._dtype) + + def _get_rhc_cost(self): + + return self._ti.solution["opt_cost"] + + def _get_rhc_constr_viol(self): + + return self._ti.solution["residual_norm"] + + def _get_rhc_nodes_cost(self): + + cost = self._ti.solver_rti.getCostValOnNodes() + return cost.reshape((1, -1)) + + def _get_rhc_nodes_constr_viol(self): + + constr_viol = self._ti.solver_rti.getConstrValOnNodes() + return constr_viol.reshape((1, -1)) + + def _get_rhc_niter_to_sol(self): + + return self._ti.solution["n_iter2sol"] + + def _set_ig_bootstrap(self, + q_state: np.ndarray = None, + v_state: np.ndarray = None): + + xig = self._ti.solution['x_opt'].copy() + uig = self._ti.solution['u_opt'].copy() + + # Normalize and keep quaternion in the same hemisphere as the previous + # solution to avoid artificial 180-deg jumps in the bootstrap warm start. + q_state_boot = q_state.copy() + q_prev = xig[3:7, 0] + q_now = q_state_boot[3:7, 0] + + q_now_norm = np.linalg.norm(q_now) + if q_now_norm > 1e-9: + q_state_boot[3:7, :] /= q_now_norm + else: + q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype) + + q_prev_norm = np.linalg.norm(q_prev) + if q_prev_norm > 1e-9: + q_prev = q_prev / q_prev_norm + + q_now = q_state_boot[3:7, 0] + if np.dot(q_prev, q_now) < 0.0: + q_state_boot[3:7, :] *= -1.0 + + xig[0:self._nq, :] = q_state_boot + xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes + uig[0:self._nv, :]=0.0 # 0 acceleration + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype)) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f = len(cforces) + if n_contact_f == 0: + continue + f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f + for c in cforces: + c.setInitialGuess(f_guess) + + # print("initial guesses") + # print(self._nq) + # print(self._nv) + # print("q") + # qig=self._ti.model.q.getInitialGuess() + # print(qig.shape) + # print(qig) + # print("v") + # print(self._ti.model.v.getInitialGuess()) + # print("a") + # print(self._ti.model.a.getInitialGuess()) + # for _, cforces in self._ti.model.cmap.items(): + # for c in cforces: + # print("force") + # print(c.getInitialGuess()) + + return xig, uig + + def _set_ig(self): + + shift_num = -1 # shift data by one node + + x_opt = self._ti.solution['x_opt'] + u_opt = self._ti.solution['u_opt'] + + # building ig for state + xig = np.roll(x_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + xig[:, -1 - i] = x_opt[:, -1] + # building ig for inputs + uig = np.roll(u_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + uig[:, -1 - i] = u_opt[:, -1] + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + + return xig, uig + + def _update_open_loop(self, + bootstrap: bool = False): + + q_state, v_state, a_state=self._set_is_open() + + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + # robot_state=xig[:, 0] + # # open loop update: + # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0 + # # is node 1 in the last opt solution) + + return q_state, v_state, a_state + + def _update_closed_loop(self, + bootstrap: bool = False): + + # set initial state + q_state=None + v_state=None + a_state=None + if self._custom_opts["adaptive_is"]: + # adaptive closed loop + q_state, v_state, a_state=self._set_is_adaptive() + elif self._custom_opts["fully_closed"]: + q_state, v_state, a_state=self._set_is_full() + elif self._custom_opts["closed_partial"]: + q_state, v_state, a_state=self._set_is_partial() + else: + Journal.log(self.__class__.__name__, + "_update_closed_loop", + "Neither adaptive_is, fully_closed, or closed_partial.", + LogType.EXCEP, + throw_when_excep = False) + q_state, v_state, a_state=self._set_is() + + # set initial guess for controller + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + return q_state, v_state, a_state + + def _set_is_open(self): + + # overriding states with rhc data + q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1) + + twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1) + v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_q_full_rhc=q[0:7] # root full q + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_twist_rhc=vel[0:6] # lin v. + jnts_v_rhc=vel[6:] # jnts v + + self.rhc_refs.set_alpha(alpha=1.0) # fully open + + # close state on known quantities + root_q_full_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_twist_rhc.setBounds(lb=twist_root, + ub=twist_root, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((twist_root, v_jnts), + axis=0) + + return (q_state, v_state, None) + + def _set_is_full(self): + + # measurements + q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_full_q_rhc=q[0:7] # root p + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_full_q_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_partial(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # overriding states with rhc data (-> all overridden state are open looop) + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc # position is always open loop + + if not self._custom_opts["estimate_v_root"]: + v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1) + # override v jnts with the ones from controller + if not self._custom_opts["use_jnt_v_feedback"]: + v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1) + # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1) + # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1) + # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1) + # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root, + ub=q_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints) + else: # get it from controller + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_adaptive(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + # getting prediction defects + root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # close state on known quantities, estimate some (e.g. lin vel) and + # open loop if thing start to explode + alpha_now=1.0 + delta=0.0 + if self._custom_opts["alpha_from_outside"]: + alpha_now=self.rhc_refs.get_alpha() + else: # "autotuned" alpha + if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.) + delta=np.max(np.abs(jnt_v_delta)) + else: + delta=np.max(np.abs(omega_root_delta)) + # fail_idx=self._get_failure_index() + # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh + alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0 + + bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1] + self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db + self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db + + self._alphas_q_root[:]=alpha_now # for now single alpha for everything + self._alphas_q_jnts[:]=alpha_now + self._alphas_twist_root[:]=alpha_now + self._alphas_v_jnts[:]=alpha_now + self._alphas_a[:]=alpha_now + if not self._custom_opts["estimate_v_root"]: + self._alphas_twist_root[0:3]=1.0 # open + self._alphas_v_jnts[:]=1.0 # open + + # position is always open loop + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc + + # expaning meas q if continuous joints + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + + # continous joints position is always open loop, but we need a delta vector of matching dimension + q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1) + + self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:] + + self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\ + np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.cos(q_jnts[self._continuous_joints_idxs_red, :]) + self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\ + np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.sin(q_jnts[self._continuous_joints_idxs_red, :]) + + q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts + jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts + + self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop + self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop + + # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, + ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, + ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) + else: + root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, + ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0) + root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, + ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, + ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0) + if self._custom_opts["lin_a_feedback"]: + lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _solve(self): + + if self._debug: + return self._db_solve(bootstrap=False) + else: + return self._min_solve(bootstrap=False) + + def _bootstrap(self): + + if self._debug: + return self._db_solve(bootstrap=True) + else: + return self._min_solve(bootstrap=True) + + def _min_solve(self, bootstrap: bool = False): + # minimal solve version -> no debug + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._pm.shift() # shifts phases of one dt + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve (to convergence) + self.sol_counter = self.sol_counter + 1 + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + return False + + def _db_solve(self, bootstrap: bool = False): + + self._timer_start = time.perf_counter() + + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._prb_update_time = time.perf_counter() + self._pm.shift() # shifts phases of one dt + self._phase_shift_time = time.perf_counter() + + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + self._task_ref_update_time = time.perf_counter() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve bootstrap + self._rti_time = time.perf_counter() + self.sol_counter = self.sol_counter + 1 + self._update_db_data() + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + if self._verbose: + solve_mode = "RTI" if not bootstrap else "Bootstrap" + exception = f"{solve_mode}() for controller {self.controller_index} failed" + \ + f" with exception {type(e).__name__}" + Journal.log(self.__class__.__name__, + "solve", + exception, + LogType.EXCEP, + throw_when_excep = False) + self._update_db_data() + return False + + def _get_fail_idx(self): + + self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx() + self._expl_idx_buffer_counter+=1 + self._expl_idx_counter+=1 + if self._expl_idx_counter%self._expl_idx_window_size==0: + self._expl_idx_buffer_counter=0 # restart from 0 + + running_avrg=np.mean(self._explosion_idx_buffer).item() + + return running_avrg + + def _get_explosion_idx(self): + explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale + return explosion_index + + def _update_db_data(self): + + self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start + self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time + self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time + self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time + self.rhc_costs.update(self._ti.solver_rti.getCostsValues()) + self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues()) + + def _reset(self): + + # reset task interface (ig, solvers, etc..) + + # phase manager and sets bootstap as solution + self._gm.reset() + self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution + self._expl_idx_counter=0.0 + self._expl_idx_buffer_counter=0 + + def _get_cost_info(self): + + cost_dict = self._ti.solver_rti.getCostsValues() + cost_names = list(cost_dict.keys()) + cost_dims = [1] * len(cost_names) # costs are always scalar + return cost_names, cost_dims + + def _get_constr_info(self): + + constr_dict = self._ti.solver_rti.getConstraintsValues() + constr_names = list(constr_dict.keys()) + constr_dims = [-1] * len(constr_names) + i = 0 + for constr in constr_dict: + constr_val = constr_dict[constr] + constr_shape = constr_val.shape + constr_dims[i] = constr_shape[0] + i+=1 + return constr_names, constr_dims + + def _get_q_from_sol(self): + full_q=self._ti.solution['q'].astype(self._dtype) + if self._custom_opts["replace_continuous_joints"]: + return full_q + else: + cont_jnts=full_q[self._continuous_joints_idxs_qfull, :] + cos=cont_jnts[::2, :] + sin=cont_jnts[1::2, :] + # copy root + self._full_q_reduced[0:7, :]=full_q[0:7, :] + # copy rev joint vals + self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :] + # and continuous + angle=np.arctan2(sin, cos) + self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle + return self._full_q_reduced + + def _get_v_from_sol(self): + return self._ti.solution['v'].astype(self._dtype) + + def _get_a_from_sol(self): + return self._ti.solution['a'].astype(self._dtype) + + def _get_a_dot_from_sol(self): + return None + + def _get_f_from_sol(self): + # to be overridden by child class + contact_names =self._get_contacts() # we use controller-side names + try: + data=[] + for key in contact_names: + contact_f=self._ti.solution["f_" + key].astype(self._dtype) + np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False) + np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f) + data.append(contact_f) + return np.concatenate(data, axis=0) + except: + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + return self.rhc_costs[cost_name] + + def _get_constr_from_sol(self, + constr_name: str): + return self.rhc_constr[constr_name] diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/isaac_world_interface.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/isaac_world_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..44d8c26f73cc9308ee4eb14a00d47a6488b73563 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/isaac_world_interface.py @@ -0,0 +1,2355 @@ +# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com) +# +# This file is part of AugMPCEnvs and distributed under the General Public License version 2 license. +# +# AugMPCEnvs is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# AugMPCEnvs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with AugMPCEnvs. If not, see . +# +from isaacsim import SimulationApp + +import carb + +import os +import math + +import torch +import numpy as np + +from typing import Dict, List +from typing_extensions import override + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal + +from aug_mpc_envs.utils.math_utils import quat_to_omega +from aug_mpc_envs.utils.height_grid_visualizer import HeightGridVisualizer +from aug_mpc_envs.utils.height_sensor import HeightGridSensor + +from aug_mpc.world_interfaces.world_interface_base import AugMPCWorldInterfaceBase +from mpc_hive.utilities.math_utils_torch import world2base_frame,world2base_frame3D + +class IsaacSimEnv(AugMPCWorldInterfaceBase): + + def __init__(self, + robot_names: List[str], + robot_urdf_paths: List[str], + robot_srdf_paths: List[str], + jnt_imp_config_paths: List[str], + n_contacts: List[int], + cluster_dt: List[float], + use_remote_stepping: List[bool], + name: str = "IsaacSimEnv", + num_envs: int = 1, + debug = False, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + n_init_step: int = 0, + timeout_ms: int = 60000, + env_opts: Dict = None, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + override_low_lev_controller: bool = False): + + self._render_step_counter = 0 + + super().__init__(name=name, + robot_names=robot_names, + robot_urdf_paths=robot_urdf_paths, + robot_srdf_paths=robot_srdf_paths, + jnt_imp_config_paths=jnt_imp_config_paths, + n_contacts=n_contacts, + cluster_dt=cluster_dt, + use_remote_stepping=use_remote_stepping, + num_envs=num_envs, + debug=debug, + verbose=verbose, + vlevel=vlevel, + n_init_step=n_init_step, + timeout_ms=timeout_ms, + env_opts=env_opts, + use_gpu=use_gpu, + dtype=dtype, + override_low_lev_controller=override_low_lev_controller) + + def is_running(self): + return self._simulation_app.is_running() + + def _pre_setup(self): + self._backend="torch" + enable_livestream = self._env_opts["enable_livestream"] + enable_viewport = self._env_opts["enable_viewport"] + base_isaac_exp = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.kit' + base_isaac_exp_headless = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.headless.kit' + + experience=base_isaac_exp + if self._env_opts["headless"]: + info = f"Will run in headless mode." + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + if enable_livestream: + experience = "" + elif enable_viewport: + exception = f"Using viewport is not supported yet." + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + else: + experience=base_isaac_exp_headless + + self._simulation_app = SimulationApp({"headless": self._env_opts["headless"]}, + experience=experience) + # all imports depending on isaac sim kits have to be done after simulationapp + # from omni.isaac.core.tasks.base_task import BaseTask + self._import_isaac_pkgs() + info = "Using IsaacSim experience file @ " + experience + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + # carb.settings.get_settings().set("/persistent/omnihydra/useSceneGraphInstancing", True) + + if enable_livestream: + info = "Livestream enabled" + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + + self._simulation_app.set_setting("/app/livestream/enabled", True) + self._simulation_app.set_setting("/app/window/drawMouse", True) + self._simulation_app.set_setting("/app/livestream/proto", "ws") + self._simulation_app.set_setting("/app/livestream/websocket/framerate_limit", 120) + self._simulation_app.set_setting("/ngx/enabled", False) + enable_extension("omni.kit.livestream.native") + enable_extension("omni.services.streaming.manager") + self._render = ((not self._env_opts["headless"]) or self._env_opts["render_to_file"]) or enable_livestream or enable_viewport + + self._record = False + self._world = None + self._physics_context = None + self._scene = None + self._task = None + self._metadata = None + + self._robots_art_views = {} + self._blink_rigid_prim_views = {} + self._robots_articulations = {} + self._robots_geom_prim_views = {} + self.omni_contact_sensors = {} + + self._solver_position_iteration_count=self._env_opts["solver_position_iteration_count"] + self._solver_velocity_iteration_count=self._env_opts["solver_velocity_iteration_count"] + self._solver_stabilization_thresh=self._env_opts["stabilization_threshold"] + self._solver_position_iteration_counts={} + self._solver_velocity_iteration_counts={} + self._solver_stabilization_threshs={} + self._robot_bodynames={} + self._robot_n_links={} + self._robot_n_dofs={} + self._robot_dof_names={} + self._distr_offset={} # decribed how robots within each env are distributed + self._spawning_radius=self._env_opts["spawning_radius"] # [m] -> default distance between roots of robots in a single + self._height_sensors={} + self._height_imgs={} + self._height_vis={} + self._height_vis_step={} + self._height_vis_step={} + self._height_vis={} + self._terrain_hit_margin = 1.0 + self._terrain_hit_log_period = 1000 + self._terrain_hit_active = {} + self._terrain_hit_counts = {} + self._terrain_hit_counts_last_logged = {} + for robot_name in self._robot_names: + self._terrain_hit_active[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.bool) + self._terrain_hit_counts[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.int32) + self._terrain_hit_counts_last_logged[robot_name] = None + + def _import_isaac_pkgs(self): + # we use global, so that we can create the simulation app inside (and so + # access Isaac's kit) and also expose to all methods the imports + global World, omni_kit, get_context, UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools, UsdShade, Vt + global enable_extension, set_camera_view, _urdf, move_prim, GridCloner, prim_utils + global get_current_stage, Scene, ArticulationView, RigidPrimView, rep + global OmniContactSensors, RlTerrains,OmniJntImpCntrl + global PhysxSchema, UsdGeom + global _sensor, _dynamic_control + global get_prim_at_path + + from pxr import PhysxSchema, UsdGeom, UsdShade, Vt + + from omni.isaac.core.world import World + from omni.usd import get_context + from pxr import UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools + from omni.isaac.core.utils.extensions import enable_extension + from omni.isaac.core.utils.viewports import set_camera_view + import omni.kit as omni_kit + from omni.importer.urdf import _urdf + from omni.isaac.core.utils.prims import move_prim + from omni.isaac.cloner import GridCloner + import omni.isaac.core.utils.prims as prim_utils + from omni.isaac.core.utils.stage import get_current_stage + from omni.isaac.core.scenes.scene import Scene + from omni.isaac.core.articulations import ArticulationView + from omni.isaac.core.prims import RigidPrimView + + import omni.replicator.core as rep + + from omni.isaac.core.utils.prims import get_prim_at_path + + from omni.isaac.sensor import _sensor + + from omni.isaac.dynamic_control import _dynamic_control + + from aug_mpc_envs.utils.contact_sensor import OmniContactSensors + from aug_mpc_envs.utils.omni_jnt_imp_cntrl import OmniJntImpCntrl + from aug_mpc_envs.utils.terrains import RlTerrains + + def _parse_env_opts(self): + isaac_opts={} + isaac_opts["envs_ns"]="/World/envs" + isaac_opts["template_env_ns"]=isaac_opts["envs_ns"] + "/env_0" + isaac_opts["base_linkname"]="base_link" + isaac_opts["deduce_base_link"]=False + isaac_opts["ground_plane_prim_path"]="/World/terrain" + isaac_opts["physics_prim_path"]="/physicsScene" + isaac_opts["use_gpu"]=True + isaac_opts["use_gpu_pipeline"]=True + isaac_opts["device"]="cuda" + isaac_opts["is_fixed_base"]=False + isaac_opts["merge_fixed_jnts"]=True + isaac_opts["self_collide"]=True + isaac_opts["sim_device"]="cuda" if isaac_opts["use_gpu"] else "cpu" + isaac_opts["physics_dt"]=1e-3 + isaac_opts["gravity"] = np.array([0.0, 0.0, -9.81]) + isaac_opts["use_fabric"]=True# Enable/disable reading of physics buffers directly. Default is True. + isaac_opts["replicate_physics"]=True + # isaac_opts["worker_thread_count"]=4 + isaac_opts["solver_type"]=1 # 0: PGS, 1:TGS, defaults to TGS. PGS faster but TGS more stable + isaac_opts["enable_stabilization"]=True + # isaac_opts["bounce_threshold_velocity"] = 0.2 + # isaac_opts["friction_offset_threshold"] = 0.04 + # isaac_opts["friction_correlation_distance"] = 0.025 + # isaac_opts["enable_sleeping"] = True + # Per-actor settings ( can override in actor_options ) + isaac_opts["solver_position_iteration_count"] = 4 # defaults to 4 + isaac_opts["solver_velocity_iteration_count"] = 3 # defaults to 1 + isaac_opts["sleep_threshold"] = 1e-5 # Mass-normalized kinetic energy threshold below which an actor may go to sleep. + # Allowed range [0, max_float). + isaac_opts["stabilization_threshold"] = 1e-4 + # Per-body settings ( can override in actor_options ) + # isaac_opts["enable_gyroscopic_forces"] = True + # isaac_opts["density"] = 1000 # density to be used for bodies that do not specify mass or density + # isaac_opts["max_depenetration_velocity"] = 100.0 + # isaac_opts["solver_velocity_iteration_count"] = 1 + # GPU buffers settings + isaac_opts["gpu_max_rigid_contact_count"] = 512 * 2048 + isaac_opts["gpu_max_rigid_patch_count"] = 80 * 2048 + isaac_opts["gpu_found_lost_pairs_capacity"] = 102400 + isaac_opts["gpu_found_lost_aggregate_pairs_capacity"] = 102400 + isaac_opts["gpu_total_aggregate_pairs_capacity"] = 102400 + # isaac_opts["gpu_max_soft_body_contacts"] = 1024 * 1024 + # isaac_opts["gpu_max_particle_contacts"] = 1024 * 1024 + # isaac_opts["gpu_heap_capacity"] = 64 * 1024 * 1024 + # isaac_opts["gpu_temp_buffer_capacity"] = 16 * 1024 * 1024 + # isaac_opts["gpu_max_num_partitions"] = 8 + + isaac_opts["env_spacing"]=15.0 + isaac_opts["spawning_height"]=1.0 + isaac_opts["spawning_radius"]=1.0 + isaac_opts["spawn_height_check_half_extent"]=0.2 + isaac_opts["spawn_height_cushion"]=0.03 + + isaac_opts["enable_height_vis"]=False + isaac_opts["height_vis_radius"]=0.03 + isaac_opts["height_vis_update_period"]=1 + isaac_opts["collision_refinement_level"]=3 # increase cylinder tesselation for smoother wheel contacts + + # rendering helpers + isaac_opts["render_to_file"]=False + isaac_opts["use_follow_camera"]=False # if True, follow robot during rendering in human mode + isaac_opts["render_follow_env_idx"]=0 + isaac_opts["render_follow_robot_idx"]=0 + isaac_opts["render_follow_offset"]=[-0.2, 3.0, 0.1] + isaac_opts["render_follow_target_offset"]=[-0.2, -1.0, 0.0] + isaac_opts["rendering_dt"]=15*isaac_opts["physics_dt"] + isaac_opts["camera_prim_path"]="/OmniverseKit_Persp" + isaac_opts["render_resolution"]=[1280, 720] # [1024, 576] + + isaac_opts["render_panoramic_cam"]=True + isaac_opts["render_panoramic_cam_height"]=2.0 + isaac_opts["render_panoramic_cam_target_xy"]=[10.0, 14.] + isaac_opts["render_panoramic_cam_target_z"]=1.2 + + # ground opts + isaac_opts["use_flat_ground"]=True + isaac_opts["static_friction"]=0.5 + isaac_opts["dynamic_friction"]=0.5 + isaac_opts["restitution"]=0.1 + isaac_opts["ground_type"]="random" + isaac_opts["ground_size"]=800 + isaac_opts["terrain_border"]=isaac_opts["ground_size"]/2 + isaac_opts["dh_ground"]=0.03 + isaac_opts["step_height_lb"]=0.08 + isaac_opts["step_height_ub"]=0.12 + isaac_opts["step_width_lb"]=1.0 + isaac_opts["step_width_ub"]= 1.5 + isaac_opts["contact_prims"] = [] + isaac_opts["sensor_radii"] = 0.1 + isaac_opts["contact_offsets"] = {} + + isaac_opts["enable_livestream"] = False + isaac_opts["enable_viewport"] = False + + isaac_opts["use_diff_vels"] = False + + # random perturbations (impulse, durations, directions are sampled uniformly, force/torque computed accordinly) + isaac_opts["use_random_pertub"]=False + isaac_opts["pert_planar_only"]=True # if True, linear pushes only in xy plane and no torques + + isaac_opts["pert_wrenches_rate"]=30.0 # on average 1 pert every pert_wrenches_rate seconds + isaac_opts["pert_wrenches_min_duration"]=0.6 + isaac_opts["pert_wrenches_max_duration"]=3.5 # [s] + isaac_opts["pert_force_max_weight_scale"]=0.4 # clip force norm to scale*weight + isaac_opts["pert_force_min_weight_scale"]=0.1 # optional min force norm as scale*weight + isaac_opts["pert_torque_max_weight_scale"]=1.0 # clip torque norm to scale*weight*max_ang_impulse_lever + + isaac_opts["pert_target_delta_v"]=2.0 # [m/s] desired max impulse = m*delta_v + isaac_opts["det_pert_rate"]=True + + # max impulse (unitless scale multiplied by weight to get N*s): delta_v/g + isaac_opts["max_lin_impulse_norm"]=isaac_opts["pert_target_delta_v"]/9.81 + isaac_opts["lin_impulse_mag_min"]=1.0 # [0, 1] -> min fraction of max_lin_impulse_norm when sampling + isaac_opts["lin_impulse_mag_max"]=1.0 # [0, 1] -> max fraction of max_lin_impulse_norm when sampling + + isaac_opts["max_ang_impulse_lever"]=0.2 # [m] + isaac_opts["max_ang_impulse_norm"]=isaac_opts["max_lin_impulse_norm"]*isaac_opts["max_ang_impulse_lever"] + isaac_opts["terrain_hit_log_period"]=1000 + + # opts definition end + + isaac_opts.update(self._env_opts) # update defaults with provided opts + isaac_opts["rendering_freq"]=int(isaac_opts["rendering_dt"]/isaac_opts["physics_dt"]) + # isaac_opts["rendering_dt"]=isaac_opts["physics_dt"] # forcing rendering_dt==physics_dt + # for some mystic reason simulation is infuenced by the rendering dt (why ??????) + + # modify things + isaac_opts["cloning_offset"] = np.array([[0.0, 0.0, isaac_opts["spawning_height"]]]*self._num_envs) + if not isaac_opts["use_gpu"]: # don't use GPU at all + isaac_opts["use_gpu_pipeline"]=False + isaac_opts["device"]="cpu" + isaac_opts["sim_device"]="cpu" + else: # use GPU + if isaac_opts["use_gpu_pipeline"]: + isaac_opts["device"]="cuda" + isaac_opts["sim_device"]="cuda" + else: # cpu pipeline + isaac_opts["device"]="cpu" + isaac_opts["sim_device"]="cpu" + isaac_opts["use_gpu"]=False + # isaac_opts["sim_device"]=isaac_opts["device"] + # overwrite env opts in case some sim params were missing + self._env_opts=isaac_opts + + # update device flag based on sim opts + self._device=isaac_opts["device"] + self._use_gpu=isaac_opts["use_gpu"] + + def _calc_robot_distrib(self): + + import math + # we distribute robots in a single env. along the + # circumference of a circle of given radius + n_robots = len(self._robot_names) + offset_baseangle = 2 * math.pi / n_robots + for i in range(n_robots): + offset_angle = offset_baseangle * (i + 1) + robot_offset_wrt_center = torch.tensor([self._spawning_radius * math.cos(offset_angle), + self._spawning_radius * math.sin(offset_angle), 0], + device=self._device, + dtype=self._dtype) + # list with n references to the original tensor + tensor_list = [robot_offset_wrt_center] * self._num_envs + self._distr_offset[self._robot_names[i]] = torch.stack(tensor_list, dim=0) + + def _init_world(self): + + self._cloner = GridCloner(spacing=self._env_opts["env_spacing"]) + self._cloner.define_base_env(self._env_opts["envs_ns"]) + prim_utils.define_prim(self._env_opts["template_env_ns"]) + self._envs_prim_paths = self._cloner.generate_paths(self._env_opts["envs_ns"] + "/env", + self._num_envs) + + # parse device based on sim_param settings + + info = "Using sim device: " + str(self._env_opts["sim_device"]) + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + + self._world = World( + physics_dt=self._env_opts["physics_dt"], + rendering_dt=self._env_opts["physics_dt"], # == physics dt (rendering is actually done manually by this env) + backend=self._backend, + device=str(self._env_opts["sim_device"]), + physics_prim_path=self._env_opts["physics_prim_path"], + set_defaults = False, # set to True to use the defaults settings [physics_dt = 1.0/ 60.0, + # stage units in meters = 0.01 (i.e in cms), rendering_dt = 1.0 / 60.0, gravity = -9.81 m / s + # ccd_enabled, stabilization_enabled, gpu dynamics turned off, + # broadcast type is MBP, solver type is TGS] + sim_params=self._env_opts + ) + + big_info = "[World] Creating Isaac simulation " + self._name + "\n" + \ + "use_gpu_pipeline: " + str(self._env_opts["use_gpu_pipeline"]) + "\n" + \ + "device: " + str(self._env_opts["sim_device"]) + "\n" +\ + "backend: " + str(self._backend) + "\n" +\ + "integration_dt: " + str(self.physics_dt()) + "\n" + \ + "rendering_dt: " + str(self.rendering_dt()) + "\n" + Journal.log(self.__class__.__name__, + "_init_world", + big_info, + LogType.STAT, + throw_when_excep = True) + + # we get the physics context to expose additional low-level ## + # settings of the simulation + self._physics_context = self._world.get_physics_context() + self._physics_scene_path = self._physics_context.prim_path + # self._physics_context.enable_gpu_dynamics(True) + self._physics_context.enable_stablization(True) + self._physics_scene_prim = self._physics_context.get_current_physics_scene_prim() + self._solver_type = self._physics_context.get_solver_type() + + if "gpu_max_rigid_contact_count" in self._env_opts: + self._physics_context.set_gpu_max_rigid_contact_count(self._env_opts["gpu_max_rigid_contact_count"]) + if "gpu_max_rigid_patch_count" in self._env_opts: + self._physics_context.set_gpu_max_rigid_patch_count(self._env_opts["gpu_max_rigid_patch_count"]) + if "gpu_found_lost_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_found_lost_pairs_capacity(self._env_opts["gpu_found_lost_pairs_capacity"]) + if "gpu_found_lost_aggregate_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(self._env_opts["gpu_found_lost_aggregate_pairs_capacity"]) + if "gpu_total_aggregate_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_total_aggregate_pairs_capacity(self._env_opts["gpu_total_aggregate_pairs_capacity"]) + if "gpu_max_soft_body_contacts" in self._env_opts: + self._physics_context.set_gpu_max_soft_body_contacts(self._env_opts["gpu_max_soft_body_contacts"]) + if "gpu_max_particle_contacts" in self._env_opts: + self._physics_context.set_gpu_max_particle_contacts(self._env_opts["gpu_max_particle_contacts"]) + if "gpu_heap_capacity" in self._env_opts: + self._physics_context.set_gpu_heap_capacity(self._env_opts["gpu_heap_capacity"]) + if "gpu_temp_buffer_capacity" in self._env_opts: + self._physics_context.set_gpu_temp_buffer_capacity(self._env_opts["gpu_temp_buffer_capacity"]) + if "gpu_max_num_partitions" in self._env_opts: + self._physics_context.set_gpu_max_num_partitions(self._env_opts["gpu_max_num_partitions"]) + + # overwriting defaults + # self._physics_context.set_gpu_max_rigid_contact_count(2 * self._physics_context.get_gpu_max_rigid_contact_count()) + # self._physics_context.set_gpu_max_rigid_patch_count(2 * self._physics_context.get_gpu_max_rigid_patch_count()) + # self._physics_context.set_gpu_found_lost_pairs_capacity(2 * self._physics_context.get_gpu_found_lost_pairs_capacity()) + # self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity()) + # self._physics_context.set_gpu_total_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_total_aggregate_pairs_capacity()) + # self._physics_context.set_gpu_heap_capacity(2 * self._physics_context.get_gpu_heap_capacity()) + # self._physics_context.set_gpu_temp_buffer_capacity(20 * self._physics_context.get_gpu_heap_capacity()) + # self._physics_context.set_gpu_max_num_partitions(20 * self._physics_context.get_gpu_temp_buffer_capacity()) + + # GPU buffers + self._gpu_max_rigid_contact_count = self._physics_context.get_gpu_max_rigid_contact_count() + self._gpu_max_rigid_patch_count = self._physics_context.get_gpu_max_rigid_patch_count() + self._gpu_found_lost_pairs_capacity = self._physics_context.get_gpu_found_lost_pairs_capacity() + self._gpu_found_lost_aggregate_pairs_capacity = self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity() + self._gpu_total_aggregate_pairs_capacity = self._physics_context.get_gpu_total_aggregate_pairs_capacity() + self._gpu_max_soft_body_contacts = self._physics_context.get_gpu_max_soft_body_contacts() + self._gpu_max_particle_contacts = self._physics_context.get_gpu_max_particle_contacts() + self._gpu_heap_capacity = self._physics_context.get_gpu_heap_capacity() + self._gpu_temp_buffer_capacity = self._physics_context.get_gpu_temp_buffer_capacity() + # self._gpu_max_num_partitions = physics_context.get_gpu_max_num_partitions() # BROKEN->method does not exist + + big_info2 = "[physics context]:" + "\n" + \ + "gpu_max_rigid_contact_count: " + str(self._gpu_max_rigid_contact_count) + "\n" + \ + "gpu_max_rigid_patch_count: " + str(self._gpu_max_rigid_patch_count) + "\n" + \ + "gpu_found_lost_pairs_capacity: " + str(self._gpu_found_lost_pairs_capacity) + "\n" + \ + "gpu_found_lost_aggregate_pairs_capacity: " + str(self._gpu_found_lost_aggregate_pairs_capacity) + "\n" + \ + "gpu_total_aggregate_pairs_capacity: " + str(self._gpu_total_aggregate_pairs_capacity) + "\n" + \ + "gpu_max_soft_body_contacts: " + str(self._gpu_max_soft_body_contacts) + "\n" + \ + "gpu_max_particle_contacts: " + str(self._gpu_max_particle_contacts) + "\n" + \ + "gpu_heap_capacity: " + str(self._gpu_heap_capacity) + "\n" + \ + "gpu_temp_buffer_capacity: " + str(self._gpu_temp_buffer_capacity) + "\n" + \ + "use_gpu_sim: " + str(self._world.get_physics_context().use_gpu_sim) + "\n" + \ + "use_gpu_pipeline: " + str(self._world.get_physics_context().use_gpu_pipeline) + "\n" + \ + "use_fabric: " + str(self._world.get_physics_context().use_fabric) + "\n" + \ + "world device: " + str(self._world.get_physics_context().device) + "\n" + \ + "physics context device: " + str(self._world.get_physics_context().device) + "\n" + + Journal.log(self.__class__.__name__, + "set_task", + big_info2, + LogType.STAT, + throw_when_excep = True) + + self._scene = self._world.scene + self._physics_context = self._world.get_physics_context() + + self._stage = get_context().get_stage() + + # strong, uniform lighting: bright sun + dome fill to cover the whole terrain + prim_utils.define_prim("/World/Lighting", "Xform") + sun_path = "/World/Lighting/SunLight" + dome_path = "/World/Lighting/AmbientDome" + + distantLight = UsdLux.DistantLight.Define(self._stage, Sdf.Path(sun_path)) + distantLight.CreateIntensityAttr(450.0) + distantLight.CreateAngleAttr(0.5) # soften shadows a bit + distantLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0)) + # Shadow attr naming differs across versions; set the underlying USD attribute directly. + distantLight.GetPrim().CreateAttribute("shadow:enable", Sdf.ValueTypeNames.Bool).Set(True) + + domeLight = UsdLux.DomeLight.Define(self._stage, Sdf.Path(dome_path)) + domeLight.CreateIntensityAttr(200.0) + domeLight.CreateExposureAttr(1.0) + domeLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0)) + + self._configure_scene() + + # if "enable_viewport" in sim_params: + # self._render = sim_params["enable_viewport"] + + def _get_baselink_candidate(self, + robot_name: str): + + stage=get_current_stage() + all_prims = [prim.GetPath().pathString for prim in stage.Traverse()] + filtered_prims = [prim for prim in all_prims if f"/{robot_name}/" in prim] + + matching=min(filtered_prims, key=len) if filtered_prims else None + + return matching.split('/')[-1] + + def _configure_scene(self): + + # environment + self._fix_base = [self._env_opts["is_fixed_base"]] * len(self._robot_names) + self._self_collide = [self._env_opts["self_collide"]] * len(self._robot_names) + self._merge_fixed = [self._env_opts["merge_fixed_jnts"]] * len(self._robot_names) + + Journal.log(self.__class__.__name__, + "_configure_scene", + "cloning environments...", + LogType.STAT, + throw_when_excep = True) + + self._ground_plane_prim_paths=[] + self._ground_plane=None + self.terrain_generator = None + if not self._env_opts["use_flat_ground"]: + # ensure terrain is large enough to contain all env clones + spacing = float(self._env_opts["env_spacing"]) + num_envs = self._num_envs + num_per_row = max(1, int(math.sqrt(num_envs))) + num_rows = int(math.ceil(num_envs / num_per_row)) + num_cols = int(math.ceil(num_envs / num_rows)) + row_offset = 0.5 * spacing * (num_rows - 1) + col_offset = 0.5 * spacing * (num_cols - 1) + margin = spacing # leave a full spacing as border cushion + required_size = 2.0 * (max(row_offset, col_offset) + margin) + + if required_size > self._env_opts["ground_size"]: + old_size = self._env_opts["ground_size"] + self._env_opts["ground_size"] = required_size + self._env_opts["terrain_border"] = self._env_opts["ground_size"] / 2.0 + Journal.log(self.__class__.__name__, + "_configure_scene", + f"Ground size increased from {old_size} to {required_size} to fit {num_envs} envs (spacing {spacing}).", + LogType.WARN, + throw_when_excep = True) + + min_height=-self._env_opts["dh_ground"] + max_height=self._env_opts["dh_ground"] + step=max_height-min_height + if self._env_opts["ground_type"]=="random": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_random_uniform_terrain(terrain_size=self._env_opts["ground_size"], + min_height=min_height, + max_height=max_height, + step=step, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + elif self._env_opts["ground_type"]=="random_patches": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif_patches" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_random_patched_terrain(terrain_size=self._env_opts["ground_size"], + min_height=min_height, + max_height=max_height, + step=step, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + patch_ratio=0.8, + patch_size=10 + ) + elif self._env_opts["ground_type"]=="slopes": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_slopes" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_sloped_terrain(terrain_size=self._env_opts["ground_size"], + slope=-0.5, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"] + ) + elif self._env_opts["ground_type"]=="stairs": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stairs" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stairs_terrain(terrain_size=self._env_opts["ground_size"], + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + ) + elif self._env_opts["ground_type"]=="stepup": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stepup_terrain( + terrain_size=self._env_opts["ground_size"], + stairs_ratio=0.3, + min_steps=1, + max_steps=1, + pyramid_platform_size=15.0, + position=np.array([0.0, 0.0, 0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + step_height=0.15 + ) + elif self._env_opts["ground_type"]=="stepup_prim": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup_prim" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stepup_prim_terrain( + terrain_size=self._env_opts["ground_size"], + stairs_ratio=0.9, + platform_size=50.0, + step_height_lb=self._env_opts["step_height_lb"], + step_height_ub=self._env_opts["step_height_ub"], + min_step_width=self._env_opts.get("step_width_lb", None), + max_step_width=self._env_opts.get("step_width_ub", None), + position=np.array([0.0, 0.0, 0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + n_steps=25, + area_factor=0.7, + random_n_steps=False + ) + # apply a custom checker material to the terrain primitives + mat_path = self._ensure_lightblue_checker_material() + self._apply_checker_material_to_terrain(terrain_root_path=terrain_prim_path, material_path=mat_path) + self._add_checker_overlay_plane(terrain_root_path=terrain_prim_path, material_path=mat_path) + self._add_checker_overlays_on_tiles(terrain_root_path=terrain_prim_path, material_path=mat_path) + else: + ground_type=self._env_opts["ground_type"] + Journal.log(self.__class__.__name__, + "_configure_scene", + f"Terrain type {ground_type} not supported. Will default to flat ground.", + LogType.EXCEP, + throw_when_excep = True) + + # add offsets to intial height depending on the terrain heightmap + if self.terrain_generator is not None: + stage = get_current_stage() + up_axis = UsdGeom.GetStageUpAxis(stage) + + spacing = self._env_opts["env_spacing"] + num_envs = self._num_envs + + num_per_row = max(1, int(np.sqrt(num_envs))) + num_rows = int(np.ceil(num_envs / num_per_row)) + num_cols = int(np.ceil(num_envs / num_rows)) + + row_offset = 0.5 * spacing * (num_rows - 1) + col_offset = 0.5 * spacing * (num_cols - 1) + + offsets = np.array(self._env_opts["cloning_offset"], dtype=float) + + for env_idx in range(num_envs): + row = env_idx // num_cols + col = env_idx % num_cols + x = row_offset - row * spacing + y = col * spacing - col_offset + + half_extent = self._env_opts["spawn_height_check_half_extent"] + if up_axis == UsdGeom.Tokens.z: + height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent) + else: + height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent) + + offsets[env_idx][2] += height + self._env_opts["spawn_height_cushion"] + + self._env_opts["cloning_offset"] = offsets + + else: + defaul_prim_path=self._env_opts["ground_plane_prim_path"]+"_default" + self._ground_plane_prim_paths.append(defaul_prim_path) + self._ground_plane=self._scene.add_default_ground_plane(z_position=0, + name="terrain", + prim_path=defaul_prim_path, + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + self._terrain_collision=None + + physics_material=UsdPhysics.MaterialAPI.Apply(self._ground_plane.prim) + physics_material.CreateDynamicFrictionAttr(self._env_opts["dynamic_friction"]) + physics_material.CreateStaticFrictionAttr(self._env_opts["static_friction"]) + physics_material.CreateRestitutionAttr(self._env_opts["restitution"]) + # self._ground_plane.apply_physics_material(physics_material) + + self._terrain_collision=PhysxSchema.PhysxCollisionAPI.Get(get_current_stage(), self._ground_plane.prim_path) + self._terrain_material=UsdPhysics.MaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path) + self._terrain_physix_material=PhysxSchema.PhysxMaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path) + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + urdf_path = self._robot_urdf_paths[robot_name] + srdf_path = self._robot_srdf_paths[robot_name] + fix_base = self._fix_base[i] + self_collide = self._self_collide[i] + merge_fixed = self._merge_fixed[i] + + self._generate_rob_descriptions(robot_name=robot_name, + urdf_path=urdf_path, + srdf_path=srdf_path) + self._import_urdf(robot_name, + fix_base=fix_base, + self_collide=self_collide, + merge_fixed=merge_fixed) + + self._cloner.clone( + source_prim_path=self._env_opts["template_env_ns"], + prim_paths=self._envs_prim_paths, + replicate_physics=self._env_opts["replicate_physics"], + position_offsets=self._env_opts["cloning_offset"] + ) # we can clone the environment in which all the robos are + + base_link_name=self._env_opts["base_linkname"] + if self._env_opts["deduce_base_link"]: + base_link_name=self._get_baselink_candidate(robot_name=robot_name) + + self._robots_art_views[robot_name] = ArticulationView(name = robot_name + "ArtView", + prim_paths_expr = self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, + reset_xform_properties=False) + self._robots_articulations[robot_name] = self._scene.add(self._robots_art_views[robot_name]) + + # height grid sensor (terrain may be None if using flat ground) + self._height_sensors[robot_name] = HeightGridSensor( + terrain_utils=self.terrain_generator if not self._env_opts["use_flat_ground"] else None, + grid_size=int(self._env_opts["height_sensor_pixels"]), + resolution=float(self._env_opts["height_sensor_resolution"]), + forward_offset=float(self._env_opts["height_sensor_forward_offset"]), + lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]), + n_envs=self._num_envs, + device=self._device, + dtype=self._dtype) + # ensure shared-data flags are set if a height sensor is active + self._env_opts["height_sensor_pixels"] = int(self._env_opts["height_sensor_pixels"]) + self._env_opts["height_sensor_resolution"] = float(self._env_opts["height_sensor_resolution"]) + self._enable_height_shared = True + self._height_sensor_pixels = self._env_opts["height_sensor_pixels"] + self._height_sensor_resolution = self._env_opts["height_sensor_resolution"] + self._height_vis_step[robot_name] = 0 + if self._env_opts["enable_height_vis"]: + self._height_vis[robot_name] = HeightGridVisualizer( + robot_name=robot_name, + num_envs=self._num_envs, + grid_size=int(self._env_opts["height_sensor_pixels"]), + resolution=float(self._env_opts["height_sensor_resolution"]), + marker_radius=float(self._env_opts["height_vis_radius"]), + forward_offset=float(self._env_opts["height_sensor_forward_offset"]), + lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]), + device=self._device, + dtype=self._dtype) + + self._blink_rigid_prim_views[robot_name] = RigidPrimView(prim_paths_expr=self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, + name = robot_name + "RigidPrimView") # base link prim views + self._scene.add(self._blink_rigid_prim_views[robot_name]) # need to add so it is properly initialized when resetting world + + # self._robots_geom_prim_views[robot_name] = GeometryPrimView(name = robot_name + "GeomView", + # prim_paths_expr = self._env_ns + "/env*"+ "/" + robot_name, + # # prepare_contact_sensors = True + # ) + # self._robots_geom_prim_views[robot_name].apply_collision_apis() # to be able to apply contact sensors + + # init contact sensors + self._init_contact_sensors(robot_name=robot_name) # IMPORTANT: this has to be called + # after calling the clone() method and initializing articulation views!! + + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_1/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_2/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_3/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_4/collisions/mesh_1") + + # filter collisions between default ground plane and custom terrains + # self._cloner.filter_collisions(physicsscene_path = self._physics_context.prim_path, + # collision_root_path = "/World/terrain_collisions", + # prim_paths=[self._ground_plane_prim_paths[1]], + # global_paths=[self._ground_plane_prim_paths[0]] + # ) + + # delete_prim(self._env_opts["ground_plane_prim_path"] + "/SphereLight") # we remove the default spherical light + + # set default camera viewport position and target + camera_position=[4.2, 4.2, 1.5] + camera_target=[0, 0, 0] + # use a single camera prim (configurable) for both viewport and render products + camera_prim = self._env_opts["camera_prim_path"] + self._set_initial_camera_params(camera_position=camera_position, + camera_target=camera_target, + camera_prim_path=camera_prim) + + if self._env_opts["render_to_file"]: + # base output dir + from datetime import datetime + timestamp = datetime.now().strftime("h%H_m%M_s%S_%d_%m_%Y") + self._render_output_dir = f"/tmp/IsaacRenderings/{timestamp}" + res = tuple(int(x) for x in self._env_opts["render_resolution"]) + + # create render product from chosen camera prim + self._render_product = rep.create.render_product(camera_prim, + res, name='rendering_camera') + self._render_writer = rep.WriterRegistry.get("BasicWriter") + self._render_writer.initialize(output_dir=self._render_output_dir, + rgb=True) + self._render_writer.attach([self._render_product]) + # optional top-down capture + if self._env_opts["render_panoramic_cam"]: + td_height = float(self._env_opts["render_panoramic_cam_height"]) + td_dir = self._render_output_dir + "/panoramic_cam" + td_offset = self._env_opts["render_panoramic_cam_target_xy"] + td_target_z = float(self._env_opts["render_panoramic_cam_target_z"]) + pos = [8.0, 11.0, td_height] + self._panoramic_cam_camera = rep.create.camera(focal_length=12, + name='rendering_camera_panoramic_cam', + clipping_range = (1, 200), + position = pos, + look_at = [td_offset[0], td_offset[1], td_target_z]) + self._panoramic_cam_render_product = rep.create.render_product(self._panoramic_cam_camera, + res, name='rendering_camera_panoramic_cam') + self._panoramic_cam_writer = rep.WriterRegistry.get("BasicWriter") + self._panoramic_cam_writer.initialize(output_dir=td_dir, rgb=True) + self._panoramic_cam_writer.attach([self._panoramic_cam_render_product]) + + self.apply_collision_filters(self._physics_context.prim_path, + "/World/collisions") + + self._reset_sim() + + self._fill_robot_info_from_world() + # initializes robot state data + + # update solver options + self._update_art_solver_options() + self._get_solver_info() # get again solver option before printing everything + self._print_envs_info() # debug print + + # for n in range(self._n_init_steps): # run some initialization steps + # self._step_sim() + + self._init_robots_state() + + self.scene_setup_completed = True + + Journal.log(self.__class__.__name__, + "set_up_scene", + "finished scene setup...", + LogType.STAT, + throw_when_excep = True) + + self._is = _sensor.acquire_imu_sensor_interface() + self._dyn_control=_dynamic_control.acquire_dynamic_control_interface() + + def _set_contact_links_material(self, prim_path: str): + prim=get_prim_at_path(prim_path) + physics_material=UsdPhysics.MaterialAPI.Apply(prim) + physics_material.CreateDynamicFrictionAttr(0) + physics_material.CreateStaticFrictionAttr(0) + physics_material.CreateRestitutionAttr(1.0) + physxMaterialAPI=PhysxSchema.PhysxMaterialAPI.Apply(prim) + physxMaterialAPI.CreateFrictionCombineModeAttr().Set("multiply") # average, min, multiply, max + physxMaterialAPI.CreateRestitutionCombineModeAttr().Set("multiply") + + def _get_lightblue_checker_texture_path(self) -> str: + tex_rel_path = os.path.join(os.path.dirname(__file__), "..", "assets", "textures", "ibrido_terrain_texture.png") + return os.path.abspath(tex_rel_path) + + def _ensure_lightblue_checker_material(self): + """Create (or reuse) a light-blue checker material for primitive terrains.""" + stage = get_current_stage() + mat_path = "/World/Looks/IbridoCheckerMaterial" + mat_prim = stage.GetPrimAtPath(mat_path) + if mat_prim.IsValid(): + return mat_path + + texture_path = self._get_lightblue_checker_texture_path() + + material = UsdShade.Material.Define(stage, mat_path) + + st_reader = UsdShade.Shader.Define(stage, f"{mat_path}/PrimvarReader_st") + st_reader.CreateIdAttr("UsdPrimvarReader_float2") + st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st") + st_reader.CreateOutput("result", Sdf.ValueTypeNames.Float2) + + uv_transform = UsdShade.Shader.Define(stage, f"{mat_path}/UVTransform") + uv_transform.CreateIdAttr("UsdTransform2d") + # keep UV scale at 1 here; tiling is controlled via mesh UVs + uv_transform.CreateInput("in", Sdf.ValueTypeNames.Float2).ConnectToSource(st_reader.GetOutput("result")) + uv_transform.CreateInput("scale", Sdf.ValueTypeNames.Float2).Set(Gf.Vec2f(1.0, 1.0)) + uv_transform.CreateOutput("result", Sdf.ValueTypeNames.Float2) + + tex = UsdShade.Shader.Define(stage, f"{mat_path}/CheckerTex") + tex.CreateIdAttr("UsdUVTexture") + tex.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(Sdf.AssetPath(texture_path)) + tex.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource(uv_transform.GetOutput("result")) + tex.CreateInput("wrapS", Sdf.ValueTypeNames.Token).Set("repeat") + tex.CreateInput("wrapT", Sdf.ValueTypeNames.Token).Set("repeat") + tex.CreateInput("minFilter", Sdf.ValueTypeNames.Token).Set("nearest") + tex.CreateInput("magFilter", Sdf.ValueTypeNames.Token).Set("nearest") + # disable mipmaps to avoid blurring sharp edges + tex.CreateInput("mipFilter", Sdf.ValueTypeNames.Token).Set("nearest") + tex.CreateInput("enableMipMap", Sdf.ValueTypeNames.Bool).Set(False) + tex.CreateInput("fallback", Sdf.ValueTypeNames.Color4f).Set(Gf.Vec4f(0.69, 0.85, 1.0, 1.0)) + tex.CreateOutput("rgb", Sdf.ValueTypeNames.Float3) + tex.CreateOutput("a", Sdf.ValueTypeNames.Float) + + pbr = UsdShade.Shader.Define(stage, f"{mat_path}/PBRShader") + pbr.CreateIdAttr("UsdPreviewSurface") + pbr.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource(tex.GetOutput("rgb")) + pbr.CreateInput("roughness", Sdf.ValueTypeNames.Float).Set(0.45) + pbr.CreateInput("metallic", Sdf.ValueTypeNames.Float).Set(0.0) + pbr.CreateOutput("surface", Sdf.ValueTypeNames.Token) + + material.CreateSurfaceOutput().ConnectToSource(pbr.GetOutput("surface")) + + return mat_path + + def _ensure_groundplane_material(self): + """Guarantee a ground-plane material exists (default checker) and return its path.""" + stage = get_current_stage() + mat_path = "/World/Looks/groundPlaneMaterial" + mat_prim = stage.GetPrimAtPath(mat_path) + if mat_prim.IsValid(): + return mat_path + + # create a temporary default ground plane to spawn the checker material, then delete the geom + tmp_gp_path = "/World/tmp_ground_for_material" + self._scene.add_default_ground_plane(z_position=-1000.0, + name="tmp_ground_for_material", + prim_path=tmp_gp_path, + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + + mat_prim = stage.GetPrimAtPath(mat_path) + prim_utils.delete_prim(tmp_gp_path) + + return mat_path if mat_prim.IsValid() else None + + def _apply_checker_material_to_terrain(self, terrain_root_path: str, material_path: str): + """Bind the checker material to all terrain prims (visual only).""" + + stage = get_current_stage() + mat_prim = stage.GetPrimAtPath(material_path) + if not mat_prim.IsValid(): + return + material = UsdShade.Material(mat_prim) + for prim in stage.Traverse(): + path = prim.GetPath().pathString + if not path.startswith(terrain_root_path): + continue + binding = UsdShade.MaterialBindingAPI.Apply(prim) + binding.Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _add_checker_overlay_plane(self, terrain_root_path: str, material_path: str): + """Create a thin visual-only mesh with UVs so the checker pattern shows up even on cube prims.""" + stage = get_current_stage() + plane_path = f"{terrain_root_path}/visual_checker" + plane_prim = stage.GetPrimAtPath(plane_path) + if plane_prim.IsValid(): + # if already exists, just (re)bind material + mat_prim = stage.GetPrimAtPath(material_path) + if mat_prim.IsValid(): + UsdShade.MaterialBindingAPI.Apply(plane_prim).Bind(UsdShade.Material(mat_prim), UsdShade.Tokens.strongerThanDescendants) + return + + # try to read base slab dimensions/position to size the overlay + slab_path = terrain_root_path + "_slab" + slab_prim = stage.GetPrimAtPath(slab_path) + center = Gf.Vec3f(0.0, 0.0, 0.0) + width = float(self._env_opts.get("ground_size", 50.0)) + length = width + thickness = 0.1 + if slab_prim.IsValid(): + xformable = UsdGeom.Xformable(slab_prim) + for op in xformable.GetOrderedXformOps(): + if op.GetOpType() == UsdGeom.XformOp.TypeTranslate: + center = Gf.Vec3f(op.Get()) + elif op.GetOpType() == UsdGeom.XformOp.TypeScale: + scale = op.Get() + width = float(scale[0]) + length = float(scale[1]) + thickness = float(scale[2]) + + half_w = 0.5 * width + half_l = 0.5 * length + z = center[2] + 0.5 * thickness + 1e-3 # slightly above the slab to avoid z-fighting + + plane = UsdGeom.Mesh.Define(stage, plane_path) + plane.CreatePointsAttr([ + Gf.Vec3f(center[0] - half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] + half_l, z), + Gf.Vec3f(center[0] - half_w, center[1] + half_l, z), + ]) + plane.CreateFaceVertexCountsAttr([4]) + plane.CreateFaceVertexIndicesAttr([0, 1, 2, 3]) + plane.CreateDoubleSidedAttr(True) + + # increase tiling density; adjustable via env opt + uv_repeats = max(1, int((width / 10.0) * 3.0)) + primvars = UsdGeom.PrimvarsAPI(plane) + st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex) + st.Set(Vt.Vec2fArray([ + Gf.Vec2f(0.0, 0.0), + Gf.Vec2f(uv_repeats, 0.0), + Gf.Vec2f(uv_repeats, uv_repeats), + Gf.Vec2f(0.0, uv_repeats), + ])) + st.SetInterpolation(UsdGeom.Tokens.vertex) + + mat_prim = stage.GetPrimAtPath(material_path) + if mat_prim.IsValid(): + material = UsdShade.Material(mat_prim) + UsdShade.MaterialBindingAPI.Apply(plane.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _add_checker_overlays_on_tiles(self, terrain_root_path: str, material_path: str): + """Add visual quads on top of each tile cube so the checker texture appears on raised steps.""" + stage = get_current_stage() + mat_prim = stage.GetPrimAtPath(material_path) + if not mat_prim.IsValid(): + return + material = UsdShade.Material(mat_prim) + for prim in stage.Traverse(): + path = prim.GetPath().pathString + if not path.startswith(terrain_root_path): + continue + if prim.GetTypeName() != "Cube": + continue + name = path.split("/")[-1] + if "wall" in name or name.endswith("_slab"): + continue + xformable = UsdGeom.Xformable(prim) + center = Gf.Vec3f(0.0, 0.0, 0.0) + scale = Gf.Vec3f(1.0, 1.0, 1.0) + for op in xformable.GetOrderedXformOps(): + if op.GetOpType() == UsdGeom.XformOp.TypeTranslate: + center = Gf.Vec3f(op.Get()) + elif op.GetOpType() == UsdGeom.XformOp.TypeScale: + scale = Gf.Vec3f(op.Get()) + width, length, height = float(scale[0]), float(scale[1]), float(scale[2]) + half_w = 0.5 * width + half_l = 0.5 * length + z = center[2] + 0.5 * height + 1e-3 + overlay_path = f"{path}_checker" + if stage.GetPrimAtPath(overlay_path).IsValid(): + UsdShade.MaterialBindingAPI.Apply(stage.GetPrimAtPath(overlay_path)).Bind(material, UsdShade.Tokens.strongerThanDescendants) + continue + mesh = UsdGeom.Mesh.Define(stage, overlay_path) + mesh.CreatePointsAttr([ + Gf.Vec3f(center[0] - half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] + half_l, z), + Gf.Vec3f(center[0] - half_w, center[1] + half_l, z), + ]) + mesh.CreateFaceVertexCountsAttr([4]) + mesh.CreateFaceVertexIndicesAttr([0, 1, 2, 3]) + mesh.CreateDoubleSidedAttr(True) + uv_repeats = max(1, int((width / 10.0) * float(self._env_opts.get("checker_uv_density", 3.0)))) + primvars = UsdGeom.PrimvarsAPI(mesh) + st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex) + st.Set(Vt.Vec2fArray([ + Gf.Vec2f(0.0, 0.0), + Gf.Vec2f(uv_repeats, 0.0), + Gf.Vec2f(uv_repeats, uv_repeats), + Gf.Vec2f(0.0, uv_repeats), + ])) + st.SetInterpolation(UsdGeom.Tokens.vertex) + UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _is_link(self, prim): + return prim.GetTypeName() == 'Xform' + + def _is_joint(self, prim): + return prim.GetTypeName() == 'PhysicsRevoluteJoint' + + def _create_collision_group(self, group_path, link_paths): + """ + Create a collision group under the given group_path that contains the links. + Args: + group_path (str): Path to create the collision group. + link_paths (List[str]): List of link paths to include in this group. + """ + collision_group = Sdf.PrimSpec( + self._stage.GetRootLayer().GetPrimAtPath(group_path), + group_path.split("/")[-1], + Sdf.SpecifierDef, + "PhysicsCollisionGroup" + ) + # Add the links to the collision group + for link_path in link_paths: + includes_rel = Sdf.RelationshipSpec(collision_group, "collection:colliders:includes", False) + includes_rel.targetPathList.Append(link_path) + + def _add_collision_filter(self, group_path, link1, link2): + """ + Filters collision between two successive links. + + Args: + group_path (str): Path of the collision group. + link1 (str): Path of the first link. + link2 (str): Path of the second link. + """ + # Create a relationship to filter collisions between the two links + filtered_groups = Sdf.RelationshipSpec( + self._stage.GetPrimAtPath(group_path), + "physics:filteredGroups", + False + ) + filtered_groups.targetPathList.Append(link1) + filtered_groups.targetPathList.Append(link2) + + def _render_sim(self, mode="human"): + + if mode == "human": + # follow requested robot/env + if self._env_opts["use_follow_camera"]: + ridx = int(self._env_opts["render_follow_robot_idx"]) + eidx = int(self._env_opts["render_follow_env_idx"]) + if ridx < len(self._robot_names) and eidx < self._num_envs: + rname = self._robot_names[ridx] + pos = self._root_p.get(rname, None) + if pos is not None and pos.shape[0] > eidx: + base = pos[eidx].detach().cpu() + offset = torch.as_tensor(self._env_opts["render_follow_offset"], + device=base.device, dtype=base.dtype) + target_offset = torch.as_tensor(self._env_opts["render_follow_target_offset"], + device=base.device, dtype=base.dtype) + quat = self._root_q.get(rname, None) + if quat is not None and quat.shape[0] > eidx: + q = quat[eidx].detach().cpu() + w, x, y, z = q.unbind(-1) + yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) + cy, sy = torch.cos(yaw), torch.sin(yaw) + rot = torch.tensor([[cy, -sy], [sy, cy]], device=base.device, dtype=base.dtype) + offset_xy = torch.matmul(rot, offset[:2]) + target_offset_xy = torch.matmul(rot, target_offset[:2]) + offset = torch.stack((offset_xy[0], offset_xy[1], offset[2])) + target_offset = torch.stack((target_offset_xy[0], target_offset_xy[1], target_offset[2])) + eye = (base + offset).tolist() + target = (base + target_offset).tolist() + set_camera_view(eye=eye, target=target, camera_prim_path=self._env_opts["camera_prim_path"]) + + self._world.render() + # optional height grid visualization + if self._env_opts["enable_height_vis"]: + for robot_name, vis in self._height_vis.items(): + # use latest stored states + if robot_name not in self._height_imgs or robot_name not in self._height_sensors: + continue + heights = self._height_imgs.get(robot_name, None) + if heights is None or heights.numel() == 0: + continue + pos_src = self._root_p.get(robot_name, None) + quat_src = self._root_q.get(robot_name, None) + if pos_src is None or quat_src is None: + continue + step = self._height_vis_step.get(robot_name, 0) + period = max(1, int(self._env_opts["height_vis_update_period"])) + if step % period == 0: + try: + vis.update( + base_positions=pos_src, + base_quats=quat_src, + heights=heights) + except Exception as exc: + print(f"[height_vis] update failed for {robot_name}: {exc}") + self._height_vis_step[robot_name] = step + 1 + return None + elif mode == "rgb_array": + # check if viewport is enabled -- if not, then complain because we won't get any data + if not self._render or not self._record: + exception = f"Cannot render '{mode}' when rendering is not enabled. Please check the provided" + \ + "arguments to the environment class at initialization." + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + # obtain the rgb data + rgb_data = self._rgb_annotator.get_data() + # convert to numpy array + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + # return the rgb data + return rgb_data[:, :, :3] + else: + return None + + def _create_viewport_render_product(self, resolution=None): + """Create a render product of the viewport for rendering.""" + + try: + + # create render product + camera_prim = self._env_opts["camera_prim_path"] + res = resolution + if res is None: + res = tuple(int(x) for x in self._env_opts["render_resolution"]) + self._render_product = rep.create.render_product(camera_prim, res) + # create rgb annotator -- used to read data from the render product + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + self._record = True + except Exception as e: + carb.log_info("omni.replicator.core could not be imported. Skipping creation of render product.") + carb.log_info(str(e)) + + def _close(self): + if self._simulation_app.is_running(): + self._simulation_app.close() + + def _step_world(self): + self._world.step(render=False, step_sim=True) + + if (self._render) and (self._render_step_counter%self._env_opts["rendering_freq"]==0): + # if self._env_opts["render_to_file"]: + # rep.orchestrator.step() + self._render_sim() # manually trigger rendering (World.step(render=True) for some reason + # will step the simulation for a dt==rendering_dt) + self._render_step_counter += 1 + + def _generate_jnt_imp_control(self, robot_name: str): + + jnt_imp_controller = OmniJntImpCntrl(articulation=self._robots_articulations[robot_name], + device=self._device, + dtype=self._dtype, + enable_safety=True, + urdf_path=self._urdf_dump_paths[robot_name], + config_path=self._jnt_imp_config_paths[robot_name], + enable_profiling=False, + debug_checks=self._debug, + override_art_controller=self._override_low_lev_controller) + + return jnt_imp_controller + + def _reset_sim(self): + self._world.reset(soft=False) + + def _reset_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False): + + if env_indxs is not None: + if self._debug: + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + if randomize: + self._randomize_yaw(robot_name=robot_name,env_indxs=env_indxs) + + # root q + self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][env_indxs, :], + orientations=self._root_q_default[robot_name][env_indxs, :], + indices = env_indxs) + # jnts q + self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][env_indxs, :], + indices = env_indxs) + # root v and omega + self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][env_indxs, :], + indices = env_indxs) + # jnts v + concatenated_vel = torch.cat((self._root_v_default[robot_name][env_indxs, :], + self._root_omega_default[robot_name][env_indxs, :]), dim=1) + self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel, + indices = env_indxs) + # jnts eff + self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][env_indxs, :], + indices = env_indxs) + self._reset_perturbations(robot_name=robot_name, env_indxs=env_indxs) + else: + + if randomize: + self._randomize_yaw(robot_name=robot_name,env_indxs=None) + + # root q + self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][:, :], + orientations=self._root_q_default[robot_name][:, :], + indices = None) + # jnts q + self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][:, :], + indices = None) + # root v and omega + self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][:, :], + indices = None) + # jnts v + concatenated_vel = torch.cat((self._root_v_default[robot_name][:, :], + self._root_omega_default[robot_name][:, :]), dim=1) + self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel, + indices = None) + # jnts eff + self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][:, :], + indices = None) + self._reset_perturbations(robot_name=robot_name, env_indxs=None) + + # we update the robots state + self._read_root_state_from_robot(env_indxs=env_indxs, + robot_name=robot_name) + self._read_jnts_state_from_robot(env_indxs=env_indxs, + robot_name=robot_name) + + def _reset_perturbations(self, robot_name: str, env_indxs: torch.Tensor = None): + """Clear perturbation state and wrenches for selected envs.""" + if robot_name not in self._pert_active: + return + if env_indxs is None: + self._pert_active[robot_name].zero_() + self._pert_steps_remaining[robot_name].zero_() + self._pert_forces_world[robot_name].zero_() + self._pert_torques_world[robot_name].zero_() + self._pert_det_counter[robot_name].zero_() + else: + self._pert_active[robot_name][env_indxs] = False + self._pert_steps_remaining[robot_name][env_indxs] = 0 + self._pert_forces_world[robot_name][env_indxs, :] = 0 + self._pert_torques_world[robot_name][env_indxs, :] = 0 + self._pert_det_counter[robot_name][env_indxs] = 0 + + def _process_perturbations(self): + + # Iterate over each robot view + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + # Pre-fetch views for code clarity (references, not copies) + active = self._pert_active[robot_name] + steps_rem = self._pert_steps_remaining[robot_name] + forces_world = self._pert_forces_world[robot_name] + torques_world = self._pert_torques_world[robot_name] + planar_only = self._env_opts["pert_planar_only"] if "pert_planar_only" in self._env_opts else False + + # --- 1. Update Active Counters (In-Place) --- + if active.any(): + # In-place subtraction + steps_rem[active] -= 1 + + # --- 2. Reset Finished Perturbations (In-Place) --- + # Logic: Active AND (Steps <= 0) + # Note: Creating 'newly_ended' boolean mask is a tiny unavoidable allocation + newly_ended = active & (steps_rem <= 0) + + if newly_ended.any(): + # Use masked_fill_ for in-place zeroing + active.masked_fill_(newly_ended, False) + forces_world[newly_ended, :]=0 + torques_world[newly_ended, :]=0 + steps_rem.masked_fill_(newly_ended, 0) + + # --- 3. Trigger New Perturbations --- + + det_rate = self._env_opts["det_pert_rate"] + if det_rate: + # deterministic spacing: count physics steps and trigger when threshold reached (if not already active) + det_counter = self._pert_det_counter[robot_name] + det_counter += 1 + trigger_mask = (det_counter >= self._pert_det_steps) & (~active) + else: + # Reuse scratch buffer for probability check + # Assumes self._pert_scratch is (num_envs, 1) pre-allocated + self._pert_scratch[robot_name].uniform_(0.0, 1.0) # used for triggering new perturbations + # Check probs against threshold + # Flatten scratch to (N,) to match 'active' mask + trigger_mask = (self._pert_scratch[robot_name].flatten() < self._pert_wrenches_prob) & (~active) + + if trigger_mask.any(): + + # Cache weights (references) + weight = self._weights[robot_name] + + # we now treat the configured "max_*_impulse_*" as the maximum **impulse** (N·s or N·m·s), + # and convert impulse -> force/torque by dividing by the sampled duration (seconds). + # Use per-robot weight scaling as before. + lin_impulse_max = self._env_opts["max_lin_impulse_norm"] * weight + ang_impulse_max = self._env_opts["max_ang_impulse_norm"] * weight + + # --- Force (Impulse) Direction Generation (Reuse _pert_lindir buffer) --- + lindir = self._pert_lindir[robot_name] # (N, 3) + + if planar_only: + # planar push direction from random yaw + yaw_angles = self._pert_scratch[robot_name].uniform_(0.0, 2*math.pi).flatten() + lindir[:, 0] = torch.cos(yaw_angles) + lindir[:, 1] = torch.sin(yaw_angles) + lindir[:, 2] = 0.0 + else: + # 1. Fill with Standard Normal noise in-place + lindir.normal_() + # 2. Normalize in-place + norms = torch.norm(lindir, dim=1, keepdim=True).clamp_min_(1e-6) + lindir.div_(norms) + + # 3. Sample linear impulse magnitudes (reuse scratch) + # scratch has shape (N,1) - uniform [0,1] + self._pert_scratch[robot_name].uniform_(self._env_opts["lin_impulse_mag_min"], self._env_opts["lin_impulse_mag_max"]) + # impulse vectors = unit_dir * (rand * lin_impulse_max) + + lindir.mul_(self._pert_scratch[robot_name] * lin_impulse_max) # now contains linear impulses (N,3) + + # --- Angular (Impulse) Direction Generation (Reuse _pert_angdir buffer) --- + angdir = self._pert_angdir[robot_name] # (N, 3) + + if planar_only: + angdir.zero_() # no torque when planar-only is requested + else: + # 1. Fill with Standard Normal noise + angdir.normal_() + # 2. Normalize + norms = torch.norm(angdir, dim=1, keepdim=True).clamp_min_(1e-6) + angdir.div_(norms) + + # 3. Sample angular impulse magnitudes (reuse scratch) + self._pert_scratch[robot_name].uniform_(0.0, 1.0) + angdir.mul_(self._pert_scratch[robot_name] * ang_impulse_max) # now contains angular impulses (N,3) + + # --- Duration Generation (Reuse _pert_durations) --- + # Keep integer steps sampling (same shape/device/dtype) + self._pert_durations[robot_name] = torch.randint_like( + self._pert_durations[robot_name], + low=self._pert_min_steps, + high=self._pert_max_steps + 1 + ) + + # --- convert to float + duration_steps = self._pert_durations[robot_name].to(dtype=lindir.dtype, device=lindir.device) + # duration in seconds (shape (N,)) + duration_seconds = duration_steps * self.physics_dt() + # avoid divide-by-zero + duration_seconds = duration_seconds.clamp_min_(1e-6) + + # compute per-step forces/torques = impulse / duration_seconds + # lindir currently holds linear impulses, angdir holds angular impulses + forces_to_apply = lindir / duration_seconds + torques_to_apply = angdir / duration_seconds + + # Optional clipping based on robot weight (min/max) + f_norm = torch.norm(forces_to_apply, dim=1, keepdim=True).clamp_min_(1e-9) + target_norm = f_norm + + f_clip_scale_max = self._env_opts["pert_force_max_weight_scale"] + if f_clip_scale_max > 0.0: + force_max = self._weights[robot_name] * f_clip_scale_max # (N,1) + target_norm = torch.minimum(target_norm, force_max) + + f_clip_scale_min = self._env_opts["pert_force_min_weight_scale"] + if f_clip_scale_min > 0.0: + force_min = self._weights[robot_name] * f_clip_scale_min + target_norm = torch.maximum(target_norm, force_min) + + forces_to_apply = forces_to_apply * (target_norm / f_norm) + + t_clip_scale = self._env_opts["pert_torque_max_weight_scale"] + if t_clip_scale > 0.0: + torque_max = self._weights[robot_name] * self._env_opts["max_ang_impulse_lever"] * t_clip_scale + t_norm = torch.norm(torques_to_apply, dim=1, keepdim=True).clamp_min_(1e-9) + t_scale = torch.minimum(torch.ones_like(t_norm), torque_max / t_norm) + torques_to_apply = torques_to_apply * t_scale + + # --- Update State Buffers --- + # Use boolean indexing to scatter only triggered values + active[trigger_mask] = True + steps_rem[trigger_mask] = self._pert_durations[robot_name][trigger_mask, :].flatten() + forces_world[trigger_mask, :] = forces_to_apply[trigger_mask, :] + torques_world[trigger_mask, :] = torques_to_apply[trigger_mask, :] + if det_rate: + det_counter[trigger_mask] = 0 + + # --- 4. Apply Wrenches (Vectorized) --- + # Only call API if there are active perturbations to minimize overhead + + forces_world[~active, :]=0 + torques_world[~active, :]=0 + + self._blink_rigid_prim_views[robot_name].apply_forces_and_torques_at_pos( + forces=forces_world, + torques=torques_world, + positions=None, # body frame origin + is_global=True + ) + + @override + def _pre_step(self): + + if self._env_opts["use_random_pertub"]: + self._process_perturbations() + + success=super()._pre_step() + + return success + + @override + def _pre_step_db(self): + + if self._env_opts["use_random_pertub"]: + self._process_perturbations() + + success=super()._pre_step_db() + + return success + + @override + def _update_contact_state(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + super()._update_contact_state(robot_name, env_indxs) + + if self._env_opts["use_random_pertub"]: + # write APPLIED perturbations to root wrench (mainly for debug) + self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_forces_world[robot_name][env_indxs, :], + data_type="f", + contact_name="root", + robot_idxs = env_indxs, + gpu=self._use_gpu) + self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_torques_world[robot_name][env_indxs, :], + data_type="t", + contact_name="root", + robot_idxs = env_indxs, + gpu=self._use_gpu) + + def _post_warmup_validation(self, robot_name: str): + """Validate warmup state: base height, tilt, and (if available) contacts.""" + envs = torch.arange(self._num_envs, device=self._device) + + # terrain height query + def _terrain_height(xy: torch.Tensor): + if self._env_opts["use_flat_ground"] or self.terrain_generator is None: + return torch.zeros((xy.shape[0],), device=xy.device, dtype=self._dtype) + heights = [] + half_extent = self._env_opts["spawn_height_check_half_extent"] + for k in range(xy.shape[0]): + h = self.terrain_generator.get_max_height_in_rect( + float(xy[k, 0]), float(xy[k, 1]), half_extent=half_extent) + heights.append(h) + return torch.as_tensor(heights, device=xy.device, dtype=self._dtype) + + # base height check + base_xy = self._root_p[robot_name][:, 0:2] + base_z = self._root_p[robot_name][:, 2] + ground_z = _terrain_height(base_xy) + margin = float(self._env_opts["spawn_height_cushion"]) + bad_z = base_z < (ground_z + margin) + + # tilt check (angle between base up and world up) + q = self._root_q[robot_name] + # quaternion to up vector + w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3] + up = torch.stack([ + 2 * (x * z - w * y), + 2 * (y * z + w * x), + 1 - 2 * (x * x + y * y) + ], dim=1) + cos_tilt = up[:, 2].clamp(-1.0, 1.0) + tilt_thresh_deg = 35.0 + bad_tilt = cos_tilt < math.cos(math.radians(tilt_thresh_deg)) + + # contact check (only if sensors exist) + # bad_contact = torch.zeros_like(base_z, dtype=torch.bool) + # if robot_name in self.omni_contact_sensors and self.omni_contact_sensors[robot_name] is not None: + # counts = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + # for link in self._contact_names.get(robot_name, []): + # f_contact = self._get_contact_f(robot_name=robot_name, + # contact_link=link, + # env_indxs=None) + # if f_contact is None: + # continue + # # use normal component (assume z-up); ignore tangential forces + # active = f_contact[:, 2] > 1e-3 + # counts += active.int() + # bad_contact = counts < 3 + + failing = torch.nonzero(bad_z | bad_tilt, as_tuple=False).flatten() + if failing.numel() > 0: + # remediate: lift to terrain+margin, upright (preserve yaw), zero root velocities + # yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) + # safe_z = (ground_z + margin)[failing] + # self._root_p[robot_name][failing, 2] = safe_z + # cos_h = torch.cos(yaw[failing] / 2) + # sin_h = torch.sin(yaw[failing] / 2) + # upright = torch.zeros((failing.shape[0], 4), device=self._device, dtype=self._dtype) + # upright[:, 0] = cos_h + # upright[:, 3] = sin_h + # self._root_q[robot_name][failing, :] = upright + # self._root_v[robot_name][failing, :] = 0.0 + # self._root_omega[robot_name][failing, :] = 0.0 + + msgs = [] # print db message + if bad_z.any(): + msgs.append(f"low_z envs {torch.nonzero(bad_z, as_tuple=False).flatten().tolist()}") + if bad_tilt.any(): + msgs.append(f"tilt envs {torch.nonzero(bad_tilt, as_tuple=False).flatten().tolist()}") + Journal.log(self.__class__.__name__, + "_post_warmup_validation", + f"Warmup validation failures for {robot_name}: " + "; ".join(msgs), + LogType.WARN, + throw_when_excep=False) + return failing + + def _import_urdf(self, + robot_name: str, + fix_base = False, + self_collide = False, + merge_fixed = True): + + import_config=_urdf.ImportConfig() + # status,import_config=omni_kit.commands.execute("URDFCreateImportConfig") + + Journal.log(self.__class__.__name__, + "update_root_offsets", + "importing robot URDF", + LogType.STAT, + throw_when_excep = True) + _urdf.acquire_urdf_interface() + # we overwrite some settings which are bound to be fixed + import_config.merge_fixed_joints = merge_fixed # makes sim more stable + # in case of fixed joints with light objects + import_config.import_inertia_tensor = True + # import_config.convex_decomp = False + import_config.fix_base = fix_base + import_config.self_collision = self_collide + # import_config.distance_scale = 1 + # import_config.make_default_prim = True + # import_config.create_physics_scene = True + # import_config.default_drive_strength = 1047.19751 + # import_config.default_position_drive_damping = 52.35988 + # import_config.default_drive_type = _urdf.UrdfJointTargetType.JOINT_DRIVE_POSITION + # import URDF + success, robot_prim_path_default = omni_kit.commands.execute( + "URDFParseAndImportFile", + urdf_path=self._urdf_dump_paths[robot_name], + import_config=import_config, + # get_articulation_root=True, + ) + + robot_base_prim_path = self._env_opts["template_env_ns"] + "/" + robot_name + + if success: + Journal.log(self.__class__.__name__, + "_import_urdf", + "Successfully importedf URDF into IsaacSim", + LogType.STAT) + else: + Journal.log(self.__class__.__name__, + "_import_urdf", + "Failed to import URDF into IsaacSim", + LogType.EXCEP, + throw_when_excep = True) + + # moving default prim to base prim path for cloning + move_prim(robot_prim_path_default, # from + robot_base_prim_path) # to + + robot_base_prim = prim_utils.get_prim_at_path(robot_base_prim_path) + children = prim_utils.get_prim_children(robot_base_prim) + # log imported prim children to the journal (print was getting truncated in logs) + Journal.log(self.__class__.__name__, + "_import_urdf", + f"Imported robot URDF children: {children}", + LogType.STAT) + + # improve collision tesselation for cylinders (e.g., wheels) if requested + # self._apply_collision_refinement(robot_base_prim_path, + # self._env_opts["collision_refinement_level"]) + + return success + + def _apply_collision_refinement(self, robot_base_prim_path: str, refinement_level: int): + """Set refinement level on collision cylinders to avoid coarse faceting.""" + if refinement_level is None: + return + stage = get_current_stage() + coll_prefix = robot_base_prim_path + "/collisions" + count = 0 + for prim in stage.Traverse(): + if not prim.IsValid(): + continue + path_str = prim.GetPath().pathString + if not path_str.startswith(coll_prefix): + continue + if prim.GetTypeName() == "Cylinder": + attr = prim.GetAttribute("refinementLevel") + if not attr.IsValid(): + attr = prim.CreateAttribute("refinementLevel", Sdf.ValueTypeNames.Int) + attr.Set(int(refinement_level)) + count += 1 + Journal.log(self.__class__.__name__, + "_apply_collision_refinement", + f"Applied refinement level {refinement_level} to {count} cylinder collision prims under {coll_prefix}", + LogType.STAT) + + def apply_collision_filters(self, + physicscene_path: str, + coll_root_path: str): + + self._cloner.filter_collisions(physicsscene_path = physicscene_path, + collision_root_path = coll_root_path, + prim_paths=self._envs_prim_paths, + global_paths=self._ground_plane_prim_paths # can collide with these prims + ) + + def _read_root_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._get_root_state(numerical_diff=self._env_opts["use_diff_vels"], + env_indxs=env_indxs, + robot_name=robot_name) + + # height grid sensor readout + if robot_name in self._height_sensors: + pos_src = self._root_p[robot_name] if env_indxs is None else self._root_p[robot_name][env_indxs] + quat_src = self._root_q[robot_name] if env_indxs is None else self._root_q[robot_name][env_indxs] + heights = self._height_sensors[robot_name].read(pos_src, quat_src) + if self._env_opts["use_flat_ground"]: + heights.zero_() + if env_indxs is None: + self._height_imgs[robot_name] = heights + else: + # clone to avoid overlapping write/read views + self._height_imgs[robot_name][env_indxs] = heights.clone() + + # print("height image") + # print(self._height_imgs[robot_name][0, :, : ]) + + def _read_jnts_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._get_robots_jnt_state( + numerical_diff=self._env_opts["use_diff_vels"], + env_indxs=env_indxs, + robot_name=robot_name) + + def _get_root_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + numerical_diff: bool = False, + base_loc: bool = True): + + # reading = self._is.get_sensor_reading("/World/Cube/Imu_Sensor", + # use_latest_data = True) + + dt=self._cluster_dt[robot_name] # getting diff state always at cluster rate + + # measurements from simulator are in world frame + if env_indxs is not None: + + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True, + indices=env_indxs) # tuple: (pos, quat), quat is [w, i, j, k] in Isaac4.2 + + self._root_p[robot_name][env_indxs, :] = pose[0] + + going_to_fly=self._root_p[robot_name][env_indxs, 0:2]>(self._env_opts["terrain_border"]-0.1) + if going_to_fly.any(): + flying=going_to_fly.sum().item() + warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!" + Journal.log(self.__class__.__name__, + "_get_root_state", + warn, + LogType.WARN, + throw_when_excep = True) + self._root_q[robot_name][env_indxs, :] = pose[1] # root orientation + if not numerical_diff: + # we get velocities from the simulation. This is not good since + # these can actually represent artifacts which do not have physical meaning. + # It's better to obtain them by differentiation to avoid issues with controllers, etc... + self._root_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True, + indices=env_indxs) # root lin. velocity + self._root_omega[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True, + indices=env_indxs) # root ang. velocity + + # for now obtain root a numerically + self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \ + self._root_v_prev[robot_name][env_indxs, :]) / dt + self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \ + self._root_omega_prev[robot_name][env_indxs, :]) / dt + + self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] + self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :] + else: + # differentiate numerically + self._root_v[robot_name][env_indxs, :] = (self._root_p[robot_name][env_indxs, :] - \ + self._root_p_prev[robot_name][env_indxs, :]) / dt + self._root_omega[robot_name][env_indxs, :] = quat_to_omega( + self._root_q_prev[robot_name][env_indxs, :], + self._root_q[robot_name][env_indxs, :], + dt) + + self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \ + self._root_v_prev[robot_name][env_indxs, :]) / dt + self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \ + self._root_omega_prev[robot_name][env_indxs, :]) / dt + + # update "previous" data for numerical differentiation + self._root_p_prev[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] + self._root_q_prev[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] + self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :] + self._track_terrain_hits(robot_name=robot_name, env_indxs=env_indxs) + + else: + # updating data for all environments + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True) # tuple: (pos, quat) + self._root_p[robot_name][:, :] = pose[0] + self._root_q[robot_name][:, :] = pose[1] # root orientation + + going_to_fly=self._root_p[robot_name][:, 0:2]>(self._env_opts["terrain_border"]-0.1) + if going_to_fly.any(): + flying=going_to_fly.sum().item() + warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!" + Journal.log(self.__class__.__name__, + "_get_root_state", + warn, + LogType.WARN, + throw_when_excep = True) + + if not numerical_diff: + # we get velocities from the simulation. This is not good since + # these can actually represent artifacts which do not have physical meaning. + # It's better to obtain them by differentiation to avoid issues with controllers, etc... + self._root_v[robot_name][:, :] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True) # root lin. velocity + self._root_omega[robot_name][:, :] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True) # root ang. velocity + + self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \ + self._root_v_prev[robot_name][:, :]) / dt + self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \ + self._root_omega_prev[robot_name][:, :]) / dt + + self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] + self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :] + else: + # differentiate numerically + self._root_v[robot_name][:, :] = (self._root_p[robot_name][:, :] - \ + self._root_p_prev[robot_name][:, :]) / dt + self._root_omega[robot_name][:, :] = quat_to_omega(self._root_q_prev[robot_name][:, :], + self._root_q[robot_name][:, :], + dt) + + self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \ + self._root_v_prev[robot_name][:, :]) / dt + self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \ + self._root_omega_prev[robot_name][:, :]) / dt + + # update "previous" data for numerical differentiation + self._root_p_prev[robot_name][:, :] = self._root_p[robot_name][:, :] + self._root_q_prev[robot_name][:, :] = self._root_q[robot_name][:, :] + self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] + self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :] + self._track_terrain_hits(robot_name=robot_name, env_indxs=None) + + if base_loc: + # rotate robot twist in base local + twist_w=torch.cat((self._root_v[robot_name], + self._root_omega[robot_name]), + dim=1) + twist_base_loc=torch.cat((self._root_v_base_loc[robot_name], + self._root_omega_base_loc[robot_name]), + dim=1) + world2base_frame(t_w=twist_w,q_b=self._root_q[robot_name],t_out=twist_base_loc) + self._root_v_base_loc[robot_name]=twist_base_loc[:, 0:3] + self._root_omega_base_loc[robot_name]=twist_base_loc[:, 3:6] + + # rotate robot a in base local + a_w=torch.cat((self._root_a[robot_name], + self._root_alpha[robot_name]), + dim=1) + a_base_loc=torch.cat((self._root_a_base_loc[robot_name], + self._root_alpha_base_loc[robot_name]), + dim=1) + world2base_frame(t_w=a_w,q_b=self._root_q[robot_name],t_out=a_base_loc) + self._root_a_base_loc[robot_name]=a_base_loc[:, 0:3] + self._root_alpha_base_loc[robot_name]=a_base_loc[:, 3:6] + + # rotate gravity in base local + world2base_frame3D(v_w=self._gravity_normalized[robot_name],q_b=self._root_q[robot_name], + v_out=self._gravity_normalized_base_loc[robot_name]) + + def _get_robots_jnt_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + numerical_diff: bool = False): + + dt= self.physics_dt() if self._override_low_lev_controller else self._cluster_dt[robot_name] + + # measurements from simulator are in world frame + if env_indxs is not None: + + self._jnts_q[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_positions( + clone = True, + indices=env_indxs) # joint positions + if not numerical_diff: + self._jnts_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True, + indices=env_indxs) # joint velocities + else: + # differentiate numerically + self._jnts_v[robot_name][env_indxs, :] = (self._jnts_q[robot_name][env_indxs, :] - \ + self._jnts_q_prev[robot_name][env_indxs, :]) / dt + # update "previous" data for numerical differentiation + self._jnts_q_prev[robot_name][env_indxs, :] = self._jnts_q[robot_name][env_indxs, :] + + self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( + clone = True, + joint_indices=None, + indices=env_indxs) # measured joint efforts (computed by joint force solver) + + else: + self._jnts_q[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) # joint positions + if not numerical_diff: + self._jnts_v[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True) # joint velocities + else: + self._jnts_v[robot_name][:, :] = (self._jnts_q[robot_name][:, :] - \ + self._jnts_q_prev[robot_name][:, :]) / dt + + self._jnts_q_prev[robot_name][:, :] = self._jnts_q[robot_name][:, :] + + self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( + clone = True) # measured joint efforts (computed by joint force solver) + + def _get_contact_f(self, + robot_name: str, + contact_link: str, + env_indxs: torch.Tensor) -> torch.Tensor: + + if self.omni_contact_sensors[robot_name] is not None: + return self.omni_contact_sensors[robot_name].get(dt=self.physics_dt(), + contact_link=contact_link, + env_indxs=env_indxs, + clone=False) + + def _set_jnts_to_homing(self, robot_name: str): + self._robots_art_views[robot_name].set_joints_default_state(positions=self._homing, + velocities = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \ + dtype=self._dtype, device=self._device), + efforts = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \ + dtype=self._dtype, device=self._device)) + + def _set_root_to_defconfig(self, robot_name: str): + self._robots_art_views[robot_name].set_default_state(positions=self._root_p_default[robot_name], + orientations=self._root_q_default[robot_name]) + + def _alter_twist_warmup(self, robot_name: str, env_indxs: torch.Tensor = None): + """Zero angular velocities and joint velocities for the given robot/envs.""" + if env_indxs is None: + twist = self._robots_art_views[robot_name].get_velocities(clone=True) + twist[:, 0] = 0.0 + twist[:, 1] = 0.0 + twist[:, 3:] = 0.0 # zero angular part, preserve current linear + self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=None) + + # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities( + # indices = None, clone=True) + # jnt_vel[:, :] = 0.0 + + # self._robots_art_views[robot_name].set_joint_velocities( + # velocities = jnt_vel, + # indices = None) + + else: + twist = self._robots_art_views[robot_name].get_velocities(clone=True, indices=env_indxs) + # twist[:, 0:2] = 0.0 + twist[:, 0] = 0.0 + twist[:, 1] = 0.0 + twist[:, 3:] = 0.0 + self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=env_indxs) + + # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities( + # indices = env_indxs, clone=True) + # jnt_vel[:, :] = 0.0 + + # self._robots_art_views[robot_name].set_joint_velocities( + # velocities = jnt_vel, + # indices = env_indxs) + + def _get_solver_info(self): + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + self._solver_position_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_position_iteration_counts() + self._solver_velocity_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_velocity_iteration_counts() + self._solver_stabilization_threshs[robot_name] = self._robots_art_views[robot_name].get_stabilization_thresholds() + + def _update_art_solver_options(self): + + # sets new solver iteration options for specifc articulations + self._get_solver_info() # gets current solver info for the articulations of the + # environments, so that dictionaries are filled properly + + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + # increase by a factor + self._solver_position_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_position_iteration_count) + self._solver_velocity_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_velocity_iteration_count) + self._solver_stabilization_threshs[robot_name] = torch.full((self._num_envs,), self._solver_stabilization_thresh) + self._robots_art_views[robot_name].set_solver_position_iteration_counts(self._solver_position_iteration_counts[robot_name]) + self._robots_art_views[robot_name].set_solver_velocity_iteration_counts(self._solver_velocity_iteration_counts[robot_name]) + self._robots_art_views[robot_name].set_stabilization_thresholds(self._solver_stabilization_threshs[robot_name]) + self._get_solver_info() # gets again solver info for articulation, so that it's possible to debug if + # the operation was successful + + def _print_envs_info(self): + + ground_info = f"[Ground info]" + "\n" + \ + "static friction coeff.: " + str(self._terrain_material.GetStaticFrictionAttr().Get()) + "\n" + \ + "dynamics friction coeff.: " + str(self._terrain_material.GetDynamicFrictionAttr().Get()) + "\n" + \ + "restitution coeff.: " + str(self._terrain_material.GetRestitutionAttr().Get()) + "\n" +\ + "friction comb. mode: " + str(self._terrain_physix_material.GetFrictionCombineModeAttr().Get()) + "\n" + \ + "damping comb. mode: " + str(self._terrain_physix_material.GetDampingCombineModeAttr().Get()) + "\n" + \ + "restitution comb. mode: " + str(self._terrain_physix_material.GetRestitutionCombineModeAttr().Get()) + "\n" + + Journal.log(self.__class__.__name__, + "_print_envs_info", + ground_info, + LogType.STAT, + throw_when_excep = True) + + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + task_info = f"[{robot_name}]" + "\n" + \ + "bodies: " + str(self._robots_art_views[robot_name].body_names) + "\n" + \ + "n. prims: " + str(self._robots_art_views[robot_name].count) + "\n" + \ + "prims names: " + str(self._robots_art_views[robot_name].prim_paths) + "\n" + \ + "n. bodies: " + str(self._robots_art_views[robot_name].num_bodies) + "\n" + \ + "n. dofs: " + str(self._robots_art_views[robot_name].num_dof) + "\n" + \ + "dof names: " + str(self._robots_art_views[robot_name].dof_names) + "\n" + \ + "solver_position_iteration_counts: " + str(self._solver_position_iteration_counts[robot_name]) + "\n" + \ + "solver_velocity_iteration_counts: " + str(self._solver_velocity_iteration_counts[robot_name]) + "\n" + \ + "stabiliz. thresholds: " + str(self._solver_stabilization_threshs[robot_name]) + # print("dof limits: " + str(self._robots_art_views[robot_name].get_dof_limits())) + # print("effort modes: " + str(self._robots_art_views[robot_name].get_effort_modes())) + # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains())) + # print("dof max efforts: " + str(self._robots_art_views[robot_name].get_max_efforts())) + # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains())) + # print("physics handle valid: " + str(self._robots_art_views[robot_name].is_physics_handle_valid()) + Journal.log(self.__class__.__name__, + "_print_envs_info", + task_info, + LogType.STAT, + throw_when_excep = True) + + def _fill_robot_info_from_world(self): + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + self._robot_bodynames[robot_name] = self._robots_art_views[robot_name].body_names + self._robot_n_links[robot_name] = self._robots_art_views[robot_name].num_bodies + self._robot_n_dofs[robot_name] = self._robots_art_views[robot_name].num_dof + self._robot_dof_names[robot_name] = self._robots_art_views[robot_name].dof_names + + def _set_initial_camera_params(self, + camera_position=[10, 10, 3], + camera_target=[0, 0, 0], + camera_prim_path="/OmniverseKit_Persp"): + set_camera_view(eye=camera_position, + target=camera_target, + camera_prim_path=camera_prim_path) + + def _init_contact_sensors(self, robot_name: str): + self.omni_contact_sensors[robot_name]=None + sensor_radii={} + contact_offsets={} + self._contact_names[robot_name]=self._env_opts["contact_prims"] + for contact_prim in self._env_opts["contact_prims"]: + sensor_radii[contact_prim]=self._env_opts["sensor_radii"] + contact_offsets[contact_prim]=np.array([0.0, 0.0, 0.0]) + if not (len(self._env_opts["contact_prims"])==0): + self.omni_contact_sensors[robot_name]=OmniContactSensors( + name=robot_name, + n_envs=self._num_envs, + contact_prims=self._env_opts["contact_prims"], + contact_offsets=contact_offsets, + sensor_radii=sensor_radii, + device=self._device, + dtype=self._dtype, + enable_debug=self._debug, + filter_paths=self._ground_plane_prim_paths) + self.omni_contact_sensors[robot_name].create_contact_sensors( + self._world, + envs_namespace=self._env_opts["envs_ns"]) + + def _init_robots_state(self): + + self._masses = {} + self._weights = {} + + self._pert_active = {} # bool mask: (num_envs,) + self._pert_steps_remaining = {} # int steps: (num_envs,) + self._pert_forces_world = {} # (num_envs,3) + self._pert_torques_world = {} # (num_envs,3) + self._pert_force_local = {} # (num_envs,3) (if needed) + self._pert_torque_local = {} # (num_envs,3) + self._pert_lindir = {} + self._pert_angdir = {} + self._pert_durations = {} + self._pert_scratch = {} + self._pert_det_counter = {} + + # convert durations in seconds to integer physics steps (min 1 step) + self._pert_min_steps = max(1, int(math.ceil(self._env_opts["pert_wrenches_min_duration"] / self.physics_dt()))) + self._pert_max_steps = max(self._pert_min_steps, int(math.ceil(self._env_opts["pert_wrenches_max_duration"] / self.physics_dt()))) + + pert_wrenches_step_rate=self._env_opts["pert_wrenches_rate"]/self.physics_dt() # 1 pert every n physics steps + self._pert_det_steps = max(1, int(round(pert_wrenches_step_rate))) + self._pert_wrenches_prob=min(1.0, 1.0/pert_wrenches_step_rate) # sampling prob to be used when not deterministic + + self._calc_robot_distrib() + + for i in range(0, len(self._robot_names)): + + robot_name = self._robot_names[i] + + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True) # tuple: (pos, quat) + + # root p (measured, previous, default) + self._root_p[robot_name] = pose[0] + self._root_p_prev[robot_name] = torch.clone(pose[0]) + # print(self._root_p_default[robot_name].device) + self._root_p_default[robot_name] = torch.clone(pose[0]) + self._distr_offset[robot_name] + # root q (measured, previous, default) + self._root_q[robot_name] = pose[1] # root orientation + self._root_q_prev[robot_name] = torch.clone(pose[1]) + self._root_q_default[robot_name] = torch.clone(pose[1]) + # jnt q (measured, previous, default) + self._jnts_q[robot_name] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) # joint positions + self._jnts_q_prev[robot_name] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) + self._jnts_q_default[robot_name] = torch.full((self._jnts_q[robot_name].shape[0], + self._jnts_q[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + + # root v (measured, default) + self._root_v[robot_name] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True) # root lin. velocityù + self._root_v_base_loc[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_v_prev[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_v_default[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + # root omega (measured, default) + self._root_omega[robot_name] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True) # root ang. velocity + self._root_omega_prev[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0) + + self._root_omega_base_loc[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0) + self._root_omega_default[robot_name] = torch.full((self._root_omega[robot_name].shape[0], self._root_omega[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + # root a (measured,) + self._root_a[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_a_base_loc[robot_name] = torch.full_like(self._root_a[robot_name], fill_value=0.0) + self._root_alpha[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_alpha_base_loc[robot_name] = torch.full_like(self._root_alpha[robot_name], fill_value=0.0) + + # height grid sensor storage + grid_sz = int(self._env_opts["height_sensor_pixels"]) + self._height_imgs[robot_name] = torch.zeros((self._num_envs, grid_sz, grid_sz), + dtype=self._dtype, + device=self._device) + + # joints v (measured, default) + self._jnts_v[robot_name] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True) # joint velocities + self._jnts_v_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + + # joints efforts (measured, default) + self._jnts_eff[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + self._jnts_eff_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + self._root_pos_offsets[robot_name] = torch.zeros((self._num_envs, 3), + device=self._device) # reference position offses + + self._root_q_offsets[robot_name] = torch.zeros((self._num_envs, 4), + device=self._device) + self._root_q_offsets[robot_name][:, 0] = 1.0 # init to valid identity quaternion + + # boolean active flag per env + self._pert_active[robot_name] = torch.zeros((self._num_envs,), dtype=torch.bool, device=self._device) + # remaining steps as integer tensor + self._pert_steps_remaining[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + # world force & torque (N and N*m) stored as floats + self._pert_forces_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_torques_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + # local frame copies (if you want to store local-frame versions) + self._pert_force_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_torque_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + + self._pert_lindir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_angdir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + + self._pert_durations[robot_name] = torch.zeros((self._num_envs, 1), dtype=torch.int32, device=self._device) + + self._pert_scratch[robot_name] = torch.zeros((self._num_envs, 1), dtype=self._dtype, device=self._device) + self._pert_det_counter[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + + self._masses[robot_name] = torch.sum(self._robots_art_views[robot_name].get_body_masses(clone=True), dim=1).to(dtype=self._dtype, device=self._device) + + self._weights[robot_name] = (self._masses[robot_name] * abs(self._env_opts["gravity"][2].item())).reshape((self._num_envs, 1)) + + def _track_terrain_hits(self, robot_name: str, env_indxs: torch.Tensor = None): + """Track transitions into the terrain boundary margin (1 m) and count hits per env.""" + if self._env_opts["use_flat_ground"]: + return + border = float(self._env_opts["terrain_border"]) + threshold = max(0.0, border - self._terrain_hit_margin) + state = self._terrain_hit_active[robot_name] + counts = self._terrain_hit_counts[robot_name] + if env_indxs is None: + pos_xy = self._root_p[robot_name][:, 0:2] + hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1) + new_hits = (~state) & hitting + if new_hits.any(): + counts[new_hits] += 1 + state.copy_(hitting) + else: + pos_xy = self._root_p[robot_name][env_indxs, 0:2] + hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1) + prev_state = state[env_indxs] + new_hits = (~prev_state) & hitting + if new_hits.any(): + counts[env_indxs[new_hits]] += 1 + state[env_indxs] = hitting + + def _maybe_log_terrain_hits(self): + """Log boundary hits at low frequency only when counters change.""" + if self._env_opts["use_flat_ground"]: + return + period = int(self._env_opts["terrain_hit_log_period"]) + if period <= 0 or (self.step_counter % period != 0): + return + for robot_name in self._robot_names: + active = self._terrain_hit_active.get(robot_name, None) + if active is None: + continue + active_now = int(active.sum().item()) + if active_now == 0: + continue + counts = self._terrain_hit_counts[robot_name] + last = self._terrain_hit_counts_last_logged.get(robot_name, None) + if last is not None and torch.equal(counts, last): + continue + total_hits = int(counts.sum().item()) + msg = f"{active_now} {robot_name} robots within {self._terrain_hit_margin}m of terrain border. Total hits: {total_hits}." + Journal.log(self.__class__.__name__, + "_terrain_hits", + msg, + LogType.WARN, + throw_when_excep = True) + self._terrain_hit_counts_last_logged[robot_name] = counts.clone() + + @override + def _post_world_step(self) -> bool: + res = super()._post_world_step() + self._maybe_log_terrain_hits() + return res + + @override + def _post_world_step_db(self) -> bool: + res = super()._post_world_step_db() + self._maybe_log_terrain_hits() + return res + + def current_tstep(self): + self._world.current_time_step_index + + def world_time(self, robot_name: str): + return self._world.current_time + + def physics_dt(self): + return self._world.get_physics_dt() + + def rendering_dt(self): + return self._env_opts["rendering_dt"] + + def set_physics_dt(self, physics_dt:float): + self._world.set_simulation_dt(physics_dt=physics_dt,rendering_dt=None) + + def set_rendering_dt(self, rendering_dt:float): + self._world.set_simulation_dt(physics_dt=None,rendering_dt=rendering_dt) + + def _robot_jnt_names(self, robot_name: str): + return self._robots_art_views[robot_name].dof_names diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/jnt_imp_config.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/jnt_imp_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88357f457e8a8de67d698e27491ce07fd2bcafa0 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/jnt_imp_config.yaml @@ -0,0 +1,46 @@ +XBotInterface: # just used for retrieving homing in sim with xbot_mujoco + urdf_path: $PWD/xmj_env_files/centauro.urdf + srdf_path: $PWD/xmj_env_files/centauro.srdf + +# defaults +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +startup_motor_pd: # iannis combo + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [200, 60] + "hip_pitch_*": [200, 60] + "knee_pitch_*": [200, 60] + "ankle_pitch_*": [200, 60] + "ankle_yaw_*": [120, 30] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/jnt_imp_config_with_ub.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/jnt_imp_config_with_ub.yaml new file mode 100644 index 0000000000000000000000000000000000000000..6c15761ef005e69087b96c5355fc10028c6088c4 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/jnt_imp_config_with_ub.yaml @@ -0,0 +1,46 @@ +XBotInterface: # just used for retrieving homing in sim with xbot_mujoco + urdf_path: $PWD/xmj_env_files/centauro.urdf + srdf_path: $PWD/xmj_env_files/centauro.srdf + +# defaults +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +startup_motor_pd: # iannis combo + "j_arm*_1": [100, 25] + "j_arm*_2": [100, 25] + "j_arm*_3": [100, 25] + "j_arm*_4": [100, 25] + "j_arm*_5": [20, 8] + "j_arm*_6": [20, 8] + "j_arm*_7": [20, 8] + "hip_yaw_*": [200, 60] + "hip_pitch_*": [200, 60] + "knee_pitch_*": [200, 60] + "ankle_pitch_*": [200, 60] + "ankle_yaw_*": [120, 30] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [400, 60] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_control_cluster.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_control_cluster.py new file mode 100644 index 0000000000000000000000000000000000000000..9c484f45d52cd3cff4da3401453a30258d1e53ab --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_control_cluster.py @@ -0,0 +1,106 @@ +import os +import argparse +import multiprocessing as mp +import importlib.util +import inspect + +from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict + +from EigenIPC.PyEigenIPC import Journal, LogType + +this_script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0] + +# Function to dynamically import a module from a specific file path +def import_env_module(env_path): + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +if __name__ == "__main__": + + # Parse command line arguments for CPU affinity + parser = argparse.ArgumentParser(description="Set CPU affinity for the script.") + parser.add_argument('--ns', type=str, help='Namespace to be used for cluster shared memory') + parser.add_argument('--urdf_path', type=str, help='Robot description package path for URDF ') + parser.add_argument('--srdf_path', type=str, help='Robot description package path for SRDF ') + parser.add_argument('--size', type=int, help='cluster size', default=1) + + # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8 + parser.add_argument('--cloop',action='store_true', help='whether to use RHC controllers in closed loop mode') + parser.add_argument('--cluster_dt', type=float, default=0.05, help='dt used by MPC controllers for discretization') + parser.add_argument('--n_nodes', type=int, default=31, help='n nodes used by MPC controllers') + + parser.add_argument('--verbose',action='store_true', help='run in verbose mode') + + parser.add_argument('--enable_debug',action='store_true', help='enable debug mode for cluster client and all controllers') + + parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data") + + parser.add_argument('--no_mp_fork',action='store_true', help='whether to multiprocess with forkserver context') + + parser.add_argument('--set_affinity',action='store_true', help='set affinity to a core for each controller') + + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--codegen_override_dir', type=str, help='Path to base dir where codegen is to be loaded', default="") + + parser.add_argument('--custom_args_names', nargs='+', default=None, + help='list of custom arguments names') + parser.add_argument('--custom_args_vals', nargs='+', default=None, + help='list of custom arguments values') + parser.add_argument('--custom_args_dtype', nargs='+', default=None, + help='list of custom arguments data types') + + parser.add_argument('--cluster_client_fname', type=str, + default="aug_mpc.controllers.rhc.hybrid_quad_client", + help="cluster client file import pattern (without extension)") + + args = parser.parse_args() + + # Ensure custom_args_names and custom_args_vals have the same length + custom_opts = generate_custom_arg_dict(args=args) + custom_opts.update({"cloop": args.cloop, + "cluster_dt": args.cluster_dt, + "n_nodes": args.n_nodes, + "codegen_override_dir": args.codegen_override_dir}) + if args.no_mp_fork: # this needs to be in the main + mp.set_start_method('spawn') + else: + # mp.set_start_method('forkserver') + mp.set_start_method('fork') + + cluster_module=importlib.import_module(args.cluster_client_fname) + # Get all classes defined in the module + classes_in_module = [name for name, obj in inspect.getmembers(cluster_module, inspect.isclass) + if obj.__module__ == cluster_module.__name__] + if len(classes_in_module) == 1: + cluster_classname=classes_in_module[0] + ClusterClient = getattr(cluster_module, cluster_classname) + cluster_client = ClusterClient(namespace=args.ns, + cluster_size=args.size, + urdf_xacro_path=args.urdf_path, + srdf_xacro_path=args.srdf_path, + open_loop=not args.cloop, + use_mp_fork = not args.no_mp_fork, + verbose=args.verbose, + debug=args.enable_debug, + base_dump_dir=args.dmpdir, + timeout_ms=args.timeout_ms, + custom_opts=custom_opts, + codegen_override=args.codegen_override_dir, + set_affinity=args.set_affinity) + cluster_client.run() + + else: + class_list_str = ", ".join(classes_in_module) + Journal.log("launch_control_cluster.py", + "", + f"Found more than one class in cluster client file {args.cluster_client_fname}. Found: {class_list_str}", + LogType.EXCEP, + throw_when_excep = False) + exit() + + # control_cluster_client = + # control_cluster_client.run() # spawns the controllers on separate processes (blocking) + diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_train_env.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_train_env.py new file mode 100644 index 0000000000000000000000000000000000000000..1864f2e4f20d85297c2b5568666ea16d9bd8f644 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_train_env.py @@ -0,0 +1,358 @@ +from aug_mpc.utils.determinism import deterministic_run + +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo +from mpc_hive.utilities.shared_data.cluster_data import SharedClusterInfo + +from EigenIPC.PyEigenIPC import VLevel, Journal, LogType +from EigenIPC.PyEigenIPC import StringTensorServer + +import os, argparse, sys, types, inspect + +from perf_sleep.pyperfsleep import PerfSleep + +import importlib.util +import torch +import signal + +algo = None # global to make it accessible by signal handler +exit_request=False +dummy_step_exit_req=False + +def handle_sigint(signum, frame): + global exit_request, dummy_step_exit_req + Journal.log("launch_train_env.py", + "", + f"Received sigint. Will stop training.", + LogType.WARN) + exit_request=True + dummy_step_exit_req=True # in case dummy_step_loop was used + +# Function to dynamically import a module from a specific file path +# def import_env_module(env_path): +# spec = importlib.util.spec_from_file_location("env_module", env_path) +# env_module = importlib.util.module_from_spec(spec) +# spec.loader.exec_module(env_module) +# return env_module + +def import_env_module(env_path, local_env_root: str = None): + """ + env_path: full path to the child env .py file to exec + local_env_root: directory where local copies of aug_mpc_envs.training_envs modules live + """ + if local_env_root is not None: + local_env_root = os.path.abspath(local_env_root) + # override aug_mpc_envs.training_envs package to point to the local_env_root + pkg_name = "aug_mpc_envs.training_envs" + if pkg_name not in sys.modules: + mod = types.ModuleType(pkg_name) + mod.__path__ = [local_env_root] # tell Python to look here first + sys.modules[pkg_name] = mod + else: + existing = getattr(sys.modules[pkg_name], "__path__", None) + if existing is None: + sys.modules[pkg_name].__path__ = [local_env_root] + elif local_env_root not in existing: + existing.insert(0, local_env_root) + + # load the module as usual + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +def log_env_hierarchy(env_class, env_path, env_type="training"): + """ + Logs the env class, its file, and full inheritance hierarchy with file paths. + env_class: the child env class + env_path: file path where the child class was loaded from + env_type: string label, e.g., "training", "evaluation", "resumed_training" + """ + def get_bases_recursive(cls): + """Recursively get all base classes with their file paths.""" + info = [] + for base in cls.__bases__: + try: + file = inspect.getfile(base) + except TypeError: + file = "built-in or unknown" + info.append(f"{base.__name__} (from {file})") + # Recurse unless it's object + if base is not object: + info.extend(get_bases_recursive(base)) + return info + + hierarchy_info = get_bases_recursive(env_class) + hierarchy_str = " -> ".join(hierarchy_info) if hierarchy_info else "No parents" + + Journal.log( + "launch_train_env.py", + "", + f"loading {env_type} env {env_class.__name__} (from {env_path}) " + f"with hierarchy: {hierarchy_str}", + LogType.INFO, + throw_when_excep=True + ) + +def dummy_step_loop(env): + global dummy_step_exit_req + while True: + if dummy_step_exit_req: + return True + step_ok=env.step(action=env.safe_action) # not a busy loop because of MPC in the step + if not step_ok: + return False + +if __name__ == "__main__": + + signal.signal(signal.SIGINT, handle_sigint) + + # Parse command line arguments for CPU affinity + parser = argparse.ArgumentParser(description="Set CPU affinity for the script.") + + parser.add_argument('--run_name', type=str, default=None, help='Name of training run') + parser.add_argument('--ns', type=str, help='Namespace to be used for shared memory') + parser.add_argument('--timeout_ms', type=int, help='Connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--drop_dir', type=str, help='Directory root where all run data will be dumped') + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--seed', type=int, help='Seed', default=1) + parser.add_argument('--use_cpu',action='store_true', help='If set, all the training (data included) will be performed on CPU') + + parser.add_argument('--db',action='store_true', help='Whether to enable local data logging for the algorithm (reward metrics, etc.)') + parser.add_argument('--env_db',action='store_true', help='Whether to enable env db data logging on shared mem (e.g. reward metrics are not available for reading anymore)') + parser.add_argument('--full_env_db',action='store_true', help='Whether to enable detailed episodic data storage (data over single transitions)') + parser.add_argument('--rmdb',action='store_true', help='Whether to enable remote debug (e.g. data logging on remote servers)') + + parser.add_argument('--tot_tsteps', type=int, help='Total number of timesteps to be collected', default=int(30e6)) + parser.add_argument('--action_repeat', type=int, help='Frame skipping (1-> no skip)', default=1) + parser.add_argument('--discount_factor', type=float, help='', default=0.99) + parser.add_argument('--obs_norm',action='store_true', help='Whether to enable the use of running normalizer in agent') + parser.add_argument('--obs_rescale',action='store_true', help='Whether to rescale observation depending on their expected range') + parser.add_argument('--add_weight_norm',action='store_true', help='Whether to add weight normalization to agent interal llayers') + parser.add_argument('--add_layer_norm',action='store_true', help='Whether to add layer normalization to agent internal llayers') + parser.add_argument('--add_batch_norm',action='store_true', help='Whether to add batch normalization to agent internal llayers') + + parser.add_argument('--act_rescale_critic',action='store_true', help='Whether to rescale actions provided to critic (if SAC) to be in range [-1, 1]') + parser.add_argument('--use_period_resets',action='store_true', help='') + + parser.add_argument('--sac',action='store_true', help='Use SAC, otherwise PPO, unless dummy is set') + parser.add_argument('--dummy',action='store_true', help='Use dummy agent (useful for testing and debugging environments)') + + parser.add_argument('--dump_checkpoints',action='store_true', help='Whether to dump model checkpoints during training') + + parser.add_argument('--demo_envs_perc', type=float, help='[0, 1]', default=0.0) + parser.add_argument('--demo_stop_thresh', type=float, default=None, + help='Performance hreshold above which demonstration envs should be deactivated.') + + parser.add_argument('--expl_envs_perc', type=float, help='[0, 1]', default=0) + + parser.add_argument('--use_rnd',action='store_true', help='Whether to use RND for exploration') + + parser.add_argument('--eval',action='store_true', help='Whether to perform an evaluation run') + parser.add_argument('--n_eval_timesteps', type=int, help='Total number of timesteps to be evaluated', default=int(1e6)) + parser.add_argument('--det_eval',action='store_true', help='Whether to perform a deterministic eval (only action mean is used). Only valid if --eval.') + parser.add_argument('--allow_expl_during_eval',action='store_true', help='Whether to allow expl envs during evaluation (useful to tune exploration)') + + parser.add_argument('--resume',action='store_true', help='Resume a previous training using a checkpoint') + parser.add_argument('--mpath', type=str, help='Model path to be used for policy evaluation', default=None) + parser.add_argument('--mname', type=str, help='Model name', default=None) + parser.add_argument('--override_env',action='store_true', help='Whether to override env when running evaluation') + + parser.add_argument('--anomaly_detect',action='store_true', help='Whether to enable anomaly detection (useful for debug)') + + parser.add_argument('--compression_ratio', type=float, + help='If e.g. 0.8, the fist layer will be of dimension [input_features_size x (input_features_size*compression_ratio)]', default=-1.0) + parser.add_argument('--actor_lwidth', type=int, help='Actor network layer width', default=128) + parser.add_argument('--critic_lwidth', type=int, help='Critic network layer width', default=256) + parser.add_argument('--actor_n_hlayers', type=int, help='Actor network size', default=3) + parser.add_argument('--critic_n_hlayers', type=int, help='Critic network size', default=4) + + parser.add_argument('--env_fname', type=str, default="twist_tracking_env", help='Training env file name (without extension)') + parser.add_argument('--env_classname', type=str, default="TwistTrackingEnv", help='Training env class name') + parser.add_argument('--override_agent_actions',action='store_true', help='Whether to override agent actions with custom ones from shared mem (useful for db)') + parser.add_argument('--override_agent_refs',action='store_true', help='Whether to override automatically generated agent refs (useful for debug)') + + parser.add_argument('--step_while_setup',action='store_true', help='Continue stepping env with default actions while setting up agent, etc..') + parser.add_argument('--reset_on_init',action='store_true', help='Whether to reset the environment on initialization') + + args = parser.parse_args() + args_dict = vars(args) + + if args.eval and args.resume: + Journal.log("launch_train_env.py", + "", + f"Cannot set both --eval and --resume flags. Exiting.", + LogType.EXCEP, + throw_when_excep = True) + + deterministic_run(seed=args.seed, torch_det_algos=False) + + anomaly_detect=False + if args.anomaly_detect: + torch.autograd.set_detect_anomaly(True) + + if (not args.mpath is None) and (not args.mname is None): + mpath_full = os.path.join(args.mpath, args.mname) + else: + mpath_full=None + + env_fname=args.env_fname + env_classname = args.env_classname + env_path="" + env_module=None + if (not args.eval and not args.resume) or (args.override_env): + # if starting a fresh traning or overriding env, load from a fresh env from aug_mpc + env_path = f"aug_mpc_envs.training_envs.{env_fname}" + env_module = importlib.import_module(env_path) + else: + if args.mpath is None: + Journal.log("launch_train_env.py", + "", + f"no mpath provided! Cannot load env. Either provide a mpath or run with --override_env", + LogType.EXCEP, + throw_when_excep = True) + + env_path = os.path.join(args.mpath, env_fname + ".py") + env_module = import_env_module(env_path, local_env_root=args.mpath) + + EnvClass = getattr(env_module, env_classname) + env_type = "training" if not args.eval else "evaluation" + if args.resume: + env_type = "resumed_training" + log_env_hierarchy(EnvClass, env_path, env_type) # db print of env class + + env = EnvClass(namespace=args.ns, + verbose=True, + vlevel=VLevel.V2, + use_gpu=not args.use_cpu, + debug=args.env_db, + override_agent_refs=args.override_agent_refs, + timeout_ms=args.timeout_ms, + env_opts=args_dict) + if not env.is_ready(): # something went wrong + exit() + + dummy_step_thread = None + if args.step_while_setup: + import threading + # spawn step thread (we don't true parallelization, thread is fine) + # start the dummy stepping in a separate thread so setup can continue concurrently + dummy_step_thread = threading.Thread(target=dummy_step_loop, args=(env,), daemon=True) + dummy_step_thread.start() + + # getting some sim info for debugging + sim_data = {} + sim_info_shared = SharedEnvInfo(namespace=args.ns, + is_server=False, + safe=False) + sim_info_shared.run() + sim_info_keys = sim_info_shared.param_keys + sim_info_data = sim_info_shared.get().flatten() + for i in range(len(sim_info_keys)): + sim_data[sim_info_keys[i]] = sim_info_data[i] + + # getting come cluster info for debugging + cluster_data={} + cluste_info_shared = SharedClusterInfo(namespace=args.ns, + is_server=False, + safe=False) + cluste_info_shared.run() + cluster_info_keys = cluste_info_shared.param_keys + cluster_info_data = cluste_info_shared.get().flatten() + for i in range(len(cluster_info_keys)): + cluster_data[cluster_info_keys[i]] = cluster_info_data[i] + + custom_args={} + custom_args["uname_host"]="user_host" + try: + username = os.getlogin() # add machine info to db data + hostname = os.uname().nodename + user_host = f"{username}@{hostname}" + custom_args["uname_host"]=user_host + except: + pass + + algo=None + if not args.dummy: + if args.sac: + from aug_mpc.training_algs.sac.sac import SAC + + algo = SAC(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + else: + from aug_mpc.training_algs.ppo.ppo import PPO + + algo = PPO(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + else: + from aug_mpc.training_algs.dummy.dummy import Dummy + + algo=Dummy(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + + custom_args.update(args_dict) + custom_args.update(cluster_data) + custom_args.update(sim_data) + + run_name=env_classname if args.run_name is None else args.run_name + algo.setup(run_name=run_name, + ns=args.ns, + verbose=True, + drop_dir_name=args.drop_dir, + custom_args=custom_args, + comment=args.comment, + eval=args.eval, + resume=args.resume, + model_path=mpath_full, + n_eval_timesteps=args.n_eval_timesteps, + dump_checkpoints=args.dump_checkpoints, + norm_obs=args.obs_norm, + rescale_obs=args.obs_rescale) + + full_drop_dir=algo.drop_dir() + shared_drop_dir = StringTensorServer(length=1, + basename="SharedTrainingDropDir", + name_space=args.ns, + verbose=True, + vlevel=VLevel.V2, + force_reconnection=True) + shared_drop_dir.run() + + while True: + if not shared_drop_dir.write_vec([full_drop_dir], 0): + ns=1000000000 + PerfSleep.thread_sleep(ns) + continue + else: + break + + if args.step_while_setup: + # stop dummy step thread and give algo authority on step + dummy_step_exit_req=True + # wait for thread to join + if dummy_step_thread is not None: + dummy_step_thread.join() + Journal.log("launch_train_env.py", + "", + f"Dummy env step thread joined. Moving step authority to algo.", + LogType.INFO) + + eval=args.eval + if args.override_agent_actions: + eval=True + if not eval: + while not exit_request: + if not algo.learn(): + break + else: # eval phase + with torch.no_grad(): # no need for grad computation + while not exit_request: + if not algo.eval(): + break + + algo.done() # make sure to terminate training properly diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_world_interface.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_world_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..1836cedcdd2191f40b35db291e86f4ff4a7b8295 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/launch_world_interface.py @@ -0,0 +1,207 @@ +import os +import argparse +import importlib.util +import inspect + +from aug_mpc.utils.rt_factor import RtFactor +from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict +from aug_mpc.utils.determinism import deterministic_run + +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0] + +# Function to dynamically import a module from a specific file path +def import_world_module(env_path): + spec = importlib.util.spec_from_file_location("world_module", env_path) + world_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(world_module) + return world_module + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Sim. env launcher") + # Add arguments + parser.add_argument('--robot_name', type=str, help='Alias to be used for the robot and also shared memory') + parser.add_argument('--urdf_path', type=str, help='path to the URDF file description for each robot') + parser.add_argument('--srdf_path', type=str, help='path to the SRDF file description for each robot (used for homing)') + parser.add_argument('--jnt_imp_config_path', type=str, help='path to a valid YAML file containing information on jnt impedance gains') + parser.add_argument('--num_envs', type=int, default=1) + parser.add_argument('--n_contacts', type=int, default=4) + parser.add_argument('--cluster_dt', type=float, default=0.03, help='dt at which the control cluster runs') + parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data") + parser.add_argument('--remote_stepping',action='store_true', + help='Whether to use remote stepping for cluster triggering (to be set during training)') + + # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8 + parser.add_argument('--use_gpu',action='store_true', help='Whether to use gpu simulation') + + parser.add_argument('--enable_debug',action='store_true', help='Whether to enable debug mode (may introduce significant overhead)') + + parser.add_argument('--headless',action='store_true', help='Whether to run simulation in headless mode') + + parser.add_argument('--verbose',action='store_true', help='Enable verbose mode') + + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--physics_dt', type=float, default=5e-4, help='') + + parser.add_argument('--use_custom_jnt_imp',action='store_true', + help='Whether to override the default PD controller with a custom one') + + parser.add_argument('--diff_vels',action='store_true', + help='Whether to obtain velocities by differentiation or not') + + parser.add_argument('--init_timesteps', type=int, help='initialization timesteps', default=None) + parser.add_argument('--seed', type=int, help='seed', default=0) + + parser.add_argument('--custom_args_names', nargs='+', default=None, + help='list of custom arguments names') + parser.add_argument('--custom_args_vals', nargs='+', default=None, + help='list of custom arguments values') + parser.add_argument('--custom_args_dtype', nargs='+', default=None, + help='list of custom arguments data types') + + parser.add_argument('--world_iface_fname', type=str, + default="aug_mpc_envs.world_interfaces.isaac_world_interface", + help="world interface file import pattern (without extension)") + + args = parser.parse_args() + + deterministic_run(seed=args.seed, torch_det_algos=False) + + default_init_duration=3.0 # [s] + default_init_tsteps=int(default_init_duration/args.physics_dt) + init_tsteps=args.init_timesteps + if init_tsteps is None: + init_tsteps=default_init_tsteps + # Ensure custom_args_names, custom_args_vals, and custom_args_dtype have the same length + custom_opt = generate_custom_arg_dict(args=args) + + Journal.log("launch_world_interface.py", + "", + f"Will warmup world interface for {default_init_duration}s ({default_init_tsteps} physics steps)", + LogType.STAT) + + robot_names = [args.robot_name] + robot_urdf_paths = [args.urdf_path] + robot_srdf_paths = [args.srdf_path] + control_clust_dts = [float(args.cluster_dt)] + use_remote_stepping = [args.remote_stepping] + n_contacts = [args.n_contacts] + jnt_imp_config_paths = [args.jnt_imp_config_path] + num_envs = args.num_envs + control_clust_dt = args.cluster_dt # [s]. Dt at which RHC controllers run + headless = args.headless + + # simulation parameters + remote_env_params = {} + remote_env_params["physics_dt"] = args.physics_dt # physics_dt? + remote_env_params["n_envs"] = num_envs + remote_env_params["use_gpu"] = args.use_gpu + remote_env_params["substepping_dt"] = control_clust_dts[0] + remote_env_params["headless"] = headless + remote_env_params["debug_enabled"] = args.enable_debug + remote_env_params["seed"] = args.seed + remote_env_params.update(custom_opt) + # sim info to be broadcasted on shared memory + # adding some data to dict for debugging + + shared_sim_infos = [] + for i in range(len(robot_names)): + shared_sim_infos.append(SharedEnvInfo( + namespace=robot_names[i], + is_server=True, + env_params_dict=remote_env_params, + verbose=True, + vlevel=VLevel.V2, + force_reconnection=True)) + shared_sim_infos[i].run() + + world_module=importlib.import_module(args.world_iface_fname) + classes_in_module = [name for name, obj in inspect.getmembers(world_module, inspect.isclass) + if obj.__module__ == world_module.__name__] + if len(classes_in_module) == 1: + cluster_classname=classes_in_module[0] + WorldInterface = getattr(world_module, cluster_classname) + else: + class_list_str = ", ".join(classes_in_module) + Journal.log("launch_world_interface.py", + "", + f"Found more than one class in world file {args.world_iface_fname}. Found: {class_list_str}", + LogType.EXCEP, + throw_when_excep = False) + exit() + + world_interface = WorldInterface(robot_names=robot_names, + robot_urdf_paths=robot_urdf_paths, + robot_srdf_paths=robot_srdf_paths, + cluster_dt=control_clust_dts, + jnt_imp_config_paths=jnt_imp_config_paths, + n_contacts=n_contacts, + use_remote_stepping=use_remote_stepping, + name=classes_in_module[0], + num_envs=num_envs, + debug=args.enable_debug, + verbose=args.verbose, + vlevel=VLevel.V2, + n_init_step=init_tsteps, + timeout_ms=args.timeout_ms, + env_opts=remote_env_params, + use_gpu=args.use_gpu, + override_low_lev_controller=args.use_custom_jnt_imp) # create environment + # reset_ok=world_interface.reset(reset_sim=True) + # if not reset_ok: + # world_interface.close() + # exit() + + rt_factor = RtFactor(dt_nom=world_interface.physics_dt(), + window_size=100) + + while True: + + if rt_factor.reset_due(): + rt_factor.reset() + + step_ok=world_interface.step() + + if not step_ok: + break + + rt_factor.update() + + for i in range(len(robot_names)): + robot_name=robot_names[i] + n_steps = world_interface.cluster_sim_step_counters[robot_name] + sol_counter = world_interface.cluster_servers[robot_name].solution_counter() + trigger_counter = world_interface.cluster_servers[robot_name].trigger_counter() + shared_sim_infos[i].write(dyn_info_name=["sim_rt_factor", + "total_rt_factor", + "env_stepping_dt", + "world_stepping_dt", + "time_to_get_states_from_env", + "cluster_state_update_dt", + "cluster_sol_time", + "n_sim_steps", + "n_cluster_trigger_steps", + "n_cluster_sol_steps", + "sim_time", + "cluster_time"], + val=[rt_factor.get(), + rt_factor.get() * num_envs, + rt_factor.get_avrg_step_time(), + world_interface.debug_data["time_to_step_world"], + world_interface.debug_data["time_to_get_states_from_env"], + world_interface.debug_data["cluster_state_update_dt"][robot_name], + world_interface.debug_data["cluster_sol_time"][robot_name], + n_steps, + trigger_counter, + sol_counter, + world_interface.debug_data["sim_time"][robot_name], + sol_counter*world_interface.cluster_servers[robot_name].cluster_dt() + ]) + + world_interface.close() diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/__init__.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py new file mode 100644 index 0000000000000000000000000000000000000000..4d88c7b914b4bb37297e0371dfd1acbd7eafee04 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/augmpc_cluster_client.py @@ -0,0 +1,123 @@ +from mpc_hive.cluster_client.control_cluster_client import ControlClusterClient +from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds +from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds + +from EigenIPC.PyEigenIPC import Journal, LogType + +from typing import List, Dict + +import os + +from abc import abstractmethod + +class AugMpcClusterClient(ControlClusterClient): + + def _import_aux_libs(self): + super()._import_aux_libs() + + def __init__(self, + namespace: str, + urdf_xacro_path: str, + srdf_xacro_path: str, + cluster_size: int, + set_affinity: bool = False, + use_mp_fork: bool = False, + isolated_cores_only: bool = False, + core_ids_override_list: List[int] = None, + verbose: bool = False, + debug: bool = False, + codegen_base_dirname: str = "CodeGen", + base_dump_dir: str = "/tmp", + codegen_override: str = None, + custom_opts: Dict = {}): + + self._base_dump_dir = base_dump_dir + + self._temp_path = base_dump_dir + "/" + f"{self.__class__.__name__}" + f"_{namespace}" + + self._codegen_base_dirname = codegen_base_dirname + self._codegen_basedir = self._temp_path + "/" + self._codegen_base_dirname + + self._codegen_override = codegen_override # can be used to manually override + # the default codegen dir + + if not os.path.exists(self._temp_path): + os.makedirs(self._temp_path) + if not os.path.exists(self._codegen_basedir): + os.makedirs(self._codegen_basedir) + + self._urdf_xacro_path = urdf_xacro_path + self._srdf_xacro_path = srdf_xacro_path + self._urdf_path="" + self._srdf_path="" + + super().__init__(namespace = namespace, + cluster_size=cluster_size, + isolated_cores_only = isolated_cores_only, + set_affinity = set_affinity, + use_mp_fork = use_mp_fork, + core_ids_override_list = core_ids_override_list, + verbose = verbose, + debug = debug, + custom_opts=custom_opts) + self._generate_srdf(namespace=namespace) + + self._generate_urdf(namespace=namespace) + + + def codegen_dir(self): + + return self._codegen_basedir + + def codegen_dir_override(self): + + return self._codegen_override + + def _generate_srdf(self,namespace:str): + + custom_xacro_args=extract_custom_xacro_args(self._custom_opts) + cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(), + new_cmds=custom_xacro_args) + + # call _xrdf_cmds_override in case some cmds need to be overridden + override_cmds=self._xrdf_cmds_override() + cmds=merge_xacro_cmds(prev_cmds=cmds, + new_cmds=override_cmds) + + self._srdf_path=generate_srdf(robot_name=namespace, + xacro_path=self._srdf_xacro_path, + dump_path=self._temp_path, + xrdf_cmds=cmds) + + def _generate_urdf(self,namespace:str): + + custom_xacro_args=extract_custom_xacro_args(self._custom_opts) + cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(), + new_cmds=custom_xacro_args) + + # call _xrdf_cmds_override in case some cmds need to be overridden + override_cmds=self._xrdf_cmds_override() + cmds=merge_xacro_cmds(prev_cmds=cmds, + new_cmds=override_cmds) + + self._urdf_path=generate_urdf(robot_name=namespace, + xacro_path=self._urdf_xacro_path, + dump_path=self._temp_path, + xrdf_cmds=cmds) + + @abstractmethod + def _xrdf_cmds(self): + + # to be implemented by parent class ( + # for an example have a look at utils/centauro_xrdf_gen.py) + + pass + + def _xrdf_cmds_override(self): + + # to be overridden by parent class + + to_be_overridden = ["dummy_cmd:=true"] + + return to_be_overridden diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py new file mode 100644 index 0000000000000000000000000000000000000000..ea5cf7226db1ae834b7bb599c33b426022fcc25d --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/augmpc_cluster_server.py @@ -0,0 +1,43 @@ +from mpc_hive.cluster_server.control_cluster_server import ControlClusterServer +from typing import List +from EigenIPC.PyEigenIPC import VLevel + +class AugMpcClusterServer(ControlClusterServer): + + def __init__(self, + robot_name: str, + cluster_size: int, + cluster_dt: float, + control_dt: float, + jnt_names: List[str], + n_contacts: int, + contact_linknames: List[str] = None, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + debug: bool = False, + use_gpu: bool = True, + force_reconnection: bool = True, + timeout_ms: int = 60000, + enable_height_sensor: bool = False, + height_grid_size: int = None, + height_grid_resolution: float = None): + + self.robot_name = robot_name + + super().__init__( + namespace=self.robot_name, + cluster_size=cluster_size, + cluster_dt=cluster_dt, + control_dt=control_dt, + jnt_names=jnt_names, + n_contacts = n_contacts, + contact_linknames = contact_linknames, + verbose=verbose, + vlevel=vlevel, + debug=debug, + use_gpu=use_gpu, + force_reconnection=force_reconnection, + timeout_ms=timeout_ms, + enable_height_sensor=enable_height_sensor, + height_grid_size=height_grid_size, + height_grid_resolution=height_grid_resolution) diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/__init__.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db15baee95a0dcc257d9c1ccb788a7aabdb6e456 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml @@ -0,0 +1,200 @@ +# A dummy example of a rhc controller configuration built on top of horizon +# and iLQR + +solver: + type: ilqr + ilqr.tol: 0.01 + ilqr.constr_viol_tol: 0.01 + ilqr.suppress_all_output: 'yes' + ilqr.codegen_enabled: true +# ilqr.codegen_workdir: /tmp/tyhio + ilqr.enable_gn: true + ilqr.hxx_reg_base: 0.0 + ilqr.n_threads: 0 + print_time: 0 + +constraints: + - ball_1_contact + - ball_2_contact + - ball_3_contact + - ball_4_contact + - z_ball_1 + - z_ball_2 + - z_ball_3 + - z_ball_4 + +costs: + - joint_regularization + - joint_posture + - base_position + - base_orientation + +.define: + - &w_base_pos 10 + - &w_base_ori 1 + - &w_ball_z 1 + # - &w_postural 0.0001 + - &w_jnt_v_reg 0.01 + - &w_jnt_a_reg 0.001 + - &w_jnt_f_reg 0.0001 + - &wheel_radius 0.124 + +base_position: + type: Cartesian + distal_link: base_link + indices: [0, 1, 2] + nodes: ${N} + weight: *w_base_pos + +base_orientation: + type: Cartesian + distal_link: base_link + indices: [3, 4, 5] + nodes: ${N} + weight: *w_base_ori + +# =============================== + +rolling_contact_1: + type: Rolling + frame: wheel_1 + radius: *wheel_radius + +rolling_contact_2: + type: Rolling + frame: wheel_2 + radius: *wheel_radius + +rolling_contact_3: + type: Rolling + frame: wheel_3 + radius: *wheel_radius + +rolling_contact_4: + type: Rolling + frame: wheel_4 + radius: *wheel_radius + +# contact_1: +# type: Cartesian +# distal_link: ball_1 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_2: +# type: Cartesian +# distal_link: ball_2 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_3: +# type: Cartesian +# distal_link: ball_3 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_4: +# type: Cartesian +# distal_link: ball_4 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# ================================== + +interaction_wheel_1: + type: VertexForce + frame: ball_1 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_1 + +interaction_wheel_2: + type: VertexForce + frame: ball_2 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_2 + +interaction_wheel_3: + type: VertexForce + frame: ball_3 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_3 + +interaction_wheel_4: + type: VertexForce + frame: ball_4 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_4 + +ball_1_contact: + type: Contact + subtask: [interaction_wheel_1, rolling_contact_1] + +ball_2_contact: + type: Contact + subtask: [interaction_wheel_2, rolling_contact_2] + +ball_3_contact: + type: Contact + subtask: [interaction_wheel_3, rolling_contact_3] + +ball_4_contact: + type: Contact + subtask: [interaction_wheel_4, rolling_contact_4] + +# joint_posture: +# type: Postural +# weight: *w_postural +# indices: [0, 1, 2, +# 4, 5, 6, +# 8, 9, 10, +# 12, 13, 14] +# nodes: ${range(N-8, N)} + +# todo: wrong, as the order COUNTS. If I add the contacts after the joint regularization, they wont get considered. +joint_regularization: + type: Regularization + nodes: all # maybe not on first nodes?? + weight: + velocity: *w_jnt_v_reg + acceleration: *w_jnt_a_reg + force: *w_jnt_f_reg + +z_ball_1: + type: Cartesian + distal_link: ball_1 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_2: + type: Cartesian + distal_link: ball_2 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_3: + type: Cartesian + distal_link: ball_3 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_4: + type: Cartesian + distal_link: ball_4 + indices: [2] + cartesian_type: position + weight: *w_ball_z \ No newline at end of file diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..448a35a2251bef0c51e573d6b32d93fcb2e7d9ce --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/gait_manager.py @@ -0,0 +1,566 @@ +import numpy as np + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +from typing import Dict + +class GaitManager: + + def __init__(self, + task_interface: TaskInterface, + phase_manager: pymanager.PhaseManager, + injection_node: int = None, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = None, + vertical_landing: bool = False, + landing_vert_weight: float = None, + phase_force_reg: float = None, + flight_duration: int = 15, + post_flight_stance: int = 3, + step_height: float = 0.1, + dh: float = 0.0, + custom_opts: Dict = {}): + + self._custom_opts=custom_opts + + self._is_open_loop=False + if "is_open_loop" in self._custom_opts: + self._is_open_loop=self._custom_opts["is_open_loop"] + + self.task_interface = task_interface + self._n_nodes_prb=self.task_interface.prb.getNNodes() + + self._phase_manager = phase_manager + self._model=self.task_interface.model + self._q0=self._model.q0 + self._kin_dyn=self.task_interface.model.kd + + # phase weights and regs + self._keep_yaw_vert=keep_yaw_vert + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_landing=vertical_landing + self._landing_vert_weight=landing_vert_weight + self._phase_force_reg=phase_force_reg + self._total_weight = np.atleast_2d(np.array([0, 0, self._kin_dyn.mass() * 9.81])).T + + self._f_reg_ref={} + + # flight parameters + self._post_flight_stance=post_flight_stance + self._flight_info_now=None + self._flight_duration_max=self._n_nodes_prb-(injection_node+1) + self._flight_duration_min=3 + self._flight_duration_default=flight_duration + # apex bounds/defaults + self._step_height_default=step_height + self._step_height_min=0.0 + self._step_height_max=0.5 + # end height bounds/defaults + self._dh_default=dh + self._dh_min=0.0 + self._dh_max=0.5 + # landing dx, dy bounds/defaults + self._land_dx_default=0.0 + self._land_dx_min=-0.5 + self._land_dx_max=0.5 + self._land_dy_default=0.0 + self._land_dy_min=-0.5 + self._land_dy_max=0.5 + + # timeline data + self._contact_timelines = dict() + self.timeline_names=[] + + self._flight_phases = {} + self._touchdown_phases = {} + self._contact_phases = {} + self._fk_contacts = {} + self._fkd_contacts = {} + self._f_reg_ref = {} + + # reference traj + self._tg = trajectoryGenerator.TrajectoryGenerator() + self._traj_der= [None, 0, 0] + self._traj_second_der=[None, 0, 0] + self._third_traj_der=[None, None, 0] + + self._ref_trjs = {} + self._ref_vtrjs = {} + + if injection_node is None: + self._injection_node = round(self.task_interface.prb.getNNodes()/2.0) + else: + self._injection_node = injection_node + + self._init_contact_timelines() + + self._reset_contact_timelines() + + def _init_contact_timelines(self): + + short_stance_duration=1 + flight_phase_short_duration=1 + + self.n_contacts=len(self._model.cmap.keys()) + self._dt=float(self.task_interface.prb.getDt()) + + self._name_to_idx_map={} + + j=0 + for contact in self._model.cmap.keys(): + + self._fk_contacts[contact]=self._kin_dyn.fk(contact) + self._fkd_contacts[contact]=self._kin_dyn.frameVelocity(contact, self._model.kd_frame) + self.timeline_names.append(contact) + self._contact_timelines[contact]=self._phase_manager.createTimeline(f'{contact}_timeline') + # stances + self._contact_phases[contact] = self._contact_timelines[contact].createPhase(short_stance_duration, + f'stance_{contact}_short') + + if self.task_interface.getTask(f'{contact}') is not None: + self._contact_phases[contact].addItem(self.task_interface.getTask(f'{contact}')) + else: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"contact task {contact} not found", + LogType.EXCEP, + throw_when_excep=True) + i=0 + self._f_reg_ref[contact]=[] + for force in self._model.cmap[contact]: + f_ref=self.task_interface.prb.createParameter(name=f"{contact}_force_reg_f{i}_ref", + dim=3) + force_reg=self.task_interface.prb.createResidual(f'{contact}_force_reg_f{i}', self._phase_force_reg * (force - f_ref), + nodes=[]) + self._f_reg_ref[contact].append(f_ref) + self.set_f_reg(contact_name=contact, scale=4) + self._contact_phases[contact].addCost(force_reg, nodes=list(range(0, short_stance_duration))) + i+=1 + + # flights + self._flight_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'flight_{contact}_short') + + # sanity checks (z trajectory) + self._zpos_task_found=True + self._zvel_task_found=True + self._xypos_task_found=True + self._xyvel_task_found=True + if self.task_interface.getTask(f'z_{contact}') is None: + self._zpos_task_found=False + if self.task_interface.getTask(f'vz_{contact}') is None: + self._zvel_task_found=False + if self.task_interface.getTask(f'xy_{contact}') is None: + self._xypos_task_found=False + if self.task_interface.getTask(f'vxy_{contact}') is None: + self._xyvel_task_found=False + if (not self._zpos_task_found) and (not self._zvel_task_found): + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"neither pos or vel task for contacts were found! Aborting.", + LogType.EXCEP, + throw_when_excep=True) + if (not self._zpos_task_found) and self._is_open_loop: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Running in open loop, but no contact pos task found. Aborting.", + LogType.EXCEP, + throw_when_excep=True) + if self._zpos_task_found and self._xyvel_task_found: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + if self._zvel_task_found and self._xypos_task_found: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + if (not self._xypos_task_found) and (not self._xyvel_task_found): + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"neither pos or vel task for contact {contact} xy were found! Will proceed without xy landing constraints.", + LogType.WARN) + # if (not self._zvel_task_found) and (not self._is_open_loop): + # Journal.log(self.__class__.__name__, + # "_init_contact_timelines", + # f"Running in closed loop, but contact vel task not found. Aborting", + # LogType.EXCEP, + # throw_when_excep=True) + + self._ref_trjs[contact]=None + self._ref_vtrjs[contact]=None + self._touchdown_phases[contact]=None + + if self._zpos_task_found: # we use pos trajectory + self._ref_trjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) + init_z_foot = self._fk_contacts[contact](q=self._q0)['ee_pos'].elements()[2] + if self._is_open_loop: + self._ref_trjs[contact][2, :] = np.atleast_2d(init_z_foot) + else: + self._ref_trjs[contact][2, :] = 0.0 # place foot at ground level initially () + + # z + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'z_{contact}'), + self._ref_trjs[contact][2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + if self._xypos_task_found: # xy, we add a landing phase of unit duration to enforce landing pos costs + + self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'touchdown_{contact}_short') + + self._touchdown_phases[contact].addItemReference(self.task_interface.getTask(f'xy_{contact}'), + self._ref_trjs[contact][0:2, 0:1], + nodes=list(range(0, short_stance_duration))) + + else: # foot traj in velocity + # ref vel traj + self._ref_vtrjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) # allocate traj + # of max length eual to number of nodes + self._ref_vtrjs[contact][2, :] = np.atleast_2d(0) + + # z + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vz_{contact}'), + self._ref_vtrjs[contact][2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + if self._xyvel_task_found: # xy (when in vel the xy vel is set on the whole flight phase) + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vxy_{contact}'), + self._ref_vtrjs[contact][0:2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + if self._vertical_landing: # add touchdown phase for vertical landing + self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'touchdown_{contact}_short') + + if self._vertical_landing and self._touchdown_phases[contact] is not None: + v_xy=self._fkd_contacts[contact](q=self._model.q, qdot=self._model.v)['ee_vel_linear'][0:2] + vertical_landing=self.task_interface.prb.createResidual(f'{contact}_only_vert_v', + self._landing_vert_weight * v_xy, + nodes=[]) + self._touchdown_phases[contact].addCost(vertical_landing, nodes=list(range(0, short_stance_duration))) + + if self._keep_yaw_vert: + # keep ankle vertical over the whole horizon (can be useful with wheeled robots) + c_ori = self._model.kd.fk(contact)(q=self._model.q)['ee_rot'][2, :] + cost_ori = self.task_interface.prb.createResidual(f'{contact}_ori', self._yaw_vertical_weight * (c_ori.T - np.array([0, 0, 1]))) + # flight_phase.addCost(cost_ori, nodes=list(range(0, flight_duration+post_landing_stance))) + + self._name_to_idx_map[contact]=j + + j+=1 + + # current pos [c0, c1, ....], current length, nominal length, nom. apex, no. landing height, landing dx, landing dy (local world aligned) + self._flight_info_now=np.zeros(shape=(7*self.n_contacts)) + + self.update() + + def _reset_contact_timelines(self): + + for contact in self._model.cmap.keys(): + + idx=self._name_to_idx_map[contact] + # we follow same order as in shm for more efficient writing + self._flight_info_now[idx]= -1.0 # pos [nodes] + self._flight_info_now[idx+1*self.n_contacts]= -1.0 # duration (remaining) [nodes] + self._flight_info_now[idx+2*self.n_contacts]=self._flight_duration_default # [nodes] + self._flight_info_now[idx+3*self.n_contacts]=self._step_height_default + self._flight_info_now[idx+4*self.n_contacts]=self._dh_default + self._flight_info_now[idx+5*self.n_contacts]=self._land_dx_default + self._flight_info_now[idx+6*self.n_contacts]=self._land_dy_default + # fill timeline with stances + contact_timeline=self._contact_timelines[contact] + contact_timeline.clear() # remove phases + short_stance_phase = contact_timeline.getRegisteredPhase(f'stance_{contact}_short') + while contact_timeline.getEmptyNodes() > 0: + contact_timeline.addPhase(short_stance_phase) + + self.update() + + def reset(self): + # self.phase_manager.clear() + self.task_interface.reset() + self._reset_contact_timelines() + + def set_f_reg(self, + contact_name, + scale: float = 4.0): + f_refs=self._f_reg_ref[contact_name] + for force in f_refs: + ref=self._total_weight/(scale*len(f_refs)) + force.assign(ref) + + def set_flight_duration(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]=val + + def get_flight_duration(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts] + + def set_step_apexdh(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]=val + + def get_step_apexdh(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts] + + def set_step_enddh(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]=val + + def get_step_enddh(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts] + + def get_step_landing_dx(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts] + + def set_step_landing_dx(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]=val + + def get_step_landing_dy(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts] + + def set_step_landing_dy(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]=val + + def add_stand(self, contact_name): + # always add stand at the end of the horizon + timeline = self._contact_timelines[contact_name] + if timeline.getEmptyNodes() > 0: + timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short')) + + def add_flight(self, contact_name, + robot_q: np.ndarray): + + timeline = self._contact_timelines[contact_name] + + flights_on_horizon=self._contact_timelines[contact_name].getPhaseIdx(self._flight_phases[contact_name]) + + last_flight_idx=self._injection_node-1 # default to make things work + if not len(flights_on_horizon)==0: # some flight phases are there + last_flight_idx=flights_on_horizon[-1]+self._post_flight_stance + + if last_flight_idx1: + Journal.log(self.__class__.__name__, + "add_flight", + f"Got flight duration {flight_duration_req} < 1!", + LogType.WARN, + throw_when_excep=True) + + # process requests to ensure flight params are valid + # duration + if flight_duration_reqself._flight_duration_max: + flight_duration_req=self._flight_duration_max + # apex height + if flight_apex_reqself._step_height_max: + flight_apex_req=self._step_height_max + # landing height + if flight_enddh_reqself._dh_max: + flight_enddh_req=self._dh_max + # landing dx + if flight_land_dx_reqself._land_dx_max: + flight_land_dx_req=self._land_dx_max + # landing dy + if flight_land_dy_reqself._land_dy_max: + flight_land_dy_req=self._land_dy_max + + land_dx_w = flight_land_dx_req + land_dy_w = flight_land_dy_req + if self._xypos_task_found or self._xyvel_task_found: + # landing dx/dy are specified in horizontal frame; rotate into world aligned frame + q_base = robot_q[3:7] + if q_base.ndim == 1: + q_base = q_base.reshape(-1, 1) + q_w = q_base[3, 0] + q_x = q_base[0, 0] + q_y = q_base[1, 0] + q_z = q_base[2, 0] + r11 = 1 - 2 * (q_y * q_y + q_z * q_z) + r21 = 2 * (q_x * q_y + q_z * q_w) + norm = np.hypot(r11, r21) + if norm > 0.0: + cos_yaw = r11 / norm + sin_yaw = r21 / norm + else: + cos_yaw = 1.0 + sin_yaw = 0. + + land_dx_w = flight_land_dx_req * cos_yaw - flight_land_dy_req * sin_yaw + land_dy_w = flight_land_dx_req * sin_yaw + flight_land_dy_req * cos_yaw + + if self._ref_vtrjs[contact_name] is not None and \ + self._ref_trjs[contact_name] is not None: # only allow one mode (pos/velocity traj) + Journal.log(self.__class__.__name__, + "add_flight", + f"Both pos and vel traj for contact {contact_name} are not None! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + + # inject pos traj if pos mode + if self._ref_trjs[contact_name] is not None: + # recompute trajectory online (just needed if using pos traj) + foot_pos=self._fk_contacts[contact_name](q=robot_q)['ee_pos'].elements() + starting_pos=foot_pos[2] # compute foot traj (local world aligned) + starting_x_pos=foot_pos[0] + starting_y_pos=foot_pos[1] + # starting_pos=0.0 + self._ref_trjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.from_derivatives( + flight_duration_req, + p_start=starting_pos, + p_goal=starting_pos+flight_enddh_req, + clearance=flight_apex_req, + derivatives=self._traj_der, + second_der=self._traj_second_der, + third_der=self._third_traj_der) + ) + if self._xypos_task_found: # we use _ref_trjs to write xy pos references + self._ref_trjs[contact_name][0, -1]=starting_x_pos+land_dx_w + self._ref_trjs[contact_name][1, -1]=starting_y_pos+land_dy_w + + for i in range(flight_duration_req): + res, phase_token_flight=timeline.addPhase(self._flight_phases[contact_name], + pos=self._injection_node+i, + absolute_position=True) + phase_token_flight.setItemReference(f'z_{contact_name}', + self._ref_trjs[contact_name][:, i]) + + if self._touchdown_phases[contact_name] is not None: + # add touchdown phase after flight + res, phase_token_touchdown=timeline.addPhase(self._touchdown_phases[contact_name], + pos=self._injection_node+flight_duration_req, + absolute_position=True) + if self._xypos_task_found: + phase_token_touchdown.setItemReference(f'xy_{contact_name}', + self._ref_trjs[contact_name][:, -1]) + + # inject vel traj if vel mode + if self._ref_vtrjs[contact_name] is not None: + self._ref_vtrjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.derivative_of_trajectory( + flight_duration_req, + p_start=0.0, + p_goal=flight_enddh_req, + clearance=flight_apex_req, + derivatives=self._traj_der, + second_der=self._traj_second_der, + third_der=self._third_traj_der)) + if self._xyvel_task_found: # compute vel reference using problem dt and flight length + flight_duration_sec=float(flight_duration_req)*self._dt + self._ref_vtrjs[contact_name][0, 0:flight_duration_req]=land_dx_w/flight_duration_sec + self._ref_vtrjs[contact_name][1, 0:flight_duration_req]=land_dy_w/flight_duration_sec + + for i in range(flight_duration_req): + res, phase_token=timeline.addPhase(self._flight_phases[contact_name], + pos=self._injection_node+i, + absolute_position=True) + phase_token.setItemReference(f'vz_{contact_name}', + self._ref_vtrjs[contact_name][2:3, i:i+1]) + if self._touchdown_phases[contact_name] is not None: + # add touchdown phase for forcing vertical landing + res, phase_token=timeline.addPhase(self._touchdown_phases[contact_name], + pos=self._injection_node+flight_duration_req, + absolute_position=True) + + if timeline.getEmptyNodes() > 0: # fill empty nodes at the end of the horizon, if any, with stance + timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short')) + + def update(self): + self._phase_manager.update() + + def update_flight_info(self, timeline_name): + + # get current position and remaining duration of flight phases over the horizon for a single contact + + # phase indexes over timeline + phase_idxs=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name]) + + if not len(phase_idxs)==0: # at least one flight phase on horizon -> read info from timeline + + # all active phases on timeline + active_phases=self._contact_timelines[timeline_name].getActivePhases() + + phase_idx_start=phase_idxs[0] + # active_nodes_start=active_phases[phase_idx_start].getActiveNodes() + pos_start=active_phases[phase_idx_start].getPosition() + # n_nodes_start=active_phases[phase_idx_start].getNNodes() + + phase_idx_last=phase_idxs[-1] # just get info for last phase on the horizon + # active_nodes_last=active_phases[phase_idx_last].getActiveNodes() + pos_last=active_phases[phase_idx_last].getPosition() + # n_nodes_last=active_phases[phase_idx_last].getNNodes() + + # write to info + self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts]=pos_last + self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts]=pos_last - pos_start + + return True + + return False + + def get_flight_info(self, timeline_name): + return (self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+2*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+3*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+4*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+5*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+6*self.n_contacts]) + + def get_flight_info_all(self): + return self._flight_info_now + + def set_ref_pos(self, + timeline_name:str, + ref_height: np.array = None, + threshold: float = 0.05): + + if ref_height is not None: + self._ref_trjs[timeline_name][2, :]=ref_height + if ref_height>threshold: + self.add_flight(timeline_name=timeline_name) + this_flight_token_idx=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])[-1] + active_phases=self._contact_timelines[timeline_name].getActivePhases() + active_phases[this_flight_token_idx].setItemReference(f'z_{timeline_name}', + self._ref_trjs[timeline_name]) + else: + self.add_stand(timeline_name=timeline_name) + + def set_force_feedback(self, + timeline_name: str, + force_norm: float): + + flight_tokens=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name]) + contact_tokens=self._contact_phases[timeline_name].getPhaseIdx(self._contact_phases[timeline_name]) + if not len(flight_tokens)==0: + first_flight=flight_tokens[0] + first_flight + + def check_horizon_full(self, + timeline_name): + timeline = self._contact_timelines[timeline_name] + if timeline.getEmptyNodes() > 0: + error = f"Empty nodes detected over the horizon for timeline {timeline}! Make sure to fill the whole horizon with valid phases!!" + Journal.log(self.__class__.__name__, + "check_horizon_full", + error, + LogType.EXCEP, + throw_when_excep = True) diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..991636dd18679c11050f97d3c47dd7df2d97eb5e --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports.py @@ -0,0 +1,18 @@ +# robot modeling and automatic differentiation +import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn +import casadi as cs + +# horizon stuff +import horizon.utils.kin_dyn as kd +from horizon.problem import Problem +from horizon.rhc.model_description import FullModelInverseDynamics +from horizon.rhc.taskInterface import TaskInterface +from horizon.rhc.tasks.interactionTask import VertexContact +from horizon.rhc.tasks.contactTask import ContactTask +from horizon.utils import trajectoryGenerator, utils + +# phase managing +import phase_manager.pymanager as pymanager +import phase_manager.pyphase as pyphase +import phase_manager.pytimeline as pytimeline + diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py new file mode 100644 index 0000000000000000000000000000000000000000..679e7d460ab545b0bd2f741904b5ac8af893f897 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py @@ -0,0 +1,28 @@ +""" +Dynamically import all necessary Horizon and related dependencies. +This function is intended to be used within the import_child_lib method +of a class, enabling the parent process to load all required libraries. +""" +def import_horizon_global(): + # Global imports to make modules accessible in child processes + global casadi_kin_dyn, cs, kd, Problem, FullModelInverseDynamics + global TaskInterface, VertexContact, ContactTask, trajectoryGenerator, utils + global pymanager, pyphase, pytimeline + + # robot modeling and automatic differentiation + import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn + import casadi as cs + + # horizon stuff + import horizon.utils.kin_dyn as kd + from horizon.problem import Problem + from horizon.rhc.model_description import FullModelInverseDynamics + from horizon.rhc.taskInterface import TaskInterface + from horizon.rhc.tasks.interactionTask import VertexContact + from horizon.rhc.tasks.contactTask import ContactTask + from horizon.utils import trajectoryGenerator, utils + + # phase managing + import phase_manager.pymanager as pymanager + import phase_manager.pyphase as pyphase + import phase_manager.pytimeline as pytimeline diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py @@ -0,0 +1,1324 @@ +from mpc_hive.controllers.rhc import RHController + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +import numpy as np + +import os + +import time + +from typing import Dict, List + +class HybridQuadRhc(RHController): + + def __init__(self, + srdf_path: str, + urdf_path: str, + config_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes:float = 25, + injection_node:int = 10, + dt: float = 0.02, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {}): + + self._refs_in_hor_frame = refs_in_hor_frame + + self._injection_node = injection_node + + self._open_loop = open_loop + self._close_loop_all = close_loop_all + + self._codegen_dir = codegen_dir + if not os.path.exists(self._codegen_dir): + os.makedirs(self._codegen_dir) + # else: + # # Directory already exists, delete it and recreate + # shutil.rmtree(self._codegen_dir) + # os.makedirs(self._codegen_dir) + + self.step_counter = 0 + self.sol_counter = 0 + + self.max_solver_iter = max_solver_iter + + self._timer_start = time.perf_counter() + self._prb_update_time = time.perf_counter() + self._phase_shift_time = time.perf_counter() + self._task_ref_update_time = time.perf_counter() + self._rti_time = time.perf_counter() + + self.robot_name = robot_name + + self.config_path = config_path + + self.urdf_path = urdf_path + # read urdf and srdf files + with open(self.urdf_path, 'r') as file: + self.urdf = file.read() + self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + self._c_timelines = dict() + self._f_reg_timelines = dict() + + self._custom_opts={"replace_continuous_joints": False, + "use_force_feedback": False, + "lin_a_feedback": False, + "is_open_loop": self._open_loop, # fully open (just for db) + "fully_closed": False, # closed loop with full feedback (just for db) + "closed_partial": False, # closed loop with partial feedback + "adaptive_is": True, # closed loop with adaptation + "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase + "alpha_from_outside": False, # alpha set ext. from shared memory + "alpha_half": 1.0, + "only_vel_wheels": True, # whether wheels (if present) are just vel controlled + "use_jnt_v_feedback": False + } + + self._custom_opts.update(custom_opts) + + self._alpha_half=self._custom_opts["alpha_half"] + + if self._open_loop: + self._custom_opts["fully_closed"]=False + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + else: + self._custom_opts["is_open_loop"]=False + if self._custom_opts["fully_closed"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["closed_partial"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["fully_closed"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["adaptive_is"]: + self._custom_opts["closed_partial"]=False + self._custom_opts["fully_closed"]=False + + super().__init__(srdf_path=srdf_path, + n_nodes=n_nodes, + dt=dt, + namespace=self.robot_name, + dtype=dtype, + verbose=verbose, + debug=debug, + timeout_ms=timeout_ms) + + self._rhc_fpaths.append(self.config_path) + + def _config_override(self): + pass + + def _post_problem_init(self): + + self.rhc_costs={} + self.rhc_constr={} + + self._fail_idx_scale=0.0 + self._expl_idx_window_size=int(1*self._n_nodes) + self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size)) + self._expl_idx_counter=0 + self._expl_idx_buffer_counter=0 + + self._pred_node_idx=self._n_nodes-1 + + self._nq=self.nq() + self._nq_jnts=self._nq-7# assuming floating base + self._nv=self.nv() + self._nv_jnts=self._nv-6 + + self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype) + self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype) + self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype) + self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype) + self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype) + self._alphas_q_root[:, :]=1.0 # default to all open + self._alphas_q_jnts[:, :]=1.0 + self._alphas_twist_root[:, :]=1.0 + self._alphas_v_jnts[:, :]=1.0 + self._alphas_a[:, :]=1.0 + + def _init_problem(self, + fixed_jnt_patterns: List[str] = None, + wheels_patterns: List[str] = None, + foot_linkname: str = None, + flight_duration: int = 10, + post_flight_stance: int = 3, + step_height: float = 0.12, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = 2.0, + vertical_landing: bool = False, + vertical_land_weight: float = 1.0, + phase_force_reg: float = 1e-2, + vel_bounds_weight: float = 1.0): + + self._fixed_jnt_patterns=fixed_jnt_patterns + + self._config_override() + + Journal.log(self.__class__.__name__, + "_init_problem", + f" Will use horizon config file at {self.config_path}", + LogType.INFO, + throw_when_excep=True) + + self._vel_bounds_weight=vel_bounds_weight + self._phase_force_reg=phase_force_reg + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_land_weight=vertical_land_weight + # overrides parent + self._prb = Problem(self._n_intervals, + receding=True, + casadi_type=cs.SX) + self._prb.setDt(self._dt) + + if "replace_continuous_joints" in self._custom_opts: + # continous joints are parametrized in So2 + if self._custom_opts["replace_continuous_joints"]: + self.urdf = self.urdf.replace('continuous', 'revolute') + else: + self.urdf = self.urdf.replace('continuous', 'revolute') + + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + + self._init_robot_homer() + + # handle fixed joints + fixed_joint_map={} + if self._fixed_jnt_patterns is not None: + for jnt_name in self._get_robot_jnt_names(): + for fixed_jnt_pattern in self._fixed_jnt_patterns: + if fixed_jnt_pattern in jnt_name: + fixed_joint_map.update({f"{jnt_name}": + self._homer.get_homing_val(jnt_name=jnt_name)}) + break # do not search for other pattern matches + + if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers + Journal.log(self.__class__.__name__, + "_init_problem", + f"Will fix following joints: \n{str(fixed_joint_map)}", + LogType.INFO, + throw_when_excep=True) + # with the fixed joint map + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map) + # assign again controlled joints names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + # updated robot homer for controlled joints + self._init_robot_homer() + + # handle continuous joints (need to change homing and retrieve + # cont jnts indexes) and homing + self._continuous_joints=self._get_continuous_jnt_names() + # reduced + self._continuous_joints_idxs=[] + self._continuous_joints_idxs_cos=[] + self._continuous_joints_idxs_sin=[] + self._continuous_joints_idxs_red=[] + self._rev_joints_idxs=[] + self._rev_joints_idxs_red=[] + # qfull + self._continuous_joints_idxs_qfull=[] + self._continuous_joints_idxs_cos_qfull=[] + self._continuous_joints_idxs_sin_qfull=[] + self._continuous_joints_idxs_red_qfull=[] + self._rev_joints_idxs_qfull=[] + self._rev_joints_idxs_red_qfull=[] + jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints)) + jnt_names=self._get_robot_jnt_names() + for i in range(len(jnt_names)): + jnt=jnt_names[i] + index=self._get_jnt_id(jnt)# accounting for floating joint + homing_idx=index-7 # homing is only for actuated joints + homing_value=self._homer.get_homing_val(jnt) + if jnt in self._continuous_joints: + jnt_homing[homing_idx]=np.cos(homing_value).item() + jnt_homing[homing_idx+1]=np.sin(homing_value).item() + # just actuated joints + self._continuous_joints_idxs.append(homing_idx) # cos + self._continuous_joints_idxs.append(homing_idx+1) # sin + self._continuous_joints_idxs_cos.append(homing_idx) + self._continuous_joints_idxs_sin.append(homing_idx+1) + self._continuous_joints_idxs_red.append(i) + # q full + self._continuous_joints_idxs_qfull.append(index) # cos + self._continuous_joints_idxs_qfull.append(index+1) # sin + self._continuous_joints_idxs_cos_qfull.append(index) + self._continuous_joints_idxs_sin_qfull.append(index+1) + self._continuous_joints_idxs_red_qfull.append(i+7) + else: + jnt_homing[homing_idx]=homing_value + # just actuated joints + self._rev_joints_idxs.append(homing_idx) + self._rev_joints_idxs_red.append(i) + # q full + self._rev_joints_idxs_qfull.append(index) + self._rev_joints_idxs_red_qfull.append(i+7) + + self._jnts_q_reduced=None + if not len(self._continuous_joints)==0: + cont_joints=", ".join(self._continuous_joints) + + Journal.log(self.__class__.__name__, + "_init_problem", + f"The following continuous joints were found: \n{cont_joints}", + LogType.INFO, + throw_when_excep=True) + # preallocating data + self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype) + self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype) + self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + else: + self._custom_opts["replace_continuous_joints"]=True + Journal.log(self.__class__.__name__, + "_init_problem", + f"No continuous joints were found.", + LogType.INFO, + throw_when_excep=True) + + # retrieve wheels indexes (not considering continuous joints, + # ok just for v, eff, etc..) + self._wheel_patterns=wheels_patterns + self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns) + self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81] + + # we can create an init for the base + init = self._base_init.tolist() + jnt_homing + + if foot_linkname is not None: + FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height + ground_level = FK(q=init)['ee_pos'] + self._base_init[2] = -ground_level[2] # override init + + self._model = FullModelInverseDynamics(problem=self._prb, + kd=self._kin_dyn, + q_init=self._homer.get_homing_map(), + base_init=self._base_init) + + self._ti = TaskInterface(prb=self._prb, + model=self._model, + max_solver_iter=self.max_solver_iter, + debug = self._debug, + verbose = self._verbose, + codegen_workdir = self._codegen_dir) + self._ti.setTaskFromYaml(self.config_path) + + # setting initial base pos ref if exists + base_pos = self._ti.getTask('base_height') + if base_pos is not None: + base_pos.setRef(np.atleast_2d(self._base_init).T) + + self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes????? + + self._gm = GaitManager(self._ti, + self._pm, + self._injection_node, + keep_yaw_vert=keep_yaw_vert, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=vertical_landing, + landing_vert_weight=self._vertical_land_weight, + phase_force_reg=self._phase_force_reg, + custom_opts=self._custom_opts, + flight_duration=flight_duration, + post_flight_stance=post_flight_stance, + step_height=step_height, + dh=0.0) + + self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0) + self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0) + self._ti.model.q.setInitialGuess(self._ti.model.q0) + self._ti.model.v.setInitialGuess(self._ti.model.v0) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f=len(cforces) + for c in cforces: + c.setInitialGuess(np.array(self._f0)/n_contact_f) + + vel_lims = self._model.kd.velocityLimits() + import horizon.utils as utils + self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:])) + self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:])) + + self._meas_lin_a_par=None + # if self._custom_opts["lin_a_feedback"]: + # # acceleration feedback on first node + # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback", + # dim=3, nodes=0) + # base_lin_a_prb=self._prb.getInput().getVars()[0:3] + # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, + # nodes=[0]) + + # if not self._open_loop: + # # we create a residual cost to be used as an attractor to the measured state on the first node + # # hard constraints injecting meas. states are pure EVIL! + # prb_state=self._prb.getState() + # full_state=prb_state.getVars() + # state_dim=prb_state.getBounds()[0].shape[0] + # meas_state=self._prb.createParameter(name="measured_state", + # dim=state_dim, nodes=0) + # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), + # nodes=[0]) + + self._ti.finalize() + self._ti.bootstrap() + + self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes + self._ti.load_initial_guess() + + self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we + # know n_dofs -> we assign it (by default = None) + + self.n_contacts = len(self._model.cmap.keys()) + + # remove variables bounds (before they were necessary to have a good + # quality bootstrap solution) + self._q_inf=np.zeros((self.nq(), 1)) + self._q_inf[:, :]=np.inf + self._v_inf=np.zeros((self.nv(), 1)) + self._v_inf[:, :]=np.inf + self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0) + self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0) + + # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb) + + def get_file_paths(self): + # can be overriden by child + paths = super().get_file_paths() + return paths + + def _get_quat_remap(self): + # overrides parent + return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention + + def _zmp(self, model): + + num = cs.SX([0, 0]) + den = cs.SX([0]) + pos_contact = dict() + force_val = dict() + + q = cs.SX.sym('q', model.nq) + v = cs.SX.sym('v', model.nv) + a = cs.SX.sym('a', model.nv) + + com = model.kd.centerOfMass()(q=q, v=v, a=a)['com'] + + n = cs.SX([0, 0, 1]) + for c in model.fmap.keys(): + pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos'] + force_val[c] = cs.SX.sym('force_val', 3) + num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n) + den += cs.dot(force_val[c], n) + + zmp = com[0:2] + (num / den) + input_list = [] + input_list.append(q) + input_list.append(v) + input_list.append(a) + + for elem in force_val.values(): + input_list.append(elem) + + f = cs.Function('zmp', input_list, [zmp]) + return f + + def _add_zmp(self): + + input_zmp = [] + + input_zmp.append(self._model.q) + input_zmp.append(self._model.v) + input_zmp.append(self._model.a) + + for f_var in self._model.fmap.values(): + input_zmp.append(f_var) + + c_mean = cs.SX([0, 0, 0]) + for c_name, f_var in self._model.fmap.items(): + fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos'] + c_mean += fk_c_pos + + c_mean /= len(self._model.cmap.keys()) + + zmp_nominal_weight = 10. + zmp_fun = self._zmp(self._model)(*input_zmp) + + if 'wheel_joint_1' in self._model.kd.joint_names(): + zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2])) + + def _quaternion_multiply(self, + q1, q2): + x1, y1, z1, w1 = q1 + x2, y2, z2, w2 = q2 + w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return np.array([x, y, z, w]) + + def _get_continuous_jnt_names(self): + import xml.etree.ElementTree as ET + root = ET.fromstring(self.urdf) + continuous_joints = [] + for joint in root.findall('joint'): + joint_type = joint.get('type') + if joint_type == 'continuous': + joint_name = joint.get('name') + continuous_joints.append(joint_name) + return continuous_joints + + def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]): + jnt_names=self._get_robot_jnt_names() + wheels_idxs=[] + for i in range(len(jnt_names)): + jnt_name=jnt_names[i] + for wheel_pattern in wheel_patterns: + if wheel_pattern in jnt_name: + wheels_idxs.append(i) + break + return wheels_idxs + + def _get_jnt_id(self, jnt_name): + return self._kin_dyn.joint_iq(jnt_name) + + def _init_rhc_task_cmds(self): + + rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm, + robot_index_shm=self.controller_index, + robot_index_view=0, # when using optimize_mem the view if always of shape 1x... + namespace=self.namespace, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + use_force_feedback=self._custom_opts["use_force_feedback"], + optimize_mem=True) + + rhc_refs.run() + + rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller) + rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap()) + + rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3], + q_ref=np.atleast_2d(self._base_init)[:, 3:7]) + + return rhc_refs + + def get_vertex_fnames_from_ti(self): + tasks=self._ti.task_list + contact_f_names=[] + for task in tasks: + if isinstance(task, ContactTask): + interaction_task=task.dynamics_tasks[0] + contact_f_names.append(interaction_task.vertex_frames[0]) + return contact_f_names + + def _get_contact_names(self): + # should get contact names from vertex frames + # list(self._ti.model.cmap.keys()) + return self.get_vertex_fnames_from_ti() + + def _get_robot_jnt_names(self): + + joints_names = self._kin_dyn.joint_names() + to_be_removed = ["universe", + "reference", + "world", + "floating", + "floating_base"] + for name in to_be_removed: + if name in joints_names: + joints_names.remove(name) + + return joints_names + + def _get_ndofs(self): + return len(self._model.joint_names) + + def nq(self): + return self._kin_dyn.nq() + + def nv(self): + return self._kin_dyn.nv() + + def _get_robot_mass(self): + + return self._kin_dyn.mass() + + def _get_root_full_q_from_sol(self, node_idx=1): + + root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype) + + np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) + np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full) + + return root_q_full + + def _get_full_q_from_sol(self, node_idx=1): + + return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype) + + def _get_root_twist_from_sol(self, node_idx=1): + # provided in world frame + twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3] + # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6] + return twist_base_local + + def _get_root_a_from_sol(self, node_idx=0): + # provided in world frame + a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3] + # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6] + return a_base_local + + def _get_jnt_q_from_sol(self, node_idx=0, + reduce: bool = True, + clamp: bool = True): + + full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype) + + np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place + np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place + + if self._custom_opts["replace_continuous_joints"] or (not reduce): + if clamp: + return np.fmod(full_jnts_q, 2*np.pi) + else: + return full_jnts_q + else: + cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2) + # copy rev joint vals + self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1) + # and continuous + self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1) + return self._jnts_q_reduced + + def _get_jnt_v_from_sol(self, node_idx=1): + + jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place + # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place + + return jnt_v_sol + + def _get_jnt_a_from_sol(self, node_idx=1): + + return self._get_a_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + + def _get_jnt_eff_from_sol(self, node_idx=0): + + efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx) + + # if self._custom_opts["only_vel_wheels"]: + + jnt_efforts=efforts_on_node[6:, 0] + + if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v: + jnt_efforts[self._wheels_idxs_v]=0.0 + + return jnt_efforts.reshape(1, + self._nv_jnts).astype(self._dtype) + + def _get_rhc_cost(self): + + return self._ti.solution["opt_cost"] + + def _get_rhc_constr_viol(self): + + return self._ti.solution["residual_norm"] + + def _get_rhc_nodes_cost(self): + + cost = self._ti.solver_rti.getCostValOnNodes() + return cost.reshape((1, -1)) + + def _get_rhc_nodes_constr_viol(self): + + constr_viol = self._ti.solver_rti.getConstrValOnNodes() + return constr_viol.reshape((1, -1)) + + def _get_rhc_niter_to_sol(self): + + return self._ti.solution["n_iter2sol"] + + def _set_ig_bootstrap(self, + q_state: np.ndarray = None, + v_state: np.ndarray = None): + + xig = self._ti.solution['x_opt'].copy() + uig = self._ti.solution['u_opt'].copy() + + # Normalize and keep quaternion in the same hemisphere as the previous + # solution to avoid artificial 180-deg jumps in the bootstrap warm start. + q_state_boot = q_state.copy() + q_prev = xig[3:7, 0] + q_now = q_state_boot[3:7, 0] + + q_now_norm = np.linalg.norm(q_now) + if q_now_norm > 1e-9: + q_state_boot[3:7, :] /= q_now_norm + else: + q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype) + + q_prev_norm = np.linalg.norm(q_prev) + if q_prev_norm > 1e-9: + q_prev = q_prev / q_prev_norm + + q_now = q_state_boot[3:7, 0] + if np.dot(q_prev, q_now) < 0.0: + q_state_boot[3:7, :] *= -1.0 + + xig[0:self._nq, :] = q_state_boot + xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes + uig[0:self._nv, :]=0.0 # 0 acceleration + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype)) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f = len(cforces) + if n_contact_f == 0: + continue + f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f + for c in cforces: + c.setInitialGuess(f_guess) + + # print("initial guesses") + # print(self._nq) + # print(self._nv) + # print("q") + # qig=self._ti.model.q.getInitialGuess() + # print(qig.shape) + # print(qig) + # print("v") + # print(self._ti.model.v.getInitialGuess()) + # print("a") + # print(self._ti.model.a.getInitialGuess()) + # for _, cforces in self._ti.model.cmap.items(): + # for c in cforces: + # print("force") + # print(c.getInitialGuess()) + + return xig, uig + + def _set_ig(self): + + shift_num = -1 # shift data by one node + + x_opt = self._ti.solution['x_opt'] + u_opt = self._ti.solution['u_opt'] + + # building ig for state + xig = np.roll(x_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + xig[:, -1 - i] = x_opt[:, -1] + # building ig for inputs + uig = np.roll(u_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + uig[:, -1 - i] = u_opt[:, -1] + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + + return xig, uig + + def _update_open_loop(self, + bootstrap: bool = False): + + q_state, v_state, a_state=self._set_is_open() + + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + # robot_state=xig[:, 0] + # # open loop update: + # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0 + # # is node 1 in the last opt solution) + + return q_state, v_state, a_state + + def _update_closed_loop(self, + bootstrap: bool = False): + + # set initial state + q_state=None + v_state=None + a_state=None + if self._custom_opts["adaptive_is"]: + # adaptive closed loop + q_state, v_state, a_state=self._set_is_adaptive() + elif self._custom_opts["fully_closed"]: + q_state, v_state, a_state=self._set_is_full() + elif self._custom_opts["closed_partial"]: + q_state, v_state, a_state=self._set_is_partial() + else: + Journal.log(self.__class__.__name__, + "_update_closed_loop", + "Neither adaptive_is, fully_closed, or closed_partial.", + LogType.EXCEP, + throw_when_excep = False) + q_state, v_state, a_state=self._set_is() + + # set initial guess for controller + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + return q_state, v_state, a_state + + def _set_is_open(self): + + # overriding states with rhc data + q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1) + + twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1) + v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_q_full_rhc=q[0:7] # root full q + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_twist_rhc=vel[0:6] # lin v. + jnts_v_rhc=vel[6:] # jnts v + + self.rhc_refs.set_alpha(alpha=1.0) # fully open + + # close state on known quantities + root_q_full_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_twist_rhc.setBounds(lb=twist_root, + ub=twist_root, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((twist_root, v_jnts), + axis=0) + + return (q_state, v_state, None) + + def _set_is_full(self): + + # measurements + q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_full_q_rhc=q[0:7] # root p + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_full_q_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_partial(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # overriding states with rhc data (-> all overridden state are open looop) + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc # position is always open loop + + if not self._custom_opts["estimate_v_root"]: + v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1) + # override v jnts with the ones from controller + if not self._custom_opts["use_jnt_v_feedback"]: + v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1) + # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1) + # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1) + # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1) + # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root, + ub=q_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints) + else: # get it from controller + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_adaptive(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + # getting prediction defects + root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # close state on known quantities, estimate some (e.g. lin vel) and + # open loop if thing start to explode + alpha_now=1.0 + delta=0.0 + if self._custom_opts["alpha_from_outside"]: + alpha_now=self.rhc_refs.get_alpha() + else: # "autotuned" alpha + if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.) + delta=np.max(np.abs(jnt_v_delta)) + else: + delta=np.max(np.abs(omega_root_delta)) + # fail_idx=self._get_failure_index() + # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh + alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0 + + bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1] + self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db + self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db + + self._alphas_q_root[:]=alpha_now # for now single alpha for everything + self._alphas_q_jnts[:]=alpha_now + self._alphas_twist_root[:]=alpha_now + self._alphas_v_jnts[:]=alpha_now + self._alphas_a[:]=alpha_now + if not self._custom_opts["estimate_v_root"]: + self._alphas_twist_root[0:3]=1.0 # open + self._alphas_v_jnts[:]=1.0 # open + + # position is always open loop + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc + + # expaning meas q if continuous joints + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + + # continous joints position is always open loop, but we need a delta vector of matching dimension + q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1) + + self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:] + + self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\ + np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.cos(q_jnts[self._continuous_joints_idxs_red, :]) + self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\ + np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.sin(q_jnts[self._continuous_joints_idxs_red, :]) + + q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts + jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts + + self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop + self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop + + # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, + ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, + ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) + else: + root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, + ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0) + root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, + ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, + ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0) + if self._custom_opts["lin_a_feedback"]: + lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _solve(self): + + if self._debug: + return self._db_solve(bootstrap=False) + else: + return self._min_solve(bootstrap=False) + + def _bootstrap(self): + + if self._debug: + return self._db_solve(bootstrap=True) + else: + return self._min_solve(bootstrap=True) + + def _min_solve(self, bootstrap: bool = False): + # minimal solve version -> no debug + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._pm.shift() # shifts phases of one dt + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve (to convergence) + self.sol_counter = self.sol_counter + 1 + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + return False + + def _db_solve(self, bootstrap: bool = False): + + self._timer_start = time.perf_counter() + + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._prb_update_time = time.perf_counter() + self._pm.shift() # shifts phases of one dt + self._phase_shift_time = time.perf_counter() + + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + self._task_ref_update_time = time.perf_counter() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve bootstrap + self._rti_time = time.perf_counter() + self.sol_counter = self.sol_counter + 1 + self._update_db_data() + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + if self._verbose: + solve_mode = "RTI" if not bootstrap else "Bootstrap" + exception = f"{solve_mode}() for controller {self.controller_index} failed" + \ + f" with exception {type(e).__name__}" + Journal.log(self.__class__.__name__, + "solve", + exception, + LogType.EXCEP, + throw_when_excep = False) + self._update_db_data() + return False + + def _get_fail_idx(self): + + self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx() + self._expl_idx_buffer_counter+=1 + self._expl_idx_counter+=1 + if self._expl_idx_counter%self._expl_idx_window_size==0: + self._expl_idx_buffer_counter=0 # restart from 0 + + running_avrg=np.mean(self._explosion_idx_buffer).item() + + return running_avrg + + def _get_explosion_idx(self): + explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale + return explosion_index + + def _update_db_data(self): + + self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start + self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time + self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time + self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time + self.rhc_costs.update(self._ti.solver_rti.getCostsValues()) + self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues()) + + def _reset(self): + + # reset task interface (ig, solvers, etc..) + + # phase manager and sets bootstap as solution + self._gm.reset() + self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution + self._expl_idx_counter=0.0 + self._expl_idx_buffer_counter=0 + + def _get_cost_info(self): + + cost_dict = self._ti.solver_rti.getCostsValues() + cost_names = list(cost_dict.keys()) + cost_dims = [1] * len(cost_names) # costs are always scalar + return cost_names, cost_dims + + def _get_constr_info(self): + + constr_dict = self._ti.solver_rti.getConstraintsValues() + constr_names = list(constr_dict.keys()) + constr_dims = [-1] * len(constr_names) + i = 0 + for constr in constr_dict: + constr_val = constr_dict[constr] + constr_shape = constr_val.shape + constr_dims[i] = constr_shape[0] + i+=1 + return constr_names, constr_dims + + def _get_q_from_sol(self): + full_q=self._ti.solution['q'].astype(self._dtype) + if self._custom_opts["replace_continuous_joints"]: + return full_q + else: + cont_jnts=full_q[self._continuous_joints_idxs_qfull, :] + cos=cont_jnts[::2, :] + sin=cont_jnts[1::2, :] + # copy root + self._full_q_reduced[0:7, :]=full_q[0:7, :] + # copy rev joint vals + self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :] + # and continuous + angle=np.arctan2(sin, cos) + self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle + return self._full_q_reduced + + def _get_v_from_sol(self): + return self._ti.solution['v'].astype(self._dtype) + + def _get_a_from_sol(self): + return self._ti.solution['a'].astype(self._dtype) + + def _get_a_dot_from_sol(self): + return None + + def _get_f_from_sol(self): + # to be overridden by child class + contact_names =self._get_contacts() # we use controller-side names + try: + data=[] + for key in contact_names: + contact_f=self._ti.solution["f_" + key].astype(self._dtype) + np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False) + np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f) + data.append(contact_f) + return np.concatenate(data, axis=0) + except: + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + return self.rhc_costs[cost_name] + + def _get_constr_from_sol(self, + constr_name: str): + return self.rhc_constr[constr_name] diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py new file mode 100644 index 0000000000000000000000000000000000000000..1c751aa8a6e8d32c028f94d4224123863866a933 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py @@ -0,0 +1,381 @@ +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager +from aug_mpc.controllers.rhc.horizon_based.utils.math_utils import hor2w_frame + +from mpc_hive.utilities.shared_data.rhc_data import RhcRefs + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal + +from typing import Union + +import numpy as np + +class HybridQuadRhcRefs(RhcRefs): + + def __init__(self, + gait_manager: GaitManager, + robot_index_shm: int, + robot_index_view: int, + namespace: str, # namespace used for shared mem + verbose: bool = True, + vlevel: bool = VLevel.V2, + safe: bool = True, + use_force_feedback: bool = False, + optimize_mem: bool = False): + + self.robot_index = robot_index_shm + self.robot_index_view = robot_index_view + self.robot_index_np_view = np.array(self.robot_index_view) + + self._step_idx = 0 + self._print_frequency = 100 + + self._verbose = verbose + + self._use_force_feedback=use_force_feedback + + if optimize_mem: + super().__init__( + is_server=False, + with_gpu_mirror=False, + namespace=namespace, + safe=safe, + verbose=verbose, + vlevel=vlevel, + optimize_mem=optimize_mem, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + else: + super().__init__( + is_server=False, + with_gpu_mirror=False, + namespace=namespace, + safe=safe, + verbose=verbose, + vlevel=vlevel) + + if not isinstance(gait_manager, GaitManager): + exception = f"Provided gait_manager argument should be of GaitManager type!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + self.gait_manager = gait_manager + + self.timeline_names = self.gait_manager.timeline_names + + # task interfaces from horizon for setting commands to rhc + self._get_tasks() + + self._p_ref_default=np.zeros((1, 3)) + self._q_ref_default=np.zeros((1, 4)) + self._q_ref_default[0, 0]=1 + + def _get_tasks(self): + # can be overridden by child + # cartesian tasks are in LOCAL_WORLD_ALIGNED (frame centered at distal link, oriented as WORLD) + self.base_lin_velxy = self.gait_manager.task_interface.getTask('base_lin_velxy') + self.base_lin_velz = self.gait_manager.task_interface.getTask('base_lin_velz') + self.base_omega = self.gait_manager.task_interface.getTask('base_omega') + self.base_height = self.gait_manager.task_interface.getTask('base_height') + + def run(self): + + super().run() + if not (self.robot_index < self.rob_refs.n_robots()): + exception = f"Provided \(0-based\) robot index {self.robot_index} exceeds number of " + \ + " available robots {self.rob_refs.n_robots()}." + Journal.log(self.__class__.__name__, + "run", + exception, + LogType.EXCEP, + throw_when_excep = True) + contact_names = list(self.gait_manager.task_interface.model.cmap.keys()) + if not (self.n_contacts() == len(contact_names)): + exception = f"N of contacts within problem {len(contact_names)} does not match n of contacts {self.n_contacts()}" + Journal.log(self.__class__.__name__, + "run", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # set some defaults from gait manager + for i in range(self.n_contacts()): + self.flight_settings_req.set(data=self.gait_manager.get_flight_duration(contact_name=contact_names[i]), + data_type="len_remain", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + self.flight_settings_req.set(data=self.gait_manager.get_step_apexdh(contact_name=contact_names[i]), + data_type="apex_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + self.flight_settings_req.set(data=self.gait_manager.get_step_enddh(contact_name=contact_names[i]), + data_type="end_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + + self.flight_settings_req.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, + n_cols=self.flight_settings_req.n_cols, + read=False) + + def step(self, qstate: np.ndarray = None, + vstate: np.ndarray = None, + force_norm: np.ndarray = None): + + if self.is_running(): + + # updates robot refs from shared mem + self.rob_refs.synch_from_shared_mem(robot_idx=self.robot_index, robot_idx_view=self.robot_index_view) + self.phase_id.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self.contact_flags.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self.flight_settings_req.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self._set_contact_phases(q_full=qstate) + + # updated internal references with latest available ones + q_base=qstate[3:7,0:1] # quaternion + self._apply_refs_to_tasks(q_base=q_base) + + # if self._use_force_feedback: + # self._set_force_feedback(force_norm=force_norm) + + self._step_idx +=1 + + else: + exception = f"{self.__class__.__name__} is not running" + Journal.log(self.__class__.__name__, + "step", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _set_contact_phases(self, + q_full: np.ndarray): + + # phase_id = self.phase_id.read_retry(row_index=self.robot_index, + # col_index=0)[0] + + contact_flags_refs = self.contact_flags.get_numpy_mirror()[self.robot_index_np_view, :] + target_n_limbs_in_contact=np.sum(contact_flags_refs).item() + if target_n_limbs_in_contact==0: + target_n_limbs_in_contact=4 + + is_contact = contact_flags_refs.flatten().tolist() + n_contacts=len(is_contact) + + for i in range(n_contacts): # loop through contact timelines + timeline_name = self.timeline_names[i] + + self.gait_manager.set_f_reg(contact_name=timeline_name, + scale=target_n_limbs_in_contact) + + if is_contact[i]==False: # release contact + + # flight parameters requests are set only when inserting a flight phase + len_req_now=int(self.flight_settings_req.get(data_type="len_remain", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item()) + apex_req_now=self.flight_settings_req.get(data_type="apex_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + end_req_now=self.flight_settings_req.get(data_type="end_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + landing_dx_req_now=self.flight_settings_req.get(data_type="land_dx", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + landing_dy_req_now=self.flight_settings_req.get(data_type="land_dy", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + + # set flight phase properties depending on last value on shared memory + self.gait_manager.set_flight_duration(contact_name=timeline_name, + val=len_req_now) + self.gait_manager.set_step_apexdh(contact_name=timeline_name, + val=apex_req_now) + self.gait_manager.set_step_enddh(contact_name=timeline_name, + val=end_req_now) + self.gait_manager.set_step_landing_dx(contact_name=timeline_name, + val=landing_dx_req_now) + self.gait_manager.set_step_landing_dy(contact_name=timeline_name, + val=landing_dy_req_now) + # insert flight phase over the horizon + self.gait_manager.add_flight(contact_name=timeline_name, + robot_q=q_full) + + else: # contact phase + self.gait_manager.add_stand(contact_name=timeline_name) + + at_least_one_flight=self.gait_manager.update_flight_info(timeline_name) + # flight_info=self.gait_manager.get_flight_info(timeline_name) + + self.gait_manager.check_horizon_full(timeline_name=timeline_name) + + # write flight info to shared mem for all contacts in one shot (we follow same order as in flight_info shm) + all_flight_info=self.gait_manager.get_flight_info_all() + flight_info_shared=self.flight_info.get_numpy_mirror() + flight_info_shared[self.robot_index_np_view, :]=all_flight_info + self.flight_info.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_np_view, + n_rows=1, n_cols=self.flight_info.n_cols, + read=False) + + self.gait_manager.update() + + def _apply_refs_to_tasks(self, q_base = None): + # overrides parent + if q_base is not None: # rhc refs are assumed to be specified in the so called "horizontal" + # frame, i.e. a vertical frame, with the x axis aligned with the projection of the base x axis + # onto the plane + root_pose = self.rob_refs.root_state.get(data_type = "q_full", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) # this should also be + # rotated into the horizontal frame (however, for now only the z componet is used, so it's ok) + + root_twist_ref = self.rob_refs.root_state.get(data_type="twist", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + + root_twist_ref_h = root_twist_ref.copy() + + hor2w_frame(root_twist_ref, q_base, root_twist_ref_h) # horizon works in local world aligned frame + + if self.base_lin_velxy is not None: + self.base_lin_velxy.setRef(root_twist_ref_h[0:2, :]) + if self.base_omega is not None: + self.base_omega.setRef(root_twist_ref_h[3:, :]) + if self.base_lin_velz is not None: + self.base_lin_velz.setRef(root_twist_ref_h[2:3, :]) + if self.base_height is not None: + self.base_height.setRef(root_pose) + else: + root_pose = self.rob_refs.root_state.get(data_type = "q_full", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + root_twist_ref = self.rob_refs.root_state.get(data_type="twist", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + + if self.base_lin_velxy is not None: + self.base_lin_velxy.setRef(root_twist_ref[0:2, :]) + if self.base_omega is not None: + self.base_omega.setRef(root_twist_ref[3:, :]) + if self.base_lin_velz is not None: + self.base_lin_velz.setRef(root_twist_ref[2:3, :]) + if self.base_height is not None: + self.base_height.setRef(root_pose) + + # def _set_force_feedback(self, + # force_norm: np.ndarray = None): + + # is_contact=force_norm>1.0 + + # for i in range(len(is_contact)): + # timeline_name = self._timeline_names[i] + # self.gait_manager.set_force_feedback(timeline_name=timeline_name, + # force_norm=force_norm[i]) + + # if not is_contact[i]: + + + def set_default_refs(self, + p_ref: np.ndarray, + q_ref: np.ndarray): + + self._p_ref_default[:, :]=p_ref + self._q_ref_default[:, :]=q_ref + + def set_alpha(self, alpha:float): + # set provided value + alpha_shared=self.alpha.get_numpy_mirror() + alpha_shared[self.robot_index_np_view, :] = alpha + self.alpha.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.alpha.n_cols, + read=False) + + def get_alpha(self): + self.alpha.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.alpha.n_cols, + read=True) + alpha=self.alpha.get_numpy_mirror()[self.robot_index_np_view, :].item() + return alpha + + def set_bound_relax(self, bound_relax:float): + # set provided value + bound_rel_shared=self.bound_rel.get_numpy_mirror() + bound_rel_shared[self.robot_index_np_view, :] = bound_relax + self.bound_rel.synch_retry(row_index=self.robot_index, col_index=0, n_rows=1, + row_index_view=self.robot_index_view, + n_cols=self.alpha.n_cols, + read=False) + + def reset(self): + + if self.is_running(): + + # resets shared mem + contact_flags_current = self.contact_flags.get_numpy_mirror() + phase_id_current = self.phase_id.get_numpy_mirror() + contact_flags_current[self.robot_index_np_view, :] = np.full((1, self.n_contacts()), dtype=np.bool_, fill_value=True) + phase_id_current[self.robot_index_np_view, :] = -1 # defaults to custom phase id + + contact_pos_current=self.rob_refs.contact_pos.get_numpy_mirror() + contact_pos_current[self.robot_index_np_view, :] = 0.0 + + flight_info_current=self.flight_info.get_numpy_mirror() + flight_info_current[self.robot_index_np_view, :] = 0.0 + + alpha=self.alpha.get_numpy_mirror() + alpha[self.robot_index_np_view, :] = 0.0 + + self.rob_refs.root_state.set(data_type="p", data=self._p_ref_default, robot_idxs=self.robot_index_np_view) + self.rob_refs.root_state.set(data_type="q", data=self._q_ref_default, robot_idxs=self.robot_index_np_view) + self.rob_refs.root_state.set(data_type="twist", data=np.zeros((1, 6)), robot_idxs=self.robot_index_np_view) + + self.contact_flags.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.contact_flags.n_cols, + read=False) + self.phase_id.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.phase_id.n_cols, + read=False) + self.rob_refs.root_state.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.rob_refs.root_state.n_cols, + read=False) + + self.rob_refs.contact_pos.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.rob_refs.contact_pos.n_cols, + read=False) + + self.flight_info.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.flight_info.n_cols, + read=False) + + # should also empty the timeline for stepping phases + self._step_idx = 0 + + else: + exception = f"Cannot call reset() since run() was not called!" + Journal.log(self.__class__.__name__, + "reset", + exception, + LogType.EXCEP, + throw_when_excep = True) + diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/utils/__init__.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8d4173d5b51ee8c644a54008ca0a40f1b34d1841 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py @@ -0,0 +1,165 @@ + +import numpy as np + +def w2hor_frame(t_w: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a twist vector expressed in WORLD frame to + an "horizontal" frame (z aligned as world, x aligned as the projection + of the x-axis of the base frame described by q_b). This is useful for specifying locomotion + references in a "game"-like fashion. + t_out will hold the result + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_21 = 2 * (q_i * q_j + q_k * q_w) + + norm = np.sqrt(R_11 ** 2 + R_21 ** 2) + x_proj_x = R_11 / norm + x_proj_y = R_21 / norm + + y_proj_x = -x_proj_y + y_proj_y = x_proj_x + + t_out[0, :] = t_w[0, :] * x_proj_x + t_w[1, :] * x_proj_y + t_out[1, :] = t_w[0, :] * y_proj_x + t_w[1, :] * y_proj_y + t_out[2, :] = t_w[2, :] # z-component remains the same + t_out[3, :] = t_w[3, :] * x_proj_x + t_w[4, :] * x_proj_y + t_out[4, :] = t_w[3, :] * y_proj_x + t_w[4, :] * y_proj_y + t_out[5, :] = t_w[5, :] # z-component remains the same + +def hor2w_frame(t_h: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in "horizontal" frame to WORLD + v_out will hold the result + """ + + # Extract quaternion components + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + # Compute rotation matrix elements + R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_21 = 2 * (q_i * q_j + q_k * q_w) + + # Normalize to get projection components + norm = np.sqrt(R_11 ** 2 + R_21 ** 2) + x_proj_x = R_11 / norm + x_proj_y = R_21 / norm + + # Orthogonal vector components + y_proj_x = -x_proj_y + y_proj_y = x_proj_x + + # Transform velocity vector components from horizontal to world frame + t_out[0, :] = t_h[0, :] * x_proj_x + t_h[1, :] * y_proj_x + t_out[1, :] = t_h[0, :] * x_proj_y + t_h[1, :] * y_proj_y + t_out[2, :] = t_h[2, :] # z-component remains the same + t_out[3, :] = t_h[3, :] * x_proj_x + t_h[4, :] * y_proj_x + t_out[4, :] = t_h[3, :] * x_proj_y + t_h[4, :] * y_proj_y + t_out[5, :] = t_h[5, :] # z-component remains the same + +def base2world_frame(t_b: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in the base frame to + the WORLD frame using the given quaternion that describes the orientation + of the base with respect to the world frame. The result is written in v_out. + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_01 = 2 * (q_i * q_j - q_k * q_w) + R_02 = 2 * (q_i * q_k + q_j * q_w) + + R_10 = 2 * (q_i * q_j + q_k * q_w) + R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2) + R_12 = 2 * (q_j * q_k - q_i * q_w) + + R_20 = 2 * (q_i * q_k - q_j * q_w) + R_21 = 2 * (q_j * q_k + q_i * q_w) + R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2) + + # Transform the velocity to the world frame + t_out[0, :] = t_b[0, :] * R_00 + t_b[1, :] * R_01 + t_b[2, :] * R_02 + t_out[1, :] = t_b[0, :] * R_10 + t_b[1, :] * R_11 + t_b[2, :] * R_12 + t_out[2, :] = t_b[0, :] * R_20 + t_b[1, :] * R_21 + t_b[2, :] * R_22 + t_out[3, :] = t_b[3, :] * R_00 + t_b[4, :] * R_01 + t_b[5, :] * R_02 + t_out[4, :] = t_b[3, :] * R_10 + t_b[4, :] * R_11 + t_b[5, :] * R_12 + t_out[5, :] = t_b[3, :] * R_20 + t_b[4, :] * R_21 + t_b[5, :] * R_22 + +def world2base_frame(t_w: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in the WORLD frame to + the base frame using the given quaternion that describes the orientation + of the base with respect to the world frame. The result is written in v_out. + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_01 = 2 * (q_i * q_j - q_k * q_w) + R_02 = 2 * (q_i * q_k + q_j * q_w) + + R_10 = 2 * (q_i * q_j + q_k * q_w) + R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2) + R_12 = 2 * (q_j * q_k - q_i * q_w) + + R_20 = 2 * (q_i * q_k - q_j * q_w) + R_21 = 2 * (q_j * q_k + q_i * q_w) + R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2) + + # Transform the velocity to the base frame using the transpose of the rotation matrix + t_out[0, :] = t_w[0, :] * R_00 + t_w[1, :] * R_10 + t_w[2, :] * R_20 + t_out[1, :] = t_w[0, :] * R_01 + t_w[1, :] * R_11 + t_w[2, :] * R_21 + t_out[2, :] = t_w[0, :] * R_02 + t_w[1, :] * R_12 + t_w[2, :] * R_22 + t_out[3, :] = t_w[3, :] * R_00 + t_w[4, :] * R_10 + t_w[5, :] * R_20 + t_out[4, :] = t_w[3, :] * R_01 + t_w[4, :] * R_11 + t_w[5, :] * R_21 + t_out[5, :] = t_w[3, :] * R_02 + t_w[4, :] * R_12 + t_w[5, :] * R_22 + +if __name__ == "__main__": + + n_envs = 5000 + t_b = np.random.rand(6, n_envs) + + q_b = np.random.rand(4, n_envs) + q_b_norm = q_b / np.linalg.norm(q_b, axis=0, keepdims=True) + + t_w = np.zeros_like(t_b) # To hold horizontal frame velocities + t_b_recovered = np.zeros_like(t_b) # To hold recovered world frame velocities + base2world_frame(t_b, q_b_norm, t_w) + world2base_frame(t_w, q_b_norm, t_b_recovered) + assert np.allclose(t_b, t_b_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b" + print("Forward test passed: t_b_recovered matches t_b") + + t_b2 = np.zeros_like(t_b) # To hold horizontal frame velocities + t_w_recovered = np.zeros_like(t_b) + world2base_frame(t_b, q_b_norm, t_b2) + base2world_frame(t_b2, q_b_norm, t_w_recovered) + assert np.allclose(t_b, t_w_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b" + print("Backward test passed: t_w_recovered matches t_w") + + # test transf. world-horizontal frame + v_h = np.zeros_like(t_b) # To hold horizontal frame velocities + v_recovered = np.zeros_like(t_b) + w2hor_frame(t_b, q_b_norm, v_h) + hor2w_frame(v_h, q_b_norm, v_recovered) + assert np.allclose(t_b, v_recovered, atol=1e-6), "Test failed: v_recovered does not match t_b" + print("horizontal forward frame test passed: matches ") + + t_w = np.zeros_like(t_b) # To hold horizontal frame velocities + v_h_recovered = np.zeros_like(t_b) + hor2w_frame(t_b, q_b_norm, t_w) + w2hor_frame(t_w, q_b_norm, v_h_recovered) + assert np.allclose(t_b, v_h_recovered, atol=1e-6), "Test failed: v_h_recovered does not match t_b" + print("horizontal backward frame test passed: matches ") + + \ No newline at end of file diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/hybrid_quad_client.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/hybrid_quad_client.py new file mode 100644 index 0000000000000000000000000000000000000000..757dc3aba72a500bbbf6c8fa21145f4ea63a2337 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/other/hybrid_quad_client.py @@ -0,0 +1,95 @@ +from aug_mpc.controllers.rhc.augmpc_cluster_client import AugMpcClusterClient + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds_horizon +from aug_mpc.utils.sys_utils import PathsGetter + +from typing import List, Dict + +class HybridQuadrupedClusterClient(AugMpcClusterClient): + + def _import_aux_libs(self): + super()._import_aux_libs() + # Import Horizon and related dependencies as global libs + from aug_mpc.controllers.rhc.horizon_based.horizon_imports_glob import import_horizon_global + import_horizon_global() + + def __init__(self, + namespace: str, + urdf_xacro_path: str, + srdf_xacro_path: str, + cluster_size: int, + set_affinity: bool = False, + use_mp_fork: bool = False, + isolated_cores_only: bool = False, + core_ids_override_list: List[int] = None, + verbose: bool = False, + debug: bool = False, + open_loop: bool = True, + base_dump_dir: str = "/tmp", + timeout_ms: int = 60000, + codegen_override: str = None, + custom_opts: Dict = {}): + + self._open_loop = open_loop + + self._paths = PathsGetter() + + self._codegen_dir_name = namespace + + self._timeout_ms = timeout_ms + + super().__init__(namespace = namespace, + urdf_xacro_path = urdf_xacro_path, + srdf_xacro_path = srdf_xacro_path, + cluster_size=cluster_size, + set_affinity = set_affinity, + use_mp_fork = use_mp_fork, + isolated_cores_only = isolated_cores_only, + core_ids_override_list = core_ids_override_list, + verbose = verbose, + debug = debug, + base_dump_dir=base_dump_dir, + codegen_override=codegen_override, + custom_opts=custom_opts) + + self._n_nodes = 31 if not ("n_nodes" in self._custom_opts) else self._custom_opts["n_nodes"] + self._dt = 0.05 if not ("cluster_dt" in self._custom_opts) else self._custom_opts["cluster_dt"] + + def _xrdf_cmds(self): + parts = self._urdf_path.split('/') + urdf_descr_root_path = '/'.join(parts[:-2]) + cmds = get_xrdf_cmds_horizon(urdf_descr_root_path=urdf_descr_root_path) + return cmds + + def _process_codegen_dir(self,idx:int): + + codegen_dir = self.codegen_dir() + f"/{self._codegen_dir_name}Rhc{idx}" + codegen_dir_ovveride = self.codegen_dir_override() + if not (codegen_dir_ovveride=="" or \ + codegen_dir_ovveride=="none" or \ + codegen_dir_ovveride=="None" or \ + (codegen_dir_ovveride is None)): # if overrde was provided + codegen_dir = f"{codegen_dir_ovveride}{idx}"# override + + return codegen_dir + + def _generate_controller(self, + idx: int): + + codegen_dir=self._process_codegen_dir(idx=idx) + + controller = HybridQuadRhc( + urdf_path=self._urdf_path, + srdf_path=self._srdf_path, + config_path = self._paths.CONFIGPATH, + robot_name=self._namespace, + codegen_dir=codegen_dir, + n_nodes=self._n_nodes, + dt=self._dt, + max_solver_iter = 1, # rti + open_loop = self._open_loop, + verbose = self._verbose, + debug = self._debug) + + return controller \ No newline at end of file diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rhc.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..9dda84a8ac9973021ad1d00683549a762e1c6a0e --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/rhc.py @@ -0,0 +1,1256 @@ +# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com) +# +# This file is part of MPCHive and distributed under the General Public License version 2 license. +# +# MPCHive is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# MPCHive is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MPCHive. If not, see . +# +from abc import ABC, abstractmethod +# from perf_sleep.pyperfsleep import PerfSleep +# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage + +import time + +import numpy as np + +from mpc_hive.utilities.shared_data.rhc_data import RobotState +from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred, RhcPredDelta +from mpc_hive.utilities.shared_data.rhc_data import RhcStatus +from mpc_hive.utilities.shared_data.rhc_data import RhcInternal +from mpc_hive.utilities.shared_data.cluster_profiling import RhcProfiling +from mpc_hive.utilities.remote_triggering import RemoteTriggererClnt + +from mpc_hive.utilities.homing import RobotHomer + +from mpc_hive.utilities.math_utils import world2base_frame + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType +from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper +from EigenIPC.PyEigenIPC import dtype + +from typing import List +# from typing import TypeVar, Union + +import signal +import os +import inspect + +class RHController(ABC): + + def __init__(self, + srdf_path: str, + n_nodes: int, + dt: float, + namespace: str, # shared mem namespace + dtype = np.float32, + verbose = False, + debug = False, + timeout_ms: int = 60000, + allow_less_jnts: bool = True): + + signal.signal(signal.SIGINT, self._handle_sigint) + + self._allow_less_jnts = allow_less_jnts # whether to allow less joints in rhc controller than the ones on the robot + # (e.g. some joints might not be desirable for control purposes) + + self.namespace = namespace + self._dtype = dtype + self._verbose = verbose + self._debug = debug + + # if not self._debug: + np.seterr(over='ignore') # ignoring overflows + + self._n_nodes = n_nodes + self._dt = dt + self._n_intervals = self._n_nodes - 1 + self._t_horizon = self._n_intervals * dt + self._set_rhc_pred_idx() # prection is by default written on last node + self._set_rhc_cmds_idx() # default to idx 2 (i.e. cmds to get to third node) + self.controller_index = None # will be assigned upon registration to a cluster + self.controller_index_np = None + self._robot_mass=1.0 + + self.srdf_path = srdf_path # using for parsing robot homing + + self._registered = False + self._closed = False + + self._allow_triggering_when_failed = True + + self._profiling_data_dict = {} + self._profiling_data_dict["full_solve_dt"] = np.nan + self._profiling_data_dict["rti_solve_dt"] = np.nan + self._profiling_data_dict["problem_update_dt"] = np.nan + self._profiling_data_dict["phases_shift_dt"] = np.nan + self._profiling_data_dict["task_ref_update"] = np.nan + + self.n_dofs = None + self.n_contacts = None + + # shared mem + self.robot_state = None + self.rhc_status = None + self.rhc_internal = None + self.cluster_stats = None + self.robot_cmds = None + self.robot_pred = None + self.rhc_pred_delta = None + self.rhc_refs = None + self._remote_triggerer = None + self._remote_triggerer_timeout = timeout_ms # [ms] + + # remote termination + self._remote_term = None + self._term_req_received = False + + # jnt names + self._env_side_jnt_names = [] + self._controller_side_jnt_names = [] + self._got_jnt_names_from_controllers = False + + # data maps + self._to_controller = [] + self._quat_remap = [0, 1, 2, 3] # defaults to no remap (to be overridden) + + self._got_contact_names = False + + self._received_trigger = False # used for proper termination + + self._n_resets = 0 + self._n_fails = 0 + self._fail_idx_thresh = 5e3 + self._contact_f_thresh = 1e5 + + self._failed = False + + self._start_time = time.perf_counter() # used for profiling when in debug mode + + self._homer = None # robot homing manager + self._homer_env = None # used for setting homing to jnts not contained in rhc prb + + self._class_name_base = f"{self.__class__.__name__}" + self._class_name = self._class_name_base # will append controller index upon registration + + self._contact_force_base_loc_aux=np.zeros((1,3),dtype=self._dtype) + self._norm_grav_vector_w=np.zeros((1,3),dtype=self._dtype) + self._norm_grav_vector_w[:, 2]=-1.0 + self._norm_grav_vector_base_loc=np.zeros((1,3),dtype=self._dtype) + + self._init() # initialize controller + + if not hasattr(self, '_rhc_fpaths'): + self._rhc_fpaths = [] + self._rhc_fpaths.append(os.path.abspath(__file__)) + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + # Get the file path of the class being initialized and append it to the list + if not hasattr(cls, '_rhc_fpaths'): + cls._rhc_fpaths = [] + child_class_file_path = os.path.abspath(inspect.getfile(cls)) + cls._rhc_fpaths.append(child_class_file_path) + + def this_paths(self): + return self._rhc_fpaths + + def __del__(self): + self._close() + + def _handle_sigint(self, signum, frame): + Journal.log(self._class_name, + "_handle_sigint", + "SIGINT received", + LogType.WARN) + self._term_req_received = True + + def _set_rhc_pred_idx(self): + # default to last node + self._pred_node_idx=self._n_nodes-1 + + def _set_rhc_cmds_idx(self): + # use index 2 by default to compensate for + # the inevitable action delay + # (get_state, trigger sol -> +dt -> apply sol ) + # if we apply action from second node (idenx 1) + # that action will already be one dt old. We assume we were already + # applying the right action to get to the state of node idx 1 and get the + # cmds for reaching the state at idx 1 + self._rhc_cmds_node_idx=2 + + def _close(self): + if not self._closed: + self._unregister_from_cluster() + if self.robot_cmds is not None: + self.robot_cmds.close() + if self.robot_pred is not None: + self.robot_pred.close() + if self.rhc_pred_delta is not None: + self.rhc_pred_delta.close() + if self.robot_state is not None: + self.robot_state.close() + if self.rhc_status is not None: + self.rhc_status.close() + if self.rhc_internal is not None: + self.rhc_internal.close() + if self.cluster_stats is not None: + self.cluster_stats.close() + if self._remote_triggerer is not None: + self._remote_triggerer.close() + if self._remote_term is not None: + self._remote_term.close() + self._closed = True + + def close(self): + self._close() + + def get_file_paths(self): + # can be overriden by child + base_paths = [] + base_paths.append(self._this_path) + return base_paths + + def init_rhc_task_cmds(self): + + self.rhc_refs = self._init_rhc_task_cmds() + self.rhc_refs.reset() + + def _init_states(self): + + quat_remap = self._get_quat_remap() + self.robot_state = RobotState(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_state.run() + self.robot_cmds = RhcCmds(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_cmds.run() + self.robot_pred = RhcPred(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_pred.run() + self.rhc_pred_delta = RhcPredDelta(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.rhc_pred_delta.run() + + def _rhc(self, rti: bool = True): + if self._debug: + self._rhc_db(rti=rti) + else: + self._rhc_min(rti=rti) + + def _rhc_db(self, rti: bool = True): + # rhc with debug data + self._start_time = time.perf_counter() + + self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with + # latest data on shared mem + + self._compute_pred_delta() + + if not self.failed(): + # we can solve only if not in failure state + if rti: + self._failed = not self._solve() # solve actual TO with RTI + else: + self._failed = not self._bootstrap() # full bootstrap solve + if (self._failed): + # perform failure procedure + self._on_failure() + else: + if not self._allow_triggering_when_failed: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. " + \ + " You should have reset() the controller!", + LogType.EXCEP, + throw_when_excep = True) + else: + if self._verbose: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. No solution will be performed. " + \ + " Use the reset() method to continue solving!", + LogType.WARN) + + self._write_cmds_from_sol() # we update update the views of the cmds + # from the latest solution + + # in debug, rhc internal state is streamed over + # shared mem. + self._update_rhc_internal() + self._profiling_data_dict["full_solve_dt"] = time.perf_counter() - self._start_time + self._update_profiling_data() # updates all profiling data + if self._verbose: + Journal.log(self._class_name, + "solve", + f"RHC full solve loop execution time -> " + str(self._profiling_data_dict["full_solve_dt"]), + LogType.INFO, + throw_when_excep = True) + + def _rhc_min(self, rti: bool = True): + + self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with + # latest data on shared mem + + self._compute_pred_delta() + + if not self.failed(): + # we can solve only if not in failure state + if rti: + self._failed = not self._solve() # solve actual TO with RTI + else: + self._failed = not self._bootstrap() # full bootstrap solve + if (self._failed): + # perform failure procedure + self._on_failure() + else: + if not self._allow_triggering_when_failed: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. " + \ + " You should have reset() the controller!", + LogType.EXCEP, + throw_when_excep = True) + else: + if self._verbose: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. No solution will be performed. " + \ + " Use the reset() method to continue solving!", + LogType.WARN) + + self._write_cmds_from_sol() # we update the views of the cmds + # from the latest solution even if failed + + def solve_once(self): + # run a single iteration of the solve loop (used for pooling) + if self._term_req_received: + return False + + if not self._remote_triggerer.wait(self._remote_triggerer_timeout): + Journal.log(self._class_name, + f"solve", + "Didn't receive any remote trigger req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + + self._received_trigger = True + + if self.rhc_status.resets.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0]: + self.reset() # rhc is reset + + if self.rhc_status.trigger.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0]: + rti_solve = self.rhc_status.rti_solve.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0] + self._rhc(rti=rti_solve) # run solution with requested solve mode + self.rhc_status.trigger.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) # allow next solution trigger + + self._remote_triggerer.ack() # send ack signal to server + self._received_trigger = False + + self._term_req_received = self._term_req_received or self._remote_term.read_retry(row_index=0, + col_index=0, + row_index_view=0)[0] + + if self._term_req_received: + self.close() + return False + + return True + + def solve(self): + + # run the solution loop and wait for trigger signals + # using cond. variables (efficient) + while True: + if not self.solve_once(): + break + + self.close() # is not stricly necessary + + def reset(self): + + if not self._closed: + self.reset_rhc_data() + self._failed = False # allow triggering + self._n_resets += 1 + self.rhc_status.fails.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.resets.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _create_jnt_maps(self): + + # retrieve env-side joint names from shared mem + self._env_side_jnt_names = self.robot_state.jnt_names() + self._check_jnt_names_compatibility() # will raise exception if not self._allow_less_jnts + if not self._got_jnt_names_from_controllers: + exception = f"Cannot run the solve(). assign_env_side_jnt_names() was not called!" + Journal.log(self._class_name, + "_create_jnt_maps", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # robot homer guarantees that _controller_side_jnt_names is at least contained in self._env_side_jnt_names (or equal) + self._to_controller = [self._env_side_jnt_names.index(element) for element in self._controller_side_jnt_names] + # set joint remappings for shared data from env data to controller + # all shared data is by convention specified according to the env (jnts are ordered that way) + # the remapping is used so that when data is returned, its a remapped view from env to controller, + # so that data can be assigned direclty from the rhc controller without any issues + self.robot_state.set_jnts_remapping(jnts_remapping=self._to_controller) + self.robot_cmds.set_jnts_remapping(jnts_remapping=self._to_controller) + self.robot_pred.set_jnts_remapping(jnts_remapping=self._to_controller) + self.rhc_pred_delta.set_jnts_remapping(jnts_remapping=self._to_controller) + + return True + + def reset_rhc_data(self): + + self._reset() # custom reset (e.g. it should set the current solution to some + # default solution, like a bootstrap) + + self.rhc_refs.reset() # reset rhc refs to default (has to be called after _reset) + + self._write_cmds_from_sol() # use latest solution (e.g. from bootstrap if called before running + # the first solve) as default state + + def failed(self): + return self._failed + + def robot_mass(self): + return self._robot_mass + + def _assign_cntrl_index(self, reg_state: np.ndarray): + state = reg_state.flatten() # ensure 1D tensor + free_spots = np.nonzero(~state.flatten())[0] + return free_spots[0].item() # just return the first free spot + + def _register_to_cluster(self): + + self.rhc_status = RhcStatus(is_server=False, + namespace=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2, + with_torch_view=False, + with_gpu_mirror=False, + optimize_mem=True, + cluster_size=1, # we just need the row corresponding to this controller + n_contacts=None, # we get this from server + n_nodes=None # we get this from server + ) + self.rhc_status.run() # rhc status (reg. flags, failure, tot cost, tot cnstrl viol, etc...) + + # acquire semaphores since we have to perform non-atomic operations + # on the whole memory views + self.rhc_status.registration.data_sem_acquire() + self.rhc_status.controllers_counter.data_sem_acquire() + self.rhc_status.controllers_counter.synch_all(retry = True, + read = True) + + available_spots = self.rhc_status.cluster_size + # from here on all pre registration ops can be done + + # incrementing cluster controllers counter + controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror() + if controllers_counter[0, 0] + 1 > available_spots: # no space left -> return + self.rhc_status.controllers_counter.data_sem_release() + self.rhc_status.registration.data_sem_release() + exception = "Cannot register to cluster. No space left " + \ + f"({controllers_counter[0, 0]} controllers already registered)" + Journal.log(self._class_name, + "_register_to_cluster", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # now we can register + + # increment controllers counter + controllers_counter += 1 + self.controller_index = controllers_counter.item() -1 + + # actually register to cluster + self.rhc_status.controllers_counter.synch_all(retry = True, + read = False) # writes to shared mem (just one for all controllers (i.e n_rows = 1)) + + # read current registration state + self.rhc_status.registration.synch_all(retry = True, + read = True, + row_index=self.controller_index, + row_index_view=0) + registrations = self.rhc_status.registration.get_numpy_mirror() + # self.controller_index = self._assign_cntrl_index(registrations) + + + self._class_name_base = self._class_name_base+str(self.controller_index) + # self.controller_index_np = np.array(self.controller_index) + self.controller_index_np = np.array(0) # given that we use optimize_mem, the shared mem copy has shape 1 x n_cols (we can write and read at [0, :]) + + registrations[self.controller_index_np, 0] = True + self.rhc_status.registration.synch_all(retry = True, + read = False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + # now all heavy stuff that would otherwise make the registration slow + self._remote_term = SharedTWrapper(namespace=self.namespace, + basename="RemoteTermination", + is_server=False, + verbose = self._verbose, + vlevel = VLevel.V2, + with_gpu_mirror=False, + with_torch_view=False, + dtype=dtype.Bool) + self._remote_term.run() + + # other initializations + + self.cluster_stats = RhcProfiling(is_server=False, + name=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=True, + optimize_mem=True, + cluster_size=1 # we just need the row corresponding to this controller + ) # profiling data + self.cluster_stats.run() + self.cluster_stats.synch_info() + + self._create_jnt_maps() + self.init_rhc_task_cmds() # initializes rhc interface to external commands (defined by child class) + self._consinstency_checks() # sanity checks + + if self._homer is None: + self._init_robot_homer() # call this in case it wasn't called by child + + self._robot_mass = self._get_robot_mass() # uses child class implemented method + self._contact_f_scale = self._get_robot_mass() * 9.81 + + # writing some static info about this controller + # self.rhc_status.rhc_static_info.synch_all(retry = True, + # read = True, + # row_index=self.controller_index, + # col_index=0) # first read current static info from other controllers (not necessary if optimize_mem==True) + self.rhc_status.rhc_static_info.set(data=np.array(self._dt), + data_type="dts", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._t_horizon), + data_type="horizons", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._n_nodes), + data_type="nnodes", + rhc_idxs=self.controller_index_np, + gpu=False) + # writing some static rhc info which is only available after problem init + self.rhc_status.rhc_static_info.set(data=np.array(len(self._get_contacts())), + data_type="ncontacts", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self.robot_mass()), + data_type="robot_mass", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._pred_node_idx), + data_type="pred_node_idx", + rhc_idxs=self.controller_index_np, + gpu=False) + + self.rhc_status.rhc_static_info.synch_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.rhc_status.rhc_static_info.n_cols, + read=False) + + # we set homings also for joints which are not in the rhc homing map + # since this is usually required on server side + + homing_full = self._homer_env.get_homing().reshape(1, + self.robot_cmds.n_jnts()) + + null_action = np.zeros((1, self.robot_cmds.n_jnts()), + dtype=self._dtype) + + self.robot_cmds.jnts_state.set(data=homing_full, data_type="q", + robot_idxs=self.controller_index_np, + no_remap=True) + self.robot_cmds.jnts_state.set(data=null_action, data_type="v", + robot_idxs=self.controller_index_np, + no_remap=True) + self.robot_cmds.jnts_state.set(data=null_action, data_type="eff", + robot_idxs=self.controller_index_np, + no_remap=True) + + # write all joints (including homing for env-only ones) + self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # only write data corresponding to this controller + + self.reset() # reset controller + self._n_resets=0 + + # for last we create the trigger client + self._remote_triggerer = RemoteTriggererClnt(namespace=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2) # remote triggering + self._remote_triggerer.run() + + if self._debug: + # internal solution is published on shared mem + # we assume the user has made available the cost + # and constraint data at this point (e.g. through + # the solution of a bootstrap) + cost_data = self._get_cost_info() + constr_data = self._get_constr_info() + config = RhcInternal.Config(is_server=True, + enable_q= True, + enable_v=True, + enable_a=True, + enable_a_dot=False, + enable_f=True, + enable_f_dot=False, + enable_eff=False, + cost_names=cost_data[0], + cost_dims=cost_data[1], + constr_names=constr_data[0], + constr_dims=constr_data[1], + ) + self.rhc_internal = RhcInternal(config=config, + namespace=self.namespace, + rhc_index = self.controller_index, + n_contacts=self.n_contacts, + n_jnts=self.n_dofs, + jnt_names=self._controller_side_jnt_names, + n_nodes=self._n_nodes, + verbose = self._verbose, + vlevel=VLevel.V2, + force_reconnection=True, + safe=True) + self.rhc_internal.run() + + self._class_name = self._class_name + f"-{self.controller_index}" + + Journal.log(self._class_name, + "_register_to_cluster", + f"controller registered", + LogType.STAT, + throw_when_excep = True) + + # we can now release everything so that other controllers can register + self.rhc_status.controllers_counter.data_sem_release() + self.rhc_status.registration.data_sem_release() + + self._registered = True + + def _unregister_from_cluster(self): + + if self._received_trigger: + # received interrupt during solution --> + # send ack signal to server anyway + self._remote_triggerer.ack() + if self._registered: + # acquire semaphores since we have to perform operations + # on the whole memory views + self.rhc_status.registration.data_sem_acquire() + self.rhc_status.controllers_counter.data_sem_acquire() + self.rhc_status.registration.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self._deactivate() + # decrementing controllers counter + self.rhc_status.controllers_counter.synch_all(retry = True, + read = True) + controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror() + controllers_counter -= 1 + self.rhc_status.controllers_counter.synch_all(retry = True, + read = False) + Journal.log(self._class_name, + "_unregister_from_cluster", + "Done", + LogType.STAT, + throw_when_excep = True) + # we can now release everything + self.rhc_status.registration.data_sem_release() + self.rhc_status.controllers_counter.data_sem_release() + self._registered = False + + def _get_quat_remap(self): + # to be overridden by child class if necessary + return [0, 1, 2, 3] + + def _consinstency_checks(self): + + # check controller dt + server_side_cluster_dt = self.cluster_stats.get_info(info_name="cluster_dt") + if not (abs(server_side_cluster_dt - self._dt) < 1e-4): + exception = f"Trying to initialize a controller with control dt {self._dt}, which" + \ + f"does not match the cluster control dt {server_side_cluster_dt}" + Journal.log(self._class_name, + "_consinstency_checks", + exception, + LogType.EXCEP, + throw_when_excep = True) + # check contact names + + server_side_contact_names = set(self.robot_state.contact_names()) + control_side_contact_names = set(self._get_contacts()) + + if not server_side_contact_names == control_side_contact_names: + warn = f"Controller-side contact names do not match server-side names!" + \ + f"\nServer: {self.robot_state.contact_names()}\n Controller: {self._get_contacts()}" + Journal.log(self._class_name, + "_consinstency_checks", + warn, + LogType.WARN, + throw_when_excep = True) + if not len(self.robot_state.contact_names()) == len(self._get_contacts()): + # at least, we need the n of contacts to match! + exception = f"Controller-side n contacts {self._get_contacts()} do not match " + \ + f"server-side n contacts {len(self.robot_state.contact_names())}!" + Journal.log(self._class_name, + "_consinstency_checks", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _init(self): + + stat = f"Trying to initialize RHC controller " + \ + f"with dt: {self._dt} s, t_horizon: {self._t_horizon} s, n_intervals: {self._n_intervals}" + Journal.log(self._class_name, + "_init", + stat, + LogType.STAT, + throw_when_excep = True) + + self._init_states() # initializes shared mem. states + + self._init_problem() # we call the child's initialization method for the actual problem + self._post_problem_init() + + self._register_to_cluster() # registers the controller to the cluster + + Journal.log(self._class_name, + "_init", + f"RHC controller initialized with cluster index {self.controller_index} on process {os.getpid()}", + LogType.STAT, + throw_when_excep = True) + + def _deactivate(self): + # signal controller deactivation over shared mem + self.rhc_status.activation_state.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + # also set cmds to homing for safety + # self.reset_rhc_data() + + def _on_failure(self): + + self.rhc_status.fails.write_retry(True, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self._deactivate() + self._n_fails += 1 + self.rhc_status.controllers_fail_counter.write_retry(self._n_fails, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _init_robot_homer(self): + self._homer = RobotHomer(srdf_path=self.srdf_path, + jnt_names=self._controller_side_jnt_names) + + self._homer_env = RobotHomer(srdf_path=self.srdf_path, + jnt_names=self.robot_state.jnt_names()) + + def _update_profiling_data(self): + + # updated debug data on shared memory + # with the latest info available + self.cluster_stats.solve_loop_dt.write_retry(self._profiling_data_dict["full_solve_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.rti_sol_time.write_retry(self._profiling_data_dict["rti_solve_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.prb_update_dt.write_retry(self._profiling_data_dict["problem_update_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.phase_shift_dt.write_retry(self._profiling_data_dict["phases_shift_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.task_ref_update_dt.write_retry(self._profiling_data_dict["task_ref_update"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _write_cmds_from_sol(self): + + # gets data from the solution and updates the view on the shared data + + # cmds for robot + # jnts + self.robot_cmds.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="v", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np) + # root + self.robot_cmds.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q_full", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="twist", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_root_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np) + f_contact = self._get_f_from_sol() + if f_contact is not None: + contact_names = self.robot_state.contact_names() + node_idx_f_estimate=self._rhc_cmds_node_idx-1 # we always write the force to reach the desired state (prev node) + rhc_q_estimate=self._get_root_full_q_from_sol(node_idx=node_idx_f_estimate)[:, 3:7] + for i in range(len(contact_names)): + contact = contact_names[i] + contact_idx = i*3 + contact_force_rhc_world=f_contact[contact_idx:(contact_idx+3), node_idx_f_estimate:node_idx_f_estimate+1].T + world2base_frame(v_w=contact_force_rhc_world, + q_b=rhc_q_estimate, + v_out=self._contact_force_base_loc_aux, + is_q_wijk=False # horizon q is ijkw + ) + self.robot_cmds.contact_wrenches.set(data=self._contact_force_base_loc_aux, + data_type="f", + robot_idxs=self.controller_index_np, + contact_name=contact) + + # prediction data from MPC horizon + self.robot_pred.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._pred_node_idx), data_type="q", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._pred_node_idx), data_type="v", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._pred_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._pred_node_idx), data_type="q_full", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._pred_node_idx), data_type="twist", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._pred_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np) + + # write robot commands + self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # jnt state + self.robot_cmds.root_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.root_state.n_cols, + read=False) # root state, in case it was written + self.robot_cmds.contact_wrenches.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.contact_wrenches.n_cols, + read=False) # contact state + + # write robot pred + self.robot_pred.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) + self.robot_pred.root_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.root_state.n_cols, + read=False) + + # we also fill other data (cost, constr. violation, etc..) + self.rhc_status.rhc_cost.write_retry(self._get_rhc_cost(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_constr_viol.write_retry(self._get_rhc_constr_viol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_n_iter.write_retry(self._get_rhc_niter_to_sol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_nodes_cost.write_retry(data=self._get_rhc_nodes_cost(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_nodes_constr_viol.write_retry(data=self._get_rhc_nodes_constr_viol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_fail_idx.write_retry(self._get_failure_index(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) # write idx on shared mem + + def _compute_pred_delta(self): + + # measurements + q_full_root_meas = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np) + twist_root_meas = self.robot_state.root_state.get(data_type="twist", robot_idxs=self.controller_index_np) + a_root_meas = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np) + g_vec_root_meas = self.robot_state.root_state.get(data_type="gn", robot_idxs=self.controller_index_np) + + q_jnts_meas = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np) + v_jnts_meas = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np) + a_jnts_meas = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np) + eff_jnts_meas = self.robot_state.jnts_state.get(data_type="eff", robot_idxs=self.controller_index_np) + + # prediction from rhc + delta_root_q_full=self._get_root_full_q_from_sol(node_idx=1)-q_full_root_meas + delta_root_twist=self._get_root_twist_from_sol(node_idx=1)-twist_root_meas + delta_root_a=self._get_root_a_from_sol(node_idx=0)-a_root_meas + delta_g_vec=self._get_norm_grav_vector_from_sol(node_idx=0)-g_vec_root_meas + + delta_jnts_q=self._get_jnt_q_from_sol(node_idx=1)-q_jnts_meas + delta_jnts_v=self._get_jnt_v_from_sol(node_idx=1)-v_jnts_meas + delta_jnts_a=self._get_jnt_a_from_sol(node_idx=0)-a_jnts_meas + delta_jnts_eff=self._get_jnt_eff_from_sol(node_idx=0)-eff_jnts_meas + + # writing pred. errors + self.rhc_pred_delta.root_state.set(data=delta_root_q_full, data_type="q_full", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_root_twist,data_type="twist", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_root_a,data_type="a_full", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_g_vec, data_type="gn", robot_idxs=self.controller_index_np) + + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_q,data_type="q", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_v,data_type="v", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_a,data_type="a", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_eff, data_type="eff", robot_idxs=self.controller_index_np) + + # write on shared memory + self.rhc_pred_delta.jnts_state.synch_retry(row_index=self.controller_index, + col_index=0, + n_rows=1, + row_index_view=0, + n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # jnt state + self.rhc_pred_delta.root_state.synch_retry(row_index=self.controller_index, + col_index=0, + n_rows=1, + row_index_view=0, + n_cols=self.robot_cmds.root_state.n_cols, + read=False) # root state + + def _assign_controller_side_jnt_names(self, + jnt_names: List[str]): + + self._controller_side_jnt_names = jnt_names + self._got_jnt_names_from_controllers = True + + def _check_jnt_names_compatibility(self): + + set_rhc = set(self._controller_side_jnt_names) + set_env = set(self._env_side_jnt_names) + + if not set_rhc == set_env: + rhc_is_missing=set_env-set_rhc + env_is_missing=set_rhc-set_env + + msg_type=LogType.WARN + message="" + if not len(rhc_is_missing)==0: # allowed + message = "\nSome env-side joint names are missing on rhc client-side!\n" + \ + "RHC-SERVER-SIDE-> \n" + \ + " ".join(self._env_side_jnt_names) + "\n" +\ + "RHC-CLIENT-SIDE -> \n" + \ + " ".join(self._controller_side_jnt_names) + "\n" \ + "\MISSING -> \n" + \ + " ".join(list(rhc_is_missing)) + "\n" + if not self._allow_less_jnts: # raise exception + msg_type=LogType.EXCEP + + if not len(env_is_missing)==0: # not allowed + message = "\nSome rhc-side joint names are missing on rhc server-side!\n" + \ + "RHC-SERVER-SIDE-> \n" + \ + " ".join(self._env_side_jnt_names) + \ + "RHC-CLIENT-SIDE -> \n" + \ + " ".join(self._controller_side_jnt_names) + "\n" \ + "\nmissing -> \n" + \ + " ".join(list(env_is_missing)) + msg_type=LogType.EXCEP + + Journal.log(self._class_name, + "_check_jnt_names_compatibility", + message, + msg_type, + throw_when_excep = True) + + def _get_cost_info(self): + # to be overridden by child class + return None, None + + def _get_constr_info(self): + # to be overridden by child class + return None, None + + def _get_fail_idx(self): + # to be overriden by parent + return 0.0 + + def _get_failure_index(self): + fail_idx=self._get_fail_idx()/self._fail_idx_thresh + if fail_idx>1.0: + fail_idx=1.0 + return fail_idx + + def _check_rhc_failure(self): + # we use fail idx viol to detect failures + idx = self._get_failure_index() + return idx>=1.0 + + def _update_rhc_internal(self): + # data which is not enabled in the config is not actually + # written so overhead is minimal for non-enabled data + self.rhc_internal.write_q(data= self._get_q_from_sol(), + retry=True) + self.rhc_internal.write_v(data= self._get_v_from_sol(), + retry=True) + self.rhc_internal.write_a(data= self._get_a_from_sol(), + retry=True) + self.rhc_internal.write_a_dot(data= self._get_a_dot_from_sol(), + retry=True) + self.rhc_internal.write_f(data= self._get_f_from_sol(), + retry=True) + self.rhc_internal.write_f_dot(data= self._get_f_dot_from_sol(), + retry=True) + self.rhc_internal.write_eff(data= self._get_eff_from_sol(), + retry=True) + for cost_idx in range(self.rhc_internal.config.n_costs): + # iterate over all costs and update all values + cost_name = self.rhc_internal.config.cost_names[cost_idx] + self.rhc_internal.write_cost(data= self._get_cost_from_sol(cost_name = cost_name), + cost_name = cost_name, + retry=True) + for constr_idx in range(self.rhc_internal.config.n_constr): + # iterate over all constraints and update all values + constr_name = self.rhc_internal.config.constr_names[constr_idx] + self.rhc_internal.write_constr(data= self._get_constr_from_sol(constr_name=constr_name), + constr_name = constr_name, + retry=True) + + def _get_contacts(self): + contact_names = self._get_contact_names() + self._got_contact_names = True + return contact_names + + def _get_q_from_sol(self): + # to be overridden by child class + return None + + def _get_v_from_sol(self): + # to be overridden by child class + return None + + def _get_a_from_sol(self): + # to be overridden by child class + return None + + def _get_a_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_f_from_sol(self): + # to be overridden by child class + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + # to be overridden by child class + return None + + def _get_constr_from_sol(self, + constr_name: str): + # to be overridden by child class + return None + + @abstractmethod + def _reset(self): + pass + + @abstractmethod + def _init_rhc_task_cmds(self): + pass + + @abstractmethod + def _get_robot_jnt_names(self): + pass + + @abstractmethod + def _get_contact_names(self): + pass + + @abstractmethod + def _get_jnt_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_v_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_a_from_sol(self, node_idx=0) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_eff_from_sol(self, node_idx=0) -> np.ndarray: + pass + + @abstractmethod + def _get_root_full_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_full_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_root_twist_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_root_a_from_sol(self, node_idx=0) -> np.ndarray: + pass + + def _get_norm_grav_vector_from_sol(self, node_idx=1) -> np.ndarray: + rhc_q=self._get_root_full_q_from_sol(node_idx=node_idx)[:, 3:7] + world2base_frame(v_w=self._norm_grav_vector_w,q_b=rhc_q,v_out=self._norm_grav_vector_base_loc, + is_q_wijk=False) + return self._norm_grav_vector_base_loc + + def _get_rhc_cost(self): + # to be overridden + return np.nan + + def _get_rhc_constr_viol(self): + # to be overridden + return np.nan + + def _get_rhc_nodes_cost(self): + # to be overridden + return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype) + + def _get_rhc_nodes_constr_viol(self): + # to be overridden + return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype) + + def _get_rhc_niter_to_sol(self) -> np.ndarray: + # to be overridden + return np.nan + + @abstractmethod + def _update_open_loop(self): + # updates rhc controller + # using the internal state + pass + + @abstractmethod + def _update_closed_loop(self): + # uses meas. from robot + pass + + @abstractmethod + def _solve(self) -> bool: + pass + + @abstractmethod + def _bootstrap(self) -> bool: + pass + + @abstractmethod + def _get_ndofs(self): + pass + + abstractmethod + def _get_robot_mass(self): + pass + + @abstractmethod + def _init_problem(self): + # initialized horizon's TO problem + pass + + @abstractmethod + def _post_problem_init(self): + pass diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/sac.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/sac.py new file mode 100644 index 0000000000000000000000000000000000000000..fd40a13287ccf6ccda694c218d114ede8d4b9d4b --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/sac.py @@ -0,0 +1,680 @@ +import torch +import torch.nn as nn +from torch.distributions.normal import Normal +import math + +from aug_mpc.utils.nn.normalization_utils import RunningNormalizer +from aug_mpc.utils.nn.layer_utils import llayer_init + +from typing import List + +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import VLevel + +class SACAgent(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + obs_ub: List[float] = None, + obs_lb: List[float] = None, + actions_ub: List[float] = None, + actions_lb: List[float] = None, + rescale_obs: bool = False, + norm_obs: bool = True, + use_action_rescale_for_critic: bool = True, + device:str="cuda", + dtype=torch.float32, + is_eval:bool=False, + load_qf:bool=False, + epsilon:float=1e-8, + debug:bool=False, + compression_ratio:float=-1.0, # > 0; if [0, 1] compression, >1 "expansion" + layer_width_actor:int=256, + n_hidden_layers_actor:int=2, + layer_width_critic:int=512, + n_hidden_layers_critic:int=4, + torch_compile: bool = False, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._use_torch_compile=torch_compile + + self._layer_width_actor=layer_width_actor + self._layer_width_critic=layer_width_critic + self._n_hidden_layers_actor=n_hidden_layers_actor + self._n_hidden_layers_critic=n_hidden_layers_critic + + self._obs_dim=obs_dim + self._actions_dim=actions_dim + self._actions_ub=actions_ub + self._actions_lb=actions_lb + + self._add_weight_norm=add_weight_norm + self._add_layer_norm=add_layer_norm + self._add_batch_norm=add_batch_norm + + self._is_eval=is_eval + self._load_qf=load_qf + + self._epsilon=epsilon + + if compression_ratio > 0.0: + self._layer_width_actor=int(compression_ratio*obs_dim) + self._layer_width_critic=int(compression_ratio*(obs_dim+actions_dim)) + + self._normalize_obs = norm_obs + self._rescale_obs=rescale_obs + if self._rescale_obs and self._normalize_obs: + Journal.log(self.__class__.__name__, + "__init__", + f"Both running normalization and obs rescaling is enabled. Was this intentional?", + LogType.WARN, + throw_when_excep = True) + + self._use_action_rescale_for_critic=use_action_rescale_for_critic + + self._rescaling_epsi=1e-9 + + self._debug = debug + + self._torch_device = device + self._torch_dtype = dtype + + # obs scale and bias + if obs_ub is None: + obs_ub = [1] * obs_dim + if obs_lb is None: + obs_lb = [-1] * obs_dim + if (len(obs_ub) != obs_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Observations ub list length should be equal to {obs_dim}, but got {len(obs_ub)}", + LogType.EXCEP, + throw_when_excep = True) + if (len(obs_lb) != obs_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Observations lb list length should be equal to {obs_dim}, but got {len(obs_lb)}", + LogType.EXCEP, + throw_when_excep = True) + + self._obs_ub = torch.tensor(obs_ub, dtype=self._torch_dtype, + device=self._torch_device) + self._obs_lb = torch.tensor(obs_lb, dtype=self._torch_dtype, + device=self._torch_device) + obs_scale = torch.full((obs_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + obs_scale[:] = (self._obs_ub-self._obs_lb)/2.0 + self.register_buffer( + "obs_scale", obs_scale + ) + obs_bias = torch.full((obs_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + obs_bias[:] = (self._obs_ub+self._obs_lb)/2.0 + self.register_buffer( + "obs_bias", obs_bias) + + self._build_nets() + + self._init_obs_norm() + + msg=f"Created SAC agent with actor [{self._layer_width_actor}, {self._n_hidden_layers_actor}]\ + and critic [{self._layer_width_critic}, {self._n_hidden_layers_critic}] sizes.\ + \n Runningobs normalizer: {type(self.obs_running_norm)} \ + \n Batch normalization: {self._add_batch_norm} \ + \n Layer normalization: {self._add_layer_norm} \ + \n Weight normalization: {self._add_weight_norm} \ + \n Critic input actions are descaled: {self._use_action_rescale_for_critic}" + Journal.log(self.__class__.__name__, + "__init__", + msg, + LogType.INFO) + + def _init_obs_norm(self): + + self.obs_running_norm=None + if self._normalize_obs: + self.obs_running_norm = RunningNormalizer((self._obs_dim,), + epsilon=self._epsilon, + device=self._torch_device, dtype=self._torch_dtype, + freeze_stats=True, # always start with freezed stats + debug=self._debug) + self.obs_running_norm.type(self._torch_dtype) # ensuring correct dtype for whole module + + def _build_nets(self): + + if self._add_weight_norm: + Journal.log(self.__class__.__name__, + "__init__", + f"Will use weight normalization reparametrization\n", + LogType.INFO) + + self.actor=None + self.qf1=None + self.qf2=None + self.qf1_target=None + self.qf2_target=None + + self.actor = Actor(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + actions_ub=self._actions_ub, + actions_lb=self._actions_lb, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_actor, + n_hidden_layers=self._n_hidden_layers_actor, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm, + ) + + if (not self._is_eval) or self._load_qf: # just needed for training or during eval + # for debug, if enabled + self.qf1 = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + self.qf1_target = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + + self.qf2 = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + self.qf2_target = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + + self.qf1_target.load_state_dict(self.qf1.state_dict()) + self.qf2_target.load_state_dict(self.qf2.state_dict()) + + if self._use_torch_compile: + self.obs_running_norm=torch.compile(self.obs_running_norm) + self.actor = torch.compile(self.actor) + if (not self._is_eval) or self._load_qf: + self.qf1 = torch.compile(self.qf1) + self.qf2 = torch.compile(self.qf2) + self.qf1_target = torch.compile(self.qf1_target) + self.qf2_target = torch.compile(self.qf2_target) + + def reset(self, reset_stats: bool = False): + # we should just reinitialize the parameters, but for easiness + # we recreate the networks + + # force deallocation of objects + import gc + del self.actor + del self.qf1 + del self.qf2 + del self.qf1_target + del self.qf2_target + gc.collect() + + self._build_nets() + + if reset_stats: # we also reinitialize obs norm + self._init_obs_norm() + + # self.obs_running_norm.reset() + + def layer_width_actor(self): + return self._layer_width_actor + + def n_hidden_layers_actor(self): + return self._n_hidden_layers_actor + + def layer_width_critic(self): + return self._layer_width_critic + + def n_hidden_layers_critic(self): + return self._n_hidden_layers_critic + + def get_impl_path(self): + import os + return os.path.abspath(__file__) + + def update_obs_bnorm(self, x): + self.obs_running_norm.unfreeze() + self.obs_running_norm.manual_stat_update(x) + self.obs_running_norm.freeze() + + def _obs_scaling_layer(self, x): + x=(x-self.obs_bias) + x=x/(self.obs_scale+self._rescaling_epsi) + return x + + def _preprocess_obs(self, x): + if self._rescale_obs: + x = self._obs_scaling_layer(x) + if self.obs_running_norm is not None: + x = self.obs_running_norm(x) + return x + + def _preprocess_actions(self, a): + if self._use_action_rescale_for_critic: + a=self.actor.remove_scaling(a=a) # rescale to be in range [-1, 1] + return a + + def get_action(self, x): + x = self._preprocess_obs(x) + return self.actor.get_action(x) + + def get_qf1_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf1(x, a) + + def get_qf2_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf2(x, a) + + def get_qf1t_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf1_target(x, a) + + def get_qf2t_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf2_target(x, a) + + def load_state_dict(self, param_dict): + + missing, unexpected = super().load_state_dict(param_dict, + strict=False) + if not len(missing)==0: + Journal.log(self.__class__.__name__, + "load_state_dict", + f"These parameters are missing from the provided state dictionary: {str(missing)}\n", + LogType.EXCEP, + throw_when_excep = True) + if not len(unexpected)==0: + Journal.log(self.__class__.__name__, + "load_state_dict", + f"These parameters present in the provided state dictionary are not needed: {str(unexpected)}\n", + LogType.WARN) + + # sanity check on running normalizer + import re + running_norm_pattern = r"running_norm" + error=f"Found some keys in model state dictionary associated with a running normalizer. Are you running the agent with norm_obs=True?\n" + if any(re.match(running_norm_pattern, key) for key in unexpected): + Journal.log(self.__class__.__name__, + "load_state_dict", + error, + LogType.EXCEP, + throw_when_excep=True) + +class CriticQ(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + device: str = "cuda", + dtype = torch.float32, + layer_width: int = 512, + n_hidden_layers: int = 4, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._lrelu_slope=0.01 + + self._torch_device = device + self._torch_dtype = dtype + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + self._q_net_dim = self._obs_dim + self._actions_dim + + self._first_hidden_layer_width=self._q_net_dim # fist layer fully connected and of same dim + # as input + + init_type="kaiming_uniform" # maintains the variance of activations throughout the network + nonlinearity="leaky_relu" # suited for kaiming + + # Input layer + layers=llayer_init( + layer=nn.Linear(self._q_net_dim, self._first_hidden_layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Hidden layers + layers.extend( + llayer_init( + layer=nn.Linear(self._first_hidden_layer_width, layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + for _ in range(n_hidden_layers - 1): + layers.extend( + llayer_init( + layer=nn.Linear(layer_width, layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Output layer + layers.extend( + llayer_init( + layer=nn.Linear(layer_width, 1), + init_type="uniform", + uniform_biases=False, # contact biases + bias_const=-0.1, # negative to prevent overestimation + scale_weight=1e-2, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False + ) + ) + + # Creating the full sequential network + self._q_net = nn.Sequential(*layers) + self._q_net.to(self._torch_device).type(self._torch_dtype) + + print("Critic architecture") + print(self._q_net) + + def get_n_params(self): + return sum(p.numel() for p in self.parameters()) + + def forward(self, x, a): + x = torch.cat([x, a], dim=1) + return self._q_net(x) + +class Actor(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + actions_ub: List[float] = None, + actions_lb: List[float] = None, + device: str = "cuda", + dtype = torch.float32, + layer_width: int = 256, + n_hidden_layers: int = 2, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._lrelu_slope=0.01 + + self._torch_device = device + self._torch_dtype = dtype + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + + self._first_hidden_layer_width=self._obs_dim # fist layer fully connected and of same dim + + # Action scale and bias + if actions_ub is None: + actions_ub = [1] * actions_dim + if actions_lb is None: + actions_lb = [-1] * actions_dim + if (len(actions_ub) != actions_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Actions ub list length should be equal to {actions_dim}, but got {len(actions_ub)}", + LogType.EXCEP, + throw_when_excep = True) + if (len(actions_lb) != actions_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Actions lb list length should be equal to {actions_dim}, but got {len(actions_lb)}", + LogType.EXCEP, + throw_when_excep = True) + + self._actions_ub = torch.tensor(actions_ub, dtype=self._torch_dtype, + device=self._torch_device) + self._actions_lb = torch.tensor(actions_lb, dtype=self._torch_dtype, + device=self._torch_device) + action_scale = torch.full((actions_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + action_scale[:] = (self._actions_ub-self._actions_lb)/2.0 + self.register_buffer( + "action_scale", action_scale + ) + actions_bias = torch.full((actions_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + actions_bias[:] = (self._actions_ub+self._actions_lb)/2.0 + self.register_buffer( + "action_bias", actions_bias) + + # Network configuration + self.LOG_STD_MAX = 2 + self.LOG_STD_MIN = -5 + + # Input layer followed by hidden layers + layers=llayer_init(nn.Linear(self._obs_dim, self._first_hidden_layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Hidden layers + layers.extend( + llayer_init(nn.Linear(self._first_hidden_layer_width, layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + for _ in range(n_hidden_layers - 1): + layers.extend( + llayer_init(nn.Linear(layer_width, layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Sequential layers for the feature extractor + self._fc12 = nn.Sequential(*layers) + + # Mean and log_std layers + out_fc_mean=llayer_init(nn.Linear(layer_width, self._actions_dim), + init_type="uniform", + uniform_biases=False, # constant bias init + bias_const=0.0, + scale_weight=1e-3, # scaling (output layer) + scale_bias=1.0, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False + ) + self.fc_mean = nn.Sequential(*out_fc_mean) + out_fc_logstd= llayer_init(nn.Linear(layer_width, self._actions_dim), + init_type="uniform", + uniform_biases=False, + bias_const=math.log(0.5), + scale_weight=1e-3, # scaling (output layer) + scale_bias=1.0, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False, + ) + self.fc_logstd = nn.Sequential(*out_fc_logstd) + + # Move all components to the specified device and dtype + self._fc12.to(device=self._torch_device, dtype=self._torch_dtype) + self.fc_mean.to(device=self._torch_device, dtype=self._torch_dtype) + self.fc_logstd.to(device=self._torch_device, dtype=self._torch_dtype) + + print("Actor architecture") + print(self._fc12) + print(self.fc_mean) + print(self.fc_logstd) + + def get_n_params(self): + return sum(p.numel() for p in self.parameters()) + + def forward(self, x): + x = self._fc12(x) + mean = self.fc_mean(x) + log_std = self.fc_logstd(x) + log_std = torch.tanh(log_std) + log_std = self.LOG_STD_MIN + 0.5 * (self.LOG_STD_MAX - self.LOG_STD_MIN) * (log_std + 1) # From SpinUp / Denis Yarats + return mean, log_std + + def get_action(self, x): + mean, log_std = self(x) + std = log_std.exp() + normal = torch.distributions.Normal(mean, std) + x_t = normal.rsample() # Reparameterization trick (for SAC we neex action + # to be differentible since we use Q nets. Using sample() would break the + # comp. graph and not allow gradients to flow) + y_t = torch.tanh(x_t) + action = y_t * self.action_scale + self.action_bias + log_prob_vec = normal.log_prob(x_t) # per-dimension log prob before tanh + log_prob_vec = log_prob_vec - torch.log(self.action_scale * (1 - y_t.pow(2)) + 1e-6) # tanh Jacobian + scaling + log_prob_sum = log_prob_vec.sum(1, keepdim=True) + mean = torch.tanh(mean) * self.action_scale + self.action_bias + return action, (log_prob_sum, log_prob_vec), mean + + def remove_scaling(self, a): + return (a - self.action_bias)/self.action_scale + +if __name__ == "__main__": + device = "cpu" # or "cpu" + import time + obs_dim = 273 + agent = SACAgent( + obs_dim=obs_dim, + actions_dim=10, + actions_lb=None, + actions_ub=None, + obs_lb=None, + obs_ub=None, + rescale_obs=False, + norm_obs=True, + use_action_rescale_for_critic=True, + is_eval=True, + compression_ratio=0.6, + layer_width_actor=128, + layer_width_critic=256, + n_hidden_layers_actor=3, + n_hidden_layers_critic=3, + device=device, + dtype=torch.float32, + add_weight_norm=True, + add_layer_norm=False, + add_batch_norm=False + ) + + n_samples = 10000 + random_obs = torch.rand((1, obs_dim), dtype=torch.float32, device=device) + + if device == "cuda": + torch.cuda.synchronize() + start = time.time() + + for i in range(n_samples): + actions, _, mean = agent.get_action(x=random_obs) + actions = actions.detach() + actions[:, :] = mean.detach() + + if device == "cuda": + torch.cuda.synchronize() + end = time.time() + + avrg_eval_time = (end - start) / n_samples + print(f"Average policy evaluation time on {device}: {avrg_eval_time:.6f} s") diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/sactor_critic_algo.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/sactor_critic_algo.py new file mode 100644 index 0000000000000000000000000000000000000000..e7106d55df943adc2679fde3692336130f187398 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/sactor_critic_algo.py @@ -0,0 +1,2391 @@ +from aug_mpc.agents.sactor_critic.sac import SACAgent +from aug_mpc.agents.dummies.dummy import DummyAgent + +from aug_mpc.utils.shared_data.algo_infos import SharedRLAlgorithmInfo, QfVal, QfTrgt +from aug_mpc.utils.shared_data.training_env import SubReturns, TotReturns +from aug_mpc.utils.nn.rnd import RNDFull + +import torch +import torch.optim as optim +import torch.nn as nn + +import random +import math +from typing import Dict + +import os +import shutil + +import time + +import wandb +import h5py +import numpy as np + +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import VLevel + +from abc import ABC, abstractmethod + +class SActorCriticAlgoBase(ABC): + + # base class for actor-critic RL algorithms + + def __init__(self, + env, + debug = False, + remote_db = False, + seed: int = 1): + + self._env = env + self._seed = seed + + self._eval = False + self._det_eval = True + + self._full_env_db=False + + self._agent = None + + self._debug = debug + self._remote_db = remote_db + + self._writer = None + + self._run_name = None + self._drop_dir = None + self._dbinfo_drop_fname = None + self._model_path = None + + self._policy_update_db_data_dict = {} + self._custom_env_data_db_dict = {} + self._rnd_db_data_dict = {} + self._hyperparameters = {} + self._wandb_d={} + + # get params from env + self._get_params_from_env() + + self._torch_device = torch.device("cpu") # defaults to cpu + + self._setup_done = False + + self._verbose = False + + self._is_done = False + + self._shared_algo_data = None + + self._this_child_path = None + self._this_basepath = os.path.abspath(__file__) + + def __del__(self): + + self.done() + + def _get_params_from_env(self): + + self._env_name = self._env.name() + self._episodic_reward_metrics = self._env.ep_rewards_metrics() + self._use_gpu = self._env.using_gpu() + self._dtype = self._env.dtype() + self._env_opts=self._env.env_opts() + self._num_envs = self._env.n_envs() + self._obs_dim = self._env.obs_dim() + self._actions_dim = self._env.actions_dim() + self._episode_timeout_lb, self._episode_timeout_ub = self._env.episode_timeout_bounds() + self._task_rand_timeout_lb, self._task_rand_timeout_ub = self._env.task_rand_timeout_bounds() + self._env_n_action_reps = self._env.n_action_reps() + self._is_continuous_actions_bool=self._env.is_action_continuous() + self._is_continuous_actions=torch.where(self._is_continuous_actions_bool)[0] + self._is_discrete_actions_bool=self._env.is_action_discrete() + self._is_discrete_actions=torch.where(self._is_discrete_actions_bool)[0] + + # default to all debug envs + self._db_env_selector=torch.tensor(list(range(0, self._num_envs)), dtype=torch.int) + self._db_env_selector_bool=torch.full((self._num_envs, ), + dtype=torch.bool, device="cpu", + fill_value=True) + # default to no expl envs + self._expl_env_selector=None + self._expl_env_selector_bool=torch.full((self._num_envs, ), dtype=torch.bool, device="cpu", + fill_value=False) + self._pert_counter=0.0 + # demo envs + self._demo_stop_thresh=None # performance metrics above which demo envs are deactivated + # (can be overridden thorugh the provided options) + self._demo_env_selector=self._env.demo_env_idxs() + self._demo_env_selector_bool=self._env.demo_env_idxs(get_bool=True) + + def learn(self): + + if not self._setup_done: + self._should_have_called_setup() + + self._start_time = time.perf_counter() + + # experience collection + with torch.no_grad(): # don't need grad computation here + for i in range(self._collection_freq): + if not self._collect_transition(): + return False + self._vec_transition_counter+=1 + + self._collection_t = time.perf_counter() + + if self._vec_transition_counter % self._bnorm_vecfreq == 0: + with torch.no_grad(): # don't need grad computation here + self._update_batch_norm(bsize=self._bnorm_bsize) + + # policy update + self._policy_update_t_start = time.perf_counter() + for i in range(self._update_freq): + self._update_policy() + self._update_counter+=1 + + self._policy_update_t = time.perf_counter() + + with torch.no_grad(): + if self._validate and (self._vec_transition_counter % self._validation_db_vecstep_freq == 0): + # validation + self._update_validation_losses() + self._validation_t = time.perf_counter() + self._post_step() + + if self._use_period_resets: + # periodic policy resets + if not self._adaptive_resets: + self._periodic_resets_on=(self._vec_transition_counter >= self._reset_vecstep_start) and \ + (self._vec_transition_counter < self._reset_vecstep_end) + + if self._periodic_resets_on and \ + (self._vec_transition_counter-self._reset_vecstep_start) % self._periodic_resets_vecfreq == 0: + self._reset_agent() + else: # trigger reset based on overfit metric + if self._overfit_idx > self._overfit_idx_thresh: + self._reset_agent() + + return not self.is_done() + + def eval(self): + + if not self._setup_done: + self._should_have_called_setup() + + self._start_time = time.perf_counter() + + if not self._collect_eval_transition(): + return False + self._vec_transition_counter+=1 + + self._collection_t = time.perf_counter() + + self._post_step() + + return not self.is_done() + + @abstractmethod + def _collect_transition(self)->bool: + pass + + @abstractmethod + def _collect_eval_transition(self)->bool: + pass + + @abstractmethod + def _update_policy(self): + pass + + @abstractmethod + def _update_validation_losses(self): + pass + + def _update_overfit_idx(self, loss, val_loss): + overfit_now=(val_loss-loss)/loss + self._overfit_idx=self._overfit_idx_alpha*overfit_now+\ + (1-self._overfit_idx_alpha)*self._overfit_idx + + def setup(self, + run_name: str, + ns: str, + custom_args: Dict = {}, + verbose: bool = False, + drop_dir_name: str = None, + eval: bool = False, + resume: bool = False, + model_path: str = None, + n_eval_timesteps: int = None, + comment: str = "", + dump_checkpoints: bool = False, + norm_obs: bool = True, + rescale_obs: bool = False): + + tot_tsteps=int(100e6) + if "tot_tsteps" in custom_args: + tot_tsteps=custom_args["tot_tsteps"] + + self._verbose = verbose + + self._ns=ns # only used for shared mem stuff + + self._dump_checkpoints = dump_checkpoints + + self._init_algo_shared_data(static_params=self._hyperparameters) # can only handle dicts with + # numeric values + + if "full_env_db" in custom_args: + self._full_env_db=custom_args["full_env_db"] + + self._eval = eval + self._resume=resume + if self._eval and self._resume: + Journal.log(self.__class__.__name__, + "setup", + f"Cannot set both eval and resume to true. Exiting.", + LogType.EXCEP, + throw_when_excep = True) + + self._load_qf=False + if self._eval: + if "load_qf" in custom_args: + self._load_qf=custom_args["load_qf"] + if self._resume: + self._load_qf=True # must load qf when resuming + self._eval=False + try: + self._det_eval=custom_args["det_eval"] + except: + pass + + self._override_agent_actions=False + if "override_agent_actions" in custom_args: + self._override_agent_actions=custom_args["override_agent_actions"] + + if self._override_agent_actions: # force evaluation mode + Journal.log(self.__class__.__name__, + "setup", + "will force evaluation mode since override_agent_actions was set to true", + LogType.INFO, + throw_when_excep = True) + self._eval=True + self._validate=False + self._load_qf=False + self._det_eval=False + self._resume=False + + self._run_name = run_name + from datetime import datetime + self._time_id = datetime.now().strftime('d%Y_%m_%d_h%H_m%M_s%S') + self._unique_id = self._time_id + "-" + self._run_name + + self._hyperparameters["unique_run_id"]=self._unique_id + self._hyperparameters.update(custom_args) + + self._torch_device = torch.device("cuda" if torch.cuda.is_available() and self._use_gpu else "cpu") + + try: + layer_width_actor=self._hyperparameters["actor_lwidth"] + layer_width_critic=self._hyperparameters["critic_lwidth"] + n_hidden_layers_actor=self._hyperparameters["actor_n_hlayers"] + n_hidden_layers_critic=self._hyperparameters["critic_n_hlayers"] + except: + layer_width_actor=256 + layer_width_critic=512 + n_hidden_layers_actor=2 + n_hidden_layers_critic=4 + pass + + use_torch_compile=False + add_weight_norm=False + add_layer_norm=False + add_batch_norm=False + compression_ratio=-1.0 + if "use_torch_compile" in self._hyperparameters and \ + self._hyperparameters["use_torch_compile"]: + use_torch_compile=True + if "add_weight_norm" in self._hyperparameters and \ + self._hyperparameters["add_weight_norm"]: + add_weight_norm=True + if "add_layer_norm" in self._hyperparameters and \ + self._hyperparameters["add_layer_norm"]: + add_layer_norm=True + if "add_batch_norm" in self._hyperparameters and \ + self._hyperparameters["add_batch_norm"]: + add_batch_norm=True + if "compression_ratio" in self._hyperparameters: + compression_ratio=self._hyperparameters["compression_ratio"] + + act_rescale_critic=False + if "act_rescale_critic" in self._hyperparameters: + act_rescale_critic=self._hyperparameters["act_rescale_critic"] + if not self._override_agent_actions: + self._agent = SACAgent(obs_dim=self._env.obs_dim(), + obs_ub=self._env.get_obs_ub().flatten().tolist(), + obs_lb=self._env.get_obs_lb().flatten().tolist(), + actions_dim=self._env.actions_dim(), + actions_ub=None, # agent will assume actions are properly normalized in [-1, 1] by the env + actions_lb=None, + rescale_obs=rescale_obs, + norm_obs=norm_obs, + use_action_rescale_for_critic=act_rescale_critic, + compression_ratio=compression_ratio, + device=self._torch_device, + dtype=self._dtype, + is_eval=self._eval, + load_qf=self._load_qf, + debug=self._debug, + layer_width_actor=layer_width_actor, + layer_width_critic=layer_width_critic, + n_hidden_layers_actor=n_hidden_layers_actor, + n_hidden_layers_critic=n_hidden_layers_critic, + torch_compile=use_torch_compile, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm) + else: # we use a fake agent + self._agent = DummyAgent(obs_dim=self._env.obs_dim(), + actions_dim=self._env.actions_dim(), + actions_ub=None, + actions_lb=None, + device=self._torch_device, + dtype=self._dtype, + debug=self._debug) + + # loging actual widths and layers in case they were override inside agent init + self._hyperparameters["actor_lwidth_actual"]=self._agent.layer_width_actor() + self._hyperparameters["actor_n_hlayers_actual"]=self._agent.n_hidden_layers_actor() + self._hyperparameters["critic_lwidth_actual"]=self._agent.layer_width_critic() + self._hyperparameters["critic_n_hlayers_actual"]=self._agent.n_hidden_layers_critic() + + # load model if necessary + if self._eval and (not self._override_agent_actions): # load pretrained model + if model_path is None: + msg = f"No model path provided in eval mode! Was this intentional? \ + No jnt remapping will be available and a randomly init agent will be used." + Journal.log(self.__class__.__name__, + "setup", + msg, + LogType.WARN, + throw_when_excep = True) + if n_eval_timesteps is None: + Journal.log(self.__class__.__name__, + "setup", + f"When eval is True, n_eval_timesteps should be provided!!", + LogType.EXCEP, + throw_when_excep = True) + # everything is ok + self._model_path = model_path + if self._model_path is not None: + self._load_model(self._model_path) + + # overwrite init params + self._init_params(tot_tsteps=n_eval_timesteps, + custom_args=custom_args) + else: + if self._resume: + if model_path is None: + msg = f"No model path provided in resume mode! Please provide a valid checkpoint path." + Journal.log(self.__class__.__name__, + "setup", + msg, + LogType.EXCEP, + throw_when_excep = True) + self._model_path = model_path + if self._model_path is not None: + self._load_model(self._model_path) # load model from checkpoint (including q functions and running normalizers) + self._init_params(tot_tsteps=tot_tsteps, + custom_args=custom_args) + + # adding additional db info + self._hyperparameters["obs_names"]=self._env.obs_names() + self._hyperparameters["action_names"]=self._env.action_names() + self._hyperparameters["sub_reward_names"]=self._env.sub_rew_names() + self._hyperparameters["sub_trunc_names"]=self._env.sub_trunc_names() + self._hyperparameters["sub_term_names"]=self._env.sub_term_names() + + self._allow_expl_during_eval=False + if "allow_expl_during_eval" in self._hyperparameters: + self._allow_expl_during_eval=self._hyperparameters["allow_expl_during_eval"] + + # reset environment + self._env.reset() + if self._eval: + self._env.switch_random_reset(on=False) + + if self._debug and (not self._override_agent_actions): + with torch.no_grad(): + init_obs = self._env.get_obs(clone=True) + _, init_log_pi, _ = self._agent.get_action(init_obs) + init_policy_entropy = (-init_log_pi[0]).mean().item() + init_policy_entropy_per_action = init_policy_entropy / float(self._actions_dim) + Journal.log(self.__class__.__name__, + "setup", + f"Initial policy entropy per action: {init_policy_entropy_per_action:.4f})", + LogType.INFO, + throw_when_excep = True) + + # create dump directory + copy important files for debug + self._init_drop_dir(drop_dir_name) + self._hyperparameters["drop_dir"]=self._drop_dir + + # add env options to hyperparameters + self._hyperparameters.update(self._env_opts) + + if not self._eval: + self._init_agent_optimizers() + + self._init_replay_buffers() # only needed when training + if self._validate: + self._init_validation_buffers() + + if self._autotune: + self._init_alpha_autotuning() + + if self._use_rnd: + self._rnd_net = RNDFull(input_dim=self._rnd_indim, output_dim=self._rnd_outdim, + layer_width=self._rnd_lwidth, n_hidden_layers=self._rnd_hlayers, + device=self._torch_device, + dtype=self._dtype, + normalize=norm_obs # normalize if also used for SAC agent + ) + self._rnd_optimizer = torch.optim.Adam(self._rnd_net.rnd_predictor_net.parameters(), + lr=self._rnd_lr) + + self._rnd_input = torch.full(size=(self._batch_size, self._rnd_net.input_dim()), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rnd_bnorm_input = torch.full(size=(self._bnorm_bsize, self._rnd_net.input_dim()), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + self._proc_exp_bonus_all = torch.full(size=(self._batch_size, 1), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._raw_exp_bonus_all = torch.full(size=(self._batch_size, 1), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + # if self._autotune_rnd_scale: + # self._reward_normalizer=RunningNormalizer((1,), epsilon=1e-8, + # device=self._torch_device, dtype=self._dtype, + # freeze_stats=False, + # debug=self._debug) + + self._init_dbdata() + + if (self._debug): + if self._remote_db: + job_type = "evaluation" if self._eval else "training" + project="IBRIDO-ablations" + wandb.init( + project=project, + group=self._run_name, + name=self._unique_id, + id=self._unique_id, + job_type=job_type, + # tags=None, + notes=comment, + resume="never", # do not allow runs with the same unique id + mode="online", # "online", "offline" or "disabled" + entity=None, + sync_tensorboard=True, + config=self._hyperparameters, + monitor_gym=True, + save_code=True, + dir=self._drop_dir + ) + wandb.watch((self._agent), log="all", log_freq=1000, log_graph=False) + + if "demo_stop_thresh" in self._hyperparameters: + self._demo_stop_thresh=self._hyperparameters["demo_stop_thresh"] + + actions = self._env.get_actions() + self._random_uniform = torch.full_like(actions, fill_value=0.0) # used for sampling random actions (preallocated + # for efficiency) + self._random_normal = torch.full_like(self._random_uniform,fill_value=0.0) + # for efficiency) + + self._actions_override=None + if self._override_agent_actions: + from aug_mpc.utils.shared_data.training_env import Actions + self._actions_override = Actions(namespace=ns+"_override", + n_envs=self._num_envs, + action_dim=actions.shape[1], + action_names=self._env.action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + self._actions_override.run() + + self._start_time_tot = time.perf_counter() + + self._start_time = time.perf_counter() + + self._replay_bf_full = False + self._validation_bf_full = False + self._bpos=0 + self._bpos_val=0 + + self._is_done = False + self._setup_done = True + + def is_done(self): + + return self._is_done + + def model_path(self): + + return self._model_path + + def _init_params(self, + tot_tsteps: int, + custom_args: Dict = {}): + + self._collection_freq=1 + self._update_freq=4 + + self._replay_buffer_size_vec=10*self._task_rand_timeout_ub # cover at least a number of eps + self._replay_buffer_size = self._replay_buffer_size_vec*self._num_envs + if self._replay_buffer_size_vec < 0: # in case env did not properly define _task_rand_timeout_ub + self._replay_buffer_size = int(1e6) + self._replay_buffer_size_vec = self._replay_buffer_size//self._num_envs + self._replay_buffer_size=self._replay_buffer_size_vec*self._num_envs + + self._batch_size = 16394 + + new_transitions_per_batch=self._collection_freq*self._num_envs/self._replay_buffer_size # assumes uniform sampling + self._utd_ratio=self._update_freq/(new_transitions_per_batch*self._batch_size) + + self._lr_policy = 1e-3 + self._lr_q = 5e-4 + + self._discount_factor = 0.99 + if "discount_factor" in custom_args: + self._discount_factor=custom_args["discount_factor"] + + self._smoothing_coeff = 0.01 + + self._policy_freq = 2 + self._trgt_net_freq = 1 + self._rnd_freq = 1 + + # exploration + + # entropy regularization (separate "discrete" and "continuous" actions) + self._entropy_metric_high = 0.5 + self._entropy_metric_low = 0.0 + + # self._entropy_disc_start = -0.05 + # self._entropy_disc_end = -0.5 + + # self._entropy_cont_start = -0.05 + # self._entropy_cont_end = -2.0 + + self._entropy_disc_start = -0.2 + self._entropy_disc_end = -0.2 + + self._entropy_cont_start = -0.8 + self._entropy_cont_end = -0.8 + + # enable/disable entropy annealing (default: enabled) + self._anneal_entropy = False + + self._trgt_avrg_entropy_per_action_disc = self._entropy_disc_start + self._trgt_avrg_entropy_per_action_cont = self._entropy_cont_start + + self._disc_idxs = self._is_discrete_actions.clone().to(torch.long) + self._cont_idxs = self._is_continuous_actions.clone().to(torch.long) + + self._target_entropy_disc = float(self._disc_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_disc) + self._target_entropy_cont = float(self._cont_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_cont) + self._target_entropy = self._target_entropy_disc + self._target_entropy_cont + self._trgt_avrg_entropy_per_action = self._target_entropy / float(max(self._actions_dim, 1)) + self._hyperparameters["anneal_entropy"] = self._anneal_entropy + + self._autotune = True + self._alpha_disc = 0.2 # initial values + self._alpha_cont = 0.2 + self._alpha = 0.5*(self._alpha_disc + self._alpha_cont) + self._log_alpha_disc = math.log(self._alpha_disc) + self._log_alpha_cont = math.log(self._alpha_cont) + self._a_optimizer_disc = None + self._a_optimizer_cont = None + + # random expl ens + self._expl_envs_perc=0.0 # [0, 1] + if "expl_envs_perc" in custom_args: + self._expl_envs_perc=custom_args["expl_envs_perc"] + self._n_expl_envs = int(self._num_envs*self._expl_envs_perc) # n of random envs on which noisy actions will be applied + self._noise_freq_vec = 100 # substeps + self._noise_duration_vec = 5 # should be less than _noise_freq + # correct with env substepping + self._noise_freq_vec=self._noise_freq_vec//self._env_n_action_reps + self._noise_duration_vec=self._noise_duration_vec//self._env_n_action_reps + + self._continuous_act_expl_noise_std=0.3 # wrt actions scale + self._discrete_act_expl_noise_std=1.2 # setting it a bit > 1 helps in ensuring discr. actions range is explored + + # rnd + self._use_rnd=True + self._rnd_net=None + self._rnd_optimizer = None + self._rnd_lr = 1e-3 + if "use_rnd" in custom_args and (not self._eval): + self._use_rnd=custom_args["use_rnd"] + self._rnd_weight=1.0 + self._alpha_rnd=0.0 + self._novelty_scaler=None + if self._use_rnd: + from adarl.utils.NoveltyScaler import NoveltyScaler + + self._novelty_scaler=NoveltyScaler(th_device=self._torch_device, + bonus_weight=self._rnd_weight, + avg_alpha=self._alpha_rnd) + + self._rnd_lwidth=512 + self._rnd_hlayers=3 + self._rnd_outdim=16 + self._rnd_indim=self._obs_dim+self._actions_dim + + # batch normalization + self._bnorm_bsize = 4096 + self._bnorm_vecfreq_nom = 5 # wrt vec steps + # make sure _bnorm_vecfreq is a multiple of _collection_freq + self._bnorm_vecfreq = (self._bnorm_vecfreq_nom//self._collection_freq)*self._collection_freq + if self._bnorm_vecfreq == 0: + self._bnorm_vecfreq=self._collection_freq + self._reward_normalizer=None + + self._total_timesteps = int(tot_tsteps) + # self._total_timesteps = self._total_timesteps//self._env_n_action_reps # correct with n of action reps + self._total_timesteps_vec = self._total_timesteps // self._num_envs + self._total_steps = self._total_timesteps_vec//self._collection_freq + self._total_timesteps_vec = self._total_steps*self._collection_freq # correct to be a multiple of self._total_steps + self._total_timesteps = self._total_timesteps_vec*self._num_envs # actual n transitions + + # self._warmstart_timesteps = int(5e3) + warmstart_length_single_env=min(self._episode_timeout_lb, self._episode_timeout_ub, + self._task_rand_timeout_lb, self._task_rand_timeout_ub) + self._warmstart_timesteps=warmstart_length_single_env*self._num_envs + if self._warmstart_timesteps < self._batch_size: # ensure we collect sufficient experience before + # starting training + self._warmstart_timesteps=4*self._batch_size + self._warmstart_vectimesteps = self._warmstart_timesteps//self._num_envs + # ensuring multiple of collection_freq + self._warmstart_timesteps = self._num_envs*self._warmstart_vectimesteps # actual + + # period nets resets (for tackling the primacy bias) + self._use_period_resets=False + if "use_period_resets" in custom_args: + self._use_period_resets=custom_args["use_period_resets"] + self._adaptive_resets=True # trigger reset based on overfit metric + self._just_one_reset=False + self._periodic_resets_freq=int(4e6) + self._periodic_resets_start=int(1.5e6) + self._periodic_resets_end=int(0.8*self._total_timesteps) + + self._periodic_resets_vecfreq=self._periodic_resets_freq//self._num_envs + self._periodic_resets_vecfreq = (self._periodic_resets_vecfreq//self._collection_freq)*self._collection_freq + self._reset_vecstep_start=self._periodic_resets_start//self._num_envs + self._reset_vecstep_end=self._periodic_resets_end//self._num_envs + + if self._just_one_reset: + # we set the end as the fist reset + a fraction of the reset frequency (this way only one reset will happen) + self._reset_vecstep_end=int(self._reset_vecstep_start+0.8*self._periodic_resets_vecfreq) + + self._periodic_resets_on=False + + # debug + self._m_checkpoint_freq_nom = 1e6 # n totoal timesteps after which a checkpoint model is dumped + self._m_checkpoint_freq= self._m_checkpoint_freq_nom//self._num_envs + + # expl envs + if self._n_expl_envs>0 and ((self._num_envs-self._n_expl_envs)>0): # log data only from envs which are not altered (e.g. by exploration noise) + # computing expl env selector + self._expl_env_selector = torch.randperm(self._num_envs, device="cpu")[:self._n_expl_envs] + self._expl_env_selector_bool[self._expl_env_selector]=True + + # demo envs + if self._demo_env_selector_bool is None: + self._db_env_selector_bool[:]=~self._expl_env_selector_bool + else: # we log db data separately for env which are neither for demo nor for random exploration + self._demo_env_selector_bool=self._demo_env_selector_bool.cpu() + self._demo_env_selector=self._demo_env_selector.cpu() + self._db_env_selector_bool[:]=torch.logical_and(~self._expl_env_selector_bool, ~self._demo_env_selector_bool) + + self._n_expl_envs = self._expl_env_selector_bool.count_nonzero() + self._num_db_envs = self._db_env_selector_bool.count_nonzero() + + if not self._num_db_envs>0: + Journal.log(self.__class__.__name__, + "_init_params", + "No indipendent db env can be computed (check your demo and expl settings)! Will use all envs.", + LogType.EXCEP, + throw_when_excep = False) + self._num_db_envs=self._num_envs + self._db_env_selector_bool[:]=True + self._db_env_selector=torch.nonzero(self._db_env_selector_bool).flatten() + + self._transition_noise_freq=float(self._noise_duration_vec)/float(self._noise_freq_vec) + self._env_noise_freq=float(self._n_expl_envs)/float(self._num_envs) + self._noise_buff_freq=self._transition_noise_freq*self._env_noise_freq + + self._db_vecstep_frequency = 32 # log db data every n (vectorized) SUB timesteps + self._db_vecstep_frequency=round(self._db_vecstep_frequency/self._env_n_action_reps) # correcting with actions reps + # correct db vecstep frequency to ensure it's a multiple of self._collection_freq + self._db_vecstep_frequency=(self._db_vecstep_frequency//self._collection_freq)*self._collection_freq + if self._db_vecstep_frequency == 0: + self._db_vecstep_frequency=self._collection_freq + + self._env_db_checkpoints_vecfreq=150*self._db_vecstep_frequency # detailed db data from envs + + self._validate=True + self._validation_collection_vecfreq=50 # add vec transitions to val buffer with some vec freq + self._validation_ratio=1.0/self._validation_collection_vecfreq # [0, 1], 0.1 10% size of training buffer + self._validation_buffer_size_vec = int(self._replay_buffer_size*self._validation_ratio)//self._num_envs + self._validation_buffer_size = self._validation_buffer_size_vec*self._num_envs + self._validation_batch_size = int(self._batch_size*self._validation_ratio) + self._validation_db_vecstep_freq=self._db_vecstep_frequency + if self._eval: # no need for validation transitions during evaluation + self._validate=False + self._overfit_idx=0.0 + self._overfit_idx_alpha=0.03 # exponential MA + self._overfit_idx_thresh=2.0 + + self._n_policy_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq #TD3 delayed update + self._n_qf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq # qf updated at each vec timesteps + self._n_tqf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq//self._trgt_net_freq + + self._exp_to_policy_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_policy_updates_to_be_done) + self._exp_to_qf_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_qf_updates_to_be_done) + self._exp_to_qft_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_tqf_updates_to_be_done) + + self._db_data_size = round(self._total_timesteps_vec/self._db_vecstep_frequency)+self._db_vecstep_frequency + + # write them to hyperparam dictionary for debugging + self._hyperparameters["n_envs"] = self._num_envs + self._hyperparameters["obs_dim"] = self._obs_dim + self._hyperparameters["actions_dim"] = self._actions_dim + + self._hyperparameters["seed"] = self._seed + self._hyperparameters["using_gpu"] = self._use_gpu + self._hyperparameters["total_timesteps_vec"] = self._total_timesteps_vec + + self._hyperparameters["collection_freq"]=self._collection_freq + self._hyperparameters["update_freq"]=self._update_freq + self._hyperparameters["total_steps"]=self._total_steps + + self._hyperparameters["utd_ratio"] = self._utd_ratio + + self._hyperparameters["n_policy_updates_when_done"] = self._n_policy_updates_to_be_done + self._hyperparameters["n_qf_updates_when_done"] = self._n_qf_updates_to_be_done + self._hyperparameters["n_tqf_updates_when_done"] = self._n_tqf_updates_to_be_done + self._hyperparameters["experience_to_policy_grad_steps_ratio"] = self._exp_to_policy_grad_ratio + self._hyperparameters["experience_to_quality_fun_grad_steps_ratio"] = self._exp_to_qf_grad_ratio + self._hyperparameters["experience_to_trgt_quality_fun_grad_steps_ratio"] = self._exp_to_qft_grad_ratio + + self._hyperparameters["episodes timeout lb"] = self._episode_timeout_lb + self._hyperparameters["episodes timeout ub"] = self._episode_timeout_ub + self._hyperparameters["task rand timeout lb"] = self._task_rand_timeout_lb + self._hyperparameters["task rand timeout ub"] = self._task_rand_timeout_ub + + self._hyperparameters["warmstart_timesteps"] = self._warmstart_timesteps + self._hyperparameters["warmstart_vectimesteps"] = self._warmstart_vectimesteps + self._hyperparameters["replay_buffer_size"] = self._replay_buffer_size + self._hyperparameters["batch_size"] = self._batch_size + self._hyperparameters["total_timesteps"] = self._total_timesteps + self._hyperparameters["lr_policy"] = self._lr_policy + self._hyperparameters["lr_q"] = self._lr_q + self._hyperparameters["discount_factor"] = self._discount_factor + self._hyperparameters["smoothing_coeff"] = self._smoothing_coeff + self._hyperparameters["policy_freq"] = self._policy_freq + self._hyperparameters["trgt_net_freq"] = self._trgt_net_freq + self._hyperparameters["autotune"] = self._autotune + self._hyperparameters["target_entropy"] = self._target_entropy + self._hyperparameters["target_entropy_disc"] = self._target_entropy_disc + self._hyperparameters["target_entropy_cont"] = self._target_entropy_cont + self._hyperparameters["disc_entropy_idxs"] = self._disc_idxs.tolist() + self._hyperparameters["cont_entropy_idxs"] = self._cont_idxs.tolist() + self._hyperparameters["log_alpha_disc"] = None if self._log_alpha_disc is None else self._log_alpha_disc + self._hyperparameters["log_alpha_cont"] = None if self._log_alpha_cont is None else self._log_alpha_cont + self._hyperparameters["alpha"] = self._alpha + self._hyperparameters["alpha_disc"] = self._alpha_disc + self._hyperparameters["alpha_cont"] = self._alpha_cont + self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq + self._hyperparameters["db_vecstep_frequency"] = self._db_vecstep_frequency + self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq + + self._hyperparameters["use_period_resets"]= self._use_period_resets + self._hyperparameters["just_one_reset"]= self._just_one_reset + self._hyperparameters["period_resets_vecfreq"]= self._periodic_resets_vecfreq + self._hyperparameters["period_resets_vecstart"]= self._reset_vecstep_start + self._hyperparameters["period_resets_vecend"]= self._reset_vecstep_end + self._hyperparameters["period_resets_freq"]= self._periodic_resets_freq + self._hyperparameters["period_resets_start"]= self._periodic_resets_start + self._hyperparameters["period_resets_end"]= self._periodic_resets_end + + self._hyperparameters["use_rnd"] = self._use_rnd + self._hyperparameters["rnd_lwidth"] = self._rnd_lwidth + self._hyperparameters["rnd_hlayers"] = self._rnd_hlayers + self._hyperparameters["rnd_outdim"] = self._rnd_outdim + self._hyperparameters["rnd_indim"] = self._rnd_indim + + self._hyperparameters["n_db_envs"] = self._num_db_envs + self._hyperparameters["n_expl_envs"] = self._n_expl_envs + self._hyperparameters["noise_freq"] = self._noise_freq_vec + self._hyperparameters["noise_duration_vec"] = self._noise_duration_vec + self._hyperparameters["noise_buff_freq"] = self._noise_buff_freq + self._hyperparameters["continuous_act_expl_noise_std"] = self._continuous_act_expl_noise_std + self._hyperparameters["discrete_act_expl_noise_std"] = self._discrete_act_expl_noise_std + + self._hyperparameters["n_demo_envs"] = self._env.n_demo_envs() + + self._hyperparameters["bnorm_bsize"] = self._bnorm_bsize + self._hyperparameters["bnorm_vecfreq"] = self._bnorm_vecfreq + + self._hyperparameters["validate"] = self._validate + self._hyperparameters["validation_ratio"] = self._validation_ratio + self._hyperparameters["validation_buffer_size_vec"] = self._validation_buffer_size_vec + self._hyperparameters["validation_buffer_size"] = self._validation_buffer_size + self._hyperparameters["validation_batch_size"] = self._validation_batch_size + self._hyperparameters["validation_collection_vecfreq"] = self._validation_collection_vecfreq + + # small debug log + info = f"\nUsing \n" + \ + f"total (vectorized) timesteps to be simulated {self._total_timesteps_vec}\n" + \ + f"total timesteps to be simulated {self._total_timesteps}\n" + \ + f"warmstart timesteps {self._warmstart_timesteps}\n" + \ + f"training replay buffer size {self._replay_buffer_size}\n" + \ + f"training replay buffer vec size {self._replay_buffer_size_vec}\n" + \ + f"training batch size {self._batch_size}\n" + \ + f"validation enabled {self._validate}\n" + \ + f"validation buffer size {self._validation_buffer_size}\n" + \ + f"validation buffer vec size {self._validation_buffer_size_vec}\n" + \ + f"validation collection freq {self._validation_collection_vecfreq}\n" + \ + f"validation update freq {self._validation_db_vecstep_freq}\n" + \ + f"validation batch size {self._validation_batch_size}\n" + \ + f"policy update freq {self._policy_freq}\n" + \ + f"target networks freq {self._trgt_net_freq}\n" + \ + f"episode timeout max steps {self._episode_timeout_ub}\n" + \ + f"episode timeout min steps {self._episode_timeout_lb}\n" + \ + f"task rand. max n steps {self._task_rand_timeout_ub}\n" + \ + f"task rand. min n steps {self._task_rand_timeout_lb}\n" + \ + f"number of action reps {self._env_n_action_reps}\n" + \ + f"total policy updates to be performed: {self._n_policy_updates_to_be_done}\n" + \ + f"total q fun updates to be performed: {self._n_qf_updates_to_be_done}\n" + \ + f"total trgt q fun updates to be performed: {self._n_tqf_updates_to_be_done}\n" + \ + f"experience to policy grad ratio: {self._exp_to_policy_grad_ratio}\n" + \ + f"experience to q fun grad ratio: {self._exp_to_qf_grad_ratio}\n" + \ + f"experience to trgt q fun grad ratio: {self._exp_to_qft_grad_ratio}\n" + \ + f"amount of noisy transitions in replay buffer: {self._noise_buff_freq*100}% \n" + db_env_idxs=", ".join(map(str, self._db_env_selector.tolist())) + n_db_envs_str=f"db envs {self._num_db_envs}/{self._num_envs} \n" + info=info + n_db_envs_str + "Debug env. indexes are [" + db_env_idxs+"]\n" + if self._env.demo_env_idxs() is not None: + demo_idxs_str=", ".join(map(str, self._env.demo_env_idxs().tolist())) + n_demo_envs_str=f"demo envs {self._env.n_demo_envs()}/{self._num_envs} \n" + info=info + n_demo_envs_str + "Demo env. indexes are [" + demo_idxs_str+"]\n" + if self._expl_env_selector is not None: + random_expl_idxs=", ".join(map(str, self._expl_env_selector.tolist())) + n_expl_envs_str=f"expl envs {self._n_expl_envs}/{self._num_envs} \n" + info=info + n_expl_envs_str + "Random exploration env. indexes are [" + random_expl_idxs+"]\n" + + Journal.log(self.__class__.__name__, + "_init_params", + info, + LogType.INFO, + throw_when_excep = True) + + # init counters + self._step_counter = 0 + self._vec_transition_counter = 0 + self._update_counter = 0 + self._log_it_counter = 0 + + def _init_dbdata(self): + + # initalize some debug data + self._collection_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._collection_t = -1.0 + + self._env_step_fps = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._env_step_rt_factor = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._batch_norm_update_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._policy_update_t_start = -1.0 + self._policy_update_t = -1.0 + self._policy_update_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._policy_update_fps = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._validation_t = -1.0 + self._validation_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._n_of_played_episodes = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_timesteps_done = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_policy_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_qfun_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_tqfun_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._elapsed_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._ep_tsteps_env_distribution = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + self._reward_names = self._episodic_reward_metrics.reward_names() + self._reward_names_str = "[" + ', '.join(self._reward_names) + "]" + self._n_rewards = self._episodic_reward_metrics.n_rewards() + + # db environments + self._tot_rew_max = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_min = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_max_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_min_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_avrg_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_std_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._sub_rew_max = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # custom data from env # (log data just from db envs for simplicity) + self._custom_env_data = {} + db_data_names = list(self._env.custom_db_data.keys()) + for dbdatan in db_data_names: # loop thorugh custom data + + self._custom_env_data[dbdatan] = {} + + max = self._env.custom_db_data[dbdatan].get_max(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + avrg = self._env.custom_db_data[dbdatan].get_avrg(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + min = self._env.custom_db_data[dbdatan].get_min(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + max_over_envs = self._env.custom_db_data[dbdatan].get_max_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + min_over_envs = self._env.custom_db_data[dbdatan].get_min_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + avrg_over_envs = self._env.custom_db_data[dbdatan].get_avrg_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + std_over_envs = self._env.custom_db_data[dbdatan].get_std_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + + self._custom_env_data[dbdatan]["max"] =torch.full((self._db_data_size, + max.shape[0], + max.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["avrg"] =torch.full((self._db_data_size, + avrg.shape[0], + avrg.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["min"] =torch.full((self._db_data_size, + min.shape[0], + min.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["max_over_envs"] =torch.full((self._db_data_size, + max_over_envs.shape[0], + max_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["min_over_envs"] =torch.full((self._db_data_size, + min_over_envs.shape[0], + min_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["avrg_over_envs"] =torch.full((self._db_data_size, + avrg_over_envs.shape[0], + avrg_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["std_over_envs"] =torch.full((self._db_data_size, + std_over_envs.shape[0], + std_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # exploration envs + if self._n_expl_envs > 0: + self._ep_tsteps_expl_env_distribution = torch.full((self._db_data_size, self._n_expl_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + # also log sub rewards metrics for exploration envs + self._sub_rew_max_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # demo environments + self._demo_envs_active = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._demo_perf_metric = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + if self._env.demo_env_idxs() is not None: + n_demo_envs=self._env.demo_env_idxs().shape[0] + + self._ep_tsteps_demo_env_distribution = torch.full((self._db_data_size, n_demo_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + # also log sub rewards metrics for exploration envs + self._sub_rew_max_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # algorithm-specific db info + self._qf1_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._min_qft_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._min_qft_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_max = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_max = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._qf1_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._actor_loss= torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_disc = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_cont = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + if self._validate: # add db data for validation losses + self._overfit_index = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._actor_loss_validation= torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_disc_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_cont_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._alphas = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alphas_disc = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alphas_cont = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._policy_entropy_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._running_mean_obs=None + self._running_std_obs=None + if self._agent.obs_running_norm is not None and not self._eval: + # some db data for the agent + self._running_mean_obs = torch.full((self._db_data_size, self._env.obs_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._running_std_obs = torch.full((self._db_data_size, self._env.obs_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + + # RND + self._rnd_loss=None + if self._use_rnd: + self._rnd_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._expl_bonus_raw_avrg = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._expl_bonus_raw_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_raw_min = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_raw_max = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._expl_bonus_proc_avrg = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._expl_bonus_proc_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_proc_min = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_proc_max = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._n_rnd_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._running_mean_rnd_input = None + self._running_std_rnd_input = None + if self._rnd_net.obs_running_norm is not None: + self._running_mean_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._running_std_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + + def _init_agent_optimizers(self): + self._qf_optimizer = optim.Adam(list(self._agent.qf1.parameters()) + list(self._agent.qf2.parameters()), + lr=self._lr_q) + self._actor_optimizer = optim.Adam(list(self._agent.actor.parameters()), + lr=self._lr_policy) + + def _init_alpha_autotuning(self): + self._log_alpha_disc = torch.full((1,), fill_value=math.log(self._alpha_disc), requires_grad=True, device=self._torch_device) + self._log_alpha_cont = torch.full((1,), fill_value=math.log(self._alpha_cont), requires_grad=True, device=self._torch_device) + self._alpha_disc = self._log_alpha_disc.exp().item() + self._alpha_cont = self._log_alpha_cont.exp().item() + self._alpha = 0.5*(self._alpha_disc + self._alpha_cont) + self._a_optimizer_disc = optim.Adam([self._log_alpha_disc], lr=self._lr_q) + self._a_optimizer_cont = optim.Adam([self._log_alpha_cont], lr=self._lr_q) + + def _init_replay_buffers(self): + + self._bpos = 0 + + self._obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._actions = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._actions_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rewards = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_terminal = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1), + fill_value=False, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + def _init_validation_buffers(self): + + self._bpos_val = 0 + + self._obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._actions_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._actions_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rewards_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_terminal_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1), + fill_value=False, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + def _save_model(self, + is_checkpoint: bool = False): + + path = self._model_path + if is_checkpoint: # use iteration as id + path = path + "_checkpoint" + str(self._log_it_counter) + info = f"Saving model to {path}" + Journal.log(self.__class__.__name__, + "_save_model", + info, + LogType.INFO, + throw_when_excep = True) + agent_state_dict=self._agent.state_dict() + if not self._eval: # training + # we log the joints which were observed during training + observed_joints=self._env.get_observed_joints() + if observed_joints is not None: + agent_state_dict["observed_jnts"]=self._env.get_observed_joints() + + torch.save(agent_state_dict, path) # saves whole agent state + # torch.save(self._agent.parameters(), path) # only save agent parameters + info = f"Done." + Journal.log(self.__class__.__name__, + "_save_model", + info, + LogType.INFO, + throw_when_excep = True) + + def _dump_env_checkpoints(self): + + path = self._env_db_checkpoints_fname+str(self._log_it_counter) + + if path is not None: + info = f"Saving env db checkpoint data to {path}" + Journal.log(self.__class__.__name__, + "_dump_env_checkpoints", + info, + LogType.INFO, + throw_when_excep = True) + + with h5py.File(path+".hdf5", 'w') as hf: + + for key, value in self._hyperparameters.items(): + if value is None: + value = "None" + hf.attrs[key] = value + + # full training envs + sub_rew_full=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._db_env_selector) + tot_rew_full=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._db_env_selector) + + if self._n_expl_envs > 0: + sub_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._expl_env_selector) + tot_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._expl_env_selector) + if self._env.n_demo_envs() > 0: + sub_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._demo_env_selector) + tot_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._demo_env_selector) + + ep_vec_freq=self._episodic_reward_metrics.ep_vec_freq() # assuming all db data was collected with the same ep_vec_freq + + hf.attrs['sub_reward_names'] = self._reward_names + hf.attrs['log_iteration'] = self._log_it_counter + hf.attrs['n_timesteps_done'] = self._n_timesteps_done[self._log_it_counter] + hf.attrs['n_policy_updates'] = self._n_policy_updates[self._log_it_counter] + hf.attrs['elapsed_min'] = self._elapsed_min[self._log_it_counter] + hf.attrs['ep_vec_freq'] = ep_vec_freq + hf.attrs['n_expl_envs'] = self._n_expl_envs + hf.attrs['n_demo_envs'] = self._env.n_demo_envs() + + # first dump custom db data names + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + episodic_data_names = self._env.custom_db_data[db_dname].data_names() + var_name = db_dname + hf.attrs[var_name+"_data_names"] = episodic_data_names + + for ep_idx in range(ep_vec_freq): # create separate datasets for each episode + ep_prefix=f'ep_{ep_idx}_' + + # rewards + hf.create_dataset(ep_prefix+'sub_rew', + data=sub_rew_full[ep_idx, :, :, :]) + hf.create_dataset(ep_prefix+'tot_rew', + data=tot_rew_full[ep_idx, :, :, :]) + if self._n_expl_envs > 0: + hf.create_dataset(ep_prefix+'sub_rew_expl', + data=sub_rew_full_expl[ep_idx, :, :, :]) + hf.create_dataset(ep_prefix+'tot_rew_expl', + data=tot_rew_full_expl[ep_idx, :, :, :]) + hf.create_dataset('expl_env_selector', data=self._expl_env_selector.cpu().numpy()) + if self._env.n_demo_envs() > 0: + hf.create_dataset(ep_prefix+'sub_rew_demo', + data=sub_rew_full_demo) + hf.create_dataset(ep_prefix+'tot_rew_demo', + data=tot_rew_full_demo[ep_idx, :, :, :]) + hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().cpu().numpy()) + + # dump all custom env data + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + episodic_data=self._env.custom_db_data[db_dname] + var_name = db_dname + hf.create_dataset(ep_prefix+var_name, + data=episodic_data.get_full_episodic_data(env_selector=self._db_env_selector)[ep_idx, :, :, :]) + if self._n_expl_envs > 0: + hf.create_dataset(ep_prefix+var_name+"_expl", + data=episodic_data.get_full_episodic_data(env_selector=self._expl_env_selector)[ep_idx, :, :, :]) + if self._env.n_demo_envs() > 0: + hf.create_dataset(ep_prefix+var_name+"_demo", + data=episodic_data.get_full_episodic_data(env_selector=self._demo_env_selector)[ep_idx, :, :, :]) + + Journal.log(self.__class__.__name__, + "_dump_env_checkpoints", + "done.", + LogType.INFO, + throw_when_excep = True) + + def done(self): + + if not self._is_done: + + if not self._eval: + self._save_model() + + self._dump_dbinfo_to_file() + + if self._full_env_db: + self._dump_env_checkpoints() + + if self._shared_algo_data is not None: + self._shared_algo_data.write(dyn_info_name=["is_done"], + val=[1.0]) + self._shared_algo_data.close() # close shared memory + + self._env.close() + + self._is_done = True + + def _dump_dbinfo_to_file(self): + + info = f"Dumping debug info at {self._dbinfo_drop_fname}" + Journal.log(self.__class__.__name__, + "_dump_dbinfo_to_file", + info, + LogType.INFO, + throw_when_excep = True) + + with h5py.File(self._dbinfo_drop_fname+".hdf5", 'w') as hf: + n_valid = int(max(0, min(self._log_it_counter, self._db_data_size))) + + def _slice_valid(arr): + if isinstance(arr, torch.Tensor): + return arr[:n_valid] + if isinstance(arr, np.ndarray): + return arr[:n_valid] + return arr + + def _ds(name, arr): + data = _slice_valid(arr) + hf.create_dataset(name, data=data.numpy() if isinstance(data, torch.Tensor) else data) + + # hf.create_dataset('numpy_data', data=numpy_data) + # Write dictionaries to HDF5 as attributes + for key, value in self._hyperparameters.items(): + if value is None: + value = "None" + hf.attrs[key] = value + + # rewards + hf.create_dataset('sub_reward_names', data=self._reward_names, + dtype='S40') + _ds('sub_rew_max', self._sub_rew_max) + _ds('sub_rew_avrg', self._sub_rew_avrg) + _ds('sub_rew_min', self._sub_rew_min) + _ds('sub_rew_max_over_envs', self._sub_rew_max_over_envs) + _ds('sub_rew_min_over_envs', self._sub_rew_min_over_envs) + _ds('sub_rew_avrg_over_envs', self._sub_rew_avrg_over_envs) + _ds('sub_rew_std_over_envs', self._sub_rew_std_over_envs) + + _ds('tot_rew_max', self._tot_rew_max) + _ds('tot_rew_avrg', self._tot_rew_avrg) + _ds('tot_rew_min', self._tot_rew_min) + _ds('tot_rew_max_over_envs', self._tot_rew_max_over_envs) + _ds('tot_rew_min_over_envs', self._tot_rew_min_over_envs) + _ds('tot_rew_avrg_over_envs', self._tot_rew_avrg_over_envs) + _ds('tot_rew_std_over_envs', self._tot_rew_std_over_envs) + + _ds('ep_tsteps_env_distr', self._ep_tsteps_env_distribution) + + if self._n_expl_envs > 0: + # expl envs + _ds('sub_rew_max_expl', self._sub_rew_max_expl) + _ds('sub_rew_avrg_expl', self._sub_rew_avrg_expl) + _ds('sub_rew_min_expl', self._sub_rew_min_expl) + _ds('sub_rew_max_over_envs_expl', self._sub_rew_max_over_envs_expl) + _ds('sub_rew_min_over_envs_expl', self._sub_rew_min_over_envs_expl) + _ds('sub_rew_avrg_over_envs_expl', self._sub_rew_avrg_over_envs_expl) + _ds('sub_rew_std_over_envs_expl', self._sub_rew_std_over_envs_expl) + + _ds('ep_timesteps_expl_env_distr', self._ep_tsteps_expl_env_distribution) + + hf.create_dataset('expl_env_selector', data=self._expl_env_selector.numpy()) + + _ds('demo_envs_active', self._demo_envs_active) + _ds('demo_perf_metric', self._demo_perf_metric) + + if self._env.n_demo_envs() > 0: + # demo envs + _ds('sub_rew_max_demo', self._sub_rew_max_demo) + _ds('sub_rew_avrg_demo', self._sub_rew_avrg_demo) + _ds('sub_rew_min_demo', self._sub_rew_min_demo) + _ds('sub_rew_max_over_envs_demo', self._sub_rew_max_over_envs_demo) + _ds('sub_rew_min_over_envs_demo', self._sub_rew_min_over_envs_demo) + _ds('sub_rew_avrg_over_envs_demo', self._sub_rew_avrg_over_envs_demo) + _ds('sub_rew_std_over_envs_demo', self._sub_rew_std_over_envs_demo) + + _ds('ep_timesteps_demo_env_distr', self._ep_tsteps_demo_env_distribution) + + hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().numpy()) + + # profiling data + _ds('env_step_fps', self._env_step_fps) + _ds('env_step_rt_factor', self._env_step_rt_factor) + _ds('collection_dt', self._collection_dt) + _ds('batch_norm_update_dt', self._batch_norm_update_dt) + _ds('policy_update_dt', self._policy_update_dt) + _ds('policy_update_fps', self._policy_update_fps) + _ds('validation_dt', self._validation_dt) + + _ds('n_of_played_episodes', self._n_of_played_episodes) + _ds('n_timesteps_done', self._n_timesteps_done) + _ds('n_policy_updates', self._n_policy_updates) + _ds('n_qfun_updates', self._n_qfun_updates) + _ds('n_tqfun_updates', self._n_tqfun_updates) + + _ds('elapsed_min', self._elapsed_min) + + # algo data + _ds('qf1_vals_mean', self._qf1_vals_mean) + _ds('qf2_vals_mean', self._qf2_vals_mean) + _ds('qf1_vals_std', self._qf1_vals_std) + _ds('qf2_vals_std', self._qf2_vals_std) + _ds('qf1_vals_max', self._qf1_vals_max) + _ds('qf1_vals_min', self._qf1_vals_min) + _ds('qf2_vals_max', self._qf2_vals_max) + _ds('qf2_vals_min', self._qf1_vals_min) + + _ds('min_qft_vals_mean', self._min_qft_vals_mean) + _ds('min_qft_vals_std', self._min_qft_vals_std) + + _ds('qf1_loss', self._qf1_loss) + _ds('qf2_loss', self._qf2_loss) + _ds('actor_loss', self._actor_loss) + _ds('alpha_loss', self._alpha_loss) + _ds('alpha_loss_disc', self._alpha_loss_disc) + _ds('alpha_loss_cont', self._alpha_loss_cont) + if self._validate: + _ds('qf1_loss_validation', self._qf1_loss_validation) + _ds('qf2_loss_validation', self._qf2_loss_validation) + _ds('actor_loss_validation', self._actor_loss_validation) + _ds('alpha_loss_validation', self._alpha_loss_validation) + _ds('alpha_loss_disc_validation', self._alpha_loss_disc_validation) + _ds('alpha_loss_cont_validation', self._alpha_loss_cont_validation) + _ds('overfit_index', self._overfit_index) + + _ds('alphas', self._alphas) + _ds('alphas_disc', self._alphas_disc) + _ds('alphas_cont', self._alphas_cont) + + _ds('policy_entropy_mean', self._policy_entropy_mean) + _ds('policy_entropy_std', self._policy_entropy_std) + _ds('policy_entropy_max', self._policy_entropy_max) + _ds('policy_entropy_min', self._policy_entropy_min) + _ds('policy_entropy_disc_mean', self._policy_entropy_disc_mean) + _ds('policy_entropy_disc_std', self._policy_entropy_disc_std) + _ds('policy_entropy_disc_max', self._policy_entropy_disc_max) + _ds('policy_entropy_disc_min', self._policy_entropy_disc_min) + _ds('policy_entropy_cont_mean', self._policy_entropy_cont_mean) + _ds('policy_entropy_cont_std', self._policy_entropy_cont_std) + _ds('policy_entropy_cont_max', self._policy_entropy_cont_max) + _ds('policy_entropy_cont_min', self._policy_entropy_cont_min) + hf.create_dataset('target_entropy', data=self._target_entropy) + hf.create_dataset('target_entropy_disc', data=self._target_entropy_disc) + hf.create_dataset('target_entropy_cont', data=self._target_entropy_cont) + + if self._use_rnd: + _ds('n_rnd_updates', self._n_rnd_updates) + _ds('expl_bonus_raw_avrg', self._expl_bonus_raw_avrg) + _ds('expl_bonus_raw_std', self._expl_bonus_raw_std) + _ds('expl_bonus_proc_avrg', self._expl_bonus_proc_avrg) + _ds('expl_bonus_proc_std', self._expl_bonus_proc_std) + + if self._rnd_net.obs_running_norm is not None: + if self._running_mean_rnd_input is not None: + _ds('running_mean_rnd_input', self._running_mean_rnd_input) + if self._running_std_rnd_input is not None: + _ds('running_std_rnd_input', self._running_std_rnd_input) + + # dump all custom env data + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + data=self._custom_env_data[db_dname] + subnames = list(data.keys()) + for subname in subnames: + var_name = db_dname + "_" + subname + _ds(var_name, data[subname]) + + # other data + if self._agent.obs_running_norm is not None: + if self._running_mean_obs is not None: + _ds('running_mean_obs', self._running_mean_obs) + if self._running_std_obs is not None: + _ds('running_std_obs', self._running_std_obs) + + info = f"done." + Journal.log(self.__class__.__name__, + "_dump_dbinfo_to_file", + info, + LogType.INFO, + throw_when_excep = True) + + def _load_model(self, + model_path: str): + + info = f"Loading model at {model_path}" + + Journal.log(self.__class__.__name__, + "_load_model", + info, + LogType.INFO, + throw_when_excep = True) + model_dict=torch.load(model_path, + map_location=self._torch_device, + weights_only=False) + + observed_joints=self._env.get_observed_joints() + if not ("observed_jnts" in model_dict): + Journal.log(self.__class__.__name__, + "_load_model", + "No observed joints key found in loaded model dictionary! Let's hope joints are ordered in the same way.", + LogType.WARN) + else: + required_joints=model_dict["observed_jnts"] + self._check_observed_joints(observed_joints,required_joints) + + self._agent.load_state_dict(model_dict) + + if self._eval: + self._switch_training_mode(False) + + def _check_observed_joints(self, + observed_joints, + required_joints): + + observed=set(observed_joints) + required=set(required_joints) + + all_required_joints_avail = required.issubset(observed) + if not all_required_joints_avail: + missing=[item for item in required if item not in observed] + missing_str=', '.join(missing) + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"not all required joints are available. Missing {missing_str}", + LogType.EXCEP, + throw_when_excep = True) + exceeding=observed-required + if not len(exceeding)==0: + # do not support having more joints than the required + exc_jnts=" ".join(list(exceeding)) + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"more than the required joints found in the observed joint: {exc_jnts}", + LogType.EXCEP, + throw_when_excep = True) + + # here we are sure that required and observed sets match + self._to_agent_jnt_remap=None + if not required_joints==observed_joints: + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"required jnt obs from agent have different ordering from observed ones. Will compute a remapping.", + LogType.WARN, + throw_when_excep = True) + self._to_agent_jnt_remap = [observed_joints.index(element) for element in required_joints] + + self._env.set_jnts_remapping(remapping= self._to_agent_jnt_remap) + + def drop_dir(self): + return self._drop_dir + + def _init_drop_dir(self, + drop_dir_name: str = None): + + # main drop directory + if drop_dir_name is None: + # drop to current directory + self._drop_dir = "./" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id + else: + self._drop_dir = drop_dir_name + "/" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id + os.makedirs(self._drop_dir) + + self._env_db_checkpoints_dropdir=None + self._env_db_checkpoints_fname=None + if self._full_env_db>0: + self._env_db_checkpoints_dropdir=self._drop_dir+"/env_db_checkpoints" + self._env_db_checkpoints_fname = self._env_db_checkpoints_dropdir + \ + "/" + self._unique_id + "_env_db_checkpoint" + os.makedirs(self._env_db_checkpoints_dropdir) + # model + if not self._eval or (self._model_path is None): + self._model_path = self._drop_dir + "/" + self._unique_id + "_model" + else: # we copy the model under evaluation to the drop dir + shutil.copy(self._model_path, self._drop_dir) + + # debug info + self._dbinfo_drop_fname = self._drop_dir + "/" + self._unique_id + "db_info" # extension added later + + # other auxiliary db files + aux_drop_dir = self._drop_dir + "/other" + os.makedirs(aux_drop_dir) + filepaths = self._env.get_file_paths() # envs implementation + filepaths.append(self._this_basepath) # algorithm implementation + filepaths.append(self._this_child_path) + filepaths.append(self._agent.get_impl_path()) # agent implementation + for file in filepaths: + shutil.copy(file, self._drop_dir) + aux_dirs = self._env.get_aux_dir() + for aux_dir in aux_dirs: + shutil.copytree(aux_dir, aux_drop_dir, dirs_exist_ok=True) + + def _get_performance_metric(self): + # to be overridden + return 0.0 + + def _post_step(self): + + self._collection_dt[self._log_it_counter] += \ + (self._collection_t-self._start_time) + self._batch_norm_update_dt[self._log_it_counter] += \ + (self._policy_update_t_start-self._collection_t) + self._policy_update_dt[self._log_it_counter] += \ + (self._policy_update_t - self._policy_update_t_start) + if self._validate: + self._validation_dt[self._log_it_counter] += \ + (self._validation_t - self._policy_update_t) + + self._step_counter+=1 # counts algo steps + + self._demo_envs_active[self._log_it_counter]=self._env.demo_active() + self._demo_perf_metric[self._log_it_counter]=self._get_performance_metric() + if self._env.n_demo_envs() > 0 and (self._demo_stop_thresh is not None): + # check if deactivation condition applies + self._env.switch_demo(active=self._demo_perf_metric[self._log_it_counter] 0: + self._ep_tsteps_expl_env_distribution[self._log_it_counter, :]=\ + self._episodic_reward_metrics.step_counters(env_selector=self._expl_env_selector)*self._env_n_action_reps + + self._sub_rew_max_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._expl_env_selector) + self._sub_rew_avrg_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._expl_env_selector) + self._sub_rew_min_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._expl_env_selector) + self._sub_rew_max_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_min_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_std_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._expl_env_selector) + + # demo envs + if self._env.n_demo_envs() > 0 and self._env.demo_active(): + # only log if demo envs are active (db data will remaing to nan if that case) + self._ep_tsteps_demo_env_distribution[self._log_it_counter, :]=\ + self._episodic_reward_metrics.step_counters(env_selector=self._demo_env_selector)*self._env_n_action_reps + + self._sub_rew_max_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._demo_env_selector) + self._sub_rew_avrg_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._demo_env_selector) + self._sub_rew_min_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._demo_env_selector) + self._sub_rew_max_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_min_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_std_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._demo_env_selector) + + # other data + if self._agent.obs_running_norm is not None: + if self._running_mean_obs is not None: + self._running_mean_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_mean() + if self._running_std_obs is not None: + self._running_std_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_std() + + if self._use_rnd: + if self._running_mean_rnd_input is not None: + self._running_mean_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_mean() + if self._running_std_rnd_input is not None: + self._running_std_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_std() + + # write some episodic db info on shared mem + sub_returns=self._sub_returns.get_torch_mirror(gpu=False) + sub_returns[:, :]=self._episodic_reward_metrics.get_sub_rew_avrg() + tot_returns=self._tot_returns.get_torch_mirror(gpu=False) + tot_returns[:, :]=self._episodic_reward_metrics.get_tot_rew_avrg() + self._sub_returns.synch_all(read=False) + self._tot_returns.synch_all(read=False) + + self._log_info() + + self._log_it_counter+=1 + + if self._dump_checkpoints and \ + (self._vec_transition_counter % self._m_checkpoint_freq == 0): + self._save_model(is_checkpoint=True) + + if self._full_env_db and \ + (self._vec_transition_counter % self._env_db_checkpoints_vecfreq == 0): + self._dump_env_checkpoints() + + if self._vec_transition_counter == self._total_timesteps_vec: + self.done() + + def _should_have_called_setup(self): + + exception = f"setup() was not called!" + + Journal.log(self.__class__.__name__, + "_should_have_called_setup", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _log_info(self): + + if self._debug or self._verbose: + elapsed_h = self._elapsed_min[self._log_it_counter].item()/60.0 + est_remaining_time_h = elapsed_h * 1/(self._vec_transition_counter) * (self._total_timesteps_vec-self._vec_transition_counter) + is_done=self._vec_transition_counter==self._total_timesteps_vec + + actual_tsteps_with_updates=-1 + experience_to_policy_grad_ratio=-1 + experience_to_qfun_grad_ratio=-1 + experience_to_tqfun_grad_ratio=-1 + if not self._eval: + actual_tsteps_with_updates=(self._n_timesteps_done[self._log_it_counter].item()-self._warmstart_timesteps) + epsi=1e-6 # to avoid div by 0 + experience_to_policy_grad_ratio=actual_tsteps_with_updates/(self._n_policy_updates[self._log_it_counter].item()-epsi) + experience_to_qfun_grad_ratio=actual_tsteps_with_updates/(self._n_qfun_updates[self._log_it_counter].item()-epsi) + experience_to_tqfun_grad_ratio=actual_tsteps_with_updates/(self._n_tqfun_updates[self._log_it_counter].item()-epsi) + + if self._debug: + + if self._remote_db: + # write general algo debug info to shared memory + info_names=self._shared_algo_data.dynamic_info.get() + info_data = [ + self._n_timesteps_done[self._log_it_counter].item(), + self._n_policy_updates[self._log_it_counter].item(), + experience_to_policy_grad_ratio, + elapsed_h, + est_remaining_time_h, + self._env_step_fps[self._log_it_counter].item(), + self._env_step_rt_factor[self._log_it_counter].item(), + self._collection_dt[self._log_it_counter].item(), + self._policy_update_fps[self._log_it_counter].item(), + self._policy_update_dt[self._log_it_counter].item(), + is_done, + self._n_of_played_episodes[self._log_it_counter].item(), + self._batch_norm_update_dt[self._log_it_counter].item(), + ] + self._shared_algo_data.write(dyn_info_name=info_names, + val=info_data) + + # write debug info to remote wandb server + db_data_names = list(self._env.custom_db_data.keys()) + for dbdatan in db_data_names: + data = self._custom_env_data[dbdatan] + data_names = self._env.custom_db_data[dbdatan].data_names() + + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_max": + wandb.Histogram(data["max"][self._log_it_counter, :, :].numpy())}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_avrg": + wandb.Histogram(data["avrg"][self._log_it_counter, :, :].numpy())}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_min": + wandb.Histogram(data["min"][self._log_it_counter, :, :].numpy())}) + + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_max_over_envs": + data["max_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_min_over_envs": + data["min_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_avrg_over_envs": + data["avrg_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_std_over_envs": + data["std_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + + self._wandb_d.update({'log_iteration' : self._log_it_counter}) + self._wandb_d.update(dict(zip(info_names, info_data))) + + # debug environments + self._wandb_d.update({'correlation_db/ep_timesteps_env_distr': + wandb.Histogram(self._ep_tsteps_env_distribution[self._log_it_counter, :, :].numpy())}) + + self._wandb_d.update({'tot_reward/tot_rew_max': wandb.Histogram(self._tot_rew_max[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_avrg': wandb.Histogram(self._tot_rew_avrg[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_min': wandb.Histogram(self._tot_rew_min[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_max_over_envs': self._tot_rew_max_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_min_over_envs': self._tot_rew_min_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_avrg_over_envs': self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_std_over_envs': self._tot_rew_std_over_envs[self._log_it_counter, :, :].item(), + }) + # sub rewards from db envs + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max": + wandb.Histogram(self._sub_rew_max.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min": + wandb.Histogram(self._sub_rew_min.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg": + wandb.Histogram(self._sub_rew_avrg.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max_over_envs": + self._sub_rew_max_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min_over_envs": + self._sub_rew_min_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg_over_envs": + self._sub_rew_avrg_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_std_over_envs": + self._sub_rew_std_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + # exploration envs + if self._n_expl_envs > 0: + self._wandb_d.update({'correlation_db/ep_timesteps_expl_env_distr': + wandb.Histogram(self._ep_tsteps_expl_env_distribution[self._log_it_counter, :, :].numpy())}) + + # sub reward from expl envs + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_expl": + wandb.Histogram(self._sub_rew_max_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_expl": + wandb.Histogram(self._sub_rew_avrg_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_expl": + wandb.Histogram(self._sub_rew_min_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_over_envs_expl": + self._sub_rew_max_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_over_envs_expl": + self._sub_rew_min_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_over_envs_expl": + self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_std_over_envs_expl": + self._sub_rew_std_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + # demo envs (only log if active, otherwise we log nans which wandb doesn't like) + if self._env.n_demo_envs() > 0: + if self._env.demo_active(): + # log hystograms only if there are no nan in the data + self._wandb_d.update({'correlation_db/ep_timesteps_demo_env_distr': + wandb.Histogram(self._ep_tsteps_demo_env_distribution[self._log_it_counter, :, :].numpy())}) + + # sub reward from expl envs + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_demo": + wandb.Histogram(self._sub_rew_max_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_demo": + wandb.Histogram(self._sub_rew_avrg_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_demo": + wandb.Histogram(self._sub_rew_min_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_over_envs_demo": + self._sub_rew_max_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_over_envs_demo": + self._sub_rew_min_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_over_envs_demo": + self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_std_over_envs_demo": + self._sub_rew_std_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + if self._vec_transition_counter > (self._warmstart_vectimesteps-1): + # algo info + self._policy_update_db_data_dict.update({ + "sac_q_info/qf1_vals_mean": self._qf1_vals_mean[self._log_it_counter, 0], + "sac_q_info/qf2_vals_mean": self._qf2_vals_mean[self._log_it_counter, 0], + "sac_q_info/min_qft_vals_mean": self._min_qft_vals_mean[self._log_it_counter, 0], + "sac_q_info/qf1_vals_std": self._qf1_vals_std[self._log_it_counter, 0], + "sac_q_info/qf2_vals_std": self._qf2_vals_std[self._log_it_counter, 0], + "sac_q_info/min_qft_vals_std": self._min_qft_vals_std[self._log_it_counter, 0], + "sac_q_info/qf1_vals_max": self._qf1_vals_max[self._log_it_counter, 0], + "sac_q_info/qf2_vals_max": self._qf2_vals_max[self._log_it_counter, 0], + "sac_q_info/qf1_vals_min": self._qf1_vals_min[self._log_it_counter, 0], + "sac_q_info/qf2_vals_min": self._qf2_vals_min[self._log_it_counter, 0], + + "sac_actor_info/policy_entropy_mean": self._policy_entropy_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_std": self._policy_entropy_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_max": self._policy_entropy_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_min": self._policy_entropy_min[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_mean": self._policy_entropy_disc_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_std": self._policy_entropy_disc_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_max": self._policy_entropy_disc_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_min": self._policy_entropy_disc_min[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_mean": self._policy_entropy_cont_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_std": self._policy_entropy_cont_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_max": self._policy_entropy_cont_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_min": self._policy_entropy_cont_min[self._log_it_counter, 0], + + "sac_q_info/qf1_loss": self._qf1_loss[self._log_it_counter, 0], + "sac_q_info/qf2_loss": self._qf2_loss[self._log_it_counter, 0], + "sac_actor_info/actor_loss": self._actor_loss[self._log_it_counter, 0]}) + alpha_logs = { + "sac_alpha_info/alpha": self._alphas[self._log_it_counter, 0], + "sac_alpha_info/alpha_disc": self._alphas_disc[self._log_it_counter, 0], + "sac_alpha_info/alpha_cont": self._alphas_cont[self._log_it_counter, 0], + "sac_alpha_info/target_entropy": self._target_entropy, + "sac_alpha_info/target_entropy_disc": self._target_entropy_disc, + "sac_alpha_info/target_entropy_cont": self._target_entropy_cont + } + if self._autotune: + alpha_logs.update({ + "sac_alpha_info/alpha_loss": self._alpha_loss[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_disc": self._alpha_loss_disc[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_cont": self._alpha_loss_cont[self._log_it_counter, 0], + }) + self._policy_update_db_data_dict.update(alpha_logs) + + if self._validate: + self._policy_update_db_data_dict.update({ + "sac_q_info/qf1_loss_validation": self._qf1_loss_validation[self._log_it_counter, 0], + "sac_q_info/qf2_loss_validation": self._qf2_loss_validation[self._log_it_counter, 0], + "sac_q_info/overfit_index": self._overfit_index[self._log_it_counter, 0], + "sac_actor_info/actor_loss_validation": self._actor_loss_validation[self._log_it_counter, 0]}) + if self._autotune: + self._policy_update_db_data_dict.update({ + "sac_alpha_info/alpha_loss_validation": self._alpha_loss_validation[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_disc_validation": self._alpha_loss_disc_validation[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_cont_validation": self._alpha_loss_cont_validation[self._log_it_counter, 0]}) + + self._wandb_d.update(self._policy_update_db_data_dict) + + if self._use_rnd: + self._rnd_db_data_dict.update({ + "rnd_info/expl_bonus_raw_avrg": self._expl_bonus_raw_avrg[self._log_it_counter, 0], + "rnd_info/expl_bonus_raw_std": self._expl_bonus_raw_std[self._log_it_counter, 0], + "rnd_info/expl_bonus_proc_avrg": self._expl_bonus_proc_avrg[self._log_it_counter, 0], + "rnd_info/expl_bonus_proc_std": self._expl_bonus_proc_std[self._log_it_counter, 0], + "rnd_info/rnd_loss": self._rnd_loss[self._log_it_counter, 0], + }) + self._wandb_d.update(self._rnd_db_data_dict) + + if self._rnd_net.obs_running_norm is not None: + # adding info on running normalizer if used + if self._running_mean_rnd_input is not None: + self._wandb_d.update({f"rnd_info/running_mean_rhc_input": self._running_mean_rnd_input[self._log_it_counter, :]}) + if self._running_std_rnd_input is not None: + self._wandb_d.update({f"rnd_info/running_std_rhc_input": self._running_std_rnd_input[self._log_it_counter, :]}) + + if self._agent.obs_running_norm is not None: + # adding info on running normalizer if used + if self._running_mean_obs is not None: + self._wandb_d.update({f"running_norm/mean": self._running_mean_obs[self._log_it_counter, :]}) + if self._running_std_obs is not None: + self._wandb_d.update({f"running_norm/std": self._running_std_obs[self._log_it_counter, :]}) + + self._wandb_d.update(self._custom_env_data_db_dict) + + wandb.log(self._wandb_d) + + if self._verbose: + + info =f"\nTotal n. timesteps simulated: {self._n_timesteps_done[self._log_it_counter].item()}/{self._total_timesteps}\n" + \ + f"N. policy updates performed: {self._n_policy_updates[self._log_it_counter].item()}/{self._n_policy_updates_to_be_done}\n" + \ + f"N. q fun updates performed: {self._n_qfun_updates[self._log_it_counter].item()}/{self._n_qf_updates_to_be_done}\n" + \ + f"N. trgt q fun updates performed: {self._n_tqfun_updates[self._log_it_counter].item()}/{self._n_tqf_updates_to_be_done}\n" + \ + f"experience to policy grad ratio: {experience_to_policy_grad_ratio}\n" + \ + f"experience to q fun grad ratio: {experience_to_qfun_grad_ratio}\n" + \ + f"experience to trgt q fun grad ratio: {experience_to_tqfun_grad_ratio}\n"+ \ + f"Warmstart completed: {self._vec_transition_counter > self._warmstart_vectimesteps or self._eval} ; ({self._vec_transition_counter}/{self._warmstart_vectimesteps})\n" +\ + f"Replay buffer full: {self._replay_bf_full}; current position {self._bpos}/{self._replay_buffer_size_vec}\n" +\ + f"Validation buffer full: {self._validation_bf_full}; current position {self._bpos_val}/{self._validation_buffer_size_vec}\n" +\ + f"Elapsed time: {self._elapsed_min[self._log_it_counter].item()/60.0} h\n" + \ + f"Estimated remaining training time: " + \ + f"{est_remaining_time_h} h\n" + \ + f"Total reward episodic data --> \n" + \ + f"max: {self._tot_rew_max_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"avg: {self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"min: {self._tot_rew_min_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"Episodic sub-rewards episodic data --> \nsub rewards names: {self._reward_names_str}\n" + \ + f"max: {self._sub_rew_max_over_envs[self._log_it_counter, :]}\n" + \ + f"min: {self._sub_rew_min_over_envs[self._log_it_counter, :]}\n" + \ + f"avg: {self._sub_rew_avrg_over_envs[self._log_it_counter, :]}\n" + \ + f"std: {self._sub_rew_std_over_envs[self._log_it_counter, :]}\n" + \ + f"N. of episodes on which episodic rew stats are computed: {self._n_of_played_episodes[self._log_it_counter].item()}\n" + \ + f"Current env. step sps: {self._env_step_fps[self._log_it_counter].item()}, time for experience collection {self._collection_dt[self._log_it_counter].item()} s\n" + \ + f"Current env (sub-stepping) rt factor: {self._env_step_rt_factor[self._log_it_counter].item()}\n" + \ + f"Current policy update fps: {self._policy_update_fps[self._log_it_counter].item()}, time for policy updates {self._policy_update_dt[self._log_it_counter].item()} s\n" + \ + f"Time spent updating batch normalizations {self._batch_norm_update_dt[self._log_it_counter].item()} s\n" + \ + f"Time spent for computing validation {self._validation_dt[self._log_it_counter].item()} s\n" + \ + f"Demo envs are active: {self._demo_envs_active[self._log_it_counter].item()}. N demo envs if active {self._env.n_demo_envs()}\n" + \ + f"Performance metric now: {self._demo_perf_metric[self._log_it_counter].item()}\n" + \ + f"Entropy (disc): current {float(self._policy_entropy_disc_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_disc:.4f}\n" + \ + f"Entropy (cont): current {float(self._policy_entropy_cont_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_cont:.4f}\n" + if self._use_rnd: + info = info + f"N. rnd updates performed: {self._n_rnd_updates[self._log_it_counter].item()}\n" + + Journal.log(self.__class__.__name__, + "_post_step", + info, + LogType.INFO, + throw_when_excep = True) + + def _add_experience(self, + obs: torch.Tensor, actions: torch.Tensor, rewards: torch.Tensor, + next_obs: torch.Tensor, + next_terminal: torch.Tensor) -> None: + + if self._validate and \ + (self._vec_transition_counter % self._validation_collection_vecfreq == 0): + # fill validation buffer + + self._obs_val[self._bpos_val] = obs + self._next_obs_val[self._bpos_val] = next_obs + self._actions_val[self._bpos_val] = actions + self._rewards_val[self._bpos_val] = rewards + self._next_terminal_val[self._bpos_val] = next_terminal + + self._bpos_val += 1 + if self._bpos_val == self._validation_buffer_size_vec: + self._validation_bf_full = True + self._bpos_val = 0 + + else: # fill normal replay buffer + self._obs[self._bpos] = obs + self._next_obs[self._bpos] = next_obs + self._actions[self._bpos] = actions + self._rewards[self._bpos] = rewards + self._next_terminal[self._bpos] = next_terminal + + self._bpos += 1 + if self._bpos == self._replay_buffer_size_vec: + self._replay_bf_full = True + self._bpos = 0 + + def _sample(self, size: int = None): + + if size is None: + size=self._batch_size + + batched_obs = self._obs.view((-1, self._env.obs_dim())) + batched_next_obs = self._next_obs.view((-1, self._env.obs_dim())) + batched_actions = self._actions.view((-1, self._env.actions_dim())) + batched_rewards = self._rewards.view(-1) + batched_terminal = self._next_terminal.view(-1) + + # sampling from the batched buffer + up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (size,)) + + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_next_obs = batched_next_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + sampled_rewards = batched_rewards[shuffled_buffer_idxs] + sampled_terminal = batched_terminal[shuffled_buffer_idxs] + + return sampled_obs, sampled_actions,\ + sampled_next_obs,\ + sampled_rewards, \ + sampled_terminal + + def _sample_validation(self, size: int = None): + + if size is None: + size=self._validation_batch_size + + batched_obs = self._obs_val.view((-1, self._env.obs_dim())) + batched_next_obs = self._next_obs_val.view((-1, self._env.obs_dim())) + batched_actions = self._actions_val.view((-1, self._env.actions_dim())) + batched_rewards = self._rewards_val.view(-1) + batched_terminal = self._next_terminal_val.view(-1) + + # sampling from the batched buffer + up_to = self._validation_buffer_size if self._validation_bf_full else self._bpos_val*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (size,)) + + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_next_obs = batched_next_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + sampled_rewards = batched_rewards[shuffled_buffer_idxs] + sampled_terminal = batched_terminal[shuffled_buffer_idxs] + + return sampled_obs, sampled_actions,\ + sampled_next_obs,\ + sampled_rewards, \ + sampled_terminal + + def _sample_random_actions(self): + + self._random_uniform.uniform_(-1,1) + random_actions = self._random_uniform + + return random_actions + + def _perturb_some_actions(self, + actions: torch.Tensor): + + if self._is_continuous_actions_bool.any(): # if there are any continuous actions + self._perturb_actions(actions, + action_idxs=self._is_continuous_actions, + env_idxs=self._expl_env_selector.to(actions.device), + normal=True, # use normal for continuous + scaling=self._continuous_act_expl_noise_std) + if self._is_discrete_actions_bool.any(): # actions to be treated as discrete + self._perturb_actions(actions, + action_idxs=self._is_discrete_actions, + env_idxs=self._expl_env_selector.to(actions.device), + normal=False, # use uniform distr for discrete + scaling=self._discrete_act_expl_noise_std) + self._pert_counter+=1 + if self._pert_counter >= self._noise_duration_vec: + self._pert_counter=0 + + def _perturb_actions(self, + actions: torch.Tensor, + action_idxs: torch.Tensor, + env_idxs: torch.Tensor, + normal: bool = True, + scaling: float = 1.0): + if normal: # gaussian + # not super efficient (in theory random_normal can be made smaller in size) + self._random_normal.normal_(mean=0, std=1) + noise=self._random_normal + else: # uniform + self._random_uniform.uniform_(-1,1) + noise=self._random_uniform + + env_indices = env_idxs.reshape(-1,1) # Get indices of True environments + action_indices = action_idxs.reshape(1,-1) # Get indices of True actions + action_indices_flat=action_indices.flatten() + + perturbation=noise[env_indices, action_indices]*scaling + perturbed_actions=actions[env_indices, action_indices]+perturbation + perturbed_actions.clamp_(-1.0, 1.0) # enforce normalized bounds + + actions[env_indices, action_indices]=\ + perturbed_actions + + def _update_batch_norm(self, bsize: int = None): + + if bsize is None: + bsize=self._batch_size # same used for training + + up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (bsize,)) + + # update obs normalization + # (we should sample also next obs, but if most of the transitions are not terminal, + # this is not an issue and is more efficient) + if (self._agent.obs_running_norm is not None) and \ + (not self._eval): + batched_obs = self._obs.view((-1, self._obs_dim)) + sampled_obs = batched_obs[shuffled_buffer_idxs] + self._agent.update_obs_bnorm(x=sampled_obs) + + if self._use_rnd: # update running norm for RND + batched_obs = self._obs.view((-1, self._obs_dim)) + batched_actions = self._actions.view((-1, self._actions_dim)) + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + torch.cat(tensors=(sampled_obs, sampled_actions), dim=1, + out=self._rnd_bnorm_input) + self._rnd_net.update_input_bnorm(x=self._rnd_bnorm_input) + + # update running norm on rewards also + # if self._reward_normalizer is not None: + # batched_rew = self._rewards.view(-1) + # sampled_rew = batched_rew[shuffled_buffer_idxs] + # self._reward_normalizer.manual_stat_update(x=sampled_rew) + + def _reset_agent(self): + # not super efficient, but effective -> + # brand new agent, brand new optimizers + self._agent.reset() + # forcing deallocation of previous optimizers + import gc + del self._qf_optimizer + del self._actor_optimizer + if self._autotune: + del self._a_optimizer_disc + del self._a_optimizer_cont + del self._log_alpha_disc + del self._log_alpha_cont + gc.collect() + self._init_agent_optimizers() + if self._autotune: # also reinitialize alpha optimization + self._init_alpha_autotuning() + + self._overfit_idx=0.0 + + def _switch_training_mode(self, + train: bool = True): + + self._agent.train(train) + + def _init_algo_shared_data(self, + static_params: Dict): + + self._shared_algo_data = SharedRLAlgorithmInfo(namespace=self._ns, + is_server=True, + static_params=static_params, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + + self._shared_algo_data.run() + + # write some initializations + self._shared_algo_data.write(dyn_info_name=["is_done"], + val=[0.0]) + + # only written to if flags where enabled + self._qf_vals=QfVal(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._qf_vals.run() + self._qf_trgt=QfTrgt(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._qf_trgt.run() + + # episodic returns + reward_names=self._episodic_reward_metrics.data_names() + self._sub_returns=SubReturns(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + n_rewards=len(reward_names), + reward_names=reward_names, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._sub_returns.run() + + self._tot_returns=TotReturns(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._tot_returns.run() diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/training_env_base.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/training_env_base.py new file mode 100644 index 0000000000000000000000000000000000000000..a1d82cd55b606a8fe783803efcb26726301c1693 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/training_env_base.py @@ -0,0 +1,2002 @@ +import torch +import math +from aug_mpc.utils.math_utils import quaternion_to_angular_velocity, quaternion_difference + +from mpc_hive.utilities.shared_data.rhc_data import RobotState +from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred +from mpc_hive.utilities.shared_data.rhc_data import RhcRefs +from mpc_hive.utilities.shared_data.rhc_data import RhcStatus +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo + +from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperSrvr +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetSrvr +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest + +from aug_mpc.utils.shared_data.agent_refs import AgentRefs +from aug_mpc.utils.shared_data.training_env import SharedTrainingEnvInfo + +from aug_mpc.utils.shared_data.training_env import Observations, NextObservations +from aug_mpc.utils.shared_data.training_env import TotRewards +from aug_mpc.utils.shared_data.training_env import SubRewards +from aug_mpc.utils.shared_data.training_env import Actions +from aug_mpc.utils.shared_data.training_env import Terminations, SubTerminations +from aug_mpc.utils.shared_data.training_env import Truncations, SubTruncations +from aug_mpc.utils.shared_data.training_env import EpisodesCounter,TaskRandCounter,SafetyRandResetsCounter,RandomTruncCounter,SubStepAbsCounter + +from aug_mpc.utils.episodic_rewards import EpisodicRewards +from aug_mpc.utils.episodic_data import EpisodicData +from aug_mpc.utils.episodic_data import MemBuffer +from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother +from aug_mpc.utils.math_utils import check_capsize + +from mpc_hive.utilities.math_utils_torch import world2base_frame + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import StringTensorClient + +from perf_sleep.pyperfsleep import PerfSleep + +from abc import abstractmethod, ABC + +import os +from typing import List, Dict + +class AugMPCTrainingEnvBase(ABC): + + """Base class for a remote training environment tailored to Learning-based Receding Horizon Control""" + + def __init__(self, + namespace: str, + obs_dim: int, + actions_dim: int, + env_name: str = "", + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + debug: bool = True, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._this_path = os.path.abspath(__file__) + + self.custom_db_data = None + + self._random_reset_active=False + + self._action_smoother_continuous=None + self._action_smoother_discrete=None + + self._closed = False + self._ready=False + + self._namespace = namespace + self._with_gpu_mirror = True + self._safe_shared_mem = False + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + + self._use_gpu = use_gpu + if self._use_gpu: + self._device="cuda" + else: + self._device="cpu" + + self._dtype = dtype + + self._verbose = verbose + self._vlevel = vlevel + + self._is_debug = debug + + self._env_name = env_name + + self._override_agent_refs = override_agent_refs + + self._substep_dt=1.0 # dt [s] between each substep + + self._env_opts={} + self._env_opts.update(env_opts) + self._process_env_opts() + + self._robot_state = None + self._rhc_cmds = None + self._rhc_pred = None + self._rhc_refs = None + self._rhc_status = None + + self._remote_stepper = None + self._remote_resetter = None + self._remote_reset_req = None + + self._agent_refs = None + + self._n_envs = 0 + + self._ep_timeout_counter = None + self._task_rand_counter = None + self._rand_safety_reset_counter = None + self._rand_trunc_counter = None + + self._actions_map={} # to be used to hold info like action idxs + self._obs_map={} + + self._obs = None + self._obs_ub = None + self._obs_lb = None + self._next_obs = None + self._actions = None + self._actual_actions = None + self._actions_ub = None + self._actions_lb = None + self._tot_rewards = None + self._sub_rewards = None + self._sub_terminations = None + self._sub_truncations = None + self._terminations = None + self._truncations = None + self._act_mem_buffer = None + + self._episodic_rewards_metrics = None + + self._timeout = timeout_ms + + self._height_grid_size = None + self._height_flat_dim = 0 + + self._attach_to_shared_mem() + + self._init_obs(obs_dim) + self._init_actions(actions_dim) + self._init_rewards() + self._init_terminations() + self._init_truncations() + self._init_custom_db_data() + + self._demo_setup() # setup for demo envs + + # to ensure maps are properly initialized + _ = self._get_action_names() + _ = self._get_obs_names() + _ = self._get_sub_trunc_names() + _ = self._get_sub_term_names() + + self._set_substep_rew() + self._set_substep_obs() + + self._custom_post_init() + + # update actions scale and offset in case it was modified in _custom_post_init + self._actions_scale = (self._actions_ub - self._actions_lb)/2.0 + self._actions_offset = (self._actions_ub + self._actions_lb)/2.0 + + if self._env_opts["use_action_smoothing"]: + self._init_action_smoothing() + + self._ready=self._init_step(reset_on_init=self._env_opts["reset_on_init"]) + + def _add_env_opt(self, + opts: Dict, + name: str, + default): + + if not name in opts: + opts[name]=default + + def _process_env_opts(self, ): + + self._check_for_env_opts("episode_timeout_lb", int) + self._check_for_env_opts("episode_timeout_ub", int) + self._check_for_env_opts("n_steps_task_rand_lb", int) + self._check_for_env_opts("n_steps_task_rand_ub", int) + self._check_for_env_opts("use_random_trunc", bool) + self._check_for_env_opts("random_trunc_freq", int) + self._check_for_env_opts("random_trunc_freq_delta", int) + self._check_for_env_opts("use_random_safety_reset", bool) + self._check_for_env_opts("random_reset_freq", int) + + self._check_for_env_opts("action_repeat", int) + + self._check_for_env_opts("n_preinit_steps", int) + + self._check_for_env_opts("demo_envs_perc", float) + + self._check_for_env_opts("vec_ep_freq_metrics_db", int) + + self._check_for_env_opts("srew_drescaling", bool) + + self._check_for_env_opts("use_action_history", bool) + self._check_for_env_opts("actions_history_size", int) + + self._check_for_env_opts("use_action_smoothing", bool) + self._check_for_env_opts("smoothing_horizon_c", float) + self._check_for_env_opts("smoothing_horizon_d", float) + + self._check_for_env_opts("add_heightmap_obs", bool) + + self._check_for_env_opts("reset_on_init", bool) + + # parse action repeat opt + get some sim information + if self._env_opts["action_repeat"] <=0: + self._env_opts["action_repeat"] = 1 + self._action_repeat=self._env_opts["action_repeat"] + # parse remote sim info + sim_info = {} + sim_info_shared = SharedEnvInfo(namespace=self._namespace, + is_server=False, + safe=False, + verbose=self._verbose, + vlevel=self._vlevel) + sim_info_shared.run() + sim_info_keys = sim_info_shared.param_keys + sim_info_data = sim_info_shared.get().flatten() + for i in range(len(sim_info_keys)): + sim_info[sim_info_keys[i]] = sim_info_data[i] + if "substepping_dt" in sim_info_keys: + self._substep_dt=sim_info["substepping_dt"] + self._env_opts.update(sim_info) + + self._env_opts["substep_dt"]=self._substep_dt + + self._env_opts["override_agent_refs"]=self._override_agent_refs + + self._env_opts["episode_timeout_lb"] = round(self._env_opts["episode_timeout_lb"]/self._action_repeat) + self._env_opts["episode_timeout_ub"] = round(self._env_opts["episode_timeout_ub"]/self._action_repeat) + + self._env_opts["n_steps_task_rand_lb"] = round(self._env_opts["n_steps_task_rand_lb"]/self._action_repeat) + self._env_opts["n_steps_task_rand_ub"] = round(self._env_opts["n_steps_task_rand_ub"]/self._action_repeat) + + if self._env_opts["random_reset_freq"] <=0: + self._env_opts["use_random_safety_reset"]=False + self._env_opts["random_reset_freq"]=-1 + self._random_reset_active=self._env_opts["use_random_safety_reset"] + + self._env_opts["random_trunc_freq"] = round(self._env_opts["random_trunc_freq"]/self._action_repeat) + self._env_opts["random_trunc_freq_delta"] = round(self._env_opts["random_trunc_freq_delta"]/self._action_repeat) + + if self._env_opts["random_trunc_freq"] <=0: + self._env_opts["use_random_trunc"]=False + self._env_opts["random_trunc_freq"]=-1 + + self._full_db=False + if "full_env_db" in self._env_opts: + self._full_db=self._env_opts["full_env_db"] + + def _check_for_env_opts(self, + name: str, + expected_type): + if not (name in self._env_opts): + Journal.log(self.__class__.__name__, + "_check_for_env_opts", + f"Required option {name} missing for env opts!", + LogType.EXCEP, + throw_when_excep=True) + if not isinstance(self._env_opts[name], expected_type): + Journal.log(self.__class__.__name__, + "_check_for_env_opts", + f"Option {name} in env opts is not of type {expected_type} (got {type(self._env_opts[name])})!", + LogType.EXCEP, + throw_when_excep=True) + + def __del__(self): + + self.close() + + def _demo_setup(self): + + self._demo_envs_idxs=None + self._demo_envs_idxs_bool=None + self._n_demo_envs=round(self._env_opts["demo_envs_perc"]*self._n_envs) + self._add_demos=False + if not self._n_demo_envs >0: + Journal.log(self.__class__.__name__, + "__init__", + "will not use demo environments", + LogType.INFO, + throw_when_excep=False) + else: + Journal.log(self.__class__.__name__, + "__init__", + f"Will run with {self._n_demo_envs} demonstration envs.", + LogType.INFO) + self._demo_envs_idxs = torch.randperm(self._n_envs, device=self._device)[:self._n_demo_envs] + self._demo_envs_idxs_bool=torch.full((self._n_envs, ), dtype=torch.bool, device=self._device, + fill_value=False) + self._demo_envs_idxs_bool[self._demo_envs_idxs]=True + + self._init_demo_envs() # custom logic + + demo_idxs_str=", ".join(map(str, self._demo_envs_idxs.tolist())) + Journal.log(self.__class__.__name__, + "__init__", + f"Demo env. indexes are [{demo_idxs_str}]", + LogType.INFO) + + def env_opts(self): + return self._env_opts + + def demo_env_idxs(self, get_bool: bool=False): + if get_bool: + return self._demo_envs_idxs_bool + else: + return self._demo_envs_idxs + + def _init_demo_envs(self): + pass + + def n_demo_envs(self): + return self._n_demo_envs + + def demo_active(self): + return self._add_demos + + def switch_demo(self, active: bool = False): + if self._demo_envs_idxs is not None: + self._add_demos=active + else: + Journal.log(self.__class__.__name__, + "switch_demo", + f"Cannot switch demostrations on. No demo envs available!", + LogType.EXCEP, + throw_when_excep=True) + + def _get_this_file_path(self): + return self._this_path + + def episode_timeout_bounds(self): + return self._env_opts["episode_timeout_lb"], self._env_opts["episode_timeout_ub"] + + def task_rand_timeout_bounds(self): + return self._env_opts["n_steps_task_rand_lb"], self._env_opts["n_steps_task_rand_ub"] + + def n_action_reps(self): + return self._action_repeat + + def get_file_paths(self): + from aug_mpc.utils.sys_utils import PathsGetter + path_getter = PathsGetter() + base_paths = [] + base_paths.append(self._get_this_file_path()) + base_paths.append(path_getter.REMOTENVPATH) + for script_path in path_getter.SCRIPTSPATHS: + base_paths.append(script_path) + + # rhc files + from EigenIPC.PyEigenIPC import StringTensorClient + from perf_sleep.pyperfsleep import PerfSleep + shared_rhc_shared_files = StringTensorClient( + basename="SharedRhcFilesDropDir", + name_space=self._namespace, + verbose=self._verbose, + vlevel=VLevel.V2) + shared_rhc_shared_files.run() + shared_rhc_files_vals=[""]*shared_rhc_shared_files.length() + while not shared_rhc_shared_files.read_vec(shared_rhc_files_vals, 0): + nsecs = 1000000000 # 1 sec + PerfSleep.thread_sleep(nsecs) # we just keep it alive + rhc_list=[] + for rhc_files in shared_rhc_files_vals: + file_list = rhc_files.split(", ") + rhc_list.extend(file_list) + rhc_list = list(set(rhc_list)) # removing duplicates + base_paths.extend(rhc_list) + + # world interface files + get_world_interface_paths = self.get_world_interface_paths() + base_paths.extend(get_world_interface_paths) + return base_paths + + def get_world_interface_paths(self): + paths = [] + shared_world_iface_files = StringTensorClient( + basename="SharedWorldInterfaceFilesDropDir", + name_space=self._namespace, + verbose=self._verbose, + vlevel=VLevel.V2) + shared_world_iface_files.run() + world_iface_vals=[""]*shared_world_iface_files.length() + while not shared_world_iface_files.read_vec(world_iface_vals, 0): + nsecs = 1000000000 # 1 sec + PerfSleep.thread_sleep(nsecs) # keep alive while waiting + shared_world_iface_files.close() + for files in world_iface_vals: + if files == "": + continue + file_list = files.split(", ") + for f in file_list: + if f not in paths: + paths.append(f) + return paths + + def get_aux_dir(self): + empty_list = [] + return empty_list + + def _init_step(self, reset_on_init: bool = True): + + self._check_controllers_registered(retry=True) + self._activate_rhc_controllers() + + # just an auxiliary tensor + initial_reset_aux = self._terminations.get_torch_mirror(gpu=self._use_gpu).clone() + initial_reset_aux[:, :] = reset_on_init # we reset all sim envs first + init_step_ok=True + init_step_ok=self._remote_sim_step() and init_step_ok + if not init_step_ok: + return False + init_step_ok=self._remote_reset(reset_mask=initial_reset_aux) and init_step_ok + if not init_step_ok: + return False + + for i in range(self._env_opts["n_preinit_steps"]): # perform some + # dummy remote env stepping to make sure to have meaningful + # initializations (doesn't increment step counter) + init_step_ok=self._remote_sim_step() and init_step_ok # 1 remote sim. step + if not init_step_ok: + return False + init_step_ok=self._send_remote_reset_req() and init_step_ok # fake reset request + if not init_step_ok: + return False + + self.reset() + + return init_step_ok + + def _debug(self): + + if self._use_gpu: + # using non_blocking which is not safe when GPU->CPU + self._obs.synch_mirror(from_gpu=True,non_blocking=True) # copy data from gpu to cpu view + self._next_obs.synch_mirror(from_gpu=True,non_blocking=True) + self._actions.synch_mirror(from_gpu=True,non_blocking=True) + self._truncations.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_truncations.synch_mirror(from_gpu=True,non_blocking=True) + self._terminations.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_terminations.synch_mirror(from_gpu=True,non_blocking=True) + self._tot_rewards.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_rewards.synch_mirror(from_gpu=True,non_blocking=True) + # if we want reliable db data then we should synchronize data streams + torch.cuda.synchronize() + + # copy CPU view on shared memory + self._obs.synch_all(read=False, retry=True) + self._next_obs.synch_all(read=False, retry=True) + self._actions.synch_all(read=False, retry=True) + self._tot_rewards.synch_all(read=False, retry=True) + self._sub_rewards.synch_all(read=False, retry=True) + self._truncations.synch_all(read=False, retry = True) + self._sub_truncations.synch_all(read=False, retry = True) + self._terminations.synch_all(read=False, retry = True) + self._sub_terminations.synch_all(read=False, retry = True) + + self._debug_agent_refs() + + def _debug_agent_refs(self): + if self._use_gpu: + if not self._override_agent_refs: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) + if not self._override_agent_refs: + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True) + + def _remote_sim_step(self): + + self._remote_stepper.trigger() # triggers simulation + RHC + if not self._remote_stepper.wait_ack_from(1, self._timeout): + Journal.log(self.__class__.__name__, + "_remote_sim_step", + "Remote sim. env step ack not received within timeout", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _remote_reset(self, + reset_mask: torch.Tensor = None): + + reset_reqs = self._remote_reset_req.get_torch_mirror() + if reset_mask is None: # just send the signal to allow stepping, but do not reset any of + # the remote envs + reset_reqs[:, :] = False + else: + reset_reqs[:, :] = reset_mask # remotely reset envs corresponding to + # the mask (True--> to be reset) + self._remote_reset_req.synch_all(read=False, retry=True) # write on shared buffer + remote_reset_ok = self._send_remote_reset_req() # process remote request + + if reset_mask is not None: + self._synch_state(gpu=self._use_gpu) # if some env was reset, we use _obs + # to hold the states, including resets, while _next_obs will always hold the + # state right after stepping the sim env + # (could be a bit more efficient, since in theory we only need to read the envs + # corresponding to the reset_mask) + + + return remote_reset_ok + + def _send_remote_reset_req(self): + + self._remote_resetter.trigger() + if not self._remote_resetter.wait_ack_from(1, self._timeout): # remote reset completed + Journal.log(self.__class__.__name__, + "_post_step", + "Remote reset did not complete within the prescribed timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def step(self, + action): + + actions_norm = action.detach() # IMPORTANT: assumes actions are already normalized in [-1, 1] + + actions = self._actions.get_torch_mirror(gpu=self._use_gpu) # will hold agent actions (real range) + + # scale normalized actions to physical space before interfacing with controllers + actions[:, :] = actions_norm*self._actions_scale + self._actions_offset + + self._override_actions_with_demo() # if necessary override some actions with expert demonstrations + # (getting actions with get_actions will return the modified actions tensor) + + actions.clamp_(self._actions_lb, self._actions_ub) # just to be safe + + if self._act_mem_buffer is not None: # store norm actions in memory buffer + self._act_mem_buffer.update(new_data=actions_norm) + + if self._env_opts["use_action_smoothing"]: + self._apply_actions_smoothing() # smooth actions if enabled (the tensor returned by + # get_actions does not contain smoothing and can be safely employed for experience collection) + + self._apply_actions_to_rhc() # apply last agent actions to rhc controller + + stepping_ok = True + tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu) + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + tot_rewards.zero_() + sub_rewards.zero_() + self._substep_rewards.zero_() + next_obs.zero_() # necessary for substep obs + + for i in range(0, self._action_repeat): + + self._pre_substep() # custom logic @ substep freq + + stepping_ok = stepping_ok and self._check_controllers_registered(retry=False) # does not make sense to run training + # if we lost some controllers + stepping_ok = stepping_ok and self._remote_sim_step() # blocking, + + # no sim substepping is allowed to fail + self._synch_state(gpu=self._use_gpu) # read state from shared mem (done in substeps also, + # since substeps rewards will need updated substep obs) + + self._custom_post_substp_pre_rew() # custom substepping logic + self._compute_substep_rewards() + self._assemble_substep_rewards() # includes rewards clipping + self._custom_post_substp_post_rew() # custom substepping logic + + # fill substep obs + self._fill_substep_obs(self._substep_obs) + self._assemble_substep_obs() + if not i==(self._action_repeat-1): + # sends reset signal to complete remote step sequence, + # but does not reset any remote env + stepping_ok = stepping_ok and self._remote_reset(reset_mask=None) + else: # last substep + + self._fill_step_obs(next_obs) # update next obs + self._clamp_obs(next_obs) # good practice + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + obs[:, :] = next_obs # start from next observation, unless reset (handled in post_step()) + + self._compute_step_rewards() # implemented by child + + tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu) + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + self._clamp_rewards(sub_rewards) # clamp all sub rewards + + tot_rewards[:, :] = torch.sum(sub_rewards, dim=1, keepdim=True) + + scale=1 # scale tot rew by the number of action repeats + if self._env_opts["srew_drescaling"]: # scale rewards depending on the n of subrewards + scale*=sub_rewards.shape[1] # n. dims rescaling + tot_rewards.mul_(1/scale) + + self._substep_abs_counter.increment() # @ substep freq + + if not stepping_ok: + return False + + stepping_ok = stepping_ok and self._post_step() # post sub-stepping operations + # (if action_repeat > 1, then just the db data at the last substep is logged) + # also, if a reset of an env occurs, obs will hold the reset state + + return stepping_ok + + def _post_step(self): + + # first increment counters + self._ep_timeout_counter.increment() # episode timeout + self._task_rand_counter.increment() # task randomization + if self._rand_trunc_counter is not None: # random truncations (for removing temp. correlations) + self._rand_trunc_counter.increment() + + # check truncation and termination conditions + self._check_truncations() # defined in child env + self._check_terminations() + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu) + ignore_ep_end=None + if self._rand_trunc_counter is not None: + ignore_ep_end=self._rand_trunc_counter.time_limits_reached() + if self._use_gpu: + ignore_ep_end=ignore_ep_end.cuda() + + truncated = torch.logical_or(truncated, + ignore_ep_end) # add truncation (sub truncations defined in child env + # remain untouched) + + episode_finished = torch.logical_or(terminated, + truncated) + episode_finished_cpu = episode_finished.cpu() + + if self._rand_safety_reset_counter is not None and self._random_reset_active: + self._rand_safety_reset_counter.increment(to_be_incremented=episode_finished_cpu.flatten()) + # truncated[:,:] = torch.logical_or(truncated, + # self._rand_safety_reset_counter.time_limits_reached().cuda()) + + if self._act_mem_buffer is not None: + self._act_mem_buffer.reset(to_be_reset=episode_finished.flatten(), + init_data=self._normalize_actions(self.default_action)) + + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.reset(to_be_reset=episode_finished.flatten(), + reset_val=self.default_action[:, self._is_continuous_actions]) + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.reset(to_be_reset=episode_finished.flatten(), + reset_val=self.default_action[:, ~self._is_continuous_actions]) + + # debug step if required (IMPORTANT: must be before remote reset so that we always db + # actual data from the step and not after reset) + if self._is_debug: + self._debug() # copies db data on shared memory + ignore_ep_end_cpu=ignore_ep_end if not self._use_gpu else ignore_ep_end.cpu() + self._update_custom_db_data(episode_finished=episode_finished_cpu, + ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc + ) + self._episodic_rewards_metrics.update(rewards = self._sub_rewards.get_torch_mirror(gpu=False), + ep_finished=episode_finished_cpu, + ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc + ) + + # remotely reset envs + to_be_reset=self._to_be_reset() + to_be_reset_custom=self._custom_reset() + if to_be_reset_custom is not None: + to_be_reset[:, :] = torch.logical_or(to_be_reset,to_be_reset_custom) + rm_reset_ok = self._remote_reset(reset_mask=to_be_reset) + + self._custom_post_step(episode_finished=episode_finished) # any additional logic from child env + # here, before actual reset taskes place (at this point the state is the reset one) + + # updating also prev pos and orientation in case some env was reset + self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + self._fill_step_obs(obs) + self._clamp_obs(obs) + + # updating prev step quantities + self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + # synchronize and reset counters for finished episodes + self._ep_timeout_counter.reset(to_be_reset=episode_finished) + self._task_rand_counter.reset(to_be_reset=episode_finished) + self._substep_abs_counter.reset(to_be_reset=torch.logical_or(terminated,to_be_reset), + randomize_offsets=True # otherwise timers across envs would be strongly correlated + ) # reset only if resetting environment or if terminal + + if self._rand_trunc_counter is not None: + # only reset when safety truncation was is triggered + self._rand_trunc_counter.reset(to_be_reset=self._rand_trunc_counter.time_limits_reached(), + randomize_limits=True, # we need to randomize otherwise the other counters will synchronize + # with the episode counters + randomize_offsets=False # always restart at 0 + ) + # safety reset counter is only when it reches its reset interval (just to keep + # the counter bounded) + if self._rand_safety_reset_counter is not None and self._random_reset_active: + self._rand_safety_reset_counter.reset(to_be_reset=self._rand_safety_reset_counter.time_limits_reached()) + + return rm_reset_ok + + def _to_be_reset(self): + # always reset if a termination occurred or if there's a random safety reset + # request + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + to_be_reset=terminated.clone() + if (self._rand_safety_reset_counter is not None) and self._random_reset_active: + to_be_reset=torch.logical_or(to_be_reset, + self._rand_safety_reset_counter.time_limits_reached()) + + return to_be_reset + + def _custom_reset(self): + # can be overridden by child + return None + + def _apply_actions_smoothing(self): + + actions = self._actions.get_torch_mirror(gpu=self._use_gpu) + actual_actions=self.get_actual_actions() # will write smoothed actions here + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.update(new_signal= + actions[:, self._is_continuous_actions]) + actual_actions[:, self._is_continuous_actions]=self._action_smoother_continuous.get() + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.update(new_signal= + actions[:, ~self._is_continuous_actions]) + actual_actions[:, ~self._is_continuous_actions]=self._action_smoother_discrete.get() + + def _update_custom_db_data(self, + episode_finished, + ignore_ep_end): + + # update defaults + self.custom_db_data["RhcRefsFlag"].update(new_data=self._rhc_refs.contact_flags.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) # before potentially resetting the flags, get data + self.custom_db_data["Actions"].update(new_data=self._actions.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Obs"].update(new_data=self._obs.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + self.custom_db_data["SubTerminations"].update(new_data=self._sub_terminations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["SubTruncations"].update(new_data=self._sub_truncations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + self.custom_db_data["Terminations"].update(new_data=self._terminations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Truncations"].update(new_data=self._truncations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + + self._get_custom_db_data(episode_finished=episode_finished, ignore_ep_end=ignore_ep_end) + + def reset_custom_db_data(self, keep_track: bool = False): + # to be called periodically to reset custom db data stat. collection + for custom_db_data in self.custom_db_data.values(): + custom_db_data.reset(keep_track=keep_track) + + def _assemble_substep_rewards(self): + # by default assemble substep rewards by averaging + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + # average over substeps depending on scale + # sub_rewards[:, self._is_substep_rew] = sub_rewards[:, self._is_substep_rew] + \ + # self._substep_rewards[:, self._is_substep_rew]/self._action_repeat + sub_rewards[:, self._is_substep_rew] += self._substep_rewards[:, self._is_substep_rew]/self._action_repeat + + def _assemble_substep_obs(self): + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + next_obs[:, self._is_substep_obs] += self._substep_obs[:, self._is_substep_obs]/self._action_repeat + + def randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + if self._override_agent_refs: + self._override_refs(env_indxs=env_indxs) + else: + self._randomize_task_refs(env_indxs=env_indxs) + + def reset(self): + + self.randomize_task_refs(env_indxs=None) # randomize all refs across envs + + self._obs.reset() + self._actions.reset() + self._next_obs.reset() + self._sub_rewards.reset() + self._tot_rewards.reset() + self._terminations.reset() + self._sub_terminations.reset() + self._truncations.reset() + self._sub_truncations.reset() + + self._ep_timeout_counter.reset(randomize_offsets=True) + self._task_rand_counter.reset() + self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._rand_safety_reset_counter is not None: + self._rand_safety_reset_counter.reset() + self._substep_abs_counter.reset() + + if self._act_mem_buffer is not None: + self._act_mem_buffer.reset_all(init_data=self._normalize_actions(self.default_action)) + + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.reset(reset_val=self.default_action[:, self._is_continuous_actions]) + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.reset(reset_val=self.default_action[:, ~self._is_continuous_actions]) + + self._synch_state(gpu=self._use_gpu) # read obs from shared mem + + # just calling custom post step to ensure tak refs are updated + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu) + episode_finished = torch.logical_or(terminated, + truncated) + self._custom_post_step(episode_finished=episode_finished) + + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + self._fill_step_obs(obs) # initialize observations + self._clamp_obs(obs) # to avoid bad things + self._fill_step_obs(next_obs) # and next obs + self._clamp_obs(next_obs) + + self.reset_custom_db_data(keep_track=False) + self._episodic_rewards_metrics.reset(keep_track=False) + + self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + def is_ready(self): + return self._ready + + def close(self): + + if not self._closed: + + # close all shared mem. clients + self._robot_state.close() + self._rhc_cmds.close() + self._rhc_pred.close() + self._rhc_refs.close() + self._rhc_status.close() + + self._remote_stepper.close() + + self._ep_timeout_counter.close() + self._task_rand_counter.close() + if self._rand_safety_reset_counter is not None: + self._rand_safety_reset_counter.close() + + # closing env.-specific shared data + self._obs.close() + self._next_obs.close() + self._actions.close() + if self._actual_actions is not None: + self._actual_actions.close() + self._sub_rewards.close() + self._tot_rewards.close() + + self._terminations.close() + self._sub_terminations.close() + self._truncations.close() + self._sub_truncations.close() + + self._closed = True + + def get_obs(self, clone:bool=False): + if clone: + return self._obs.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._obs.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_next_obs(self, clone:bool=False): + if clone: + return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_actions(self, clone:bool=False, normalized: bool = False): + actions = self._actions.get_torch_mirror(gpu=self._use_gpu).detach() + if normalized: + normalized_actions = self._normalize_actions(actions) + return normalized_actions.clone() if clone else normalized_actions + return actions.clone() if clone else actions + + def get_actual_actions(self, clone:bool=False, normalized: bool = False): + if self._env_opts["use_action_smoothing"]: + actions = self._actual_actions.get_torch_mirror(gpu=self._use_gpu).detach() + else: # actual action coincides with the one from the agent + possible modif. + actions = self.get_actions(clone=False, normalized=False) + if normalized: + normalized_actions = self._normalize_actions(actions) + return normalized_actions.clone() if clone else normalized_actions + return actions.clone() if clone else actions + + def _normalize_actions(self, actions: torch.Tensor): + scale = torch.where(self._actions_scale == 0.0, + torch.ones_like(self._actions_scale), + self._actions_scale) + normalized = (actions - self._actions_offset)/scale + zero_scale_mask = torch.eq(self._actions_scale, 0.0).squeeze(0) + if torch.any(zero_scale_mask): + normalized[:, zero_scale_mask] = 0.0 + return normalized + + def get_rewards(self, clone:bool=False): + if clone: + return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_terminations(self, clone:bool=False): + if clone: + return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_truncations(self, clone:bool=False): + if clone: + return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach() + + def obs_dim(self): + + return self._obs_dim + + def actions_dim(self): + + return self._actions_dim + + def ep_rewards_metrics(self): + + return self._episodic_rewards_metrics + + def using_gpu(self): + + return self._use_gpu + + def name(self): + + return self._env_name + + def n_envs(self): + + return self._n_envs + + def dtype(self): + + return self._dtype + + def obs_names(self): + return self._get_obs_names() + + def action_names(self): + return self._get_action_names() + + def sub_rew_names(self): + return self._get_rewards_names() + + def sub_term_names(self): + return self._get_sub_term_names() + + def sub_trunc_names(self): + return self._get_sub_trunc_names() + + def _get_obs_names(self): + # to be overridden by child class + return None + + def get_robot_jnt_names(self): + return self._robot_state.jnt_names() + + def _get_action_names(self): + # to be overridden by child class + return None + + def _get_rewards_names(self): + # to be overridden by child class + return None + + def _get_sub_term_names(self): + # to be overridden by child class + sub_term_names = [] + sub_term_names.append("rhc_failure") + sub_term_names.append("robot_capsize") + sub_term_names.append("rhc_capsize") + + return sub_term_names + + def _get_sub_trunc_names(self): + # to be overridden by child class + sub_trunc_names = [] + sub_trunc_names.append("ep_timeout") + + return sub_trunc_names + + def _get_custom_db_data(self, episode_finished): + # to be overridden by child class + pass + + def set_observed_joints(self): + # ny default observe all joints available + return self._robot_state.jnt_names() + + def _set_jnts_blacklist_pattern(self): + self._jnt_q_blacklist_patterns=[] + + def get_observed_joints(self): + return self._observed_jnt_names + + def _init_obs(self, obs_dim: int): + + device = "cuda" if self._use_gpu else "cpu" + + obs_threshold_default = 1e3 + self._obs_threshold_lb = -obs_threshold_default # used for clipping observations + self._obs_threshold_ub = obs_threshold_default + + self._obs_ub = torch.full((1, obs_dim), dtype=self._dtype, device=device, + fill_value=1.0) + self._obs_lb = torch.full((1, obs_dim), dtype=self._dtype, device=device, + fill_value=-1.0) + self._obs_scale = (self._obs_ub - self._obs_lb)/2.0 + self._obs_offset = (self._obs_ub + self._obs_lb)/2.0 + + if not self._obs_dim==len(self._get_obs_names()): + error=f"obs dim {self._obs_dim} does not match obs names length {len(self._get_obs_names())}!!" + Journal.log(self.__class__.__name__, + "_init_obs", + error, + LogType.EXCEP, + throw_when_excep = True) + + self._obs = Observations(namespace=self._namespace, + n_envs=self._n_envs, + obs_dim=self._obs_dim, + obs_names=self._get_obs_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._next_obs = NextObservations(namespace=self._namespace, + n_envs=self._n_envs, + obs_dim=self._obs_dim, + obs_names=self._get_obs_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._obs.run() + self._next_obs.run() + + self._is_substep_obs = torch.zeros((self.obs_dim(),), dtype=torch.bool, device=device) + self._is_substep_obs.fill_(False) # default to all step obs + + # not super memory efficient + self._substep_obs=torch.full_like(self._obs.get_torch_mirror(gpu=self._use_gpu), fill_value=0.0) + + def _init_actions(self, actions_dim: int): + + device = "cuda" if self._use_gpu else "cpu" + # action scalings to be applied to agent's output + self._actions_ub = torch.full((1, actions_dim), dtype=self._dtype, device=device, + fill_value=1.0) + self._actions_lb = torch.full((1, actions_dim), dtype=self._dtype, device=device, + fill_value=-1.0) + self._actions_scale = (self._actions_ub - self._actions_lb)/2.0 + self._actions_offset = (self._actions_ub + self._actions_lb)/2.0 + + if not self._actions_dim==len(self._get_action_names()): + error=f"action dim {self._actions_dim} does not match action names length {len(self._get_action_names())}!!" + Journal.log(self.__class__.__name__, + "_init_actions", + error, + LogType.EXCEP, + throw_when_excep = True) + self._actions = Actions(namespace=self._namespace, + n_envs=self._n_envs, + action_dim=self._actions_dim, + action_names=self._get_action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._actions.run() + + self.default_action = torch.full_like(input=self.get_actions(),fill_value=0.0) + self.safe_action = torch.full_like(input=self.get_actions(),fill_value=0.0) + + if self._env_opts["use_action_history"]: + self._act_mem_buffer=MemBuffer(name="ActionMemBuf", + data_tensor=self._actions.get_torch_mirror(), + data_names=self._get_action_names(), + debug=self._debug, + horizon=self._env_opts["actions_history_size"], + dtype=self._dtype, + use_gpu=self._use_gpu) + + # default to all continuous actions (changes the way noise is added) + self._is_continuous_actions=torch.full((actions_dim, ), + dtype=torch.bool, device=device, + fill_value=True) + + def _init_action_smoothing(self): + + continuous_actions=self.get_actions()[:, self._is_continuous_actions] + discrete_actions=self.get_actions()[:, ~self._is_continuous_actions] + self._action_smoother_continuous=ExponentialSignalSmoother(signal=continuous_actions, + update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent + smoothing_horizon=self._env_opts["smoothing_horizon_c"], + target_smoothing=0.5, + debug=self._debug, + dtype=self._dtype, + use_gpu=self._use_gpu, + name="ActionSmootherContinuous") + self._action_smoother_discrete=ExponentialSignalSmoother(signal=discrete_actions, + update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent + smoothing_horizon=self._env_opts["smoothing_horizon_d"], + target_smoothing=0.5, + debug=self._debug, + dtype=self._dtype, + use_gpu=self._use_gpu, + name="ActionSmootherDiscrete") + + # we also need somewhere to keep the actual actions after smoothing + self._actual_actions = Actions(namespace=self._namespace+"_actual", + n_envs=self._n_envs, + action_dim=self._actions_dim, + action_names=self._get_action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + self._actual_actions.run() + + def _init_rewards(self): + + reward_thresh_default = 1.0 + n_sub_rewards = len(self._get_rewards_names()) + device = "cuda" if self._use_gpu else "cpu" + self._reward_thresh_lb = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=-reward_thresh_default, device=device) # used for clipping rewards + self._reward_thresh_ub = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=reward_thresh_default, device=device) + + self._sub_rewards = SubRewards(namespace=self._namespace, + n_envs=self._n_envs, + n_rewards=n_sub_rewards, + reward_names=self._get_rewards_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._tot_rewards = TotRewards(namespace=self._namespace, + n_envs=self._n_envs, + reward_names=["total_reward"], + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._sub_rewards.run() + self._tot_rewards.run() + + self._substep_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() + # used to hold substep rewards (not super mem. efficient) + self._is_substep_rew = torch.zeros((self._substep_rewards.shape[1],),dtype=torch.bool,device=device) + self._is_substep_rew.fill_(True) # default to all substep rewards + + self._episodic_rewards_metrics = EpisodicRewards(reward_tensor=self._sub_rewards.get_torch_mirror(), + reward_names=self._get_rewards_names(), + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._episodic_rewards_metrics.set_constant_data_scaling(scaling=self._get_reward_scaling()) + + def _get_reward_scaling(self): + # to be overridden by child (default to no scaling) + return 1 + + def _max_ep_length(self): + #.should be overriden by child + return self._env_opts["episode_timeout_ub"] + + def _init_custom_db_data(self): + + self.custom_db_data = {} + # by default always log this contact data + rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror() + contact_names = self._rhc_refs.rob_refs.contact_names() + stepping_data = EpisodicData("RhcRefsFlag", rhc_latest_contact_ref, contact_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=stepping_data) + + # log also action data + actions = self._actions.get_torch_mirror() + action_names = self._get_action_names() + action_data = EpisodicData("Actions", actions, action_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=action_data) + + # and observations + observations = self._obs.get_torch_mirror() + observations_names = self._get_obs_names() + obs_data = EpisodicData("Obs", observations, observations_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=obs_data) + + # log sub-term and sub-truncations data + t_scaling=1 # 1 so that we log an interpretable data in terms of why the episode finished + data_scaling = torch.full((self._n_envs, 1), + fill_value=t_scaling, + dtype=torch.int32,device="cpu") + sub_term = self._sub_terminations.get_torch_mirror() + term = self._terminations.get_torch_mirror() + sub_termination_names = self.sub_term_names() + + sub_term_data = EpisodicData("SubTerminations", sub_term, sub_termination_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + sub_term_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=sub_term_data) + term_data = EpisodicData("Terminations", term, ["terminations"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + term_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=term_data) + + sub_trunc = self._sub_truncations.get_torch_mirror() + trunc = self._truncations.get_torch_mirror() + sub_truncations_names = self.sub_trunc_names() + sub_trunc_data = EpisodicData("SubTruncations", sub_trunc, sub_truncations_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + sub_trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=sub_trunc_data) + trunc_data = EpisodicData("Truncations", trunc, ["truncations"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=trunc_data) + + def _add_custom_db_data(self, db_data: EpisodicData): + self.custom_db_data[db_data.name()] = db_data + + def _init_terminations(self): + + # Boolean array indicating whether each environment episode has terminated after + # the current step. An episode termination could occur based on predefined conditions + # in the environment, such as reaching a goal or exceeding a time limit. + + self._terminations = Terminations(namespace=self._namespace, + n_envs=self._n_envs, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._terminations.run() + + sub_t_names = self.sub_term_names() + self._sub_terminations = SubTerminations(namespace=self._namespace, + n_envs=self._n_envs, + n_term=len(sub_t_names), + term_names=sub_t_names, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._sub_terminations.run() + + device = "cuda" if self._use_gpu else "cpu" + self._is_capsized=torch.zeros((self._n_envs,1), + dtype=torch.bool, device=device) + self._is_rhc_capsized=torch.zeros((self._n_envs,1), + dtype=torch.bool, device=device) + self._max_pitch_angle=60.0*math.pi/180.0 + + def _init_truncations(self): + + self._truncations = Truncations(namespace=self._namespace, + n_envs=self._n_envs, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + + self._truncations.run() + + sub_trc_names = self.sub_trunc_names() + self._sub_truncations = SubTruncations(namespace=self._namespace, + n_envs=self._n_envs, + n_trunc=len(sub_trc_names), + truc_names=sub_trc_names, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._sub_truncations.run() + + def _update_jnt_blacklist(self): + device = "cuda" if self._use_gpu else "cpu" + all_available_jnts=self.get_observed_joints() + blacklist=[] + for i in range(len(all_available_jnts)): + for pattern in self._jnt_q_blacklist_patterns: + if pattern in all_available_jnts[i]: + # stop at first pattern match + blacklist.append(i) + break + if not len(blacklist)==0: + self._jnt_q_blacklist_idxs=torch.tensor(blacklist, dtype=torch.int, device=device) + + def _attach_to_shared_mem(self): + + # runs shared mem clients for getting observation and setting RHC commands + + # remote stepping data + self._remote_stepper = RemoteStepperSrvr(namespace=self._namespace, + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._remote_stepper.run() + self._remote_resetter = RemoteResetSrvr(namespace=self._namespace, + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._remote_resetter.run() + self._remote_reset_req = RemoteResetRequest(namespace=self._namespace, + is_server=False, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True) + self._remote_reset_req.run() + + self._jnts_remapping=None + self._jnt_q_blacklist_idxs=None + + self._robot_state = RobotState(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True, + enable_height_sensor=self._env_opts["add_heightmap_obs"]) + + self._rhc_cmds = RhcCmds(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_pred = RhcPred(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_refs = RhcRefs(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_status = RhcStatus(namespace=self._namespace, + is_server=False, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._robot_state.run() + self._n_envs = self._robot_state.n_robots() + self._n_jnts = self._robot_state.n_jnts() + self._n_contacts = self._robot_state.n_contacts() # we assume same n contacts for all rhcs for now + + self._rhc_cmds.run() + self._rhc_pred.run() + self._rhc_refs.run() + self._rhc_status.run() + # we read rhc info now and just this time, since it's assumed to be static + self._check_controllers_registered(retry=True) # blocking + # (we need controllers to be connected to read meaningful data) + + self._rhc_status.rhc_static_info.synch_all(read=True,retry=True) + if self._use_gpu: + self._rhc_status.rhc_static_info.synch_mirror(from_gpu=False,non_blocking=False) + rhc_horizons=self._rhc_status.rhc_static_info.get("horizons",gpu=self._use_gpu) + rhc_nnodes=self._rhc_status.rhc_static_info.get("nnodes",gpu=self._use_gpu) + rhc_dts=self._rhc_status.rhc_static_info.get("dts",gpu=self._use_gpu) + + # height sensor metadata (client side) + if self._env_opts["add_heightmap_obs"]: + self._height_grid_size = self._robot_state.height_sensor.grid_size + self._height_flat_dim = self._robot_state.height_sensor.n_cols + rhc_ncontacts=self._rhc_status.rhc_static_info.get("ncontacts",gpu=self._use_gpu) + robot_mass=self._rhc_status.rhc_static_info.get("robot_mass",gpu=self._use_gpu) + pred_node_idxs_rhc=self._rhc_status.rhc_static_info.get("pred_node_idx",gpu=self._use_gpu) + + self._n_nodes_rhc=torch.round(rhc_nnodes) # we assume nodes are static during an env lifetime + self._rhc_horizons=rhc_horizons + self._rhc_dts=rhc_dts + self._n_contacts_rhc=rhc_ncontacts + self._rhc_robot_masses=robot_mass + if (self._rhc_robot_masses == 0).any(): + zero_indices = torch.nonzero(self._rhc_robot_masses == 0, as_tuple=True) + print(zero_indices) # This will print the indices of zero elements + Journal.log(self.__class__.__name__, + "_attach_to_shared_mem", + "Found at least one robot with 0 mass from RHC static info!!", + LogType.EXCEP, + throw_when_excep=True) + + self._rhc_robot_weight=robot_mass*9.81 + self._pred_node_idxs_rhc=pred_node_idxs_rhc + self._pred_horizon_rhc=self._pred_node_idxs_rhc*self._rhc_dts + + # run server for agent commands + self._agent_refs = AgentRefs(namespace=self._namespace, + is_server=True, + n_robots=self._n_envs, + n_jnts=self._robot_state.n_jnts(), + n_contacts=self._robot_state.n_contacts(), + contact_names=self._robot_state.contact_names(), + q_remapping=None, + with_gpu_mirror=self._use_gpu, + force_reconnection=True, + safe=False, + verbose=self._verbose, + vlevel=self._vlevel, + fill_value=0) + self._agent_refs.run() + q_init_agent_refs=torch.full_like(self._robot_state.root_state.get(data_type="q", gpu=self._use_gpu),fill_value=0.0) + q_init_agent_refs[:, 0]=1.0 + self._agent_refs.rob_refs.root_state.set(data_type="q", data=q_init_agent_refs, + gpu=self._use_gpu) + if self._use_gpu: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=True) + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry=True) + # episode steps counters (for detecting episode truncations for + # time limits) + self._ep_timeout_counter = EpisodesCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["episode_timeout_lb"], + n_steps_ub=self._env_opts["episode_timeout_ub"], + randomize_offsets_at_startup=True, # this has to be randomized + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) # handles step counter through episodes and through envs + self._ep_timeout_counter.run() + self._task_rand_counter = TaskRandCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["n_steps_task_rand_lb"], + n_steps_ub=self._env_opts["n_steps_task_rand_ub"], + randomize_offsets_at_startup=False, # not necessary since it will be synched with the timeout counter + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) # handles step counter through episodes and through envs + self._task_rand_counter.run() + self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._env_opts["use_random_trunc"]: + self._rand_trunc_counter=RandomTruncCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["random_trunc_freq"]-self._env_opts["random_trunc_freq_delta"], + n_steps_ub=self._env_opts["random_trunc_freq"], + randomize_offsets_at_startup=True, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=False) + self._rand_trunc_counter.run() + # self._rand_trunc_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._env_opts["use_random_safety_reset"]: + self._rand_safety_reset_counter=SafetyRandResetsCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["random_reset_freq"], + n_steps_ub=self._env_opts["random_reset_freq"], + randomize_offsets_at_startup=True, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=False) + self._rand_safety_reset_counter.run() + # self._rand_safety_reset_counter.sync_counters(other_counter=self._ep_timeout_counter) + + # timer to track abs time in each env (reset logic to be implemented in child) + self._substep_abs_counter = SubStepAbsCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=1e9, + n_steps_ub=1e9, + randomize_offsets_at_startup=True, # randomizing startup offsets + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) + self._substep_abs_counter.run() + + # debug data servers + traing_env_param_dict = {} + traing_env_param_dict["use_gpu"] = self._use_gpu + traing_env_param_dict["debug"] = self._is_debug + traing_env_param_dict["n_preinit_steps"] = self._env_opts["n_preinit_steps"] + traing_env_param_dict["n_preinit_steps"] = self._n_envs + + self._training_sim_info = SharedTrainingEnvInfo(namespace=self._namespace, + is_server=True, + training_env_params_dict=traing_env_param_dict, + safe=False, + force_reconnection=True, + verbose=self._verbose, + vlevel=self._vlevel) + self._training_sim_info.run() + + self._observed_jnt_names=self.set_observed_joints() + self._set_jnts_blacklist_pattern() + self._update_jnt_blacklist() + + self._prev_root_p_substep=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone() + self._prev_root_q_substep=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone() + self._prev_root_p_step=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone() + self._prev_root_q_step=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone() + + def _activate_rhc_controllers(self): + self._rhc_status.activation_state.get_torch_mirror()[:, :] = True + self._rhc_status.activation_state.synch_all(read=False, retry=True) # activates all controllers + + def _synch_state(self, + gpu: bool = True): + + # read from shared memory on CPU + # robot state + self._robot_state.root_state.synch_all(read = True, retry = True) + self._robot_state.jnts_state.synch_all(read = True, retry = True) + # rhc cmds + self._rhc_cmds.root_state.synch_all(read = True, retry = True) + self._rhc_cmds.jnts_state.synch_all(read = True, retry = True) + self._rhc_cmds.contact_wrenches.synch_all(read = True, retry = True) + # rhc pred + self._rhc_pred.root_state.synch_all(read = True, retry = True) + # self._rhc_pred.jnts_state.synch_all(read = True, retry = True) + # self._rhc_pred.contact_wrenches.synch_all(read = True, retry = True) + # refs for root link and contacts + self._rhc_refs.rob_refs.root_state.synch_all(read = True, retry = True) + self._rhc_refs.contact_flags.synch_all(read = True, retry = True) + self._rhc_refs.flight_info.synch_all(read = True, retry = True) + self._rhc_refs.flight_settings_req.synch_all(read = True, retry = True) + self._rhc_refs.rob_refs.contact_pos.synch_all(read = True, retry = True) + # rhc cost + self._rhc_status.rhc_cost.synch_all(read = True, retry = True) + # rhc constr. violations + self._rhc_status.rhc_constr_viol.synch_all(read = True, retry = True) + # failure states + self._rhc_status.fails.synch_all(read = True, retry = True) + # tot cost and cnstr viol on nodes + step variable + self._rhc_status.rhc_nodes_cost.synch_all(read = True, retry = True) + self._rhc_status.rhc_nodes_constr_viol.synch_all(read = True, retry = True) + self._rhc_status.rhc_fcn.synch_all(read = True, retry = True) + self._rhc_status.rhc_fail_idx.synch_all(read = True, retry = True) + if self._env_opts["add_heightmap_obs"]: + self._robot_state.height_sensor.synch_all(read=True, retry=True) + if gpu: + # copies data to "mirror" on GPU --> we can do it non-blocking since + # in this direction it should be safe + self._robot_state.root_state.synch_mirror(from_gpu=False,non_blocking=True) # copies shared data on GPU + self._robot_state.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.root_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_pred.root_state.synch_mirror(from_gpu=False,non_blocking=True) + # self._rhc_pred.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + # self._rhc_pred.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.contact_flags.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.flight_info.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.flight_settings_req.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_cost.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_constr_viol.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.fails.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_nodes_cost.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_nodes_constr_viol.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_fcn.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_fail_idx.synch_mirror(from_gpu=False,non_blocking=True) + if self._env_opts["add_heightmap_obs"]: + self._robot_state.height_sensor.synch_mirror(from_gpu=False, non_blocking=True) + torch.cuda.synchronize() # ensuring that all the streams on the GPU are completed \ + # before the CPU continues execution + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # just used for setting agent refs externally (i.e. from shared mem on CPU) + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + def _clamp_obs(self, + obs: torch.Tensor): + if self._is_debug: + self._check_finite(obs, "observations", False) + torch.nan_to_num(input=obs, out=obs, nan=self._obs_threshold_ub, + posinf=self._obs_threshold_ub, + neginf=self._obs_threshold_lb) # prevent nans + + obs.clamp_(self._obs_threshold_lb, self._obs_threshold_ub) + + def _clamp_rewards(self, + rewards: torch.Tensor): + if self._is_debug: + self._check_finite(rewards, "rewards", False) + torch.nan_to_num(input=rewards, out=rewards, nan=0.0, + posinf=None, + neginf=None) # prevent nans + rewards.clamp_(self._reward_thresh_lb, self._reward_thresh_ub) + + def get_actions_lb(self): + return self._actions_lb + + def get_actions_ub(self): + return self._actions_ub + + def get_actions_scale(self): + return self._actions_scale + + def get_actions_offset(self): + return self._actions_offset + + def get_obs_lb(self): + return self._obs_lb + + def get_obs_ub(self): + return self._obs_ub + + def get_obs_scale(self): + self._obs_scale = (self._obs_ub - self._obs_lb)/2.0 + return self._obs_scale + + def get_obs_offset(self): + self._obs_offset = (self._obs_ub + self._obs_lb)/2.0 + return self._obs_offset + + def switch_random_reset(self, on: bool = True): + self._random_reset_active=on + + def set_jnts_remapping(self, + remapping: List = None): + + self._jnts_remapping=remapping + if self._jnts_remapping is not None: + self._robot_state.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + self._rhc_cmds.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + self._rhc_pred.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + # we need to also update the list of observed joints to match + available_joints=self._robot_state.jnt_names() + # the remapping ordering + self._observed_jnt_names=[] + for i in range(len(available_joints)): + self._observed_jnt_names.append(available_joints[self._jnts_remapping[i]]) + + self._update_jnt_blacklist() + + updated_obs_names=self._get_obs_names() # get updated obs names (should use get_observed_joints + # internally, so that jnt names are updated) + + # also update jnt obs names on shared memory + names_old=self._obs.get_obs_names() + names_old_next=self._next_obs.get_obs_names() + names_old[:]=updated_obs_names + names_old_next[:]=updated_obs_names + self._obs.update_names() + self._next_obs.update_names() + + # also update + if "Obs" in self.custom_db_data: + db_obs_names=self.custom_db_data["Obs"].data_names() + db_obs_names[:]=updated_obs_names + + def _check_finite(self, + tensor: torch.Tensor, + name: str, + throw: bool = False): + if not torch.isfinite(tensor).all().item(): + exception = f"Found nonfinite elements in {name} tensor!!" + non_finite_idxs=torch.nonzero(~torch.isfinite(tensor)) + n_nonf_elems=non_finite_idxs.shape[0] + + if name=="observations": + for i in range(n_nonf_elems): + db_msg=f"{self.obs_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \ + f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}" + print(db_msg) + if name=="rewards": + for i in range(n_nonf_elems): + db_msg=f"{self.sub_rew_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \ + f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}" + print(db_msg) + print(tensor) + Journal.log(self.__class__.__name__, + "_check_finite", + exception, + LogType.EXCEP, + throw_when_excep = throw) + + def _check_controllers_registered(self, + retry: bool = False): + + if retry: + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + while not (n_connected_controllers == self._n_envs): + warn = f"Expected {self._n_envs} controllers to be connected during training, " + \ + f"but got {n_connected_controllers}. Will wait for all to be connected..." + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + warn, + LogType.WARN, + throw_when_excep = False) + nsecs = int(2 * 1000000000) + PerfSleep.thread_sleep(nsecs) + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + info = f"All {n_connected_controllers} controllers connected!" + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + info, + LogType.INFO, + throw_when_excep = False) + return True + else: + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + if not (n_connected_controllers == self._n_envs): + exception = f"Expected {self._n_envs} controllers to be connected during training, " + \ + f"but got {n_connected_controllers}. Aborting..." + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + exception, + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _check_truncations(self): + + self._check_sub_truncations() + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + truncations = self._truncations.get_torch_mirror(gpu=self._use_gpu) + truncations[:, :] = torch.any(sub_truncations,dim=1,keepdim=True) + + def _check_terminations(self): + + self._check_sub_terminations() + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + terminations = self._terminations.get_torch_mirror(gpu=self._use_gpu) + terminations[:, :] = torch.any(sub_terminations,dim=1,keepdim=True) + + def _check_sub_truncations(self): + # default behaviour-> to be overriden by child + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + sub_truncations[:, 0:1]=self._ep_timeout_counter.time_limits_reached() + + def _check_sub_terminations(self): + # default behaviour-> to be overriden by child + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + + # terminate when either the real robot or the prediction from the MPC are capsized + check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle, + output_t=self._is_capsized) + check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle, + output_t=self._is_rhc_capsized) + + sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu) + sub_terminations[:, 1:2] = self._is_capsized + sub_terminations[:, 2:3] = self._is_rhc_capsized + + def is_action_continuous(self): + return self._is_continuous_actions + + def is_action_discrete(self): + return ~self._is_continuous_actions + + @abstractmethod + def _pre_substep(self): + pass + + @abstractmethod + def _custom_post_step(self,episode_finished): + pass + + @abstractmethod + def _custom_post_substp_post_rew(self): + pass + + @abstractmethod + def _custom_post_substp_pre_rew(self): + pass + + @abstractmethod + def _apply_actions_to_rhc(self): + pass + + def _override_actions_with_demo(self): + pass + + @abstractmethod + def _compute_substep_rewards(self): + pass + + @abstractmethod + def _set_substep_rew(self): + pass + + @abstractmethod + def _set_substep_obs(self): + pass + + @abstractmethod + def _compute_step_rewards(self): + pass + + @abstractmethod + def _fill_substep_obs(self, + obs: torch.Tensor): + pass + + @abstractmethod + def _fill_step_obs(self, + obs: torch.Tensor): + pass + + @abstractmethod + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + pass + + def _custom_post_init(self): + pass + + def _get_avrg_substep_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + # to be called at each substep + robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + root_v_avrg_w=(robot_p_meas-self._prev_root_p_substep)/self._substep_dt + root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_substep,robot_q_meas),\ + dt=self._substep_dt) + twist_w=torch.cat((root_v_avrg_w, + root_omega_avrg_w), + dim=1) + if not base_loc: + self._prev_root_p_substep[:, :]=robot_p_meas + self._prev_root_q_substep[:, :]=robot_q_meas + out[:, :]=twist_w + # rotate using the current (end-of-substep) orientation for consistency with other signals + world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out) + self._prev_root_p_substep[:, :]=robot_p_meas + self._prev_root_q_substep[:, :]=robot_q_meas + + def _get_avrg_step_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + # to be called after substeps of actions repeats + robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + dt=self._substep_dt*self._action_repeat # accounting for frame skipping + root_v_avrg_w=(robot_p_meas-self._prev_root_p_step)/(dt) + root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_step,robot_q_meas),\ + dt=dt) + twist_w=torch.cat((root_v_avrg_w, + root_omega_avrg_w), + dim=1) + if not base_loc: + out[:, :]=twist_w + # rotate using the current (end-of-step) orientation for consistency with other signals + world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out) + + def _get_avrg_rhc_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + + rhc_root_p =self._rhc_cmds.root_state.get(data_type="p",gpu=self._use_gpu) + rhc_root_q =self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + rhc_root_p_pred =self._rhc_pred.root_state.get(data_type="p",gpu=self._use_gpu) + rhc_root_q_pred =self._rhc_pred.root_state.get(data_type="q",gpu=self._use_gpu) + + rhc_root_v_avrg_rhc_w=(rhc_root_p_pred-rhc_root_p)/self._pred_horizon_rhc + rhc_root_omega_avrg_rhc_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(rhc_root_q,rhc_root_q_pred),\ + dt=self._pred_horizon_rhc) + + rhc_pred_avrg_twist_rhc_w = torch.cat((rhc_root_v_avrg_rhc_w, + rhc_root_omega_avrg_rhc_w), + dim=1) + if not base_loc: + out[:, :]=rhc_pred_avrg_twist_rhc_w + # to rhc base frame (using first node as reference) + world2base_frame(t_w=rhc_pred_avrg_twist_rhc_w, q_b=rhc_root_q, t_out=out) diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/twist_tracking_env.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/twist_tracking_env.py new file mode 100644 index 0000000000000000000000000000000000000000..5a8f9e4403b6124e26f78086191441feace4386a --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/twist_tracking_env.py @@ -0,0 +1,1396 @@ +from typing import Dict + +import os + +import torch + +from EigenIPC.PyEigenIPC import VLevel, LogType, Journal + +from mpc_hive.utilities.shared_data.rhc_data import RobotState, RhcStatus, RhcRefs +from mpc_hive.utilities.math_utils_torch import world2base_frame, base2world_frame, w2hor_frame + +from aug_mpc.utils.sys_utils import PathsGetter +from aug_mpc.utils.timers import PeriodicTimer +from aug_mpc.utils.episodic_data import EpisodicData +from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother +from aug_mpc.utils.math_utils import check_capsize +from aug_mpc.training_envs.training_env_base import AugMPCTrainingEnvBase + +class TwistTrackingEnv(AugMPCTrainingEnvBase): + """Base AugMPC training env that tracks commanded twists by pushing velocity and contact targets into the RHC controller while handling locomotion rewards/resets.""" + + def __init__(self, + namespace: str, + actions_dim: int = 10, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + env_name = "LinVelTrack" + device = "cuda" if use_gpu else "cpu" + + self._add_env_opt(env_opts, "srew_drescaling", + False) + + self._add_env_opt(env_opts, "step_thresh", 0.) # when step action < thresh, a step is requested + + # counters settings + self._add_env_opt(env_opts, "single_task_ref_per_episode", + True # if True, the task ref is constant over the episode (ie + # episodes are truncated when task is changed) + ) + self._add_env_opt(env_opts, "add_angvel_ref_rand", default=True) # randomize also agular vel ref (just z component) + + self._add_env_opt(env_opts, "episode_timeout_lb", + 1024) + self._add_env_opt(env_opts, "episode_timeout_ub", + 1024) + self._add_env_opt(env_opts, "n_steps_task_rand_lb", + 512) + self._add_env_opt(env_opts, "n_steps_task_rand_ub", + 512) + self._add_env_opt(env_opts, "use_random_safety_reset", + True) + self._add_env_opt(env_opts, "random_reset_freq", + 10) # a random reset once every n-episodes (per env) + self._add_env_opt(env_opts, "use_random_trunc", + True) + self._add_env_opt(env_opts, "random_trunc_freq", + env_opts["episode_timeout_ub"]*5) # to remove temporal correlations between envs + self._add_env_opt(env_opts, "random_trunc_freq_delta", + env_opts["episode_timeout_ub"]*2) # to randomize trunc frequency between envs + + if not env_opts["single_task_ref_per_episode"]: + env_opts["random_reset_freq"]=int(env_opts["random_reset_freq"]/\ + (env_opts["episode_timeout_lb"]/float(env_opts["n_steps_task_rand_lb"]))) + + self._add_env_opt(env_opts, "action_repeat", 1) # frame skipping (different agent action every action_repeat + # env substeps) + + self._add_env_opt(env_opts, "n_preinit_steps", 1) # n steps of the controllers to properly initialize everything + + self._add_env_opt(env_opts, "vec_ep_freq_metrics_db", 1) # n eps over which debug metrics are reported + self._add_env_opt(env_opts, "demo_envs_perc", 0.0) + self._add_env_opt(env_opts, "max_cmd_v", 1.5) # maximum cmd v for lin v actions (single component) + self._add_env_opt(env_opts, "max_cmd_omega", 1.0) # maximum cmd v for omega v actions (single component) + + # action smoothing + self._add_env_opt(env_opts, "use_action_smoothing", False) + self._add_env_opt(env_opts, "smoothing_horizon_c", 0.01) + self._add_env_opt(env_opts, "smoothing_horizon_d", 0.03) + + # whether to smooth vel error signal + self._add_env_opt(env_opts, "use_track_reward_smoother", False) + self._add_env_opt(env_opts, "smoothing_horizon_vel_err", 0.08) + self._add_env_opt(env_opts, "track_rew_smoother", None) + + # rewards + self._reward_map={} + self._reward_lb_map={} + + self._add_env_opt(env_opts, "reward_lb_default", -0.5) + self._add_env_opt(env_opts, "reward_ub_default", 1e6) + + self._add_env_opt(env_opts, "task_error_reward_lb", -0.5) + self._add_env_opt(env_opts, "CoT_reward_lb", -0.5) + self._add_env_opt(env_opts, "power_reward_lb", -0.5) + self._add_env_opt(env_opts, "action_rate_reward_lb", -0.5) + self._add_env_opt(env_opts, "jnt_vel_reward_lb", -0.5) + self._add_env_opt(env_opts, "rhc_avrg_vel_reward_lb", -0.5) + + self._add_env_opt(env_opts, "add_power_reward", False) + self._add_env_opt(env_opts, "add_CoT_reward", True) + self._add_env_opt(env_opts, "use_CoT_wrt_ref", False) + self._add_env_opt(env_opts, "add_action_rate_reward", True) + self._add_env_opt(env_opts, "add_jnt_v_reward", False) + + self._add_env_opt(env_opts, "use_rhc_avrg_vel_tracking", False) + + # task tracking + self._add_env_opt(env_opts, "use_relative_error", default=False) # use relative vel error (wrt current task norm) + self._add_env_opt(env_opts, "directional_tracking", default=True) # whether to compute tracking error based on reference direction + # if env_opts["add_angvel_ref_rand"]: + # env_opts["directional_tracking"]=False + + self._add_env_opt(env_opts, "use_L1_norm", default=True) # whether to use L1 norm for the error (otherwise L2) + self._add_env_opt(env_opts, "use_exp_track_rew", default=True) # whether to use a reward of the form A*e^(B*x), + # otherwise A*(1-B*x) + + self._add_env_opt(env_opts, "use_fail_idx_weight", default=False) + self._add_env_opt(env_opts, "task_track_offset_exp", default=1.0) + self._add_env_opt(env_opts, "task_track_scale_exp", default=5.0) + self._add_env_opt(env_opts, "task_track_offset", default=1.0) + self._add_env_opt(env_opts, "task_track_scale", default=1.5) + self._add_env_opt(env_opts, "task_track_front_weight", default=1.0) + self._add_env_opt(env_opts, "task_track_lat_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_vert_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_omega_z_weight", default=0.4) + self._add_env_opt(env_opts, "task_track_omega_x_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_omega_y_weight", default=0.05) + # if env_opts["add_angvel_ref_rand"]: + # env_opts["task_track_omega_x_weight"]=0.0 + # env_opts["task_track_omega_y_weight"]=0.0 + # env_opts["task_track_omega_z_weight"]=1.0 + + # task pred tracking + self._add_env_opt(env_opts, "task_pred_track_offset", default=1.0) + self._add_env_opt(env_opts, "task_pred_track_scale", default=3.0) + + # energy penalties + self._add_env_opt(env_opts, "CoT_offset", default=0.3) + self._add_env_opt(env_opts, "CoT_scale", default=0.5) + self._add_env_opt(env_opts, "power_offset", default=0.1) + self._add_env_opt(env_opts, "power_scale", default=8e-4) + + # action rate penalty + self._add_env_opt(env_opts, "action_rate_offset", default=0.1) + self._add_env_opt(env_opts, "action_rate_scale", default=2.0) + self._add_env_opt(env_opts, "action_rate_rew_d_weight", default=0.1) + self._add_env_opt(env_opts, "action_rate_rew_c_weight", default=1.0) + + # jnt vel penalty + self._add_env_opt(env_opts, "jnt_vel_offset", default=0.1) + self._add_env_opt(env_opts, "jnt_vel_scale", default=2.0) + + # terminations + self._add_env_opt(env_opts, "add_term_mpc_capsize", default=False) # add termination based on mpc capsizing prediction + + # observations + self._add_env_opt(env_opts, "rhc_fail_idx_scale", default=1.0) + self._add_env_opt(env_opts, "use_action_history", default=True) # whether to add information on past actions to obs + self._add_env_opt(env_opts, "add_prev_actions_stats_to_obs", default=False) # add actions std, mean + last action over a horizon to obs (if self._use_action_history True) + self._add_env_opt(env_opts, "actions_history_size", default=3) + + self._add_env_opt(env_opts, "add_mpc_contact_f_to_obs", default=True) # add estimate vertical contact f to obs + self._add_env_opt(env_opts, "add_fail_idx_to_obs", default=True) # we need to obserse mpc failure idx to correlate it with terminations + + self._add_env_opt(env_opts, "use_linvel_from_rhc", default=True) # no lin vel meas available, we use est. from mpc + self._add_env_opt(env_opts, "add_flight_info", default=True) # add feedback info on pos, remamining duration, length, + # apex and landing height of flight phases + self._add_env_opt(env_opts, "add_flight_settings", default=False) # add feedback info on current flight requests for mpc + + self._add_env_opt(env_opts, "use_prob_based_stepping", default=False) # interpret actions as stepping prob (never worked) + + self._add_env_opt(env_opts, "add_rhc_cmds_to_obs", default=True) # add the rhc cmds which are being applied now to the robot + + if not "add_periodic_clock_to_obs" in env_opts: + # add a sin/cos clock to obs (useful if task is explicitly + # time-dependent) + self._add_env_opt(env_opts, "add_periodic_clock_to_obs", default=False) + + self._add_env_opt(env_opts, "add_heightmap_obs", default=False) + + # temporarily creating robot state client to get some data + robot_state_tmp = RobotState(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False, + enable_height_sensor=env_opts["add_heightmap_obs"]) + robot_state_tmp.run() + rhc_status_tmp = RhcStatus(is_server=False, + namespace=namespace, + verbose=verbose, + vlevel=vlevel, + with_torch_view=False, + with_gpu_mirror=False) + rhc_status_tmp.run() + rhc_refs_tmp = RhcRefs(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False) + rhc_refs_tmp.run() + n_jnts = robot_state_tmp.n_jnts() + self._contact_names = robot_state_tmp.contact_names() + self._n_contacts = len(self._contact_names) + self._flight_info_size=rhc_refs_tmp.flight_info.n_cols + self._flight_setting_size=rhc_refs_tmp.flight_settings_req.n_cols + # height sensor metadata (if present) + self._height_grid_size = None + self._height_flat_dim = 0 + if env_opts["add_heightmap_obs"]: + self._height_grid_size = robot_state_tmp.height_sensor.grid_size + self._height_flat_dim = robot_state_tmp.height_sensor.n_cols + + robot_state_tmp.close() + rhc_status_tmp.close() + rhc_refs_tmp.close() + + # defining obs dimension + obs_dim=3 # normalized gravity vector in base frame + obs_dim+=6 # meas twist in base frame + obs_dim+=2*n_jnts # joint pos + vel + if env_opts["add_mpc_contact_f_to_obs"]: + obs_dim+=3*self._n_contacts + obs_dim+=6 # twist reference in base frame frame + if env_opts["add_fail_idx_to_obs"]: + obs_dim+=1 # rhc controller failure index + if env_opts["add_term_mpc_capsize"]: + obs_dim+=3 # gravity vec from mpc + if env_opts["use_rhc_avrg_vel_tracking"]: + obs_dim+=6 # mpc avrg twist + if env_opts["add_flight_info"]: # contact pos, remaining duration, length, apex, landing height, landing dx, dy + obs_dim+=self._flight_info_size + if env_opts["add_flight_settings"]: + obs_dim+=self._flight_setting_size + if env_opts["add_rhc_cmds_to_obs"]: + obs_dim+=3*n_jnts + if env_opts["use_action_history"]: + if env_opts["add_prev_actions_stats_to_obs"]: + obs_dim+=3*actions_dim # previous agent actions statistics (mean, std + last action) + else: # full action history + obs_dim+=env_opts["actions_history_size"]*actions_dim + if env_opts["use_action_smoothing"]: + obs_dim+=actions_dim # it's better to also add the smoothed actions as obs + if env_opts["add_periodic_clock_to_obs"]: + obs_dim+=2 + if env_opts["add_heightmap_obs"]: + obs_dim+=self._height_flat_dim + # Agent task reference + self._add_env_opt(env_opts, "use_pof0", default=True) # with some prob, references will be null + self._add_env_opt(env_opts, "pof0_linvel", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr) + self._add_env_opt(env_opts, "pof0_omega", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr) + self._add_env_opt(env_opts, "max_linvel_ref", default=0.3) # m/s + self._add_env_opt(env_opts, "max_angvel_ref", default=0.0) # rad/s + if env_opts["add_angvel_ref_rand"]: + env_opts["max_angvel_ref"]=0.4 + + # ready to init base class + self._this_child_path = os.path.abspath(__file__) + AugMPCTrainingEnvBase.__init__(self, + namespace=namespace, + obs_dim=obs_dim, + actions_dim=actions_dim, + env_name=env_name, + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def _custom_post_init(self): + + device = "cuda" if self._use_gpu else "cpu" + + self._update_jnt_blacklist() # update blacklist for joints + + # constant base-frame unit vectors (reuse to avoid per-call allocations) + self._base_x_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device) + self._base_x_dir[:, 0] = 1.0 + self._base_y_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device) + self._base_y_dir[:, 1] = 1.0 + + self._twist_ref_lb = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=-1.5) + self._twist_ref_ub = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=1.5) + + # task reference parameters (world frame) + # lin vel + self._twist_ref_lb[0, 0] = -self._env_opts["max_linvel_ref"] + self._twist_ref_lb[0, 1] = -self._env_opts["max_linvel_ref"] + self._twist_ref_lb[0, 2] = 0.0 + self._twist_ref_ub[0, 0] = self._env_opts["max_linvel_ref"] + self._twist_ref_ub[0, 1] = self._env_opts["max_linvel_ref"] + self._twist_ref_ub[0, 2] = 0.0 + # angular vel + self._twist_ref_lb[0, 3] = 0.0 + self._twist_ref_lb[0, 4] = 0.0 + self._twist_ref_lb[0, 5] = -self._env_opts["max_angvel_ref"] + self._twist_ref_ub[0, 3] = 0.0 + self._twist_ref_ub[0, 4] = 0.0 + self._twist_ref_ub[0, 5] = self._env_opts["max_angvel_ref"] + + self._twist_ref_offset = (self._twist_ref_ub + self._twist_ref_lb)/2.0 + self._twist_ref_scale = (self._twist_ref_ub - self._twist_ref_lb)/2.0 + + # adding some custom db info + agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=False) + agent_twist_ref_data = EpisodicData("AgentTwistRefs", agent_twist_ref, + ["v_x", "v_y", "v_z", "omega_x", "omega_y", "omega_z"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + rhc_fail_idx = EpisodicData("RhcFailIdx", self._rhc_fail_idx(gpu=False), ["rhc_fail_idx"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + f_names=[] + for contact in self._contact_names: + f_names.append(f"fc_{contact}_x_base_loc") + f_names.append(f"fc_{contact}_y_base_loc") + f_names.append(f"fc_{contact}_z_base_loc") + rhc_contact_f = EpisodicData("RhcContactForces", + self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), + f_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._pow_db_data=torch.full(size=(self._n_envs,2), + dtype=self._dtype, device="cpu", + fill_value=-1.0) + power_db = EpisodicData("Power", + self._pow_db_data, + ["CoT", "W"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._track_error_db=torch.full_like(agent_twist_ref, fill_value=0.0) + task_err_db = EpisodicData("TrackingError", + agent_twist_ref, + ["e_vx", "e_vy", "e_vz", "e_omegax", "e_omegay", "e_omegaz"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._add_custom_db_data(db_data=agent_twist_ref_data) + self._add_custom_db_data(db_data=rhc_fail_idx) + self._add_custom_db_data(db_data=rhc_contact_f) + self._add_custom_db_data(db_data=power_db) + self._add_custom_db_data(db_data=task_err_db) + + # rewards + self._task_err_weights = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=0.0) + if self._env_opts["directional_tracking"]: + self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] # frontal + self._task_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] # lateral + self._task_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] # vertical + self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + else: + self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 1] = self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + + self._task_pred_err_weights = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=0.0) + if self._env_opts["directional_tracking"]: + self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] + self._task_pred_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] + self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + else: + self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 1] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + + self._power_penalty_weights = torch.full((1, self._n_jnts), dtype=self._dtype, device=device, + fill_value=1.0) + self._power_penalty_weights_sum = torch.sum(self._power_penalty_weights).item() + subr_names=self._get_rewards_names() # initializes + + # reward clipping + self._reward_thresh_lb[:, :] = self._env_opts["reward_lb_default"] + self._reward_thresh_ub[:, :]= self._env_opts["reward_ub_default"] + + for reward_name, env_opt_key in self._reward_lb_map.items(): + if reward_name in self._reward_map: + self._reward_thresh_lb[:, self._reward_map[reward_name]] = self._env_opts[env_opt_key] + + # obs bounds + self._obs_threshold_lb = -1e3 # used for clipping observations + self._obs_threshold_ub = 1e3 + + # actions + if not self._env_opts["use_prob_based_stepping"]: + self._is_continuous_actions[6:10]=False + + v_cmd_max = self._env_opts["max_cmd_v"] + omega_cmd_max = self._env_opts["max_cmd_omega"] + self._actions_lb[:, 0:3] = -v_cmd_max + self._actions_ub[:, 0:3] = v_cmd_max + self._actions_lb[:, 3:6] = -omega_cmd_max # twist cmds + self._actions_ub[:, 3:6] = omega_cmd_max + if "contact_flag_start" in self._actions_map: + idx=self._actions_map["contact_flag_start"] + if self._env_opts["use_prob_based_stepping"]: + self._actions_lb[:, idx:idx+self._n_contacts] = 0.0 # contact flags + self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 + else: + self._actions_lb[:, idx:idx+self._n_contacts] = -1.0 + self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 + + self.default_action[:, :] = (self._actions_ub+self._actions_lb)/2.0 + # self.default_action[:, ~self._is_continuous_actions] = 1.0 + self.safe_action[:, :] = self.default_action + if "contact_flag_start" in self._actions_map: # safe actions for contacts is 1 (keep contact) + idx=self._actions_map["contact_flag_start"] + self.safe_action[:, idx:idx+self._n_contacts] = 1.0 + + # assign obs bounds (useful if not using automatic obs normalization) + obs_names=self._get_obs_names() + obs_patterns=["gn", + "linvel", + "omega", + "q_jnt", + "v_jnt", + "fc", + "rhc_fail", + "rhc_cmd_q", + "rhc_cmd_v", + "rhc_cmd_eff", + "flight_pos" + ] + obs_ubs=[1.0, + 5*v_cmd_max, + 5*omega_cmd_max, + 2*torch.pi, + 30.0, + 2.0, + 1.0, + 2*torch.pi, + 30.0, + 200.0, + self._n_nodes_rhc.mean().item()] + obs_lbs=[-1.0, + -5*v_cmd_max, + -5*omega_cmd_max, + -2*torch.pi, + -30.0, + -2.0, + 0.0, + -2*torch.pi, + -30.0, + -200.0, + 0.0] + obs_bounds = {name: (lb, ub) for name, lb, ub in zip(obs_patterns, obs_lbs, obs_ubs)} + + for i in range(len(obs_names)): + obs_name=obs_names[i] + for pattern in obs_patterns: + if pattern in obs_name: + lb=obs_bounds[pattern][0] + ub=obs_bounds[pattern][1] + self._obs_lb[:, i]=lb + self._obs_ub[:, i]=ub + break + + # handle action memory buffer in obs + if self._env_opts["use_action_history"]: # just history stats + if self._env_opts["add_prev_actions_stats_to_obs"]: + i=0 + prev_actions_idx = next((i for i, s in enumerate(obs_names) if "_prev_act" in s), None) + prev_actions_mean_idx=next((i for i, s in enumerate(obs_names) if "_avrg_act" in s), None) + prev_actions_std_idx=next((i for i, s in enumerate(obs_names) if "_std_act" in s), None) + + # assume actions are always normalized in [-1, 1] by agent + if prev_actions_idx is not None: + self._obs_lb[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=-1.0 + self._obs_ub[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=1.0 + if prev_actions_mean_idx is not None: + self._obs_lb[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=-1.0 + self._obs_ub[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=1.0 + if prev_actions_std_idx is not None: + self._obs_lb[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=0 + self._obs_ub[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=1.0 + + else: # full history + i=0 + first_action_mem_buffer_idx = next((i for i, s in enumerate(obs_names) if "_m1_act" in s), None) + if first_action_mem_buffer_idx is not None: + action_idx_start_idx_counter=first_action_mem_buffer_idx + for j in range(self._env_opts["actions_history_size"]): + self._obs_lb[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=-1.0 + self._obs_ub[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=1.0 + action_idx_start_idx_counter+=self.actions_dim() + + # some aux data to avoid allocations at training runtime + self._rhc_twist_cmd_rhc_world=self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu).detach().clone() + self._rhc_twist_cmd_rhc_h=self._rhc_twist_cmd_rhc_world.detach().clone() + self._agent_twist_ref_current_w=self._rhc_twist_cmd_rhc_world.detach().clone() + self._agent_twist_ref_current_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._substep_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._step_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._root_twist_avrg_rhc_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._root_twist_avrg_rhc_base_loc_next=self._rhc_twist_cmd_rhc_world.detach().clone() + + self._random_thresh_contacts=torch.rand((self._n_envs,self._n_contacts), device=device) + # aux data + self._task_err_scaling = torch.zeros((self._n_envs, 1),dtype=self._dtype,device=device) + + self._pof1_b_linvel= torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_linvel"]) + self._pof1_b_omega = torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_omega"]) + self._bernoulli_coeffs_linvel = self._pof1_b_linvel.clone() + self._bernoulli_coeffs_linvel[:, :] = 1.0 + self._bernoulli_coeffs_omega = self._pof1_b_omega.clone() + self._bernoulli_coeffs_omega[:, :] = 1.0 + + # smoothing + self._track_rew_smoother=None + if self._env_opts["use_track_reward_smoother"]: + sub_reward_proxy=self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)[:, 0:1] + smoothing_dt=self._substep_dt + if not self._is_substep_rew[self._reward_map["task_error"]]: # assuming first reward is tracking + smoothing_dt=self._substep_dt*self._action_repeat + self._track_rew_smoother=ExponentialSignalSmoother( + name=self.__class__.__name__+"VelErrorSmoother", + signal=sub_reward_proxy, # same dimension of vel error + update_dt=smoothing_dt, + smoothing_horizon=self._env_opts["smoothing_horizon_vel_err"], + target_smoothing=0.5, + debug=self._is_debug, + dtype=self._dtype, + use_gpu=self._use_gpu) + + # if we need the action rate, we also need the action history + if self._env_opts["add_action_rate_reward"]: + if not self._env_opts["use_action_history"]: + Journal.log(self.__class__.__name__, + "_custom_post_init", + "add_action_rate_reward is True, but ", + LogType.EXCEP, + throw_when_excep=True) + + history_size=self._env_opts["actions_history_size"] + if history_size < 2: + Journal.log(self.__class__.__name__, + "_custom_post_init", + f"add_action_rate_reward requires actions history ({history_size}) to be >=2!", + LogType.EXCEP, + throw_when_excep=True) + + # add periodic timer if required + self._periodic_clock=None + if self._env_opts["add_periodic_clock_to_obs"]: + self._add_env_opt(self._env_opts, "clock_period", + default=int(1.5*self._action_repeat*self.task_rand_timeout_bounds()[1])) # correcting with n substeps + # (we are using the _substep_abs_counter counter) + self._periodic_clock=PeriodicTimer(counter=self._substep_abs_counter, + period=self._env_opts["clock_period"], + dtype=self._dtype, + device=self._device) + + def get_file_paths(self): + paths=AugMPCTrainingEnvBase.get_file_paths(self) + paths.append(self._this_child_path) + return paths + + def get_aux_dir(self): + aux_dirs = [] + path_getter = PathsGetter() + aux_dirs.append(path_getter.RHCDIR) + return aux_dirs + + def _get_reward_scaling(self): + if self._env_opts["single_task_ref_per_episode"]: + return self._env_opts["n_steps_task_rand_ub"] + else: + return self._env_opts["episode_timeout_ub"] + + def _max_ep_length(self): + if self._env_opts["single_task_ref_per_episode"]: + return self._env_opts["n_steps_task_rand_ub"] + else: + return self._env_opts["episode_timeout_ub"] + + def _check_sub_truncations(self): + # overrides parent + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + sub_truncations[:, 0:1] = self._ep_timeout_counter.time_limits_reached() + if self._env_opts["single_task_ref_per_episode"]: + sub_truncations[:, 1:2] = self._task_rand_counter.time_limits_reached() + + def _check_sub_terminations(self): + # default behaviour-> to be overriden by child + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + + # terminate if mpc just failed + sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu) + + # check if robot is capsizing + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle, + output_t=self._is_capsized) + sub_terminations[:, 1:2] = self._is_capsized + + if self._env_opts["add_term_mpc_capsize"]: + # check if robot is about to capsize accordin to MPC + robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle, + output_t=self._is_rhc_capsized) + sub_terminations[:, 2:3] = self._is_rhc_capsized + + def _custom_reset(self): + return None + + def reset(self): + AugMPCTrainingEnvBase.reset(self) + + def _pre_substep(self): + pass + + def _custom_post_step(self,episode_finished): + # executed after checking truncations and terminations and remote env reset + if self._use_gpu: + time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached().cuda(),episode_finished) + self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten()) + else: + time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached(),episode_finished) + self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten()) + # task refs are randomized in world frame -> we rotate them in base local + # (not super efficient, we should do it just for the finished envs) + self._update_loc_twist_refs() + + if self._track_rew_smoother is not None: # reset smoother + self._track_rew_smoother.reset_all(to_be_reset=episode_finished.flatten(), + value=0.0) + + def _custom_post_substp_pre_rew(self): + self._update_loc_twist_refs() + + def _custom_post_substp_post_rew(self): + pass + + def _update_loc_twist_refs(self): + # get fresh robot orientation + if not self._override_agent_refs: + robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + # rotate agent ref from world to robot base + world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, + t_out=self._agent_twist_ref_current_base_loc) + # write it to agent refs tensors + self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc, + gpu=self._use_gpu) + + def _apply_actions_to_rhc(self): + + self._set_rhc_refs() + + self._write_rhc_refs() + + def _set_rhc_refs(self): + + action_to_be_applied = self.get_actual_actions() # see _get_action_names() to get + # the meaning of each component of this tensor + + rhc_latest_twist_cmd = self._rhc_refs.rob_refs.root_state.get(data_type="twist", gpu=self._use_gpu) + rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror(gpu=self._use_gpu) + rhc_latest_pos_ref = self._rhc_refs.rob_refs.contact_pos.get(data_type="p_z", gpu=self._use_gpu) + rhc_q=self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) # this is always + # avaialble + + # reference twist for MPC is assumed to always be specified in MPC's + # horizontal frame, while agent actions are interpreted as in MPC's + # base frame -> we need to rotate the actions into the horizontal frame + base2world_frame(t_b=action_to_be_applied[:, 0:6],q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_world) + w2hor_frame(t_w=self._rhc_twist_cmd_rhc_world,q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_h) + + rhc_latest_twist_cmd[:, 0:6] = self._rhc_twist_cmd_rhc_h + + # self._rhc_refs.rob_refs.root_state.set(data_type="p", data=rhc_latest_p_ref, + # gpu=self._use_gpu) + self._rhc_refs.rob_refs.root_state.set(data_type="twist", data=rhc_latest_twist_cmd, + gpu=self._use_gpu) + + # contact flags + idx=self._actions_map["contact_flag_start"] + if self._env_opts["use_prob_based_stepping"]: + # encode actions as probs + self._random_thresh_contacts.uniform_() # random values in-place between 0 and 1 + rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] >= self._random_thresh_contacts # keep contact with + # probability action_to_be_applied[:, 6:10] + else: # just use a threshold + rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] > self._env_opts["step_thresh"] + # actually apply actions to controller + + def _write_rhc_refs(self): + + if self._use_gpu: + # GPU->CPU --> we cannot use asynchronous data transfer since it's unsafe + self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) # write from gpu to cpu mirror + self._rhc_refs.contact_flags.synch_mirror(from_gpu=True,non_blocking=False) + self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=True,non_blocking=False) + + self._rhc_refs.rob_refs.root_state.synch_all(read=False, retry=True) # write mirror to shared mem + self._rhc_refs.contact_flags.synch_all(read=False, retry=True) + self._rhc_refs.rob_refs.contact_pos.synch_all(read=False, retry=True) + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # runs at every post_step + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + agent_linvel_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="v", + gpu=self._use_gpu) + + agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega", + gpu=self._use_gpu) + + # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \ + # agent_p_ref_current[:, 0:2] + self._agent_twist_ref_current_w[:, 0:3]=agent_linvel_ref_current # set linvel target + + self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem + + def _fill_substep_obs(self, + obs: torch.Tensor): + + # measured stuff + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + + if self._env_opts["use_linvel_from_rhc"]: + # twist estimate from mpc + robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu) + obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_rhc_base_loc_next[:, 0:3] + else: + obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_meas_base_loc[:, 0:3] + obs[:, self._obs_map["omega_meas"]:(self._obs_map["omega_meas"]+3)] = robot_twist_meas_base_loc[:, 3:6] + + obs[:, self._obs_map["v_jnt"]:(self._obs_map["v_jnt"]+self._n_jnts)] = robot_jnt_v_meas + + def _fill_step_obs(self, + obs: torch.Tensor): + + # measured stuff + robot_gravity_norm_base_loc = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + robot_jnt_q_meas = self._robot_state.jnts_state.get(data_type="q",gpu=self._use_gpu) + if self._jnt_q_blacklist_idxs is not None: # we don't want to read joint pos from blacklist + robot_jnt_q_meas[:, self._jnt_q_blacklist_idxs]=0.0 + robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + + # twist estimate from mpc + robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu) + # cmds for jnt imp to be applied next + robot_jnt_q_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="q",gpu=self._use_gpu) + robot_jnt_v_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="v",gpu=self._use_gpu) + robot_jnt_eff_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="eff",gpu=self._use_gpu) + + flight_info_now = self._rhc_refs.flight_info.get(data_type="all",gpu=self._use_gpu) + flight_settings_now = self._rhc_refs.flight_settings_req.get(data_type="all",gpu=self._use_gpu) + + # refs + agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) + + obs[:, self._obs_map["gn_base"]:(self._obs_map["gn_base"]+3)] = robot_gravity_norm_base_loc # norm. gravity vector in base frame + + obs[:, self._obs_map["q_jnt"]:(self._obs_map["q_jnt"]+self._n_jnts)] = robot_jnt_q_meas # meas jnt pos + obs[:, self._obs_map["twist_ref"]:(self._obs_map["twist_ref"]+6)] = agent_twist_ref # high lev agent refs to be tracked + + if self._env_opts["add_mpc_contact_f_to_obs"]: + n_forces=3*len(self._contact_names) + obs[:, self._obs_map["contact_f_mpc"]:(self._obs_map["contact_f_mpc"]+n_forces)] = self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=self._use_gpu) + if self._env_opts["add_fail_idx_to_obs"]: + obs[:, self._obs_map["rhc_fail_idx"]:(self._obs_map["rhc_fail_idx"]+1)] = self._rhc_fail_idx(gpu=self._use_gpu) + if self._env_opts["add_term_mpc_capsize"]: + obs[:, self._obs_map["gn_base_mpc"]:(self._obs_map["gn_base_mpc"]+3)] = self._rhc_cmds.root_state.get(data_type="gn",gpu=self._use_gpu) + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc, base_loc=True) + obs[:, self._obs_map["avrg_twist_mpc"]:(self._obs_map["avrg_twist_mpc"]+6)] = self._root_twist_avrg_rhc_base_loc + if self._env_opts["add_flight_info"]: + obs[:, self._obs_map["flight_info"]:(self._obs_map["flight_info"]+self._flight_info_size)] = flight_info_now + if self._env_opts["add_flight_settings"]: + obs[:, self._obs_map["flight_settings_req"]:(self._obs_map["flight_settings_req"]+self._flight_setting_size)] = \ + flight_settings_now + + if self._env_opts["add_rhc_cmds_to_obs"]: + obs[:, self._obs_map["rhc_cmds_q"]:(self._obs_map["rhc_cmds_q"]+self._n_jnts)] = robot_jnt_q_rhc_applied_next + obs[:, self._obs_map["rhc_cmds_v"]:(self._obs_map["rhc_cmds_v"]+self._n_jnts)] = robot_jnt_v_rhc_applied_next + obs[:, self._obs_map["rhc_cmds_eff"]:(self._obs_map["rhc_cmds_eff"]+self._n_jnts)] = robot_jnt_eff_rhc_applied_next + if self._env_opts["use_action_history"]: + if self._env_opts["add_prev_actions_stats_to_obs"]: # just add last, std and mean to obs + obs[:, self._obs_map["action_history_prev"]:(self._obs_map["action_history_prev"]+self.actions_dim())]=self._act_mem_buffer.get(idx=0) + obs[:, self._obs_map["action_history_avrg"]:(self._obs_map["action_history_avrg"]+self.actions_dim())]=self._act_mem_buffer.mean(clone=False) + obs[:, self._obs_map["action_history_std"]:(self._obs_map["action_history_std"]+self.actions_dim())]=self._act_mem_buffer.std(clone=False) + else: # add whole memory buffer to obs + next_idx=self._obs_map["action_history"] + for i in range(self._env_opts["actions_history_size"]): + obs[:, next_idx:(next_idx+self.actions_dim())]=self._act_mem_buffer.get(idx=i) # get all (n_envs x (obs_dim x horizon)) + next_idx+=self.actions_dim() + + if self._env_opts["use_action_smoothing"]: # adding smoothed actions + obs[:, self._obs_map["action_smoothing"]:(self._obs_map["action_smoothing"]+self.actions_dim())]=self.get_actual_actions(normalized=True) + next_idx+=self.actions_dim() + + if self._env_opts["add_periodic_clock_to_obs"]: + obs[:, next_idx:(next_idx+2)]=self._periodic_clock.get() + next_idx+=2 + if self._env_opts["add_heightmap_obs"]: + hm = self._robot_state.height_sensor.get(gpu=self._use_gpu) + obs[:, self._obs_map["heightmap"]:(self._obs_map["heightmap"]+self._height_flat_dim)] = hm + + def _get_custom_db_data(self, + episode_finished, + ignore_ep_end): + episode_finished = episode_finished.cpu() + self.custom_db_data["AgentTwistRefs"].update( + new_data=self._agent_refs.rob_refs.root_state.get(data_type="twist", gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["RhcFailIdx"].update(new_data=self._rhc_fail_idx(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["RhcContactForces"].update( + new_data=self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Power"].update( + new_data=self._pow_db_data, + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["TrackingError"].update( + new_data=self._track_error_db, + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + # reward functions + def _action_rate(self): + continuous_actions=self._is_continuous_actions + discrete_actions=~self._is_continuous_actions + n_c_actions=continuous_actions.sum().item() + n_d_actions=discrete_actions.sum().item() + actions_prev=self._act_mem_buffer.get(idx=1) + actions_now=self._act_mem_buffer.get(idx=0) + actions_rate=(actions_now-actions_prev) # actions already normalized + actions_rate_c=actions_rate[:, continuous_actions] + actions_rate_d=actions_rate[:, discrete_actions] + + actions_rate_sqrd=None # assuming n_c_actions > 0 always + actions_rate_sqrd=self._env_opts["action_rate_rew_c_weight"]*torch.sum(actions_rate_c*actions_rate_c, dim=1, keepdim=True)/n_c_actions + if discrete_actions.any(): + actions_rate_sqrd+=self._env_opts["action_rate_rew_d_weight"]*torch.sum(actions_rate_d*actions_rate_d, dim=1, keepdim=True)/n_d_actions + return actions_rate_sqrd + + def _mech_pow(self, jnts_vel, jnts_effort, autoscaled: bool = False, drained: bool = True): + mech_pow_jnts=(jnts_effort*jnts_vel)*self._power_penalty_weights + if drained: + mech_pow_jnts.clamp_(0.0,torch.inf) # do not account for regenerative power + mech_pow_tot = torch.sum(mech_pow_jnts, dim=1, keepdim=True) + self._pow_db_data[:, 1:2]=mech_pow_tot.cpu() + if autoscaled: + mech_pow_tot=mech_pow_tot/self._power_penalty_weights_sum + return mech_pow_tot + + def _cost_of_transport(self, jnts_vel, jnts_effort, v_norm, mass_weight: bool = False): + drained_mech_pow=self._mech_pow(jnts_vel=jnts_vel, + jnts_effort=jnts_effort, + drained=True) + CoT=drained_mech_pow/(v_norm+1e-2) + if mass_weight: + robot_weight=self._rhc_robot_weight + CoT=CoT/robot_weight + + # add to db metrics + self._pow_db_data[:, 0:1]=CoT.cpu() + self._pow_db_data[:, 1:2]=drained_mech_pow.cpu() + + return CoT + + def _jnt_vel_penalty(self, jnts_vel): + weighted_jnt_vel = torch.sum(jnts_vel*jnts_vel, dim=1, keepdim=True)/self._n_jnts + return weighted_jnt_vel + + def _rhc_fail_idx(self, gpu: bool): + rhc_fail_idx = self._rhc_status.rhc_fail_idx.get_torch_mirror(gpu=gpu) + return self._env_opts["rhc_fail_idx_scale"]*rhc_fail_idx + + # basic L1 and L2 error functions + def _track_err_wmse(self, task_ref, task_meas, scaling, weights): + # weighted mean-squared error computation + task_error = (task_meas-task_ref) + # add to db metrics + self._track_error_db[:, :]=torch.abs(task_error) + scaled_error=task_error/scaling + + task_wmse = torch.sum(scaled_error*scaled_error*weights, dim=1, keepdim=True)/torch.sum(weights).item() + return task_wmse # weighted mean square error (along task dimension) + + def _track_err_dir_wmse(self, task_ref, task_meas, scaling, weights): + # weighted DIRECTIONAL mean-squared error computation + task_error = (task_meas-task_ref) + # add to db metrics + self._track_error_db[:, :]=torch.abs(task_error) + task_error=task_error/scaling + + # projection along commanded direction and gravity, matching paper formulation + v_ref=task_ref[:, 0:3] + delta_v=task_error[:, 0:3] + + v_ref_norm=torch.norm(v_ref, dim=1, keepdim=True) + cmd_dir=v_ref/(v_ref_norm+1e-8) + # fallback to measured direction if command is (near) zero to avoid degenerate projection + meas_dir=task_meas[:, 0:3] + meas_dir=meas_dir/(torch.norm(meas_dir, dim=1, keepdim=True)+1e-8) + cmd_dir=torch.where((v_ref_norm>1e-6), cmd_dir, meas_dir) + + gravity_dir = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) # normalized gravity in base frame + gravity_dir = gravity_dir/(torch.norm(gravity_dir, dim=1, keepdim=True)+1e-8) + + forward_error=torch.sum(delta_v*cmd_dir, dim=1, keepdim=True) + vertical_error=torch.sum(delta_v*gravity_dir, dim=1, keepdim=True) + lateral_vec=delta_v - vertical_error*gravity_dir - forward_error*cmd_dir + lateral_error=torch.norm(lateral_vec, dim=1, keepdim=True) + + # angular directional components: use gravity as vertical, project base x onto the world xy plane for roll, and close the triad with pitch + base_x = self._base_x_dir + base_y = self._base_y_dir + + roll_dir = base_x - torch.sum(base_x*gravity_dir, dim=1, keepdim=True)*gravity_dir + roll_norm = torch.norm(roll_dir, dim=1, keepdim=True) + roll_dir_alt = base_y - torch.sum(base_y*gravity_dir, dim=1, keepdim=True)*gravity_dir # fallback if base x is almost aligned with gravity + roll_norm_alt = torch.norm(roll_dir_alt, dim=1, keepdim=True) + use_alt_roll = roll_norm < 1e-6 + roll_dir = torch.where(use_alt_roll, roll_dir_alt, roll_dir) + roll_norm = torch.where(use_alt_roll, roll_norm_alt, roll_norm) + roll_dir = roll_dir/(roll_norm+1e-8) + + pitch_dir = torch.cross(gravity_dir, roll_dir, dim=1) + pitch_dir = pitch_dir/(torch.norm(pitch_dir, dim=1, keepdim=True)+1e-8) + + delta_omega = task_error[:, 3:6] + omega_roll_error = torch.sum(delta_omega*roll_dir, dim=1, keepdim=True) + omega_pitch_error = torch.sum(delta_omega*pitch_dir, dim=1, keepdim=True) + omega_vertical_error = torch.sum(delta_omega*gravity_dir, dim=1, keepdim=True) + + full_error=torch.cat((forward_error, lateral_error, vertical_error, omega_roll_error, omega_pitch_error, omega_vertical_error), dim=1) + task_wmse_dir = torch.sum(full_error*full_error*weights, dim=1, keepdim=True)/torch.sum(weights).item() + return task_wmse_dir # weighted mean square error (along task dimension) + + # L2 errors + def _tracking_err_rel_wmse(self, task_ref, task_meas, weights, directional: bool = False): + ref_norm = task_ref.norm(dim=1,keepdim=True) # norm of the full twist reference + self._task_err_scaling[:, :] = ref_norm+1e-2 + if directional: + task_rel_err_wmse=self._track_err_dir_wmse(task_ref=task_ref, task_meas=task_meas, + scaling=self._task_err_scaling, weights=weights) + else: + task_rel_err_wmse=self._track_err_wmse(task_ref=task_ref, task_meas=task_meas, + scaling=self._task_err_scaling, weights=weights) + return task_rel_err_wmse + + def _tracking_err_wmse(self, task_ref, task_meas, weights, directional: bool = False): + self._task_err_scaling[:, :] = 1 + if directional: + task_err_wmse = self._track_err_dir_wmse(task_ref=task_ref, + task_meas=task_meas, scaling=self._task_err_scaling, weights=weights) + else: + task_err_wmse = self._track_err_wmse(task_ref=task_ref, + task_meas=task_meas, scaling=self._task_err_scaling, weights=weights) + return task_err_wmse + + # L1 errors + def _tracking_err_rel_lin(self, task_ref, task_meas, weights, directional): + task_rel_err_wmse = self._tracking_err_rel_wmse(task_ref=task_ref, + task_meas=task_meas, weights=weights, directional=directional) + return task_rel_err_wmse.sqrt() + + def _tracking_err_lin(self, task_ref, task_meas, weights, directional: bool = False): + self._task_err_scaling[:, :] = 1 + task_err_wmse=self._tracking_err_wmse(task_ref=task_ref, + task_meas=task_meas, weights=weights, directional=directional) + return task_err_wmse.sqrt() + + # reward computation over steps/substeps + def _compute_step_rewards(self): + + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + # tracking reward + if self._env_opts["use_L1_norm"]: # linear errors + task_error_fun = self._tracking_err_lin + if self._env_opts["use_relative_error"]: + task_error_fun = self._tracking_err_rel_lin + else: # quadratic error + task_error_fun = self._tracking_err_wmse + if self._env_opts["use_relative_error"]: + task_error_fun = self._tracking_err_rel_wmse + + agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) # high level agent refs (hybrid twist) + self._get_avrg_step_root_twist(out=self._step_avrg_root_twist_base_loc, base_loc=True) + task_error = task_error_fun(task_meas=self._step_avrg_root_twist_base_loc, + task_ref=agent_task_ref_base_loc, + weights=self._task_err_weights, + directional=self._env_opts["directional_tracking"]) + + idx=self._reward_map["task_error"] + if self._env_opts["use_exp_track_rew"]: + sub_rewards[:, idx:(idx+1)] = \ + self._env_opts["task_track_offset_exp"]*torch.exp(-self._env_opts["task_track_scale_exp"]*task_error) + else: # simple linear reward + sub_rewards[:, idx:(idx+1)] = \ + self._env_opts["task_track_offset"]*(1.0-self._env_opts["task_track_scale"]*task_error) + + if self._env_opts["use_fail_idx_weight"]: # add weight based on fail idx + fail_idx=self._rhc_fail_idx(gpu=self._use_gpu) + sub_rewards[:, idx:(idx+1)]=(1-fail_idx)*sub_rewards[:, idx:(idx+1)] + if self._track_rew_smoother is not None: # smooth reward if required + self._track_rew_smoother.update(new_signal=sub_rewards[:, 0:1]) + sub_rewards[:, idx:(idx+1)]=self._track_rew_smoother.get() + + # action rate + if self._env_opts["add_action_rate_reward"]: + action_rate=self._action_rate() + idx=self._reward_map["action_rate"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["action_rate_offset"]*(1.0-self._env_opts["action_rate_scale"]*action_rate) + + # mpc vel tracking + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc_next,base_loc=True) # get estimated avrg vel + # from MPC after stepping + task_pred_error=task_error_fun(task_meas=self._root_twist_avrg_rhc_base_loc_next, + task_ref=agent_task_ref_base_loc, + weights=self._task_pred_err_weights, + directional=self._env_opts["directional_tracking"]) + idx=self._reward_map["rhc_avrg_vel_error"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["task_pred_track_offset"]*torch.exp(-self._env_opts["task_pred_track_scale"]*task_pred_error) + + def _compute_substep_rewards(self): + + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + if self._env_opts["add_CoT_reward"] or self._env_opts["add_power_reward"]: + jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + jnts_effort = self._robot_state.jnts_state.get(data_type="eff",gpu=self._use_gpu) + + if self._env_opts["add_CoT_reward"]: + if self._env_opts["use_CoT_wrt_ref"]: # uses v ref norm for computing cot + agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) + v_norm=torch.norm(agent_task_ref_base_loc, dim=1, keepdim=True) + else: # uses measured velocity + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + v_norm=torch.norm(robot_twist_meas_base_loc[:,0:3], dim=1, keepdim=True) + CoT=self._cost_of_transport(jnts_vel=jnts_vel,jnts_effort=jnts_effort,v_norm=v_norm, + mass_weight=True + ) + idx=self._reward_map["CoT"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["CoT_offset"]*(1-self._env_opts["CoT_scale"]*CoT) + if self._env_opts["add_power_reward"]: + weighted_mech_power=self._mech_pow(jnts_vel=jnts_vel,jnts_effort=jnts_effort, drained=True) + idx=self._reward_map["mech_pow"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["power_offset"]*(1-self._env_opts["power_scale"]*weighted_mech_power) + + if self._env_opts["add_jnt_v_reward"]: + jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + jnt_v=self._jnt_vel_penalty(jnts_vel=jnts_vel) + idx=self._reward_map["jnt_v"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["jnt_vel_offset"]*(1-self._env_opts["jnt_vel_scale"]*jnt_v) + + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + # we randomize the reference in world frame, since it's much more intuitive + # (it will be rotated in base frame when provided to the agent and used for rew + # computation) + + if self._env_opts["use_pof0"]: # sample from bernoulli distribution + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + if env_indxs is None: + random_uniform=torch.full_like(self._agent_twist_ref_current_w, fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[:, :] = random_uniform*self._twist_ref_scale + self._twist_ref_offset + self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel + self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega + else: + random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, :], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[env_indxs, :] = random_uniform * self._twist_ref_scale + self._twist_ref_offset + self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :] + self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega + + def _get_obs_names(self): + + obs_names = [""] * self.obs_dim() + + # proprioceptive stream of obs + next_idx=0 + + self._obs_map["gn_base"]=next_idx + obs_names[0] = "gn_x_base_loc" + obs_names[1] = "gn_y_base_loc" + obs_names[2] = "gn_z_base_loc" + next_idx+=3 + + self._obs_map["linvel_meas"]=next_idx + obs_names[next_idx] = "linvel_x_base_loc" + obs_names[next_idx+1] = "linvel_y_base_loc" + obs_names[next_idx+2] = "linvel_z_base_loc" + next_idx+=3 + + self._obs_map["omega_meas"]=next_idx + obs_names[next_idx] = "omega_x_base_loc" + obs_names[next_idx+1] = "omega_y_base_loc" + obs_names[next_idx+2] = "omega_z_base_loc" + next_idx+=3 + + jnt_names=self.get_observed_joints() + self._obs_map["q_jnt"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"q_jnt_{jnt_names[i]}" + next_idx+=self._n_jnts + + self._obs_map["v_jnt"]=next_idx + for i in range(self._n_jnts): # jnt obs (v): + obs_names[next_idx+i] = f"v_jnt_{jnt_names[i]}" + next_idx+=self._n_jnts + + # references + self._obs_map["twist_ref"]=next_idx + obs_names[next_idx] = "linvel_x_ref_base_loc" + obs_names[next_idx+1] = "linvel_y_ref_base_loc" + obs_names[next_idx+2] = "linvel_z_ref_base_loc" + obs_names[next_idx+3] = "omega_x_ref_base_loc" + obs_names[next_idx+4] = "omega_y_ref_base_loc" + obs_names[next_idx+5] = "omega_z_ref_base_loc" + next_idx+=6 + + # contact forces + if self._env_opts["add_mpc_contact_f_to_obs"]: + i = 0 + self._obs_map["contact_f_mpc"]=next_idx + for contact in self._contact_names: + obs_names[next_idx+i] = f"fc_{contact}_x_base_loc" + obs_names[next_idx+i+1] = f"fc_{contact}_y_base_loc" + obs_names[next_idx+i+2] = f"fc_{contact}_z_base_loc" + i+=3 + next_idx+=3*len(self._contact_names) + + # data directly from MPC + if self._env_opts["add_fail_idx_to_obs"]: + self._obs_map["rhc_fail_idx"]=next_idx + obs_names[next_idx] = "rhc_fail_idx" + next_idx+=1 + if self._env_opts["add_term_mpc_capsize"]: + self._obs_map["gn_base_mpc"]=next_idx + obs_names[next_idx] = "gn_x_rhc_base_loc" + obs_names[next_idx+1] = "gn_y_rhc_base_loc" + obs_names[next_idx+2] = "gn_z_rhc_base_loc" + next_idx+=3 + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._obs_map["avrg_twist_mpc"]=next_idx + obs_names[next_idx] = "linvel_x_avrg_rhc" + obs_names[next_idx+1] = "linvel_y_avrg_rhc" + obs_names[next_idx+2] = "linvel_z_avrg_rhc" + obs_names[next_idx+3] = "omega_x_avrg_rhc" + obs_names[next_idx+4] = "omega_y_avrg_rhc" + obs_names[next_idx+5] = "omega_z_avrg_rhc" + next_idx+=6 + + if self._env_opts["add_flight_info"]: + self._obs_map["flight_info"]=next_idx + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_pos_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_remaining_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_apex_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_end_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + + if self._env_opts["add_flight_settings"]: + self._obs_map["flight_settings_req"]=next_idx + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_apex_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_end_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + + if self._env_opts["add_rhc_cmds_to_obs"]: + self._obs_map["rhc_cmds_q"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_q_{jnt_names[i]}" + next_idx+=self._n_jnts + self._obs_map["rhc_cmds_v"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_v_{jnt_names[i]}" + next_idx+=self._n_jnts + self._obs_map["rhc_cmds_eff"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_eff_{jnt_names[i]}" + next_idx+=self._n_jnts + + # previous actions info + if self._env_opts["use_action_history"]: + self._obs_map["action_history"]=next_idx + action_names = self._get_action_names() + if self._env_opts["add_prev_actions_stats_to_obs"]: + self._obs_map["action_history_prev"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_prev_act" + next_idx+=self.actions_dim() + self._obs_map["action_history_avrg"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_avrg_act" + next_idx+=self.actions_dim() + self._obs_map["action_history_std"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_std_act" + next_idx+=self.actions_dim() + else: + for i in range(self._env_opts["actions_history_size"]): + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_m{i+1}_act" + next_idx+=self.actions_dim() + + if self._env_opts["use_action_smoothing"]: + self._obs_map["action_smoothing"]=next_idx + for smoothed_action in range(self.actions_dim()): + obs_names[next_idx+smoothed_action] = action_names[smoothed_action]+f"_smoothed" + next_idx+=self.actions_dim() + + if self._env_opts["add_periodic_clock_to_obs"]: + self._obs_map["clock"]=next_idx + obs_names[next_idx] = "clock_cos" + obs_names[next_idx+1] = "clock_sin" + next_idx+=2 + if self._env_opts["add_heightmap_obs"] and self._height_grid_size is not None: + self._obs_map["heightmap"]=next_idx + gs = self._height_grid_size + for r in range(gs): + for c in range(gs): + obs_names[next_idx] = f"height_r{r}_c{c}" + next_idx += 1 + + return obs_names + + def _set_substep_obs(self): + # which obs are to be averaged over substeps? + + self._is_substep_obs[self._obs_map["linvel_meas"]:self._obs_map["linvel_meas"]+3]=True + self._is_substep_obs[self._obs_map["omega_meas"]:self._obs_map["omega_meas"]+3]=True + self._is_substep_obs[self._obs_map["v_jnt"]:self._obs_map["v_jnt"]+self._n_jnts]=True # also good for noise + + # self._is_substep_obs[self._obs_map["contact_f_mpc"]:self._obs_map["contact_f_mpc"]+3*len(self._contact_names)]=True + + def _get_action_names(self): + + action_names = [""] * self.actions_dim() + action_names[0] = "vx_cmd" # twist commands from agent to RHC controller + action_names[1] = "vy_cmd" + action_names[2] = "vz_cmd" + action_names[3] = "roll_omega_cmd" + action_names[4] = "pitch_omega_cmd" + action_names[5] = "yaw_omega_cmd" + + next_idx=6 + + self._actions_map["contact_flag_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx] = f"contact_flag_{contact}" + next_idx+=1 + + return action_names + + def _set_substep_rew(self): + + # which rewards are to be computed at substeps frequency? + self._is_substep_rew[self._reward_map["task_error"]]=False + if self._env_opts["add_CoT_reward"]: + self._is_substep_rew[self._reward_map["CoT"]]=True + if self._env_opts["add_power_reward"]: + self._is_substep_rew[self._reward_map["mech_pow"]]=True + if self._env_opts["add_action_rate_reward"]: + self._is_substep_rew[self._reward_map["action_rate"]]=False + if self._env_opts["add_jnt_v_reward"]: + self._is_substep_rew[self._reward_map["jnt_v"]]=True + + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._is_substep_rew[self._reward_map["rhc_avrg_vel_error"]]=False + + def _get_rewards_names(self): + + counter=0 + reward_names = [] + + # adding rewards + reward_names.append("task_error") + self._reward_map["task_error"]=counter + self._reward_lb_map["task_error"]="task_error_reward_lb" + counter+=1 + if self._env_opts["add_power_reward"] and self._env_opts["add_CoT_reward"]: + Journal.log(self.__class__.__name__, + "__init__", + "Only one between CoT and power reward can be used!", + LogType.EXCEP, + throw_when_excep=True) + if self._env_opts["add_CoT_reward"]: + reward_names.append("CoT") + self._reward_map["CoT"]=counter + self._reward_lb_map["CoT"]="CoT_reward_lb" + counter+=1 + if self._env_opts["add_power_reward"]: + reward_names.append("mech_pow") + self._reward_map["mech_pow"]=counter + self._reward_lb_map["mech_pow"]="power_reward_lb" + counter+=1 + if self._env_opts["add_action_rate_reward"]: + reward_names.append("action_rate") + self._reward_map["action_rate"]=counter + self._reward_lb_map["action_rate"]="action_rate_reward_lb" + counter+=1 + if self._env_opts["add_jnt_v_reward"]: + reward_names.append("jnt_v") + self._reward_map["jnt_v"]=counter + self._reward_lb_map["jnt_v"]="jnt_vel_reward_lb" + counter+=1 + if self._env_opts["use_rhc_avrg_vel_tracking"]: + reward_names.append("rhc_avrg_vel_error") + self._reward_map["rhc_avrg_vel_error"]=counter + self._reward_lb_map["rhc_avrg_vel_error"]="rhc_avrg_vel_reward_lb" + counter+=1 + + return reward_names + + def _get_sub_trunc_names(self): + sub_trunc_names = [] + sub_trunc_names.append("ep_timeout") + if self._env_opts["single_task_ref_per_episode"]: + sub_trunc_names.append("task_ref_rand") + return sub_trunc_names + + def _get_sub_term_names(self): + # to be overridden by child class + sub_term_names = [] + sub_term_names.append("rhc_failure") + sub_term_names.append("robot_capsize") + if self._env_opts["add_term_mpc_capsize"]: + sub_term_names.append("rhc_capsize") + + return sub_term_names + + def _set_jnts_blacklist_pattern(self): + # used to exclude pos measurement from wheels + self._jnt_q_blacklist_patterns=["wheel"] diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/world_interface_base.py b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/world_interface_base.py new file mode 100644 index 0000000000000000000000000000000000000000..e178652510798c2984aacf23eb1e9e21ed586aad --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/world_interface_base.py @@ -0,0 +1,1718 @@ +from aug_mpc.controllers.rhc.augmpc_cluster_server import AugMpcClusterServer +from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperClnt +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetClnt +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest +from aug_mpc.utils.jnt_imp_control_base import JntImpCntrlBase +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds +from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf +from aug_mpc.utils.math_utils import quaternion_difference +from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds + +from aug_mpc.utils.filtering import FirstOrderFilter + +from mpc_hive.utilities.homing import RobotHomer +from mpc_hive.utilities.shared_data.jnt_imp_control import JntImpCntrlData + +from EigenIPC.PyEigenIPC import VLevel, Journal, LogType, dtype +from EigenIPC.PyEigenIPC import StringTensorServer +from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper + +from typing import List, Dict, TypeVar + +import os +import inspect +import signal +import time + +import numpy as np +import torch + +from abc import ABC, abstractmethod + +JntImpCntrlChild = TypeVar('JntImpCntrlChild', bound='JntImpCntrlBase') + +class AugMPCWorldInterfaceBase(ABC): + + def __init__(self, + robot_names: List[str], + robot_urdf_paths: List[str], + robot_srdf_paths: List[str], + jnt_imp_config_paths: List[str], + n_contacts: List[int], + cluster_dt: List[float], + use_remote_stepping: List[bool], + name: str = "AugMPCWorldInterfaceBase", + num_envs: int = 1, + debug = False, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + n_init_step: int = 0, + timeout_ms: int = 60000, + env_opts: Dict = None, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + dump_basepath: str = "/tmp", + override_low_lev_controller: bool = False): + + # checks on input args + # type checks + if not isinstance(robot_names, List): + exception = "robot_names must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(robot_urdf_paths, List): + exception = "robot_urdf_paths must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(robot_srdf_paths, List): + exception = "robot_srdf_paths must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(cluster_dt, List): + exception = "cluster_dt must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(use_remote_stepping, List): + exception = "use_remote_stepping must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(n_contacts, List): + exception = "n_contacts must be a list (of integers)!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(jnt_imp_config_paths, List): + exception = "jnt_imp_config_paths must be a list paths!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # dim checks + if not len(robot_urdf_paths) == len(robot_names): + exception = f"robot_urdf_paths has len {len(robot_urdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(robot_srdf_paths) == len(robot_names): + exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(cluster_dt) == len(robot_names): + exception = f"cluster_dt has len {len(cluster_dt)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(use_remote_stepping) == len(robot_names): + exception = f"use_remote_stepping has len {len(use_remote_stepping)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(robot_srdf_paths) == len(robot_names): + exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(jnt_imp_config_paths) == len(robot_names): + exception = f"jnt_imp_config_paths has len {len(jnt_imp_config_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + self._remote_exit_flag=None + + self._name=name + self._num_envs=num_envs + self._debug=debug + self._verbose=verbose + self._vlevel=vlevel + self._force_reconnection=True + self._timeout_ms=timeout_ms + self._use_gpu=use_gpu + self._device = "cuda" if self._use_gpu else "cpu" + self._dtype=dtype + self._robot_names=robot_names + self._env_opts={} + self._env_opts["deact_when_failure"]=True + self._env_opts["filter_jnt_vel"]=False + self._env_opts["filter_cutoff_freq"]=10.0 # [Hz] + self._env_opts["filter_sampling_rate"]=100 # rate at which state is filtered [Hz] + self._env_opts["add_remote_exit_flag"]=False # add shared data server to trigger a remote exit + self._env_opts["wheel_joint_patterns"]=["wheel"] + self._env_opts["filter_wheel_pos_ref"]=True + self._env_opts["zero_wheel_eff_ref"]=True + + self._env_opts["enable_height_sensor"]=False + self._env_opts["height_sensor_resolution"]=0.16 + self._env_opts["height_sensor_pixels"]=10 + self._env_opts["height_sensor_lateral_offset"]=0.0 + self._env_opts["height_sensor_forward_offset"]=0.0 + + self._env_opts["run_cluster_bootstrap"] = False + + self._filter_step_ssteps_freq=None + + self._env_opts.update(env_opts) + + self.step_counter = 0 # global step counter + self._n_init_steps = n_init_step # n steps to be performed before applying solutions from control clusters + self._srdf_dump_paths = robot_srdf_paths + self._homers = {} + self._homing = None + self._jnt_imp_cntrl_shared_data = {} + self._jnt_imp_controllers = {} + self._jnt_imp_config_paths = {} + + # control cluster data + self.cluster_sim_step_counters = {} + self.cluster_servers = {} + self._trigger_sol = {} + self._wait_sol = {} + self._cluster_dt = {} + self._robot_urdf_paths={} + self._robot_srdf_paths={} + self._contact_names={} + self._num_contacts={} + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + self._cluster_dt[robot_name]=cluster_dt[i] + self._robot_urdf_paths[robot_name]=robot_urdf_paths[i] + self._robot_srdf_paths[robot_name]=robot_srdf_paths[i] + self._contact_names[robot_name]=None + self._num_contacts[robot_name]=n_contacts[i] + self._jnt_imp_config_paths[robot_name]=jnt_imp_config_paths[i] + # db data + self.debug_data = {} + self.debug_data["time_to_step_world"] = np.nan + self.debug_data["time_to_get_states_from_env"] = np.nan + self.debug_data["cluster_sol_time"] = {} + self.debug_data["cluster_state_update_dt"] = {} + self.debug_data["sim_time"] = {} + self.debug_data["cluster_time"] = {} + + self._env_timer = time.perf_counter() + + # remote sim stepping options + self._timeout = timeout_ms # timeout for remote stepping + self._use_remote_stepping = use_remote_stepping + # should use remote stepping + self._remote_steppers = {} + self._remote_resetters = {} + self._remote_reset_requests = {} + self._is_first_trigger = {} + + self._closed = False + + self._this_child_path=os.path.abspath(inspect.getfile(self.__class__)) + self._descr_dump_path=dump_basepath+"/"+f"{self.__class__.__name__}" + self._urdf_dump_paths = {} + self._srdf_dump_paths = {} + self.xrdf_cmd_vals = [] # by default empty, needs to be overriden by + # child class + self._world_iface_files_server=None + + self._override_low_lev_controller=override_low_lev_controller + + self._root_p = {} + self._root_q = {} + self._jnts_q = {} + self._root_p_prev = {} # used for num differentiation + self._root_q_prev = {} # used for num differentiation + self._jnts_q_prev = {} # used for num differentiation + self._root_v_prev = {} # used for num differentiation + self._root_omega_prev = {} # used for num differentiation + self._root_p_default = {} + self._root_q_default = {} + self._jnts_q_default = {} + + self._gravity_normalized = {} + self._gravity_normalized_base_loc = {} + + self._root_v = {} + self._root_v_base_loc = {} + self._root_v_default = {} + self._root_omega = {} + self._root_omega_base_loc = {} + self._root_omega_default = {} + self._root_a = {} + self._root_a_base_loc = {} + self._root_alpha = {} + self._root_alpha_base_loc = {} + + self._jnts_v = {} + self._jnt_vel_filter = {} + self._jnts_v_default = {} + self._jnts_eff = {} + self._jnts_eff_default = {} + + self._root_pos_offsets = {} + self._root_q_offsets = {} + self._root_q_offsets_yaw = {} + self._root_q_yaw_rel_ws = {} + + self._parse_env_opts() + + self._enable_height_shared = self._env_opts["enable_height_sensor"] + self._height_sensor_resolution = self._env_opts["height_sensor_resolution"] + self._height_sensor_pixels = self._env_opts["height_sensor_pixels"] + + self._pre_setup() # child's method + + self._init_world() # after this point all info from sim or robot is + # available + + self._publish_world_interface_files() + + setup_ok=self._setup() + if not setup_ok: + self.close() + + self._exit_request=False + signal.signal(signal.SIGINT, self.signal_handler) + + def signal_handler(self, sig, frame): + Journal.log(self.__class__.__name__, + "signal_handler", + "received SIGINT -> cleaning up", + LogType.WARN) + self._exit_request=True + + def __del__(self): + self.close() + + def is_closed(self): + return self._closed + + def close(self) -> None: + if not self._closed: + for i in range(len(self._robot_names)): + if self._robot_names[i] in self.cluster_servers: + self.cluster_servers[self._robot_names[i]].close() + if self._use_remote_stepping[i]: # remote signaling + if self._robot_names[i] in self._remote_reset_requests: + self._remote_reset_requests[self._robot_names[i]].close() + self._remote_resetters[self._robot_names[i]].close() + self._remote_steppers[self._robot_names[i]].close() + if self._robot_names[i] in self._jnt_imp_cntrl_shared_data: + jnt_imp_shared_data=self._jnt_imp_cntrl_shared_data[self._robot_names[i]] + if jnt_imp_shared_data is not None: + jnt_imp_shared_data.close() + if self._remote_exit_flag is not None: + self._remote_exit_flag.close() + if self._world_iface_files_server is not None: + self._world_iface_files_server.close() + self._close() + self._closed=True + + def _collect_world_interface_files(self): + files = [self._this_child_path] + # prefer generated URDF/SRDF if available, fallback to provided xacros + if len(self._urdf_dump_paths) > 0: + files.extend(list(self._urdf_dump_paths.values())) + else: + files.extend(list(self._robot_urdf_paths.values())) + if len(self._srdf_dump_paths) > 0: + files.extend(list(self._srdf_dump_paths.values())) + else: + files.extend(list(self._robot_srdf_paths.values())) + files.extend(list(self._jnt_imp_config_paths.values())) + # remove duplicates while preserving order + unique_files=[] + for f in files: + if f not in unique_files: + unique_files.append(f) + return unique_files + + def _publish_world_interface_files(self): + + if not any(self._use_remote_stepping): + return + self._world_iface_files_server=StringTensorServer(length=1, + basename="SharedWorldInterfaceFilesDropDir", + name_space=self._robot_names[0], + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._world_iface_files_server.run() + combined_paths=", ".join(self._collect_world_interface_files()) + while not self._world_iface_files_server.write_vec([combined_paths], 0): + Journal.log(self.__class__.__name__, + "_publish_world_interface_files", + f"Failed to pub world interface files. Retrying...", + LogType.WARN) + time.sleep(0.1) + Journal.log(self.__class__.__name__, + "_publish_world_interface_files", + f"World interface files advertised: {combined_paths}", + LogType.STAT) + + def _setup(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + # normalized gravity vector + self._gravity_normalized[robot_name]=torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._gravity_normalized[robot_name][:, 2]=-1.0 + self._gravity_normalized_base_loc[robot_name]=self._gravity_normalized[robot_name].detach().clone() + + # Pre-allocate yaw-related buffers once and reuse them in root_q_yaw_rel(). + q_ref = self._root_q[robot_name] + self._root_q_offsets_yaw[robot_name] = torch.zeros( + (self._num_envs,), dtype=q_ref.dtype, device=q_ref.device) + self._root_q_yaw_rel_ws[robot_name] = { + "yaw_abs": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "yaw_rel": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "yaw_sin": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "yaw_cos": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "q_abs_unit": torch.zeros_like(q_ref), + "q_yaw_abs": torch.zeros_like(q_ref), + "q_yaw_rel": torch.zeros_like(q_ref), + "q_yaw_abs_conj": torch.zeros_like(q_ref), + "q_pr": torch.zeros_like(q_ref), + "q_rel": torch.zeros_like(q_ref), + } + + self.cluster_sim_step_counters[robot_name]=0 + self._is_first_trigger[robot_name] = True + if not isinstance(self._cluster_dt[robot_name], (float)): + exception = f"cluster_dt[{i}] should be a float!" + Journal.log(self.__class__.__name__, + "_setup", + exception, + LogType.EXCEP, + throw_when_excep = False) + return False + self._cluster_dt[robot_name] = self._cluster_dt[robot_name] + self._trigger_sol[robot_name] = True # allow first trigger + self._wait_sol[robot_name] = False + + # initialize a lrhc cluster server for communicating with rhc controllers + self.cluster_servers[robot_name] = AugMpcClusterServer(cluster_size=self._num_envs, + cluster_dt=self._cluster_dt[robot_name], + control_dt=self.physics_dt(), + jnt_names=self._robot_jnt_names(robot_name=robot_name), + n_contacts=self._n_contacts(robot_name=robot_name), + contact_linknames=self._contact_names[robot_name], + verbose=self._verbose, + vlevel=self._vlevel, + debug=self._debug, + robot_name=robot_name, + use_gpu=self._use_gpu, + force_reconnection=self._force_reconnection, + timeout_ms=self._timeout, + enable_height_sensor=self._enable_height_shared, + height_grid_size=self._height_sensor_pixels, + height_grid_resolution=self._height_sensor_resolution) + self.cluster_servers[robot_name].run() + self.debug_data["cluster_sol_time"][robot_name] = np.nan + self.debug_data["cluster_state_update_dt"][robot_name] = np.nan + self.debug_data["sim_time"][robot_name] = np.nan + # remote sim stepping + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name] = RemoteStepperClnt(namespace=robot_name, + verbose=self._debug, + vlevel=self._vlevel) + self._remote_resetters[robot_name] = RemoteResetClnt(namespace=robot_name, + verbose=self._debug, + vlevel=self._vlevel) + self._remote_reset_requests[robot_name] = RemoteResetRequest(namespace=robot_name, + n_env=self._num_envs, + is_server=True, + verbose=self._debug, + vlevel=self._vlevel, + force_reconnection=self._force_reconnection, + safe=False) + self._remote_steppers[robot_name].run() + self._remote_resetters[robot_name].run() + self._remote_reset_requests[robot_name].run() + else: + self._remote_steppers[robot_name] = None + self._remote_reset_requests[robot_name] = None + self._remote_resetters[robot_name] = None + + self._homers[robot_name] = RobotHomer(srdf_path=self._srdf_dump_paths[robot_name], + jnt_names=self._robot_jnt_names(robot_name=robot_name), + filter=True) + robot_homing=torch.from_numpy(self._homers[robot_name].get_homing().reshape(1,-1)) + if "cuda" in self._device: + robot_homing=robot_homing.cuda() + self._homing=robot_homing.repeat(self._num_envs, 1) + + self._jnts_q_default[robot_name] = self._homing + self._set_jnts_to_homing(robot_name=robot_name) + self._set_root_to_defconfig(robot_name=robot_name) + self._reset_sim() + + self._init_safe_cluster_actions(robot_name=robot_name) + + Journal.log(self.__class__.__name__, + "_setup", + f"Will use joint impedance config at {self._jnt_imp_config_paths[robot_name]} for {robot_name}", + LogType.STAT) + + self._jnt_imp_controllers[robot_name] = self._generate_jnt_imp_control(robot_name=robot_name) + self._jnt_imp_controllers[robot_name].set_velocity_controlled_joints( + name_patterns=self._env_opts["wheel_joint_patterns"], + filter_pos_ref=self._env_opts["filter_wheel_pos_ref"], + zero_eff_ref=self._env_opts["zero_wheel_eff_ref"]) + self._jnt_imp_cntrl_shared_data[robot_name] = JntImpCntrlData(is_server=True, + n_envs=self._num_envs, + n_jnts=len(self._robot_jnt_names(robot_name=robot_name)), + jnt_names=self._robot_jnt_names(robot_name=robot_name), + namespace=robot_name, + verbose=self._verbose, + force_reconnection=self._force_reconnection, + vlevel=self._vlevel, + use_gpu=self._use_gpu, + safe=False) + self._jnt_imp_cntrl_shared_data[robot_name].run() + + self._jnt_vel_filter[robot_name]=None + if self._env_opts["filter_jnt_vel"]: + self._jnt_vel_filter[robot_name]=FirstOrderFilter(dt=1.0/self._env_opts["filter_sampling_rate"], + filter_BW=self._env_opts["filter_cutoff_freq"], + rows=self._num_envs, + cols=len(self._robot_jnt_names(robot_name=robot_name)), + device=self._device, + dtype=self._dtype) + + physics_rate=1.0/self.physics_dt() + self._filter_step_ssteps_freq=int(physics_rate/self._env_opts["filter_sampling_rate"]) + if self._filter_step_ssteps_freq <=0: + Journal.log(self.__class__.__name__, + "_setup", + f"The filter_sampling_rate should be smaller that the physics rate ({physics_rate} Hz)", + LogType.EXCEP, + throw_when_excep=True) + + for n in range(self._n_init_steps): # run some initialization steps + if hasattr(self, "_alter_twist_warmup"): + self._alter_twist_warmup(robot_name=robot_name, env_indxs=None) + self._step_world() + + self._read_jnts_state_from_robot(robot_name=robot_name, + env_indxs=None) + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + # allow child to perform additional warmup validations (e.g., terrain/tilt) + # retry_done = False + if hasattr(self, "_post_warmup_validation"): + failing = self._post_warmup_validation(robot_name=robot_name) + if failing is not None and failing.numel() > 0: + # retry: reset only failing envs, rerun warmup, revalidate once + failing = failing.to(self._device) + Journal.log(self.__class__.__name__, + "_setup", + f"Warmup validation failed for {robot_name}, envs indexes {failing.tolist()}", + LogType.EXCEP, + throw_when_excep=True) + else: + Journal.log(self.__class__.__name__, + "_setup", + f"Warmup validation passed for {robot_name}", + LogType.INFO) + + # write some inits for all robots + # self._update_root_offsets(robot_name) + self._synch_default_root_states(robot_name=robot_name) + epsi=0.03 # adding a bit of height to avoid initial penetration + self._root_p_default[robot_name][:, 2]=self._root_p_default[robot_name][:, 2]+epsi + + reset_ok=self._reset(env_indxs=None, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True, + acquire_offsets=True) # resets everything, updates the cluster with fresh reset states + # and acquire offsets + if not reset_ok: + return False + + # cluster setup here + control_cluster=self.cluster_servers[robot_name] + + control_cluster.pre_trigger() + to_be_activated=control_cluster.get_inactive_controllers() + if to_be_activated is not None: + control_cluster.activate_controllers( + idxs=to_be_activated) + + if self._env_opts["run_cluster_bootstrap"]: + cluster_setup_ok=self._setup_mpc_cluster(robot_name) + if not cluster_setup_ok: + return False + self._set_cluster_actions(robot_name=robot_name) # write last cmds + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + + if self._use_remote_stepping[i]: + step_wait_ok = self._wait_for_remote_step_req(robot_name=robot_name) + if not step_wait_ok: + return False + + self._set_startup_jnt_imp_gains(robot_name=robot_name) # set gains to + # startup config (usually lower) + + control_cluster.pre_trigger() + control_cluster.trigger_solution(bootstrap=False) # trigger first solution (in real-time iteration) before first call to step to ensure that first solution is ready when step is called the first time + + if self._env_opts["add_remote_exit_flag"]: + self._remote_exit_flag=SharedTWrapper(namespace = self._robot_names[0],# use first robot as name + basename = "IbridoRemoteEnvExitFlag", + is_server = True, + n_rows = 1, + n_cols = 1, + verbose = True, + vlevel = self._vlevel, + safe = False, + dtype=dtype.Bool, + force_reconnection=True, + fill_value = False) + self._remote_exit_flag.run() + + self._setup_done=True + + return self._setup_done + + def _setup_mpc_cluster(self, robot_name: str): + + control_cluster = self.cluster_servers[robot_name] + + # self._set_state_to_cluster(robot_name=robot_name) + rhc_state = control_cluster.get_state() + root_twist=rhc_state.root_state.get(data_type="twist", robot_idxs = None, gpu=self._use_gpu) + jnt_v=rhc_state.jnts_state.get(data_type="v", robot_idxs = None, gpu=self._use_gpu) + root_twist[:, :]=0 # override meas state to make sure MPC bootstrap uses zero velocity + jnt_v[:, :]=0 + + control_cluster.write_robot_state() + + # trigger bootstrap solution (solvers will run up to convergence) + control_cluster.trigger_solution(bootstrap=True) # this will trigger the bootstrap solver with the initial state, + # which will run until convergence before returning + wait_ok=control_cluster.wait_for_solution() # blocking + if not wait_ok: + return False + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + if failed is not None: + failed_idxs = torch.nonzero(failed).squeeze(-1) + if failed_idxs.numel() > 0: + Journal.log(self.__class__.__name__, + "_setup", + f"Bootstrap solution failed for {robot_name} | n_failed: {failed_idxs.numel()}, idxs: {failed_idxs.cpu().tolist()}", + LogType.EXCEP, + throw_when_excep=False) + return False + + return True + + def step(self) -> bool: + + success=False + + if self._remote_exit_flag is not None: + # check for exit request + self._remote_exit_flag.synch_all(read=True, retry = False) + self._exit_request=self._exit_request or \ + bool(self._remote_exit_flag.get_numpy_mirror()[0, 0].item()) + + if self._exit_request: + self.close() + + if self.is_running() and (not self.is_closed()): + if self._debug: + pre_step_ok=self._pre_step_db() + if not pre_step_ok: + return False + self._env_timer=time.perf_counter() + self._step_world() + self.debug_data["time_to_step_world"] = \ + time.perf_counter() - self._env_timer + self._post_world_step_db() + success=True + else: + pre_step_ok=self._pre_step() + if not pre_step_ok: + return False + self._step_world() + self._post_world_step() + success=True + + return success + + def render(self, mode:str="human") -> None: + self._render_sim(mode) + + def reset(self, + env_indxs: torch.Tensor = None, + reset_cluster: bool = False, + reset_cluster_counter = False, + randomize: bool = False, + reset_sim: bool = False) -> None: + + for i in range(len(self._robot_names)): + robot_name=self._robot_names[i] + reset_ok=self._reset(robot_name=robot_name, + env_indxs=env_indxs, + randomize=randomize, + reset_cluster=reset_cluster, + reset_cluster_counter=reset_cluster_counter) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=env_indxs) + + if reset_sim: + self._reset_sim() + + return True + + def _reset_cluster(self, + robot_name: str, + env_indxs: torch.Tensor = None, + reset_cluster_counter: bool = False): + + control_cluster = self.cluster_servers[robot_name] + + reset_ok=control_cluster.reset_controllers(idxs=env_indxs) + if not reset_ok: + return False + + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=env_indxs) + control_cluster.write_robot_state() # writes to shared memory + + if reset_cluster_counter: + self.cluster_sim_step_counters[robot_name] = 0 + + return True + + def _step_jnt_vel_filter(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._jnt_vel_filter[robot_name].update(refk=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs), + idxs=env_indxs) + + def _set_state_to_cluster(self, + robot_name: str, + env_indxs: torch.Tensor = None, + base_loc: bool = True): + + if self._debug: + if not isinstance(env_indxs, (torch.Tensor, type(None))): + msg = "Provided env_indxs should be a torch tensor of indexes!" + raise Exception(f"[{self.__class__.__name__}]" + f"[{self.journal.exception}]: " + msg) + + control_cluster = self.cluster_servers[robot_name] + # floating base + rhc_state = control_cluster.get_state() + # configuration + rhc_state.root_state.set(data=self.root_p_rel(robot_name=robot_name, env_idxs=env_indxs), + data_type="p", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_q(robot_name=robot_name, env_idxs=env_indxs), + data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + # rhc_state.root_state.set(data=self.root_q_yaw_rel(robot_name=robot_name, env_idxs=env_indxs), + # data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + # twist + rhc_state.root_state.set(data=self.root_v(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_omega(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="omega", robot_idxs = env_indxs, gpu=self._use_gpu) + + # angular accc. + rhc_state.root_state.set(data=self.root_a(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="a", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_alpha(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="alpha", robot_idxs = env_indxs, gpu=self._use_gpu) + + # gravity vec + rhc_state.root_state.set(data=self.gravity(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="gn", robot_idxs = env_indxs, gpu=self._use_gpu) + + # joints + rhc_state.jnts_state.set(data=self.jnts_q(robot_name=robot_name, env_idxs=env_indxs), + data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + + v_jnts=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs) + if self._jnt_vel_filter[robot_name] is not None: # apply filtering + v_jnts=self._jnt_vel_filter[robot_name].get(idxs=env_indxs) + rhc_state.jnts_state.set(data=v_jnts, + data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.jnts_state.set(data=self.jnts_eff(robot_name=robot_name, env_idxs=env_indxs), + data_type="eff", robot_idxs = env_indxs, gpu=self._use_gpu) + + # height map + if self._enable_height_shared: + hdata = self._height_imgs[robot_name] + if env_indxs is not None: + hdata = hdata[env_indxs] + flat = hdata.reshape(hdata.shape[0], -1) + rhc_state.height_sensor.set(data=flat, data_type=None, robot_idxs=env_indxs, gpu=self._use_gpu) + + # Updating contact state for selected contact links + self._update_contact_state(robot_name=robot_name, env_indxs=env_indxs) + + def _update_contact_state(self, + robot_name: str, + env_indxs: torch.Tensor = None): + for i in range(0, self.cluster_servers[robot_name].n_contact_sensors()): + contact_link = self.cluster_servers[robot_name].contact_linknames()[i] + f_contact = self._get_contact_f(robot_name=robot_name, + contact_link=contact_link, + env_indxs=env_indxs) + if f_contact is not None: + self.cluster_servers[robot_name].get_state().contact_wrenches.set(data=f_contact, data_type="f", + contact_name=contact_link, + robot_idxs = env_indxs, + gpu=self._use_gpu) + + def _init_safe_cluster_actions(self, + robot_name: str): + + # this does not actually write on shared memory, + # but it's enough to get safe actions for the simulator before the + # cluster starts to receive data from the controllers + control_cluster = self.cluster_servers[robot_name] + rhc_cmds = control_cluster.get_actions() + n_jnts = rhc_cmds.n_jnts() + + null_action = torch.zeros((self._num_envs, n_jnts), + dtype=self._dtype, + device=self._device) + rhc_cmds.jnts_state.set(data=self._homing, data_type="q", gpu=self._use_gpu) + rhc_cmds.jnts_state.set(data=null_action, data_type="v", gpu=self._use_gpu) + rhc_cmds.jnts_state.set(data=null_action, data_type="eff", gpu=self._use_gpu) + + def _pre_step_db(self) -> None: + + # cluster step logic here + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + if self._override_low_lev_controller: + # if overriding low-lev jnt imp. this has to run at the highest + # freq possible + start=time.perf_counter() + self._read_jnts_state_from_robot(robot_name=robot_name) + self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start + + self._write_state_to_jnt_imp(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + if self._jnt_vel_filter[robot_name] is not None and \ + (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0: + # filter joint vel at a fixed frequency wrt sim steps + if not self._override_low_lev_controller: + # we need a fresh sensor reading + self._read_jnts_state_from_robot(robot_name=robot_name) + self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None) + + control_cluster = self.cluster_servers[robot_name] + if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]): + wait_ok=control_cluster.wait_for_solution() # this is blocking + if not wait_ok: + return False + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + self._set_cluster_actions(robot_name=robot_name) # write last cmds to low level control + if not self._override_low_lev_controller: + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + # we can update the jnt state just at the rate at which the cluster needs it + start=time.perf_counter() + self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None) + else: + # read state necessary for cluster + start=time.perf_counter() + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start + start=time.perf_counter() + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=None) + control_cluster.write_robot_state() + self.debug_data["cluster_state_update_dt"][robot_name] = time.perf_counter()-start + + self._update_jnt_imp_cntrl_shared_data() # only if debug_mode_jnt_imp is enabled + + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name].ack() # signal cluster stepping is finished + if failed is not None and self._env_opts["deact_when_failure"]: # deactivate robot completely + self._deactivate(env_indxs=failed, + robot_name=robot_name) + wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking) + wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name) + if not wait_reset_ok or not wait_step_ok: + return False + else: + if failed is not None: + reset_ok=self._reset(env_indxs=failed, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=failed) + + control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers()) + + control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving + # values of some rhc flags on shared memory + control_cluster.trigger_solution() # trigger only active controllers + + return True + + def _pre_step(self) -> None: + + # cluster step logic here + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + if self._override_low_lev_controller: + # if overriding low-lev jnt imp. this has to run at the highest + # freq possible + self._read_jnts_state_from_robot(robot_name=robot_name) + self._write_state_to_jnt_imp(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + if self._jnt_vel_filter[robot_name] is not None and \ + (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0: + # filter joint vel at a fixed frequency wrt sim steps + if not self._override_low_lev_controller: + # we need a fresh sensor reading + self._read_jnts_state_from_robot(robot_name=robot_name) + self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None) + + control_cluster = self.cluster_servers[robot_name] + if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]): + wait_ok=control_cluster.wait_for_solution() # this is blocking + if not wait_ok: + return False + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + self._set_cluster_actions(robot_name=robot_name) # set last cmds to low level control + if not self._override_low_lev_controller: + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + # we can update the jnt state just at the rate at which the cluster needs it + self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None) + # read state necessary for cluster + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + # write last robot state to the cluster of controllers + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=None) + control_cluster.write_robot_state() # write on shared mem + + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name].ack() # signal cluster stepping is finished + if failed is not None and self._env_opts["deact_when_failure"]: + self._deactivate(env_indxs=failed, + robot_name=robot_name) + wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking) + wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name) + if not wait_reset_ok or not wait_step_ok: + return False + else: + if failed is not None: + reset_ok=self._reset(env_indxs=failed, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=failed) + control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers()) + + control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving + # values of some rhc flags on shared memory + control_cluster.trigger_solution() # trigger only active controllers + + return True + + def _post_world_step_db(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + control_cluster = self.cluster_servers[robot_name] + self.cluster_sim_step_counters[robot_name]+=1 # this has to be update with sim freq + if self._debug: + self.debug_data["sim_time"][robot_name]=self.world_time(robot_name=robot_name) + self.debug_data["cluster_sol_time"][robot_name] = \ + control_cluster.solution_time() + + self.step_counter +=1 + + def _post_world_step(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + self.cluster_sim_step_counters[robot_name]+=1 + self.step_counter +=1 + + def _reset(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False, + reset_cluster: bool = False, + reset_cluster_counter = False, + acquire_offsets: bool = False): + + # resets the state of target robot and env to the defaults + self._reset_state(env_indxs=env_indxs, + robot_name=robot_name, + randomize=randomize) + + # and jnt imp. controllers + self._reset_jnt_imp_control(robot_name=robot_name, + env_indxs=env_indxs) + + # read reset state + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=env_indxs) + self._read_jnts_state_from_robot(robot_name=robot_name, + env_indxs=env_indxs) + + if self._jnt_vel_filter[robot_name] is not None: + self._jnt_vel_filter[robot_name].reset(idxs=env_indxs) + + if acquire_offsets: + self._update_root_offsets(robot_name=robot_name, + env_indxs=env_indxs) + + if reset_cluster: # reset controllers remotely + reset_ok=self._reset_cluster(env_indxs=env_indxs, + robot_name=robot_name, + reset_cluster_counter=reset_cluster_counter) + if not reset_ok: + return False + + return True + + def _randomize_yaw(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + root_q_default = self._root_q_default[robot_name] + if env_indxs is None: + env_indxs = torch.arange(root_q_default.shape[0]) + + num_indices = env_indxs.shape[0] + yaw_angles = torch.rand((num_indices,), + device=root_q_default.device) * 2 * torch.pi # uniformly distributed random angles + + # Compute cos and sin once + cos_half = torch.cos(yaw_angles / 2) + root_q_default[env_indxs, :] = torch.stack((cos_half, + torch.zeros_like(cos_half), + torch.zeros_like(cos_half), + torch.sin(yaw_angles / 2)), dim=1).reshape(num_indices, 4) + + def _deactivate(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + # deactivate jnt imp controllers for given robots and envs (makes the robot fall) + self._jnt_imp_controllers[robot_name].deactivate(robot_indxs=env_indxs) + + def _n_contacts(self, robot_name: str) -> List[int]: + return self._num_contacts[robot_name] + + def root_p(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._root_p[robot_name] + else: + return self._root_p[robot_name][env_idxs, :] + + def root_p_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + rel_pos = torch.sub(self.root_p(robot_name=robot_name), + self._root_pos_offsets[robot_name]) + else: + rel_pos = torch.sub(self.root_p(robot_name=robot_name, + env_idxs=env_idxs), + self._root_pos_offsets[robot_name][env_idxs, :]) + return rel_pos + + def root_q(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._root_q[robot_name] + else: + return self._root_q[robot_name][env_idxs, :] + + def root_q_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return quaternion_difference(self._root_q_offsets[robot_name], + self.root_q(robot_name=robot_name)) + rel_q = quaternion_difference(self._root_q_offsets[robot_name][env_idxs, :], + self.root_q(robot_name=robot_name, + env_idxs=env_idxs)) + return rel_q + + def _quat_to_yaw_wxyz(self, q: torch.Tensor, out: torch.Tensor = None): + # Quaternion convention is w, x, y, z. + w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3] + num = 2.0 * (w * z + x * y) + den = 1.0 - 2.0 * (y * y + z * z) + if out is None: + return torch.atan2(num, den) + return torch.atan2(num, den, out=out) + + def _yaw_to_quat_wxyz(self, yaw: torch.Tensor, like_q: torch.Tensor, + out: torch.Tensor = None): + q = out + if q is None: + q = torch.zeros((yaw.shape[0], 4), dtype=like_q.dtype, device=like_q.device) + else: + q.zero_() + q[:, 0] = torch.cos(yaw / 2.0) + q[:, 3] = torch.sin(yaw / 2.0) + return q + + def _quat_conjugate_wxyz(self, q: torch.Tensor, out: torch.Tensor = None): + qi = out + if qi is None: + qi = torch.empty_like(q) + qi[:, :] = q + qi[:, 1:] = -qi[:, 1:] + return qi + + def _quat_multiply_wxyz(self, q1: torch.Tensor, q2: torch.Tensor, + out: torch.Tensor = None): + q_out = out + if q_out is None: + q_out = torch.empty_like(q1) + w1, x1, y1, z1 = q1[:, 0], q1[:, 1], q1[:, 2], q1[:, 3] + w2, x2, y2, z2 = q2[:, 0], q2[:, 1], q2[:, 2], q2[:, 3] + q_out[:, 0] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + q_out[:, 1] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + q_out[:, 2] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + q_out[:, 3] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return q_out + + def _normalize_quat_wxyz(self, q: torch.Tensor, out: torch.Tensor = None): + q_norm = out + if q_norm is None: + q_norm = torch.empty_like(q) + q_norm[:, :] = q + q_norm /= torch.clamp(torch.norm(q_norm, dim=1, keepdim=True), min=1e-9) + return q_norm + + def root_q_yaw_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + # Return quaternion with startup yaw removed while preserving current pitch/roll. + if env_idxs is None: + ws = self._root_q_yaw_rel_ws[robot_name] + q_abs = self._root_q[robot_name] + yaw_start = self._root_q_offsets_yaw[robot_name] + + self._normalize_quat_wxyz(q=q_abs, out=ws["q_abs_unit"]) + self._quat_to_yaw_wxyz(q=ws["q_abs_unit"], out=ws["yaw_abs"]) + + torch.sub(ws["yaw_abs"], yaw_start, out=ws["yaw_rel"]) + torch.sin(ws["yaw_rel"], out=ws["yaw_sin"]) + torch.cos(ws["yaw_rel"], out=ws["yaw_cos"]) + torch.atan2(ws["yaw_sin"], ws["yaw_cos"], out=ws["yaw_rel"]) + + # Build pure-yaw quaternions for: + # 1) the current absolute heading and 2) the startup-relative heading. + self._yaw_to_quat_wxyz(yaw=ws["yaw_abs"], like_q=ws["q_abs_unit"], out=ws["q_yaw_abs"]) + self._yaw_to_quat_wxyz(yaw=ws["yaw_rel"], like_q=ws["q_abs_unit"], out=ws["q_yaw_rel"]) + + # Isolate pitch/roll by removing the absolute yaw from the current orientation. + # For unit quaternions q_pr = q_yaw_abs^{-1} * q_abs. + self._quat_conjugate_wxyz(q=ws["q_yaw_abs"], out=ws["q_yaw_abs_conj"]) + self._quat_multiply_wxyz(q1=ws["q_yaw_abs_conj"], q2=ws["q_abs_unit"], out=ws["q_pr"]) + # Recompose orientation with relative yaw + current pitch/roll. + self._quat_multiply_wxyz(q1=ws["q_yaw_rel"], q2=ws["q_pr"], out=ws["q_rel"]) + + return self._normalize_quat_wxyz(q=ws["q_rel"], out=ws["q_rel"]) + + q_abs = self.root_q(robot_name=robot_name, env_idxs=env_idxs) + q_abs = self._normalize_quat_wxyz(q=q_abs, out=q_abs) + + yaw_abs = self._quat_to_yaw_wxyz(q_abs) + yaw_start = self._root_q_offsets_yaw[robot_name][env_idxs] + yaw_rel = yaw_abs - yaw_start + yaw_rel = torch.atan2(torch.sin(yaw_rel), torch.cos(yaw_rel)) + + q_yaw_abs = self._yaw_to_quat_wxyz(yaw_abs, like_q=q_abs) + q_yaw_rel = self._yaw_to_quat_wxyz(yaw_rel, like_q=q_abs) + q_pr = self._quat_multiply_wxyz(self._quat_conjugate_wxyz(q_yaw_abs), q_abs) + q_rel = self._quat_multiply_wxyz(q_yaw_rel, q_pr) + + return self._normalize_quat_wxyz(q_rel) + + def root_v(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_v=self._root_v[robot_name] + if base_loc: + root_v=self._root_v_base_loc[robot_name] + if env_idxs is None: + return root_v + else: + return root_v[env_idxs, :] + + def root_omega(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_omega=self._root_omega[robot_name] + if base_loc: + root_omega=self._root_omega_base_loc[robot_name] + if env_idxs is None: + return root_omega + else: + return root_omega[env_idxs, :] + + def root_a(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_a=self._root_a[robot_name] + if base_loc: + root_a=self._root_a_base_loc[robot_name] + if env_idxs is None: + return root_a + else: + return root_a[env_idxs, :] + + def root_alpha(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_alpha=self._root_alpha[robot_name] + if base_loc: + root_alpha=self._root_alpha_base_loc[robot_name] + if env_idxs is None: + return root_alpha + else: + return root_alpha[env_idxs, :] + + def gravity(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + gravity_loc=self._gravity_normalized[robot_name] + if base_loc: + gravity_loc=self._gravity_normalized_base_loc[robot_name] + if env_idxs is None: + return gravity_loc + else: + return gravity_loc[env_idxs, :] + + def jnts_q(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._jnts_q[robot_name] + else: + return self._jnts_q[robot_name][env_idxs, :] + + def jnts_v(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._jnts_v[robot_name] + else: + return self._jnts_v[robot_name][env_idxs, :] + + def jnts_eff(self, + robot_name: str, + env_idxs: torch.Tensor = None): # (measured) efforts + + if env_idxs is None: + return self._jnts_eff[robot_name] + else: + return self._jnts_eff[robot_name][env_idxs, :] + + def _wait_for_remote_step_req(self, + robot_name: str): + if not self._remote_steppers[robot_name].wait(self._timeout): + Journal.log(self.__class__.__name__, + "_wait_for_remote_step_req", + "Didn't receive any remote step req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _process_remote_reset_req(self, + robot_name: str): + + if not self._remote_resetters[robot_name].wait(self._timeout): + Journal.log(self.__class__.__name__, + "_process_remote_reset_req", + "Didn't receive any remote reset req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + + reset_requests = self._remote_reset_requests[robot_name] + reset_requests.synch_all(read=True, retry=True) # read reset requests from shared mem + to_be_reset = reset_requests.to_be_reset(gpu=self._use_gpu) + if to_be_reset is not None: + reset_ok=self._reset(env_indxs=to_be_reset, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=to_be_reset) # set gains to startup config (usually lower gains) + + control_cluster = self.cluster_servers[robot_name] + control_cluster.activate_controllers(idxs=to_be_reset) # activate controllers + # (necessary if failed) + + self._remote_resetters[robot_name].ack() # signal reset performed + + return True + + def _update_jnt_imp_cntrl_shared_data(self): + if self._debug: + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + # updating all the jnt impedance data - > this may introduce some overhead + imp_data = self._jnt_imp_cntrl_shared_data[robot_name].imp_data_view + # set data + imp_data.set(data_type="pos_err", + data=self._jnt_imp_controllers[robot_name].pos_err(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_err", + data=self._jnt_imp_controllers[robot_name].vel_err(), + gpu=self._use_gpu) + imp_data.set(data_type="pos_gains", + data=self._jnt_imp_controllers[robot_name].pos_gains(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_gains", + data=self._jnt_imp_controllers[robot_name].vel_gains(), + gpu=self._use_gpu) + imp_data.set(data_type="eff_ff", + data=self._jnt_imp_controllers[robot_name].eff_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="pos", + data=self._jnt_imp_controllers[robot_name].pos(), + gpu=self._use_gpu) + imp_data.set(data_type="pos_ref", + data=self._jnt_imp_controllers[robot_name].pos_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="vel", + data=self._jnt_imp_controllers[robot_name].vel(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_ref", + data=self._jnt_imp_controllers[robot_name].vel_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="eff", + data=self._jnt_imp_controllers[robot_name].eff(), + gpu=self._use_gpu) + imp_data.set(data_type="imp_eff", + data=self._jnt_imp_controllers[robot_name].imp_eff(), + gpu=self._use_gpu) + # copy from GPU to CPU if using gpu + if self._use_gpu: + imp_data.synch_mirror(from_gpu=True,non_blocking=True) + # even if it's from GPU->CPu we can use non-blocking since it's just for db + # purposes + # write copies to shared memory + imp_data.synch_all(read=False, retry=False) + + def _set_startup_jnt_imp_gains(self, + robot_name:str, + env_indxs: torch.Tensor = None): + + startup_p_gains=self._jnt_imp_controllers[robot_name].startup_p_gains() + startup_d_gains=self._jnt_imp_controllers[robot_name].startup_d_gains() + + if env_indxs is not None: + self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs, + pos_gains=startup_p_gains[env_indxs, :], + vel_gains=startup_d_gains[env_indxs, :]) + else: + self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs, + pos_gains=startup_p_gains[:, :], + vel_gains=startup_d_gains[:, :]) + + def _write_state_to_jnt_imp(self, + robot_name: str): + + # always update ,imp. controller internal state (jnt imp control is supposed to be + # always running) + self._jnt_imp_controllers[robot_name].update_state(pos=self.jnts_q(robot_name=robot_name), + vel = self.jnts_v(robot_name=robot_name), + eff = self.jnts_eff(robot_name=robot_name)) + + def _set_cluster_actions(self, + robot_name): + control_cluster = self.cluster_servers[robot_name] + actions=control_cluster.get_actions() + active_controllers=control_cluster.get_active_controllers(gpu=self._use_gpu) + + if active_controllers is not None: + self._jnt_imp_controllers[robot_name].set_refs( + pos_ref=actions.jnts_state.get(data_type="q", gpu=self._use_gpu)[active_controllers, :], + vel_ref=actions.jnts_state.get(data_type="v", gpu=self._use_gpu)[active_controllers, :], + eff_ref=actions.jnts_state.get(data_type="eff", gpu=self._use_gpu)[active_controllers, :], + robot_indxs=active_controllers) + + def _jnt_imp_reset_overrride(self, robot_name:str): + # to be overriden + pass + + def _apply_cmds_to_jnt_imp_control(self, robot_name:str): + + self._jnt_imp_controllers[robot_name].apply_cmds() + + def _update_root_offsets(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + msg = "Provided env_indxs should be a torch tensor of indexes!" + Journal.log(self.__class__.__name__, + "update_root_offsets", + msg, + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist()) + if self._verbose: + Journal.log(self.__class__.__name__, + "update_root_offsets", + f"updating root offsets " + for_robots, + LogType.STAT, + throw_when_excep = True) + + # only planar position used + if env_indxs is None: + self._root_pos_offsets[robot_name][:, 0:2] = self._root_p[robot_name][:, 0:2] + self._normalize_quat_wxyz(q=self._root_q[robot_name], out=self._root_q_offsets[robot_name]) + self._quat_to_yaw_wxyz(q=self._root_q_offsets[robot_name], + out=self._root_q_offsets_yaw[robot_name]) + + else: + self._root_pos_offsets[robot_name][env_indxs, 0:2] = self._root_p[robot_name][env_indxs, 0:2] + q_root_norm=self._normalize_quat_wxyz(self._root_q[robot_name][env_indxs, :]) + self._root_q_offsets[robot_name][env_indxs, :] = q_root_norm + self._root_q_offsets_yaw[robot_name][env_indxs] = self._quat_to_yaw_wxyz(q=q_root_norm) + + def _reset_jnt_imp_control(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + Journal.log(self.__class__.__name__, + "reset_jnt_imp_control", + "Provided env_indxs should be a torch tensor of indexes!", + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs) + + if self._verbose: + Journal.log(self.__class__.__name__, + "reset_jnt_imp_control", + f"resetting joint impedances " + for_robots, + LogType.STAT, + throw_when_excep = True) + + # resets all internal data, refs to defaults + self._jnt_imp_controllers[robot_name].reset(robot_indxs=env_indxs) + + #restore jnt imp refs to homing + if env_indxs is None: + self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[:, :], + robot_indxs = None) + else: + self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[env_indxs, :], + robot_indxs = env_indxs) + + # self._write_state_to_jnt_imp(robot_name=robot_name) + # actually applies reset commands to the articulation + self._write_state_to_jnt_imp(robot_name=robot_name) + self._jnt_imp_reset_overrride(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + def _synch_default_root_states(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + msg = "Provided env_indxs should be a torch tensor of indexes!" + Journal.log(self.__class__.__name__, + "synch_default_root_states", + msg, + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist()) + if self._verbose: + Journal.log(self.__class__.__name__, + "synch_default_root_states", + f"updating default root states " + for_robots, + LogType.STAT, + throw_when_excep = True) + + if env_indxs is None: + self._root_p_default[robot_name][:, :] = self._root_p[robot_name] + self._root_q_default[robot_name][:, :] = self._root_q[robot_name] + else: + self._root_p_default[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] + self._root_q_default[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + + def _generate_rob_descriptions(self, + robot_name: str, + urdf_path: str, + srdf_path: str): + + custom_xacro_args=extract_custom_xacro_args(self._env_opts) + Journal.log(self.__class__.__name__, + "_generate_rob_descriptions", + "generating URDF for robot "+ f"{robot_name}, from URDF {urdf_path}...", + LogType.STAT, + throw_when_excep = True) + xrdf_cmds=self._xrdf_cmds(robot_name=robot_name) + xrdf_cmds=merge_xacro_cmds(prev_cmds=xrdf_cmds, + new_cmds=custom_xacro_args) + self._urdf_dump_paths[robot_name]=generate_urdf(robot_name=robot_name, + xacro_path=urdf_path, + dump_path=self._descr_dump_path, + xrdf_cmds=xrdf_cmds) + Journal.log(self.__class__.__name__, + "_generate_rob_descriptions", + "generating SRDF for robot "+ f"{robot_name}, from SRDF {srdf_path}...", + LogType.STAT, + throw_when_excep = True) + # we also generate SRDF files, which are useful for control + self._srdf_dump_paths[robot_name]=generate_srdf(robot_name=robot_name, + xacro_path=srdf_path, + dump_path=self._descr_dump_path, + xrdf_cmds=xrdf_cmds) + + def _xrdf_cmds(self, robot_name:str): + urdfpath=self._robot_urdf_paths[robot_name] + # we assume directory tree of the robot package is like + # robot-ros-pkg/robot_urdf/urdf/robot.urdf.xacro + parts = urdfpath.split('/') + urdf_descr_root_path = '/'.join(parts[:-2]) + cmds = get_xrdf_cmds(urdf_descr_root_path=urdf_descr_root_path) + return cmds + + @abstractmethod + def current_tstep(self) -> int: + pass + + @abstractmethod + def world_time(self, robot_name: str) -> float: + return self.cluster_sim_step_counters[robot_name]*self.physics_dt() + + @abstractmethod + def is_running(self) -> bool: + pass + + @abstractmethod + def _get_contact_f(self, + robot_name: str, + contact_link: str, + env_indxs: torch.Tensor) -> torch.Tensor: + return None + + @abstractmethod + def physics_dt(self) -> float: + pass + + @abstractmethod + def rendering_dt(self) -> float: + pass + + @abstractmethod + def set_physics_dt(self, physics_dt:float): + pass + + @abstractmethod + def set_rendering_dt(self, rendering_dt:float): + pass + + @abstractmethod + def _robot_jnt_names(self, robot_name: str) -> List[str]: + pass + + @abstractmethod + def _read_root_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + # IMPORTANT: Child interfaces should provide root quaternions in w, x, y, z convention. + pass + + @abstractmethod + def _read_jnts_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + pass + + @abstractmethod + def _init_robots_state(self): + pass + + @abstractmethod + def _reset_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False): + pass + + @abstractmethod + def _init_world(self): + pass + + @abstractmethod + def _reset_sim(self) -> None: + pass + + @abstractmethod + def _set_jnts_to_homing(self, robot_name: str): + pass + + @abstractmethod + def _set_root_to_defconfig(self, robot_name: str): + pass + + @abstractmethod + def _parse_env_opts(self): + pass + + @abstractmethod + def _pre_setup(self): + pass + + @abstractmethod + def _generate_jnt_imp_control(self) -> JntImpCntrlChild: + pass + + @abstractmethod + def _render_sim(self, mode:str="human") -> None: + pass + + @abstractmethod + def _close(self) -> None: + pass + + @abstractmethod + def _step_world(self) -> None: + pass diff --git a/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/xbot2_basic.yaml b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/xbot2_basic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..142e3d84db18838f5fa449a9edc97d5b8c76b382 --- /dev/null +++ b/bundles/centauro/d2026_02_21_h14_m01_s10-CentauroCloopPartialUbNoWheels_FakePosTrackingEnv/xbot2_basic.yaml @@ -0,0 +1,86 @@ +XBotInterface: + urdf_path: $PWD/centauro.urdf + srdf_path: $PWD/centauro_old.srdf + # urdf_path: $PWD/centauro_dagana_right.urdf + # srdf_path: $PWD/centauro_dagana_right.srdf + +ModelInterface: + model_type: RBDL + is_model_floating_base: true + +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +startup_motor_pd: + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [800, 80] + "hip_pitch_*": [800, 80] + "knee_pitch_*": [800, 80] + "ankle_pitch_*": [800, 80] + "ankle_yaw_*": [480, 50] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [800, 80] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +motor_vel: + j_wheel_*: [1] + neck_velodyne: [1] + +# hal +xbotcore_device_configs: + sim: $PWD/hal/centauro_gz.yaml + dummy: $PWD/hal/centauro_dummy.yaml + +# threads +xbotcore_threads: + rt_main: {sched: fifo , prio: 60, period: 0.001} + nrt_main: {sched: other, prio: 0 , period: 0.005} + +# plugins +xbotcore_plugins: + + homing: + thread: rt_main + type: homing + + ros_io: {thread: nrt_main, type: ros_io} + + ros_control: {thread: nrt_main, type: ros_control, params: {autostart: {type: bool, value: true}}} + +# global parameters +xbotcore_param: + /xbot/hal/joint_safety/delta_check_dt: 0.02 + /xbot/hal/joint_safety/filter_autostart: {value: true, type: bool} + # /xbot/hal/joint_safety/filter_cutoff_hz: {value: 1.0, type: double} + /xbot/hal/joint_safety/filter_safe_cutoff_hz: {value: 5.0, type: double} + /xbot/hal/joint_safety/filter_medium_cutoff_hz: {value: 15.0, type: double} + /xbot/hal/joint_safety/filter_fast_cutoff_hz: {value: 25.0, type: double} + /xbot/hal/enable_safety: {value: true, type: bool} \ No newline at end of file diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/bundle.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/bundle.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a49c05922ba770c8be569a603756d016faa5e1db --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/bundle.yaml @@ -0,0 +1,107 @@ +bundle_format: augmpc_model_bundle_v1 +bundle_name: d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv +checkpoint_file: d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv_model +preserved_training_cfgs: + - ibrido_run__2026_03_07_19_20_05_ID/training_cfg_centauro_no_yaw_ub_cloop.sh +framework: + repos: + AugMPC: + commit: f2ff28085ea76d2b548841de6f363f7183480f86 + branch: ibrido + remote: git@github.com:AndrePatri/AugMPC.git + dirty: false + AugMPCEnvs: + commit: 46c5d4161cb4b249b3a2e50c93c6bc2aa087f027 + branch: ibrido + remote: git@github.com:AndrePatri/AugMPCEnvs.git + dirty: false + AugMPCModels: + commit: 8b15c800c0f5684c61fdaf4847156ff71df61ebc + branch: main + remote: https://huggingface.co/AndrePatri/AugMPCModels + dirty: true + CentauroHybridMPC: + commit: 640889d4c3b9c8d360b5a3ccde6fc2bd8f139891 + branch: ibrido + remote: git@github.com:ADVRHumanoids/CentauroHybridMPC.git + dirty: false + EigenIPC: + commit: 7c1c3ecd08716e61ed164a6e0bb788aa716705ca + branch: devel + remote: git@github.com:AndrePatri/EigenIPC.git + dirty: false + IBRIDO: + commit: 0ceb5f3e0508b6ecdce12bcc8f0dcbcde8f29a93 + branch: main + remote: git@github.com:AndrePatri/IBRIDO.git + dirty: false + IsaacLab: + commit: a520a883ce996d855cc9d5255d71fd1c1307633f + branch: HEAD + remote: git@github.com:isaac-sim/IsaacLab.git + dirty: true + KyonRLStepping: + commit: 2bea2b8d70078974869df0549e90fc27ff31f851 + branch: ibrido + remote: git@github.com:ADVRHumanoids/KyonRLStepping.git + dirty: false + MPCHive: + commit: 45b4fc692850cef607020dda2e32fd708e7fff62 + branch: devel + remote: git@github.com:AndrePatri/MPCHive.git + dirty: false + MPCViz: + commit: 806d03efcc9d8ab1fb04991a159c19ba89bfb85b + branch: ros2_humble + remote: git@github.com:AndrePatri/MPCViz.git + dirty: false + adarl: + commit: f585499e49fa05fdd070a77f3211c0996599b87b + branch: ibrido + remote: git@github.com:c-rizz/adarl.git + dirty: false + casadi: + commit: 79cebe3b416dee22abc87de0076b80a5b97bd345 + branch: optional_float + remote: git@github.com:AndrePatri/casadi.git + dirty: true + horizon: + commit: 3b084317709ff9b88d4a07ac5436f5988b39eece + branch: ibrido + remote: git@github.com:AndrePatri/horizon.git + dirty: true + iit-centauro-ros-pkg: + commit: 6069807ebb37a6d7df39430a02685e09ed9b167a + branch: ibrido_ros2 + remote: git@github.com:AndrePatri/iit-centauro-ros-pkg.git + dirty: false + iit-dagana-ros-pkg: + commit: f7ecd6c84a0b7f3320c1b7de6449cbcd4445d2fe + branch: ibrido_ros2 + remote: git@github.com:AndrePatri/iit-dagana-ros-pkg.git + dirty: false + iit-kyon-description: + commit: 50fd7c8909b3ddfd1ebbe67c61d6b775b86df6b1 + branch: ibrido_ros2 + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + iit-kyon-description-mpc: + commit: 3a92bee28405172e8f6c628b4498703724d35bf5 + branch: ibrido_ros2 + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + iit-kyon-ros-pkg: + commit: 298917efffb63dbca540e0aedbd21b41bf393fbf + branch: ibrido_ros2_simple + remote: git@github.com:ADVRHumanoids/iit-kyon-ros-pkg.git + dirty: false + phase_manager: + commit: 9925d18c0d7a55d013cbaa4998c61d85a3a8944f + branch: ibrido + remote: git@github.com:AndrePatri/phase_manager.git + dirty: false + unitree_ros: + commit: c75a622b8782d11dd6aa4c2ebd3b0f9c13a56aae + branch: ibrido + remote: git@github.com:AndrePatri/unitree_ros.git + dirty: true diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID.srdf b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID.srdf new file mode 100644 index 0000000000000000000000000000000000000000..adf957c2d948b8f62f760014fc1b9a95cb3e8cfb --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID.srdf @@ -0,0 +1,147 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID.urdf b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID.urdf new file mode 100644 index 0000000000000000000000000000000000000000..70256a67050070e647bdcf5cf3b142c14b5db50b --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_big_wheels_no_yaw_ub_2026_03_07_19_20_05_ID.urdf @@ -0,0 +1,1569 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_rhc.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..e2c5d1d97a8a91c01688ec97dd78a23ac8d25823 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_rhc.py @@ -0,0 +1,145 @@ +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc + +import numpy as np + +from typing import Dict + +from centaurohybridmpc.utils.sysutils import PathsGetter + +class CentauroRhc(HybridQuadRhc): + + def __init__(self, + srdf_path: str, + urdf_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes: float = 31, + dt: float = 0.03, + injection_node: int = 10, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {} + ): + + paths = PathsGetter() + self._files_suffix="" + if open_loop: + self._files_suffix="_open" + + self._add_upper_body=False + if ("add_upper_body" in custom_opts) and \ + (custom_opts["add_upper_body"]): + self._add_upper_body=True + self._files_suffix+="_ub" + + config_path=paths.RHCCONFIGPATH_NO_WHEELS+self._files_suffix+".yaml" + + super().__init__(srdf_path=srdf_path, + urdf_path=urdf_path, + config_path=config_path, + robot_name=robot_name, # used for shared memory namespaces + codegen_dir=codegen_dir, + n_nodes=n_nodes, + dt=dt, + injection_node=injection_node, + max_solver_iter=max_solver_iter, # defaults to rt-iteration + open_loop=open_loop, + close_loop_all=close_loop_all, + dtype=dtype, + verbose=verbose, + debug=debug, + refs_in_hor_frame=refs_in_hor_frame, + timeout_ms=timeout_ms, + custom_opts=custom_opts) + + self._fail_idx_scale=1e-9 + self._fail_idx_thresh_open_loop=1e0 + self._fail_idx_thresh_close_loop=10 + + if open_loop: + self._fail_idx_thresh=self._fail_idx_thresh_open_loop + else: + self._fail_idx_thresh=self._fail_idx_thresh_close_loop + + # adding some additional config files for jnt imp control + self._rhc_fpaths.append(paths.JNT_IMP_CONFIG_XBOT+".yaml") + self._rhc_fpaths.append(paths.JNT_IMP_CONFIG+".yaml") + + def _set_rhc_pred_idx(self): + self._pred_node_idx=round((self._n_nodes-1)*2/3) + + def _set_rhc_cmds_idx(self): + self._rhc_cmds_node_idx=2 + + def _config_override(self): + paths = PathsGetter() + if ("control_wheels" in self._custom_opts): + if self._custom_opts["control_wheels"]: + self.config_path = paths.RHCCONFIGPATH_WHEELS+self._files_suffix+".yaml" + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + self.config_path = paths.RHCCONFIGPATH_WHEELS_NO_YAW+self._files_suffix+".yaml" + if ("replace_continuous_joints" in self._custom_opts) and \ + (not self._custom_opts["replace_continuous_joints"]): + # use continuous joints -> different config + self.config_path = paths.RHCCONFIGPATH_WHEELS_CONTINUOUS+self._files_suffix+".yaml" + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + self.config_path = paths.RHCCONFIGPATH_WHEELS_CONTINUOUS_NO_YAW+self._files_suffix+".yaml" + + else: + self._custom_opts["control_wheels"]=False + + if not self._custom_opts["control_wheels"]: + self._fixed_jnt_patterns=self._fixed_jnt_patterns+\ + ["j_wheel", + "ankle_yaw"] + self._custom_opts["replace_continuous_joints"]=True + + def _init_problem(self): + + if not self._custom_opts["control_wheels"]: + self._yaw_vertical_weight=120.0 + else: + self._yaw_vertical_weight=50.0 + + fixed_jnts_patterns=[ + "d435_head", + "velodyne_joint", + "dagana"] + + if not self._add_upper_body: + fixed_jnts_patterns.append("j_arm") + fixed_jnts_patterns.append("torso") + + if ("fix_yaw" in self._custom_opts) and \ + (self._custom_opts["fix_yaw"]): + fixed_jnts_patterns.append("ankle_yaw") + + flight_duration_sec=0.5 # [s] + flight_duration=int(flight_duration_sec/self._dt) + post_flight_duration_sec=0.2 # [s] + post_flight_duration=int(post_flight_duration_sec/self._dt) + + step_height=0.1 + if ("step_height" in self._custom_opts): + step_height=self._custom_opts["step_height"] + + super()._init_problem(fixed_jnt_patterns=fixed_jnts_patterns, + wheels_patterns=["wheel_"], + foot_linkname="wheel_1", + flight_duration=flight_duration, + post_flight_stance=post_flight_duration, + step_height=step_height, + keep_yaw_vert=True, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=True, + vertical_land_weight=10.0, + phase_force_reg=5e-2, + vel_bounds_weight=1.0) \ No newline at end of file diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_rhc_wheels_continuous_no_yaw_ub.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_rhc_wheels_continuous_no_yaw_ub.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f662505aa1923ec4f910b7a66a9d2f08440532b --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/centauro_rhc_wheels_continuous_no_yaw_ub.yaml @@ -0,0 +1,344 @@ +solver: + type: ilqr + ipopt.linear_solver: ma57 + ipopt.tol: 0.1 +# ilqr.merit_der_threshold: 1e-3 +# ilqr.defect_norm_threshold: 1e-3 + ipopt.constr_viol_tol: 0.01 + ilqr.constraint_violation_threshold: 1e-2 +# ipopt.hessian_approximation: exact + ipopt.print_level: 5 + ipopt.suppress_all_output: 'yes' + ipopt.sb: 'yes' + ilqr.suppress_all_output: 'yes' + ilqr.codegen_enabled: true + ilqr.codegen_workdir: /tmp/tyhio + ilqr.enable_gn: true + ilqr.hxx_reg_base: 0.0 + ilqr.n_threads: 0 + print_time: 0 + +constraints: + - contact_1 + - contact_2 + - contact_3 + - contact_4 + +costs: + - z_contact_1 + - z_contact_2 + - z_contact_3 + - z_contact_4 + # - vz_contact_1 + # - vz_contact_2 + # - vz_contact_3 + # - vz_contact_4 + # - xy_contact_1 + # - xy_contact_2 + # - xy_contact_3 + # - xy_contact_4 + # - vxy_contact_1 + # - vxy_contact_2 + # - vxy_contact_3 + # - vxy_contact_4 + - base_lin_velxy + - base_lin_velz + - base_omega + - base_capture + - joint_posture_capture + - v_regularization + - a_regularization + # - force_regularization + +.define: + - &w_base_omega 15. + - &w_base_vxy 20. + - &w_base_vz 20. + - &w_base_z 15. + - &w_contact_z 225.0 + - &w_contact_vz 250.0 + - &w_contact_xy 60.0 + - &w_contact_vxy 50.0 + - &w_base_capture 200. + - &wheel_radius 0.124 + +base_lin_velxy: + type: Cartesian + distal_link: base_link + indices: [0, 1] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_vxy + +base_lin_velz: + type: Cartesian + distal_link: base_link + indices: [2] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_vz + +base_omega: + type: Cartesian + distal_link: base_link + indices: [3, 4, 5] + nodes: ${range(1, N-5)} + cartesian_type: velocity + weight: *w_base_omega + +base_capture: + type: Cartesian + distal_link: base_link + indices: [0, 1, 2, 3, 4, 5] + nodes: ${range(N-5, N+1)} + cartesian_type: velocity + weight: *w_base_capture + +# =============================== + +rolling_contact_1: + type: Rolling + frame: wheel_1 + radius: *wheel_radius + +rolling_contact_2: + type: Rolling + frame: wheel_2 + radius: *wheel_radius + +rolling_contact_3: + type: Rolling + frame: wheel_3 + radius: *wheel_radius + +rolling_contact_4: + type: Rolling + frame: wheel_4 + radius: *wheel_radius + +# ================================== + +interaction_contact_1: + type: VertexForce + frame: contact_1 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_1 + +interaction_contact_2: + type: VertexForce + frame: contact_2 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_2 + +interaction_contact_3: + type: VertexForce + frame: contact_3 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_3 + +interaction_contact_4: + type: VertexForce + frame: contact_4 + fn_min: 100.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_4 + +contact_1: + type: Contact + subtask: [interaction_contact_1, rolling_contact_1] + +contact_2: + type: Contact + subtask: [interaction_contact_2, rolling_contact_2] + +contact_3: + type: Contact + subtask: [interaction_contact_3, rolling_contact_3] + +contact_4: + type: Contact + subtask: [interaction_contact_4, rolling_contact_4] + +joint_posture_capture: + type: Postural + weight: [45.0, 10.0, 10.0, 10.0, + 45.0, 10.0, 10.0, 10.0, + 45.0, 10.0, 10.0, 10.0, + 45.0, 10.0, 10.0, 10.0, + 55.0, + 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, + 25.0, 25.0, 25.0, 25.0, 25.0, 25.0] + indices: [0, 1, 2, 3, + 6, 7, 8, 9, + 12, 13, 14, 15, + 18, 19, 20, 21, + 24, + 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36] + nodes: ${range(N-5, N+1)} + +v_regularization: + type: Regularization + variable_name: v + nodes: ${range(0, N+1)} + indices: [0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 21, 23, 24, 25, + 26, + 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38] + weight: [2e-1, 2e-1, 2e-1, 2e-1, 2e-1, 2e-1, + 1e0, 8e-1, 8e-1, 8e-1, 1e0, + 1e0, 8e-1, 8e-1, 8e-1, 1e0, + 1e0, 8e-1, 8e-1, 8e-1, 1e0, + 1e0, 8e-1, 8e-1, 8e-1, 1e0, + 2e0, + 2e0, 2e0, 2e0, 2e0, 2e0, 2e0, + 2e0, 2e0, 2e0, 2e0, 2e0, 2e0] + +a_regularization: + type: Regularization + variable_name: a + nodes: ${range(0, N+1)} + indices: [0, 1, 2, 3, 4, 5, + 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, + 21, 21, 23, 24, 25, + 26, + 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38] + weight: [4e-1, 4e-1, 4e-1, 4e-1, 4e-1, 4e-1, + 4e-1, 4e-1, 4e-1, 4e-1, 8e-1, + 4e-1, 4e-1, 4e-1, 4e-1, 8e-1, + 4e-1, 4e-1, 4e-1, 4e-1, 8e-1, + 4e-1, 4e-1, 4e-1, 4e-1, 8e-1, + 4e-1, + 4e-1, 4e-1, 4e-1, 4e-1, 4e-1, 4e-1, + 4e-1, 4e-1, 4e-1, 4e-1, 4e-1, 4e-1] + +# flight phase traj tracking +z_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +z_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [2] + cartesian_type: position + weight: *w_contact_z + +xy_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +xy_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +xy_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +xy_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [0, 1] + cartesian_type: position + weight: *w_contact_xy + +vz_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vz_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [2] + cartesian_type: velocity + weight: *w_contact_vz + +vxy_contact_1: + type: Cartesian + distal_link: contact_1 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy + +vxy_contact_2: + type: Cartesian + distal_link: contact_2 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy + +vxy_contact_3: + type: Cartesian + distal_link: contact_3 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy + +vxy_contact_4: + type: Cartesian + distal_link: contact_4 + indices: [0, 1] + cartesian_type: velocity + weight: *w_contact_vxy \ No newline at end of file diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv_model b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv_model new file mode 100644 index 0000000000000000000000000000000000000000..2f7d9a999d5c99002248fd95fb4e7a6aa0c0eeb0 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv_model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f8a90b01e55d417a703ebfbad4ff339379de5ed0ea39eac7a158d11e88976df +size 4449849 diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/fake_pos_tracking_env.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/fake_pos_tracking_env.py new file mode 100644 index 0000000000000000000000000000000000000000..d6cb165e2eeb89dd9dc87b5895e8628a0f6e5d8a --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/fake_pos_tracking_env.py @@ -0,0 +1,202 @@ +import os + +from typing import Dict + +import torch + +from EigenIPC.PyEigenIPC import VLevel + +from mpc_hive.utilities.math_utils_torch import world2base_frame + +from aug_mpc_envs.training_envs.twist_tracking_env import TwistTrackingEnv + +class FakePosTrackingEnv(TwistTrackingEnv): + """Converts random planar position goals into twist references so the agent learns to drive the robot toward targets while managing contact scheduling.""" + + def __init__(self, + namespace: str, + actions_dim: int = 10, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._add_env_opt(env_opts, "max_distance", default=5.0) # [m] + self._add_env_opt(env_opts, "min_distance", default=0.0) # [m] + self._add_env_opt(env_opts, "max_vref", default=1.0) # [m/s] + self._add_env_opt(env_opts, "max_dp", default=5.0) # [m] after this, v ref saturates + self._add_env_opt(env_opts, "max_dt", default=env_opts["max_dp"]/ env_opts["max_vref"]) + + TwistTrackingEnv.__init__(self, + namespace=namespace, + actions_dim=actions_dim, # twist + contact flags + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def get_file_paths(self): + paths=TwistTrackingEnv.get_file_paths(self) + paths.append(os.path.abspath(__file__)) + return paths + + def _custom_post_init(self): + TwistTrackingEnv._custom_post_init(self) + + # position targets to be reached (wrt robot's pos at ep start) + self._p_trgt_w=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2].detach().clone() + self._p_delta_w=self._p_trgt_w.detach().clone() + self._dp_norm=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + self._dp_versor=self._p_trgt_w.detach().clone() + + self._trgt_d=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + self._trgt_theta=torch.zeros((self._n_envs, 1),dtype=self._dtype,device=self._device) + + def _update_loc_twist_refs(self): + # this is called at each env substep + + self._compute_twist_ref_w() + + if not self._override_agent_refs: + agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p", + gpu=self._use_gpu) + agent_p_ref_current[:, 0:2]=self._p_trgt_w + + # then convert it to base ref local for the agent + robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + # rotate agent ref from world to robot base + world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, + t_out=self._agent_twist_ref_current_base_loc) + # write it to agent refs tensors + self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc, + gpu=self._use_gpu) + + def _compute_twist_ref_w(self, env_indxs: torch.Tensor = None): + + # angular refs are not altered + if env_indxs is None: + # we update the position error using the current base position + self._p_delta_w[:, :]=self._p_trgt_w-\ + self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + + self._dp_norm[:, :]=self._p_delta_w.norm(dim=1,keepdim=True)+1e-6 + self._dp_versor[:, :]=self._p_delta_w/self._dp_norm + + # apply for vref saturation + to_be_saturated=self._dp_norm[:, :]>self._env_opts["max_dp"] + self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"] + + # we compute the twist refs for the agent depending of the position error + self._agent_twist_ref_current_w[:, 0:2]=self._dp_norm*self._dp_versor/self._env_opts["max_dt"] + self._agent_twist_ref_current_w[:, 2:3]=0 # no vertical vel + + # apply pof0 using last value of bernoully coeffs + self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel + self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega + else: + self._p_delta_w[env_indxs, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[env_indxs, 0:2] -\ + self._p_trgt_w[env_indxs, :] + + # apply for vref saturation + to_be_saturated=torch.logical_and((self._dp_norm[:, :]>self._env_opts["max_dp"]).flatten(),env_indxs) + self._dp_norm[to_be_saturated.flatten(), :]=self._env_opts["max_dp"] + + self._dp_norm[env_indxs, :]=self._p_delta_w[env_indxs, :].norm(dim=1,keepdim=True)+1e-6 + self._dp_versor[env_indxs, :]=self._p_delta_w[env_indxs, :]/self._dp_norm[env_indxs, :] + + self._agent_twist_ref_current_w[env_indxs, 0:2]=self._dp_norm[env_indxs, :]*self._dp_versor[env_indxs, :]/self._env_opts["max_dt"] + self._agent_twist_ref_current_w[env_indxs, 2:3]=0 # no vertical vel + + # apply pof0 using last value of bernoully coeffs + self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :] + self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # runs at every post_step + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + agent_p_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="p", + gpu=self._use_gpu) + + agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega", + gpu=self._use_gpu) + + # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \ + # agent_p_ref_current[:, 0:2] + self._p_trgt_w[:, :]=agent_p_ref_current[:, 0:2] # set p target target from shared mem + + self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem + + def _debug_agent_refs(self): + if self._use_gpu: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True) + + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + # we randomize the target position/omega in world frame + if env_indxs is None: + self._trgt_d.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"]) + self._trgt_theta.uniform_(0.0, 2*torch.pi) + + self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] +\ + torch.cat((self._trgt_d*torch.cos(self._trgt_theta) + ,self._trgt_d*torch.sin(self._trgt_theta)), dim=1) + + # randomize just omega + random_uniform=torch.full_like(self._agent_twist_ref_current_w[:, 3:6], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[:, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6] + + # sample for all envs pof0 + if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + + else: + + if env_indxs.any(): + integer_idxs=torch.nonzero(env_indxs).flatten() + + trgt_d_selected=self._trgt_d[integer_idxs, :] + trgt_d_selected.uniform_(self._env_opts["min_distance"], self._env_opts["max_distance"]) + self._trgt_d[integer_idxs, :]=trgt_d_selected + + trgt_theta_selected=self._trgt_theta[integer_idxs, :] + trgt_theta_selected.uniform_(0.0, 2*torch.pi) + self._trgt_theta[integer_idxs, :]=trgt_theta_selected + + self._p_trgt_w[integer_idxs, 0:1]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 0:1] +\ + self._trgt_d[integer_idxs, :]*torch.cos(self._trgt_theta[integer_idxs, :]) + self._p_trgt_w[integer_idxs, 1:2]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[integer_idxs, 1:2] +\ + self._trgt_d[integer_idxs, :]*torch.sin(self._trgt_theta[integer_idxs, :]) + + # randomize just omega + random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, 3:6], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[env_indxs, 3:6] = random_uniform*self._twist_ref_scale[:, 3:6] + self._twist_ref_offset[:, 3:6] + + # sample for all envs pof0, then reset to 1 for envs which are not to be randomized + if self._env_opts["use_pof0"]: # sample from bernoulli distribution and update coefficients + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + self._bernoulli_coeffs_linvel[~env_indxs, :]=1 + self._bernoulli_coeffs_omega[~env_indxs, :]=1 + + self._compute_twist_ref_w(env_indxs=env_indxs) # update linear vel twist refs based on pos error + + diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/hybrid_quad_rhc.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/hybrid_quad_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/hybrid_quad_rhc.py @@ -0,0 +1,1324 @@ +from mpc_hive.controllers.rhc import RHController + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +import numpy as np + +import os + +import time + +from typing import Dict, List + +class HybridQuadRhc(RHController): + + def __init__(self, + srdf_path: str, + urdf_path: str, + config_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes:float = 25, + injection_node:int = 10, + dt: float = 0.02, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {}): + + self._refs_in_hor_frame = refs_in_hor_frame + + self._injection_node = injection_node + + self._open_loop = open_loop + self._close_loop_all = close_loop_all + + self._codegen_dir = codegen_dir + if not os.path.exists(self._codegen_dir): + os.makedirs(self._codegen_dir) + # else: + # # Directory already exists, delete it and recreate + # shutil.rmtree(self._codegen_dir) + # os.makedirs(self._codegen_dir) + + self.step_counter = 0 + self.sol_counter = 0 + + self.max_solver_iter = max_solver_iter + + self._timer_start = time.perf_counter() + self._prb_update_time = time.perf_counter() + self._phase_shift_time = time.perf_counter() + self._task_ref_update_time = time.perf_counter() + self._rti_time = time.perf_counter() + + self.robot_name = robot_name + + self.config_path = config_path + + self.urdf_path = urdf_path + # read urdf and srdf files + with open(self.urdf_path, 'r') as file: + self.urdf = file.read() + self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + self._c_timelines = dict() + self._f_reg_timelines = dict() + + self._custom_opts={"replace_continuous_joints": False, + "use_force_feedback": False, + "lin_a_feedback": False, + "is_open_loop": self._open_loop, # fully open (just for db) + "fully_closed": False, # closed loop with full feedback (just for db) + "closed_partial": False, # closed loop with partial feedback + "adaptive_is": True, # closed loop with adaptation + "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase + "alpha_from_outside": False, # alpha set ext. from shared memory + "alpha_half": 1.0, + "only_vel_wheels": True, # whether wheels (if present) are just vel controlled + "use_jnt_v_feedback": False + } + + self._custom_opts.update(custom_opts) + + self._alpha_half=self._custom_opts["alpha_half"] + + if self._open_loop: + self._custom_opts["fully_closed"]=False + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + else: + self._custom_opts["is_open_loop"]=False + if self._custom_opts["fully_closed"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["closed_partial"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["fully_closed"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["adaptive_is"]: + self._custom_opts["closed_partial"]=False + self._custom_opts["fully_closed"]=False + + super().__init__(srdf_path=srdf_path, + n_nodes=n_nodes, + dt=dt, + namespace=self.robot_name, + dtype=dtype, + verbose=verbose, + debug=debug, + timeout_ms=timeout_ms) + + self._rhc_fpaths.append(self.config_path) + + def _config_override(self): + pass + + def _post_problem_init(self): + + self.rhc_costs={} + self.rhc_constr={} + + self._fail_idx_scale=0.0 + self._expl_idx_window_size=int(1*self._n_nodes) + self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size)) + self._expl_idx_counter=0 + self._expl_idx_buffer_counter=0 + + self._pred_node_idx=self._n_nodes-1 + + self._nq=self.nq() + self._nq_jnts=self._nq-7# assuming floating base + self._nv=self.nv() + self._nv_jnts=self._nv-6 + + self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype) + self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype) + self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype) + self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype) + self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype) + self._alphas_q_root[:, :]=1.0 # default to all open + self._alphas_q_jnts[:, :]=1.0 + self._alphas_twist_root[:, :]=1.0 + self._alphas_v_jnts[:, :]=1.0 + self._alphas_a[:, :]=1.0 + + def _init_problem(self, + fixed_jnt_patterns: List[str] = None, + wheels_patterns: List[str] = None, + foot_linkname: str = None, + flight_duration: int = 10, + post_flight_stance: int = 3, + step_height: float = 0.12, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = 2.0, + vertical_landing: bool = False, + vertical_land_weight: float = 1.0, + phase_force_reg: float = 1e-2, + vel_bounds_weight: float = 1.0): + + self._fixed_jnt_patterns=fixed_jnt_patterns + + self._config_override() + + Journal.log(self.__class__.__name__, + "_init_problem", + f" Will use horizon config file at {self.config_path}", + LogType.INFO, + throw_when_excep=True) + + self._vel_bounds_weight=vel_bounds_weight + self._phase_force_reg=phase_force_reg + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_land_weight=vertical_land_weight + # overrides parent + self._prb = Problem(self._n_intervals, + receding=True, + casadi_type=cs.SX) + self._prb.setDt(self._dt) + + if "replace_continuous_joints" in self._custom_opts: + # continous joints are parametrized in So2 + if self._custom_opts["replace_continuous_joints"]: + self.urdf = self.urdf.replace('continuous', 'revolute') + else: + self.urdf = self.urdf.replace('continuous', 'revolute') + + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + + self._init_robot_homer() + + # handle fixed joints + fixed_joint_map={} + if self._fixed_jnt_patterns is not None: + for jnt_name in self._get_robot_jnt_names(): + for fixed_jnt_pattern in self._fixed_jnt_patterns: + if fixed_jnt_pattern in jnt_name: + fixed_joint_map.update({f"{jnt_name}": + self._homer.get_homing_val(jnt_name=jnt_name)}) + break # do not search for other pattern matches + + if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers + Journal.log(self.__class__.__name__, + "_init_problem", + f"Will fix following joints: \n{str(fixed_joint_map)}", + LogType.INFO, + throw_when_excep=True) + # with the fixed joint map + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map) + # assign again controlled joints names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + # updated robot homer for controlled joints + self._init_robot_homer() + + # handle continuous joints (need to change homing and retrieve + # cont jnts indexes) and homing + self._continuous_joints=self._get_continuous_jnt_names() + # reduced + self._continuous_joints_idxs=[] + self._continuous_joints_idxs_cos=[] + self._continuous_joints_idxs_sin=[] + self._continuous_joints_idxs_red=[] + self._rev_joints_idxs=[] + self._rev_joints_idxs_red=[] + # qfull + self._continuous_joints_idxs_qfull=[] + self._continuous_joints_idxs_cos_qfull=[] + self._continuous_joints_idxs_sin_qfull=[] + self._continuous_joints_idxs_red_qfull=[] + self._rev_joints_idxs_qfull=[] + self._rev_joints_idxs_red_qfull=[] + jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints)) + jnt_names=self._get_robot_jnt_names() + for i in range(len(jnt_names)): + jnt=jnt_names[i] + index=self._get_jnt_id(jnt)# accounting for floating joint + homing_idx=index-7 # homing is only for actuated joints + homing_value=self._homer.get_homing_val(jnt) + if jnt in self._continuous_joints: + jnt_homing[homing_idx]=np.cos(homing_value).item() + jnt_homing[homing_idx+1]=np.sin(homing_value).item() + # just actuated joints + self._continuous_joints_idxs.append(homing_idx) # cos + self._continuous_joints_idxs.append(homing_idx+1) # sin + self._continuous_joints_idxs_cos.append(homing_idx) + self._continuous_joints_idxs_sin.append(homing_idx+1) + self._continuous_joints_idxs_red.append(i) + # q full + self._continuous_joints_idxs_qfull.append(index) # cos + self._continuous_joints_idxs_qfull.append(index+1) # sin + self._continuous_joints_idxs_cos_qfull.append(index) + self._continuous_joints_idxs_sin_qfull.append(index+1) + self._continuous_joints_idxs_red_qfull.append(i+7) + else: + jnt_homing[homing_idx]=homing_value + # just actuated joints + self._rev_joints_idxs.append(homing_idx) + self._rev_joints_idxs_red.append(i) + # q full + self._rev_joints_idxs_qfull.append(index) + self._rev_joints_idxs_red_qfull.append(i+7) + + self._jnts_q_reduced=None + if not len(self._continuous_joints)==0: + cont_joints=", ".join(self._continuous_joints) + + Journal.log(self.__class__.__name__, + "_init_problem", + f"The following continuous joints were found: \n{cont_joints}", + LogType.INFO, + throw_when_excep=True) + # preallocating data + self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype) + self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype) + self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + else: + self._custom_opts["replace_continuous_joints"]=True + Journal.log(self.__class__.__name__, + "_init_problem", + f"No continuous joints were found.", + LogType.INFO, + throw_when_excep=True) + + # retrieve wheels indexes (not considering continuous joints, + # ok just for v, eff, etc..) + self._wheel_patterns=wheels_patterns + self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns) + self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81] + + # we can create an init for the base + init = self._base_init.tolist() + jnt_homing + + if foot_linkname is not None: + FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height + ground_level = FK(q=init)['ee_pos'] + self._base_init[2] = -ground_level[2] # override init + + self._model = FullModelInverseDynamics(problem=self._prb, + kd=self._kin_dyn, + q_init=self._homer.get_homing_map(), + base_init=self._base_init) + + self._ti = TaskInterface(prb=self._prb, + model=self._model, + max_solver_iter=self.max_solver_iter, + debug = self._debug, + verbose = self._verbose, + codegen_workdir = self._codegen_dir) + self._ti.setTaskFromYaml(self.config_path) + + # setting initial base pos ref if exists + base_pos = self._ti.getTask('base_height') + if base_pos is not None: + base_pos.setRef(np.atleast_2d(self._base_init).T) + + self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes????? + + self._gm = GaitManager(self._ti, + self._pm, + self._injection_node, + keep_yaw_vert=keep_yaw_vert, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=vertical_landing, + landing_vert_weight=self._vertical_land_weight, + phase_force_reg=self._phase_force_reg, + custom_opts=self._custom_opts, + flight_duration=flight_duration, + post_flight_stance=post_flight_stance, + step_height=step_height, + dh=0.0) + + self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0) + self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0) + self._ti.model.q.setInitialGuess(self._ti.model.q0) + self._ti.model.v.setInitialGuess(self._ti.model.v0) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f=len(cforces) + for c in cforces: + c.setInitialGuess(np.array(self._f0)/n_contact_f) + + vel_lims = self._model.kd.velocityLimits() + import horizon.utils as utils + self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:])) + self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:])) + + self._meas_lin_a_par=None + # if self._custom_opts["lin_a_feedback"]: + # # acceleration feedback on first node + # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback", + # dim=3, nodes=0) + # base_lin_a_prb=self._prb.getInput().getVars()[0:3] + # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, + # nodes=[0]) + + # if not self._open_loop: + # # we create a residual cost to be used as an attractor to the measured state on the first node + # # hard constraints injecting meas. states are pure EVIL! + # prb_state=self._prb.getState() + # full_state=prb_state.getVars() + # state_dim=prb_state.getBounds()[0].shape[0] + # meas_state=self._prb.createParameter(name="measured_state", + # dim=state_dim, nodes=0) + # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), + # nodes=[0]) + + self._ti.finalize() + self._ti.bootstrap() + + self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes + self._ti.load_initial_guess() + + self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we + # know n_dofs -> we assign it (by default = None) + + self.n_contacts = len(self._model.cmap.keys()) + + # remove variables bounds (before they were necessary to have a good + # quality bootstrap solution) + self._q_inf=np.zeros((self.nq(), 1)) + self._q_inf[:, :]=np.inf + self._v_inf=np.zeros((self.nv(), 1)) + self._v_inf[:, :]=np.inf + self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0) + self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0) + + # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb) + + def get_file_paths(self): + # can be overriden by child + paths = super().get_file_paths() + return paths + + def _get_quat_remap(self): + # overrides parent + return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention + + def _zmp(self, model): + + num = cs.SX([0, 0]) + den = cs.SX([0]) + pos_contact = dict() + force_val = dict() + + q = cs.SX.sym('q', model.nq) + v = cs.SX.sym('v', model.nv) + a = cs.SX.sym('a', model.nv) + + com = model.kd.centerOfMass()(q=q, v=v, a=a)['com'] + + n = cs.SX([0, 0, 1]) + for c in model.fmap.keys(): + pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos'] + force_val[c] = cs.SX.sym('force_val', 3) + num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n) + den += cs.dot(force_val[c], n) + + zmp = com[0:2] + (num / den) + input_list = [] + input_list.append(q) + input_list.append(v) + input_list.append(a) + + for elem in force_val.values(): + input_list.append(elem) + + f = cs.Function('zmp', input_list, [zmp]) + return f + + def _add_zmp(self): + + input_zmp = [] + + input_zmp.append(self._model.q) + input_zmp.append(self._model.v) + input_zmp.append(self._model.a) + + for f_var in self._model.fmap.values(): + input_zmp.append(f_var) + + c_mean = cs.SX([0, 0, 0]) + for c_name, f_var in self._model.fmap.items(): + fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos'] + c_mean += fk_c_pos + + c_mean /= len(self._model.cmap.keys()) + + zmp_nominal_weight = 10. + zmp_fun = self._zmp(self._model)(*input_zmp) + + if 'wheel_joint_1' in self._model.kd.joint_names(): + zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2])) + + def _quaternion_multiply(self, + q1, q2): + x1, y1, z1, w1 = q1 + x2, y2, z2, w2 = q2 + w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return np.array([x, y, z, w]) + + def _get_continuous_jnt_names(self): + import xml.etree.ElementTree as ET + root = ET.fromstring(self.urdf) + continuous_joints = [] + for joint in root.findall('joint'): + joint_type = joint.get('type') + if joint_type == 'continuous': + joint_name = joint.get('name') + continuous_joints.append(joint_name) + return continuous_joints + + def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]): + jnt_names=self._get_robot_jnt_names() + wheels_idxs=[] + for i in range(len(jnt_names)): + jnt_name=jnt_names[i] + for wheel_pattern in wheel_patterns: + if wheel_pattern in jnt_name: + wheels_idxs.append(i) + break + return wheels_idxs + + def _get_jnt_id(self, jnt_name): + return self._kin_dyn.joint_iq(jnt_name) + + def _init_rhc_task_cmds(self): + + rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm, + robot_index_shm=self.controller_index, + robot_index_view=0, # when using optimize_mem the view if always of shape 1x... + namespace=self.namespace, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + use_force_feedback=self._custom_opts["use_force_feedback"], + optimize_mem=True) + + rhc_refs.run() + + rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller) + rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap()) + + rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3], + q_ref=np.atleast_2d(self._base_init)[:, 3:7]) + + return rhc_refs + + def get_vertex_fnames_from_ti(self): + tasks=self._ti.task_list + contact_f_names=[] + for task in tasks: + if isinstance(task, ContactTask): + interaction_task=task.dynamics_tasks[0] + contact_f_names.append(interaction_task.vertex_frames[0]) + return contact_f_names + + def _get_contact_names(self): + # should get contact names from vertex frames + # list(self._ti.model.cmap.keys()) + return self.get_vertex_fnames_from_ti() + + def _get_robot_jnt_names(self): + + joints_names = self._kin_dyn.joint_names() + to_be_removed = ["universe", + "reference", + "world", + "floating", + "floating_base"] + for name in to_be_removed: + if name in joints_names: + joints_names.remove(name) + + return joints_names + + def _get_ndofs(self): + return len(self._model.joint_names) + + def nq(self): + return self._kin_dyn.nq() + + def nv(self): + return self._kin_dyn.nv() + + def _get_robot_mass(self): + + return self._kin_dyn.mass() + + def _get_root_full_q_from_sol(self, node_idx=1): + + root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype) + + np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) + np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full) + + return root_q_full + + def _get_full_q_from_sol(self, node_idx=1): + + return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype) + + def _get_root_twist_from_sol(self, node_idx=1): + # provided in world frame + twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3] + # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6] + return twist_base_local + + def _get_root_a_from_sol(self, node_idx=0): + # provided in world frame + a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3] + # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6] + return a_base_local + + def _get_jnt_q_from_sol(self, node_idx=0, + reduce: bool = True, + clamp: bool = True): + + full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype) + + np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place + np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place + + if self._custom_opts["replace_continuous_joints"] or (not reduce): + if clamp: + return np.fmod(full_jnts_q, 2*np.pi) + else: + return full_jnts_q + else: + cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2) + # copy rev joint vals + self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1) + # and continuous + self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1) + return self._jnts_q_reduced + + def _get_jnt_v_from_sol(self, node_idx=1): + + jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place + # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place + + return jnt_v_sol + + def _get_jnt_a_from_sol(self, node_idx=1): + + return self._get_a_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + + def _get_jnt_eff_from_sol(self, node_idx=0): + + efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx) + + # if self._custom_opts["only_vel_wheels"]: + + jnt_efforts=efforts_on_node[6:, 0] + + if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v: + jnt_efforts[self._wheels_idxs_v]=0.0 + + return jnt_efforts.reshape(1, + self._nv_jnts).astype(self._dtype) + + def _get_rhc_cost(self): + + return self._ti.solution["opt_cost"] + + def _get_rhc_constr_viol(self): + + return self._ti.solution["residual_norm"] + + def _get_rhc_nodes_cost(self): + + cost = self._ti.solver_rti.getCostValOnNodes() + return cost.reshape((1, -1)) + + def _get_rhc_nodes_constr_viol(self): + + constr_viol = self._ti.solver_rti.getConstrValOnNodes() + return constr_viol.reshape((1, -1)) + + def _get_rhc_niter_to_sol(self): + + return self._ti.solution["n_iter2sol"] + + def _set_ig_bootstrap(self, + q_state: np.ndarray = None, + v_state: np.ndarray = None): + + xig = self._ti.solution['x_opt'].copy() + uig = self._ti.solution['u_opt'].copy() + + # Normalize and keep quaternion in the same hemisphere as the previous + # solution to avoid artificial 180-deg jumps in the bootstrap warm start. + q_state_boot = q_state.copy() + q_prev = xig[3:7, 0] + q_now = q_state_boot[3:7, 0] + + q_now_norm = np.linalg.norm(q_now) + if q_now_norm > 1e-9: + q_state_boot[3:7, :] /= q_now_norm + else: + q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype) + + q_prev_norm = np.linalg.norm(q_prev) + if q_prev_norm > 1e-9: + q_prev = q_prev / q_prev_norm + + q_now = q_state_boot[3:7, 0] + if np.dot(q_prev, q_now) < 0.0: + q_state_boot[3:7, :] *= -1.0 + + xig[0:self._nq, :] = q_state_boot + xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes + uig[0:self._nv, :]=0.0 # 0 acceleration + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype)) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f = len(cforces) + if n_contact_f == 0: + continue + f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f + for c in cforces: + c.setInitialGuess(f_guess) + + # print("initial guesses") + # print(self._nq) + # print(self._nv) + # print("q") + # qig=self._ti.model.q.getInitialGuess() + # print(qig.shape) + # print(qig) + # print("v") + # print(self._ti.model.v.getInitialGuess()) + # print("a") + # print(self._ti.model.a.getInitialGuess()) + # for _, cforces in self._ti.model.cmap.items(): + # for c in cforces: + # print("force") + # print(c.getInitialGuess()) + + return xig, uig + + def _set_ig(self): + + shift_num = -1 # shift data by one node + + x_opt = self._ti.solution['x_opt'] + u_opt = self._ti.solution['u_opt'] + + # building ig for state + xig = np.roll(x_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + xig[:, -1 - i] = x_opt[:, -1] + # building ig for inputs + uig = np.roll(u_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + uig[:, -1 - i] = u_opt[:, -1] + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + + return xig, uig + + def _update_open_loop(self, + bootstrap: bool = False): + + q_state, v_state, a_state=self._set_is_open() + + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + # robot_state=xig[:, 0] + # # open loop update: + # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0 + # # is node 1 in the last opt solution) + + return q_state, v_state, a_state + + def _update_closed_loop(self, + bootstrap: bool = False): + + # set initial state + q_state=None + v_state=None + a_state=None + if self._custom_opts["adaptive_is"]: + # adaptive closed loop + q_state, v_state, a_state=self._set_is_adaptive() + elif self._custom_opts["fully_closed"]: + q_state, v_state, a_state=self._set_is_full() + elif self._custom_opts["closed_partial"]: + q_state, v_state, a_state=self._set_is_partial() + else: + Journal.log(self.__class__.__name__, + "_update_closed_loop", + "Neither adaptive_is, fully_closed, or closed_partial.", + LogType.EXCEP, + throw_when_excep = False) + q_state, v_state, a_state=self._set_is() + + # set initial guess for controller + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + return q_state, v_state, a_state + + def _set_is_open(self): + + # overriding states with rhc data + q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1) + + twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1) + v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_q_full_rhc=q[0:7] # root full q + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_twist_rhc=vel[0:6] # lin v. + jnts_v_rhc=vel[6:] # jnts v + + self.rhc_refs.set_alpha(alpha=1.0) # fully open + + # close state on known quantities + root_q_full_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_twist_rhc.setBounds(lb=twist_root, + ub=twist_root, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((twist_root, v_jnts), + axis=0) + + return (q_state, v_state, None) + + def _set_is_full(self): + + # measurements + q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_full_q_rhc=q[0:7] # root p + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_full_q_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_partial(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # overriding states with rhc data (-> all overridden state are open looop) + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc # position is always open loop + + if not self._custom_opts["estimate_v_root"]: + v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1) + # override v jnts with the ones from controller + if not self._custom_opts["use_jnt_v_feedback"]: + v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1) + # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1) + # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1) + # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1) + # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root, + ub=q_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints) + else: # get it from controller + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_adaptive(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + # getting prediction defects + root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # close state on known quantities, estimate some (e.g. lin vel) and + # open loop if thing start to explode + alpha_now=1.0 + delta=0.0 + if self._custom_opts["alpha_from_outside"]: + alpha_now=self.rhc_refs.get_alpha() + else: # "autotuned" alpha + if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.) + delta=np.max(np.abs(jnt_v_delta)) + else: + delta=np.max(np.abs(omega_root_delta)) + # fail_idx=self._get_failure_index() + # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh + alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0 + + bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1] + self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db + self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db + + self._alphas_q_root[:]=alpha_now # for now single alpha for everything + self._alphas_q_jnts[:]=alpha_now + self._alphas_twist_root[:]=alpha_now + self._alphas_v_jnts[:]=alpha_now + self._alphas_a[:]=alpha_now + if not self._custom_opts["estimate_v_root"]: + self._alphas_twist_root[0:3]=1.0 # open + self._alphas_v_jnts[:]=1.0 # open + + # position is always open loop + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc + + # expaning meas q if continuous joints + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + + # continous joints position is always open loop, but we need a delta vector of matching dimension + q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1) + + self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:] + + self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\ + np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.cos(q_jnts[self._continuous_joints_idxs_red, :]) + self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\ + np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.sin(q_jnts[self._continuous_joints_idxs_red, :]) + + q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts + jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts + + self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop + self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop + + # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, + ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, + ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) + else: + root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, + ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0) + root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, + ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, + ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0) + if self._custom_opts["lin_a_feedback"]: + lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _solve(self): + + if self._debug: + return self._db_solve(bootstrap=False) + else: + return self._min_solve(bootstrap=False) + + def _bootstrap(self): + + if self._debug: + return self._db_solve(bootstrap=True) + else: + return self._min_solve(bootstrap=True) + + def _min_solve(self, bootstrap: bool = False): + # minimal solve version -> no debug + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._pm.shift() # shifts phases of one dt + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve (to convergence) + self.sol_counter = self.sol_counter + 1 + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + return False + + def _db_solve(self, bootstrap: bool = False): + + self._timer_start = time.perf_counter() + + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._prb_update_time = time.perf_counter() + self._pm.shift() # shifts phases of one dt + self._phase_shift_time = time.perf_counter() + + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + self._task_ref_update_time = time.perf_counter() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve bootstrap + self._rti_time = time.perf_counter() + self.sol_counter = self.sol_counter + 1 + self._update_db_data() + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + if self._verbose: + solve_mode = "RTI" if not bootstrap else "Bootstrap" + exception = f"{solve_mode}() for controller {self.controller_index} failed" + \ + f" with exception {type(e).__name__}" + Journal.log(self.__class__.__name__, + "solve", + exception, + LogType.EXCEP, + throw_when_excep = False) + self._update_db_data() + return False + + def _get_fail_idx(self): + + self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx() + self._expl_idx_buffer_counter+=1 + self._expl_idx_counter+=1 + if self._expl_idx_counter%self._expl_idx_window_size==0: + self._expl_idx_buffer_counter=0 # restart from 0 + + running_avrg=np.mean(self._explosion_idx_buffer).item() + + return running_avrg + + def _get_explosion_idx(self): + explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale + return explosion_index + + def _update_db_data(self): + + self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start + self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time + self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time + self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time + self.rhc_costs.update(self._ti.solver_rti.getCostsValues()) + self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues()) + + def _reset(self): + + # reset task interface (ig, solvers, etc..) + + # phase manager and sets bootstap as solution + self._gm.reset() + self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution + self._expl_idx_counter=0.0 + self._expl_idx_buffer_counter=0 + + def _get_cost_info(self): + + cost_dict = self._ti.solver_rti.getCostsValues() + cost_names = list(cost_dict.keys()) + cost_dims = [1] * len(cost_names) # costs are always scalar + return cost_names, cost_dims + + def _get_constr_info(self): + + constr_dict = self._ti.solver_rti.getConstraintsValues() + constr_names = list(constr_dict.keys()) + constr_dims = [-1] * len(constr_names) + i = 0 + for constr in constr_dict: + constr_val = constr_dict[constr] + constr_shape = constr_val.shape + constr_dims[i] = constr_shape[0] + i+=1 + return constr_names, constr_dims + + def _get_q_from_sol(self): + full_q=self._ti.solution['q'].astype(self._dtype) + if self._custom_opts["replace_continuous_joints"]: + return full_q + else: + cont_jnts=full_q[self._continuous_joints_idxs_qfull, :] + cos=cont_jnts[::2, :] + sin=cont_jnts[1::2, :] + # copy root + self._full_q_reduced[0:7, :]=full_q[0:7, :] + # copy rev joint vals + self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :] + # and continuous + angle=np.arctan2(sin, cos) + self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle + return self._full_q_reduced + + def _get_v_from_sol(self): + return self._ti.solution['v'].astype(self._dtype) + + def _get_a_from_sol(self): + return self._ti.solution['a'].astype(self._dtype) + + def _get_a_dot_from_sol(self): + return None + + def _get_f_from_sol(self): + # to be overridden by child class + contact_names =self._get_contacts() # we use controller-side names + try: + data=[] + for key in contact_names: + contact_f=self._ti.solution["f_" + key].astype(self._dtype) + np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False) + np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f) + data.append(contact_f) + return np.concatenate(data, axis=0) + except: + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + return self.rhc_costs[cost_name] + + def _get_constr_from_sol(self, + constr_name: str): + return self.rhc_constr[constr_name] diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/isaac_world_interface.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/isaac_world_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..44d8c26f73cc9308ee4eb14a00d47a6488b73563 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/isaac_world_interface.py @@ -0,0 +1,2355 @@ +# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com) +# +# This file is part of AugMPCEnvs and distributed under the General Public License version 2 license. +# +# AugMPCEnvs is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# AugMPCEnvs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with AugMPCEnvs. If not, see . +# +from isaacsim import SimulationApp + +import carb + +import os +import math + +import torch +import numpy as np + +from typing import Dict, List +from typing_extensions import override + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal + +from aug_mpc_envs.utils.math_utils import quat_to_omega +from aug_mpc_envs.utils.height_grid_visualizer import HeightGridVisualizer +from aug_mpc_envs.utils.height_sensor import HeightGridSensor + +from aug_mpc.world_interfaces.world_interface_base import AugMPCWorldInterfaceBase +from mpc_hive.utilities.math_utils_torch import world2base_frame,world2base_frame3D + +class IsaacSimEnv(AugMPCWorldInterfaceBase): + + def __init__(self, + robot_names: List[str], + robot_urdf_paths: List[str], + robot_srdf_paths: List[str], + jnt_imp_config_paths: List[str], + n_contacts: List[int], + cluster_dt: List[float], + use_remote_stepping: List[bool], + name: str = "IsaacSimEnv", + num_envs: int = 1, + debug = False, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + n_init_step: int = 0, + timeout_ms: int = 60000, + env_opts: Dict = None, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + override_low_lev_controller: bool = False): + + self._render_step_counter = 0 + + super().__init__(name=name, + robot_names=robot_names, + robot_urdf_paths=robot_urdf_paths, + robot_srdf_paths=robot_srdf_paths, + jnt_imp_config_paths=jnt_imp_config_paths, + n_contacts=n_contacts, + cluster_dt=cluster_dt, + use_remote_stepping=use_remote_stepping, + num_envs=num_envs, + debug=debug, + verbose=verbose, + vlevel=vlevel, + n_init_step=n_init_step, + timeout_ms=timeout_ms, + env_opts=env_opts, + use_gpu=use_gpu, + dtype=dtype, + override_low_lev_controller=override_low_lev_controller) + + def is_running(self): + return self._simulation_app.is_running() + + def _pre_setup(self): + self._backend="torch" + enable_livestream = self._env_opts["enable_livestream"] + enable_viewport = self._env_opts["enable_viewport"] + base_isaac_exp = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.kit' + base_isaac_exp_headless = f'{os.environ["EXP_PATH"]}/omni.isaac.sim.python.aug_mpc_envs.headless.kit' + + experience=base_isaac_exp + if self._env_opts["headless"]: + info = f"Will run in headless mode." + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + if enable_livestream: + experience = "" + elif enable_viewport: + exception = f"Using viewport is not supported yet." + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + else: + experience=base_isaac_exp_headless + + self._simulation_app = SimulationApp({"headless": self._env_opts["headless"]}, + experience=experience) + # all imports depending on isaac sim kits have to be done after simulationapp + # from omni.isaac.core.tasks.base_task import BaseTask + self._import_isaac_pkgs() + info = "Using IsaacSim experience file @ " + experience + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + # carb.settings.get_settings().set("/persistent/omnihydra/useSceneGraphInstancing", True) + + if enable_livestream: + info = "Livestream enabled" + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + + self._simulation_app.set_setting("/app/livestream/enabled", True) + self._simulation_app.set_setting("/app/window/drawMouse", True) + self._simulation_app.set_setting("/app/livestream/proto", "ws") + self._simulation_app.set_setting("/app/livestream/websocket/framerate_limit", 120) + self._simulation_app.set_setting("/ngx/enabled", False) + enable_extension("omni.kit.livestream.native") + enable_extension("omni.services.streaming.manager") + self._render = ((not self._env_opts["headless"]) or self._env_opts["render_to_file"]) or enable_livestream or enable_viewport + + self._record = False + self._world = None + self._physics_context = None + self._scene = None + self._task = None + self._metadata = None + + self._robots_art_views = {} + self._blink_rigid_prim_views = {} + self._robots_articulations = {} + self._robots_geom_prim_views = {} + self.omni_contact_sensors = {} + + self._solver_position_iteration_count=self._env_opts["solver_position_iteration_count"] + self._solver_velocity_iteration_count=self._env_opts["solver_velocity_iteration_count"] + self._solver_stabilization_thresh=self._env_opts["stabilization_threshold"] + self._solver_position_iteration_counts={} + self._solver_velocity_iteration_counts={} + self._solver_stabilization_threshs={} + self._robot_bodynames={} + self._robot_n_links={} + self._robot_n_dofs={} + self._robot_dof_names={} + self._distr_offset={} # decribed how robots within each env are distributed + self._spawning_radius=self._env_opts["spawning_radius"] # [m] -> default distance between roots of robots in a single + self._height_sensors={} + self._height_imgs={} + self._height_vis={} + self._height_vis_step={} + self._height_vis_step={} + self._height_vis={} + self._terrain_hit_margin = 1.0 + self._terrain_hit_log_period = 1000 + self._terrain_hit_active = {} + self._terrain_hit_counts = {} + self._terrain_hit_counts_last_logged = {} + for robot_name in self._robot_names: + self._terrain_hit_active[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.bool) + self._terrain_hit_counts[robot_name] = torch.zeros((self._num_envs,), device=self._device, dtype=torch.int32) + self._terrain_hit_counts_last_logged[robot_name] = None + + def _import_isaac_pkgs(self): + # we use global, so that we can create the simulation app inside (and so + # access Isaac's kit) and also expose to all methods the imports + global World, omni_kit, get_context, UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools, UsdShade, Vt + global enable_extension, set_camera_view, _urdf, move_prim, GridCloner, prim_utils + global get_current_stage, Scene, ArticulationView, RigidPrimView, rep + global OmniContactSensors, RlTerrains,OmniJntImpCntrl + global PhysxSchema, UsdGeom + global _sensor, _dynamic_control + global get_prim_at_path + + from pxr import PhysxSchema, UsdGeom, UsdShade, Vt + + from omni.isaac.core.world import World + from omni.usd import get_context + from pxr import UsdLux, Sdf, Gf, UsdPhysics, PhysicsSchemaTools + from omni.isaac.core.utils.extensions import enable_extension + from omni.isaac.core.utils.viewports import set_camera_view + import omni.kit as omni_kit + from omni.importer.urdf import _urdf + from omni.isaac.core.utils.prims import move_prim + from omni.isaac.cloner import GridCloner + import omni.isaac.core.utils.prims as prim_utils + from omni.isaac.core.utils.stage import get_current_stage + from omni.isaac.core.scenes.scene import Scene + from omni.isaac.core.articulations import ArticulationView + from omni.isaac.core.prims import RigidPrimView + + import omni.replicator.core as rep + + from omni.isaac.core.utils.prims import get_prim_at_path + + from omni.isaac.sensor import _sensor + + from omni.isaac.dynamic_control import _dynamic_control + + from aug_mpc_envs.utils.contact_sensor import OmniContactSensors + from aug_mpc_envs.utils.omni_jnt_imp_cntrl import OmniJntImpCntrl + from aug_mpc_envs.utils.terrains import RlTerrains + + def _parse_env_opts(self): + isaac_opts={} + isaac_opts["envs_ns"]="/World/envs" + isaac_opts["template_env_ns"]=isaac_opts["envs_ns"] + "/env_0" + isaac_opts["base_linkname"]="base_link" + isaac_opts["deduce_base_link"]=False + isaac_opts["ground_plane_prim_path"]="/World/terrain" + isaac_opts["physics_prim_path"]="/physicsScene" + isaac_opts["use_gpu"]=True + isaac_opts["use_gpu_pipeline"]=True + isaac_opts["device"]="cuda" + isaac_opts["is_fixed_base"]=False + isaac_opts["merge_fixed_jnts"]=True + isaac_opts["self_collide"]=True + isaac_opts["sim_device"]="cuda" if isaac_opts["use_gpu"] else "cpu" + isaac_opts["physics_dt"]=1e-3 + isaac_opts["gravity"] = np.array([0.0, 0.0, -9.81]) + isaac_opts["use_fabric"]=True# Enable/disable reading of physics buffers directly. Default is True. + isaac_opts["replicate_physics"]=True + # isaac_opts["worker_thread_count"]=4 + isaac_opts["solver_type"]=1 # 0: PGS, 1:TGS, defaults to TGS. PGS faster but TGS more stable + isaac_opts["enable_stabilization"]=True + # isaac_opts["bounce_threshold_velocity"] = 0.2 + # isaac_opts["friction_offset_threshold"] = 0.04 + # isaac_opts["friction_correlation_distance"] = 0.025 + # isaac_opts["enable_sleeping"] = True + # Per-actor settings ( can override in actor_options ) + isaac_opts["solver_position_iteration_count"] = 4 # defaults to 4 + isaac_opts["solver_velocity_iteration_count"] = 3 # defaults to 1 + isaac_opts["sleep_threshold"] = 1e-5 # Mass-normalized kinetic energy threshold below which an actor may go to sleep. + # Allowed range [0, max_float). + isaac_opts["stabilization_threshold"] = 1e-4 + # Per-body settings ( can override in actor_options ) + # isaac_opts["enable_gyroscopic_forces"] = True + # isaac_opts["density"] = 1000 # density to be used for bodies that do not specify mass or density + # isaac_opts["max_depenetration_velocity"] = 100.0 + # isaac_opts["solver_velocity_iteration_count"] = 1 + # GPU buffers settings + isaac_opts["gpu_max_rigid_contact_count"] = 512 * 2048 + isaac_opts["gpu_max_rigid_patch_count"] = 80 * 2048 + isaac_opts["gpu_found_lost_pairs_capacity"] = 102400 + isaac_opts["gpu_found_lost_aggregate_pairs_capacity"] = 102400 + isaac_opts["gpu_total_aggregate_pairs_capacity"] = 102400 + # isaac_opts["gpu_max_soft_body_contacts"] = 1024 * 1024 + # isaac_opts["gpu_max_particle_contacts"] = 1024 * 1024 + # isaac_opts["gpu_heap_capacity"] = 64 * 1024 * 1024 + # isaac_opts["gpu_temp_buffer_capacity"] = 16 * 1024 * 1024 + # isaac_opts["gpu_max_num_partitions"] = 8 + + isaac_opts["env_spacing"]=15.0 + isaac_opts["spawning_height"]=1.0 + isaac_opts["spawning_radius"]=1.0 + isaac_opts["spawn_height_check_half_extent"]=0.2 + isaac_opts["spawn_height_cushion"]=0.03 + + isaac_opts["enable_height_vis"]=False + isaac_opts["height_vis_radius"]=0.03 + isaac_opts["height_vis_update_period"]=1 + isaac_opts["collision_refinement_level"]=3 # increase cylinder tesselation for smoother wheel contacts + + # rendering helpers + isaac_opts["render_to_file"]=False + isaac_opts["use_follow_camera"]=False # if True, follow robot during rendering in human mode + isaac_opts["render_follow_env_idx"]=0 + isaac_opts["render_follow_robot_idx"]=0 + isaac_opts["render_follow_offset"]=[-0.2, 3.0, 0.1] + isaac_opts["render_follow_target_offset"]=[-0.2, -1.0, 0.0] + isaac_opts["rendering_dt"]=15*isaac_opts["physics_dt"] + isaac_opts["camera_prim_path"]="/OmniverseKit_Persp" + isaac_opts["render_resolution"]=[1280, 720] # [1024, 576] + + isaac_opts["render_panoramic_cam"]=True + isaac_opts["render_panoramic_cam_height"]=2.0 + isaac_opts["render_panoramic_cam_target_xy"]=[10.0, 14.] + isaac_opts["render_panoramic_cam_target_z"]=1.2 + + # ground opts + isaac_opts["use_flat_ground"]=True + isaac_opts["static_friction"]=0.5 + isaac_opts["dynamic_friction"]=0.5 + isaac_opts["restitution"]=0.1 + isaac_opts["ground_type"]="random" + isaac_opts["ground_size"]=800 + isaac_opts["terrain_border"]=isaac_opts["ground_size"]/2 + isaac_opts["dh_ground"]=0.03 + isaac_opts["step_height_lb"]=0.08 + isaac_opts["step_height_ub"]=0.12 + isaac_opts["step_width_lb"]=1.0 + isaac_opts["step_width_ub"]= 1.5 + isaac_opts["contact_prims"] = [] + isaac_opts["sensor_radii"] = 0.1 + isaac_opts["contact_offsets"] = {} + + isaac_opts["enable_livestream"] = False + isaac_opts["enable_viewport"] = False + + isaac_opts["use_diff_vels"] = False + + # random perturbations (impulse, durations, directions are sampled uniformly, force/torque computed accordinly) + isaac_opts["use_random_pertub"]=False + isaac_opts["pert_planar_only"]=True # if True, linear pushes only in xy plane and no torques + + isaac_opts["pert_wrenches_rate"]=30.0 # on average 1 pert every pert_wrenches_rate seconds + isaac_opts["pert_wrenches_min_duration"]=0.6 + isaac_opts["pert_wrenches_max_duration"]=3.5 # [s] + isaac_opts["pert_force_max_weight_scale"]=0.4 # clip force norm to scale*weight + isaac_opts["pert_force_min_weight_scale"]=0.1 # optional min force norm as scale*weight + isaac_opts["pert_torque_max_weight_scale"]=1.0 # clip torque norm to scale*weight*max_ang_impulse_lever + + isaac_opts["pert_target_delta_v"]=2.0 # [m/s] desired max impulse = m*delta_v + isaac_opts["det_pert_rate"]=True + + # max impulse (unitless scale multiplied by weight to get N*s): delta_v/g + isaac_opts["max_lin_impulse_norm"]=isaac_opts["pert_target_delta_v"]/9.81 + isaac_opts["lin_impulse_mag_min"]=1.0 # [0, 1] -> min fraction of max_lin_impulse_norm when sampling + isaac_opts["lin_impulse_mag_max"]=1.0 # [0, 1] -> max fraction of max_lin_impulse_norm when sampling + + isaac_opts["max_ang_impulse_lever"]=0.2 # [m] + isaac_opts["max_ang_impulse_norm"]=isaac_opts["max_lin_impulse_norm"]*isaac_opts["max_ang_impulse_lever"] + isaac_opts["terrain_hit_log_period"]=1000 + + # opts definition end + + isaac_opts.update(self._env_opts) # update defaults with provided opts + isaac_opts["rendering_freq"]=int(isaac_opts["rendering_dt"]/isaac_opts["physics_dt"]) + # isaac_opts["rendering_dt"]=isaac_opts["physics_dt"] # forcing rendering_dt==physics_dt + # for some mystic reason simulation is infuenced by the rendering dt (why ??????) + + # modify things + isaac_opts["cloning_offset"] = np.array([[0.0, 0.0, isaac_opts["spawning_height"]]]*self._num_envs) + if not isaac_opts["use_gpu"]: # don't use GPU at all + isaac_opts["use_gpu_pipeline"]=False + isaac_opts["device"]="cpu" + isaac_opts["sim_device"]="cpu" + else: # use GPU + if isaac_opts["use_gpu_pipeline"]: + isaac_opts["device"]="cuda" + isaac_opts["sim_device"]="cuda" + else: # cpu pipeline + isaac_opts["device"]="cpu" + isaac_opts["sim_device"]="cpu" + isaac_opts["use_gpu"]=False + # isaac_opts["sim_device"]=isaac_opts["device"] + # overwrite env opts in case some sim params were missing + self._env_opts=isaac_opts + + # update device flag based on sim opts + self._device=isaac_opts["device"] + self._use_gpu=isaac_opts["use_gpu"] + + def _calc_robot_distrib(self): + + import math + # we distribute robots in a single env. along the + # circumference of a circle of given radius + n_robots = len(self._robot_names) + offset_baseangle = 2 * math.pi / n_robots + for i in range(n_robots): + offset_angle = offset_baseangle * (i + 1) + robot_offset_wrt_center = torch.tensor([self._spawning_radius * math.cos(offset_angle), + self._spawning_radius * math.sin(offset_angle), 0], + device=self._device, + dtype=self._dtype) + # list with n references to the original tensor + tensor_list = [robot_offset_wrt_center] * self._num_envs + self._distr_offset[self._robot_names[i]] = torch.stack(tensor_list, dim=0) + + def _init_world(self): + + self._cloner = GridCloner(spacing=self._env_opts["env_spacing"]) + self._cloner.define_base_env(self._env_opts["envs_ns"]) + prim_utils.define_prim(self._env_opts["template_env_ns"]) + self._envs_prim_paths = self._cloner.generate_paths(self._env_opts["envs_ns"] + "/env", + self._num_envs) + + # parse device based on sim_param settings + + info = "Using sim device: " + str(self._env_opts["sim_device"]) + Journal.log(self.__class__.__name__, + "__init__", + info, + LogType.STAT, + throw_when_excep = True) + + self._world = World( + physics_dt=self._env_opts["physics_dt"], + rendering_dt=self._env_opts["physics_dt"], # == physics dt (rendering is actually done manually by this env) + backend=self._backend, + device=str(self._env_opts["sim_device"]), + physics_prim_path=self._env_opts["physics_prim_path"], + set_defaults = False, # set to True to use the defaults settings [physics_dt = 1.0/ 60.0, + # stage units in meters = 0.01 (i.e in cms), rendering_dt = 1.0 / 60.0, gravity = -9.81 m / s + # ccd_enabled, stabilization_enabled, gpu dynamics turned off, + # broadcast type is MBP, solver type is TGS] + sim_params=self._env_opts + ) + + big_info = "[World] Creating Isaac simulation " + self._name + "\n" + \ + "use_gpu_pipeline: " + str(self._env_opts["use_gpu_pipeline"]) + "\n" + \ + "device: " + str(self._env_opts["sim_device"]) + "\n" +\ + "backend: " + str(self._backend) + "\n" +\ + "integration_dt: " + str(self.physics_dt()) + "\n" + \ + "rendering_dt: " + str(self.rendering_dt()) + "\n" + Journal.log(self.__class__.__name__, + "_init_world", + big_info, + LogType.STAT, + throw_when_excep = True) + + # we get the physics context to expose additional low-level ## + # settings of the simulation + self._physics_context = self._world.get_physics_context() + self._physics_scene_path = self._physics_context.prim_path + # self._physics_context.enable_gpu_dynamics(True) + self._physics_context.enable_stablization(True) + self._physics_scene_prim = self._physics_context.get_current_physics_scene_prim() + self._solver_type = self._physics_context.get_solver_type() + + if "gpu_max_rigid_contact_count" in self._env_opts: + self._physics_context.set_gpu_max_rigid_contact_count(self._env_opts["gpu_max_rigid_contact_count"]) + if "gpu_max_rigid_patch_count" in self._env_opts: + self._physics_context.set_gpu_max_rigid_patch_count(self._env_opts["gpu_max_rigid_patch_count"]) + if "gpu_found_lost_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_found_lost_pairs_capacity(self._env_opts["gpu_found_lost_pairs_capacity"]) + if "gpu_found_lost_aggregate_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(self._env_opts["gpu_found_lost_aggregate_pairs_capacity"]) + if "gpu_total_aggregate_pairs_capacity" in self._env_opts: + self._physics_context.set_gpu_total_aggregate_pairs_capacity(self._env_opts["gpu_total_aggregate_pairs_capacity"]) + if "gpu_max_soft_body_contacts" in self._env_opts: + self._physics_context.set_gpu_max_soft_body_contacts(self._env_opts["gpu_max_soft_body_contacts"]) + if "gpu_max_particle_contacts" in self._env_opts: + self._physics_context.set_gpu_max_particle_contacts(self._env_opts["gpu_max_particle_contacts"]) + if "gpu_heap_capacity" in self._env_opts: + self._physics_context.set_gpu_heap_capacity(self._env_opts["gpu_heap_capacity"]) + if "gpu_temp_buffer_capacity" in self._env_opts: + self._physics_context.set_gpu_temp_buffer_capacity(self._env_opts["gpu_temp_buffer_capacity"]) + if "gpu_max_num_partitions" in self._env_opts: + self._physics_context.set_gpu_max_num_partitions(self._env_opts["gpu_max_num_partitions"]) + + # overwriting defaults + # self._physics_context.set_gpu_max_rigid_contact_count(2 * self._physics_context.get_gpu_max_rigid_contact_count()) + # self._physics_context.set_gpu_max_rigid_patch_count(2 * self._physics_context.get_gpu_max_rigid_patch_count()) + # self._physics_context.set_gpu_found_lost_pairs_capacity(2 * self._physics_context.get_gpu_found_lost_pairs_capacity()) + # self._physics_context.set_gpu_found_lost_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity()) + # self._physics_context.set_gpu_total_aggregate_pairs_capacity(20 * self._physics_context.get_gpu_total_aggregate_pairs_capacity()) + # self._physics_context.set_gpu_heap_capacity(2 * self._physics_context.get_gpu_heap_capacity()) + # self._physics_context.set_gpu_temp_buffer_capacity(20 * self._physics_context.get_gpu_heap_capacity()) + # self._physics_context.set_gpu_max_num_partitions(20 * self._physics_context.get_gpu_temp_buffer_capacity()) + + # GPU buffers + self._gpu_max_rigid_contact_count = self._physics_context.get_gpu_max_rigid_contact_count() + self._gpu_max_rigid_patch_count = self._physics_context.get_gpu_max_rigid_patch_count() + self._gpu_found_lost_pairs_capacity = self._physics_context.get_gpu_found_lost_pairs_capacity() + self._gpu_found_lost_aggregate_pairs_capacity = self._physics_context.get_gpu_found_lost_aggregate_pairs_capacity() + self._gpu_total_aggregate_pairs_capacity = self._physics_context.get_gpu_total_aggregate_pairs_capacity() + self._gpu_max_soft_body_contacts = self._physics_context.get_gpu_max_soft_body_contacts() + self._gpu_max_particle_contacts = self._physics_context.get_gpu_max_particle_contacts() + self._gpu_heap_capacity = self._physics_context.get_gpu_heap_capacity() + self._gpu_temp_buffer_capacity = self._physics_context.get_gpu_temp_buffer_capacity() + # self._gpu_max_num_partitions = physics_context.get_gpu_max_num_partitions() # BROKEN->method does not exist + + big_info2 = "[physics context]:" + "\n" + \ + "gpu_max_rigid_contact_count: " + str(self._gpu_max_rigid_contact_count) + "\n" + \ + "gpu_max_rigid_patch_count: " + str(self._gpu_max_rigid_patch_count) + "\n" + \ + "gpu_found_lost_pairs_capacity: " + str(self._gpu_found_lost_pairs_capacity) + "\n" + \ + "gpu_found_lost_aggregate_pairs_capacity: " + str(self._gpu_found_lost_aggregate_pairs_capacity) + "\n" + \ + "gpu_total_aggregate_pairs_capacity: " + str(self._gpu_total_aggregate_pairs_capacity) + "\n" + \ + "gpu_max_soft_body_contacts: " + str(self._gpu_max_soft_body_contacts) + "\n" + \ + "gpu_max_particle_contacts: " + str(self._gpu_max_particle_contacts) + "\n" + \ + "gpu_heap_capacity: " + str(self._gpu_heap_capacity) + "\n" + \ + "gpu_temp_buffer_capacity: " + str(self._gpu_temp_buffer_capacity) + "\n" + \ + "use_gpu_sim: " + str(self._world.get_physics_context().use_gpu_sim) + "\n" + \ + "use_gpu_pipeline: " + str(self._world.get_physics_context().use_gpu_pipeline) + "\n" + \ + "use_fabric: " + str(self._world.get_physics_context().use_fabric) + "\n" + \ + "world device: " + str(self._world.get_physics_context().device) + "\n" + \ + "physics context device: " + str(self._world.get_physics_context().device) + "\n" + + Journal.log(self.__class__.__name__, + "set_task", + big_info2, + LogType.STAT, + throw_when_excep = True) + + self._scene = self._world.scene + self._physics_context = self._world.get_physics_context() + + self._stage = get_context().get_stage() + + # strong, uniform lighting: bright sun + dome fill to cover the whole terrain + prim_utils.define_prim("/World/Lighting", "Xform") + sun_path = "/World/Lighting/SunLight" + dome_path = "/World/Lighting/AmbientDome" + + distantLight = UsdLux.DistantLight.Define(self._stage, Sdf.Path(sun_path)) + distantLight.CreateIntensityAttr(450.0) + distantLight.CreateAngleAttr(0.5) # soften shadows a bit + distantLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0)) + # Shadow attr naming differs across versions; set the underlying USD attribute directly. + distantLight.GetPrim().CreateAttribute("shadow:enable", Sdf.ValueTypeNames.Bool).Set(True) + + domeLight = UsdLux.DomeLight.Define(self._stage, Sdf.Path(dome_path)) + domeLight.CreateIntensityAttr(200.0) + domeLight.CreateExposureAttr(1.0) + domeLight.CreateColorAttr(Gf.Vec3f(1.0, 1.0, 1.0)) + + self._configure_scene() + + # if "enable_viewport" in sim_params: + # self._render = sim_params["enable_viewport"] + + def _get_baselink_candidate(self, + robot_name: str): + + stage=get_current_stage() + all_prims = [prim.GetPath().pathString for prim in stage.Traverse()] + filtered_prims = [prim for prim in all_prims if f"/{robot_name}/" in prim] + + matching=min(filtered_prims, key=len) if filtered_prims else None + + return matching.split('/')[-1] + + def _configure_scene(self): + + # environment + self._fix_base = [self._env_opts["is_fixed_base"]] * len(self._robot_names) + self._self_collide = [self._env_opts["self_collide"]] * len(self._robot_names) + self._merge_fixed = [self._env_opts["merge_fixed_jnts"]] * len(self._robot_names) + + Journal.log(self.__class__.__name__, + "_configure_scene", + "cloning environments...", + LogType.STAT, + throw_when_excep = True) + + self._ground_plane_prim_paths=[] + self._ground_plane=None + self.terrain_generator = None + if not self._env_opts["use_flat_ground"]: + # ensure terrain is large enough to contain all env clones + spacing = float(self._env_opts["env_spacing"]) + num_envs = self._num_envs + num_per_row = max(1, int(math.sqrt(num_envs))) + num_rows = int(math.ceil(num_envs / num_per_row)) + num_cols = int(math.ceil(num_envs / num_rows)) + row_offset = 0.5 * spacing * (num_rows - 1) + col_offset = 0.5 * spacing * (num_cols - 1) + margin = spacing # leave a full spacing as border cushion + required_size = 2.0 * (max(row_offset, col_offset) + margin) + + if required_size > self._env_opts["ground_size"]: + old_size = self._env_opts["ground_size"] + self._env_opts["ground_size"] = required_size + self._env_opts["terrain_border"] = self._env_opts["ground_size"] / 2.0 + Journal.log(self.__class__.__name__, + "_configure_scene", + f"Ground size increased from {old_size} to {required_size} to fit {num_envs} envs (spacing {spacing}).", + LogType.WARN, + throw_when_excep = True) + + min_height=-self._env_opts["dh_ground"] + max_height=self._env_opts["dh_ground"] + step=max_height-min_height + if self._env_opts["ground_type"]=="random": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_random_uniform_terrain(terrain_size=self._env_opts["ground_size"], + min_height=min_height, + max_height=max_height, + step=step, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + elif self._env_opts["ground_type"]=="random_patches": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_random_unif_patches" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_random_patched_terrain(terrain_size=self._env_opts["ground_size"], + min_height=min_height, + max_height=max_height, + step=step, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + patch_ratio=0.8, + patch_size=10 + ) + elif self._env_opts["ground_type"]=="slopes": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_slopes" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_sloped_terrain(terrain_size=self._env_opts["ground_size"], + slope=-0.5, + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"] + ) + elif self._env_opts["ground_type"]=="stairs": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stairs" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stairs_terrain(terrain_size=self._env_opts["ground_size"], + position=np.array([0.0, 0.0,0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + ) + elif self._env_opts["ground_type"]=="stepup": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stepup_terrain( + terrain_size=self._env_opts["ground_size"], + stairs_ratio=0.3, + min_steps=1, + max_steps=1, + pyramid_platform_size=15.0, + position=np.array([0.0, 0.0, 0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + step_height=0.15 + ) + elif self._env_opts["ground_type"]=="stepup_prim": + terrain_prim_path=self._env_opts["ground_plane_prim_path"]+"_stepup_prim" + self._ground_plane_prim_paths.append(terrain_prim_path) + self.terrain_generator = RlTerrains(get_current_stage(), prim_path=terrain_prim_path) + self._ground_plane=self.terrain_generator.create_stepup_prim_terrain( + terrain_size=self._env_opts["ground_size"], + stairs_ratio=0.9, + platform_size=50.0, + step_height_lb=self._env_opts["step_height_lb"], + step_height_ub=self._env_opts["step_height_ub"], + min_step_width=self._env_opts.get("step_width_lb", None), + max_step_width=self._env_opts.get("step_width_ub", None), + position=np.array([0.0, 0.0, 0.0]), + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"], + n_steps=25, + area_factor=0.7, + random_n_steps=False + ) + # apply a custom checker material to the terrain primitives + mat_path = self._ensure_lightblue_checker_material() + self._apply_checker_material_to_terrain(terrain_root_path=terrain_prim_path, material_path=mat_path) + self._add_checker_overlay_plane(terrain_root_path=terrain_prim_path, material_path=mat_path) + self._add_checker_overlays_on_tiles(terrain_root_path=terrain_prim_path, material_path=mat_path) + else: + ground_type=self._env_opts["ground_type"] + Journal.log(self.__class__.__name__, + "_configure_scene", + f"Terrain type {ground_type} not supported. Will default to flat ground.", + LogType.EXCEP, + throw_when_excep = True) + + # add offsets to intial height depending on the terrain heightmap + if self.terrain_generator is not None: + stage = get_current_stage() + up_axis = UsdGeom.GetStageUpAxis(stage) + + spacing = self._env_opts["env_spacing"] + num_envs = self._num_envs + + num_per_row = max(1, int(np.sqrt(num_envs))) + num_rows = int(np.ceil(num_envs / num_per_row)) + num_cols = int(np.ceil(num_envs / num_rows)) + + row_offset = 0.5 * spacing * (num_rows - 1) + col_offset = 0.5 * spacing * (num_cols - 1) + + offsets = np.array(self._env_opts["cloning_offset"], dtype=float) + + for env_idx in range(num_envs): + row = env_idx // num_cols + col = env_idx % num_cols + x = row_offset - row * spacing + y = col * spacing - col_offset + + half_extent = self._env_opts["spawn_height_check_half_extent"] + if up_axis == UsdGeom.Tokens.z: + height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent) + else: + height = self.terrain_generator.get_max_height_in_rect(x, y, half_extent=half_extent) + + offsets[env_idx][2] += height + self._env_opts["spawn_height_cushion"] + + self._env_opts["cloning_offset"] = offsets + + else: + defaul_prim_path=self._env_opts["ground_plane_prim_path"]+"_default" + self._ground_plane_prim_paths.append(defaul_prim_path) + self._ground_plane=self._scene.add_default_ground_plane(z_position=0, + name="terrain", + prim_path=defaul_prim_path, + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + self._terrain_collision=None + + physics_material=UsdPhysics.MaterialAPI.Apply(self._ground_plane.prim) + physics_material.CreateDynamicFrictionAttr(self._env_opts["dynamic_friction"]) + physics_material.CreateStaticFrictionAttr(self._env_opts["static_friction"]) + physics_material.CreateRestitutionAttr(self._env_opts["restitution"]) + # self._ground_plane.apply_physics_material(physics_material) + + self._terrain_collision=PhysxSchema.PhysxCollisionAPI.Get(get_current_stage(), self._ground_plane.prim_path) + self._terrain_material=UsdPhysics.MaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path) + self._terrain_physix_material=PhysxSchema.PhysxMaterialAPI.Get(get_current_stage(), self._ground_plane.prim_path) + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + urdf_path = self._robot_urdf_paths[robot_name] + srdf_path = self._robot_srdf_paths[robot_name] + fix_base = self._fix_base[i] + self_collide = self._self_collide[i] + merge_fixed = self._merge_fixed[i] + + self._generate_rob_descriptions(robot_name=robot_name, + urdf_path=urdf_path, + srdf_path=srdf_path) + self._import_urdf(robot_name, + fix_base=fix_base, + self_collide=self_collide, + merge_fixed=merge_fixed) + + self._cloner.clone( + source_prim_path=self._env_opts["template_env_ns"], + prim_paths=self._envs_prim_paths, + replicate_physics=self._env_opts["replicate_physics"], + position_offsets=self._env_opts["cloning_offset"] + ) # we can clone the environment in which all the robos are + + base_link_name=self._env_opts["base_linkname"] + if self._env_opts["deduce_base_link"]: + base_link_name=self._get_baselink_candidate(robot_name=robot_name) + + self._robots_art_views[robot_name] = ArticulationView(name = robot_name + "ArtView", + prim_paths_expr = self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, + reset_xform_properties=False) + self._robots_articulations[robot_name] = self._scene.add(self._robots_art_views[robot_name]) + + # height grid sensor (terrain may be None if using flat ground) + self._height_sensors[robot_name] = HeightGridSensor( + terrain_utils=self.terrain_generator if not self._env_opts["use_flat_ground"] else None, + grid_size=int(self._env_opts["height_sensor_pixels"]), + resolution=float(self._env_opts["height_sensor_resolution"]), + forward_offset=float(self._env_opts["height_sensor_forward_offset"]), + lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]), + n_envs=self._num_envs, + device=self._device, + dtype=self._dtype) + # ensure shared-data flags are set if a height sensor is active + self._env_opts["height_sensor_pixels"] = int(self._env_opts["height_sensor_pixels"]) + self._env_opts["height_sensor_resolution"] = float(self._env_opts["height_sensor_resolution"]) + self._enable_height_shared = True + self._height_sensor_pixels = self._env_opts["height_sensor_pixels"] + self._height_sensor_resolution = self._env_opts["height_sensor_resolution"] + self._height_vis_step[robot_name] = 0 + if self._env_opts["enable_height_vis"]: + self._height_vis[robot_name] = HeightGridVisualizer( + robot_name=robot_name, + num_envs=self._num_envs, + grid_size=int(self._env_opts["height_sensor_pixels"]), + resolution=float(self._env_opts["height_sensor_resolution"]), + marker_radius=float(self._env_opts["height_vis_radius"]), + forward_offset=float(self._env_opts["height_sensor_forward_offset"]), + lateral_offset=float(self._env_opts["height_sensor_lateral_offset"]), + device=self._device, + dtype=self._dtype) + + self._blink_rigid_prim_views[robot_name] = RigidPrimView(prim_paths_expr=self._env_opts["envs_ns"] + "/env_.*"+ "/" + robot_name + "/" + base_link_name, + name = robot_name + "RigidPrimView") # base link prim views + self._scene.add(self._blink_rigid_prim_views[robot_name]) # need to add so it is properly initialized when resetting world + + # self._robots_geom_prim_views[robot_name] = GeometryPrimView(name = robot_name + "GeomView", + # prim_paths_expr = self._env_ns + "/env*"+ "/" + robot_name, + # # prepare_contact_sensors = True + # ) + # self._robots_geom_prim_views[robot_name].apply_collision_apis() # to be able to apply contact sensors + + # init contact sensors + self._init_contact_sensors(robot_name=robot_name) # IMPORTANT: this has to be called + # after calling the clone() method and initializing articulation views!! + + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_1/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_2/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_3/collisions/mesh_1") + # self._set_contact_links_material(prim_path="/World/envs/env_0/kyon_no_wheels/lower_leg_4/collisions/mesh_1") + + # filter collisions between default ground plane and custom terrains + # self._cloner.filter_collisions(physicsscene_path = self._physics_context.prim_path, + # collision_root_path = "/World/terrain_collisions", + # prim_paths=[self._ground_plane_prim_paths[1]], + # global_paths=[self._ground_plane_prim_paths[0]] + # ) + + # delete_prim(self._env_opts["ground_plane_prim_path"] + "/SphereLight") # we remove the default spherical light + + # set default camera viewport position and target + camera_position=[4.2, 4.2, 1.5] + camera_target=[0, 0, 0] + # use a single camera prim (configurable) for both viewport and render products + camera_prim = self._env_opts["camera_prim_path"] + self._set_initial_camera_params(camera_position=camera_position, + camera_target=camera_target, + camera_prim_path=camera_prim) + + if self._env_opts["render_to_file"]: + # base output dir + from datetime import datetime + timestamp = datetime.now().strftime("h%H_m%M_s%S_%d_%m_%Y") + self._render_output_dir = f"/tmp/IsaacRenderings/{timestamp}" + res = tuple(int(x) for x in self._env_opts["render_resolution"]) + + # create render product from chosen camera prim + self._render_product = rep.create.render_product(camera_prim, + res, name='rendering_camera') + self._render_writer = rep.WriterRegistry.get("BasicWriter") + self._render_writer.initialize(output_dir=self._render_output_dir, + rgb=True) + self._render_writer.attach([self._render_product]) + # optional top-down capture + if self._env_opts["render_panoramic_cam"]: + td_height = float(self._env_opts["render_panoramic_cam_height"]) + td_dir = self._render_output_dir + "/panoramic_cam" + td_offset = self._env_opts["render_panoramic_cam_target_xy"] + td_target_z = float(self._env_opts["render_panoramic_cam_target_z"]) + pos = [8.0, 11.0, td_height] + self._panoramic_cam_camera = rep.create.camera(focal_length=12, + name='rendering_camera_panoramic_cam', + clipping_range = (1, 200), + position = pos, + look_at = [td_offset[0], td_offset[1], td_target_z]) + self._panoramic_cam_render_product = rep.create.render_product(self._panoramic_cam_camera, + res, name='rendering_camera_panoramic_cam') + self._panoramic_cam_writer = rep.WriterRegistry.get("BasicWriter") + self._panoramic_cam_writer.initialize(output_dir=td_dir, rgb=True) + self._panoramic_cam_writer.attach([self._panoramic_cam_render_product]) + + self.apply_collision_filters(self._physics_context.prim_path, + "/World/collisions") + + self._reset_sim() + + self._fill_robot_info_from_world() + # initializes robot state data + + # update solver options + self._update_art_solver_options() + self._get_solver_info() # get again solver option before printing everything + self._print_envs_info() # debug print + + # for n in range(self._n_init_steps): # run some initialization steps + # self._step_sim() + + self._init_robots_state() + + self.scene_setup_completed = True + + Journal.log(self.__class__.__name__, + "set_up_scene", + "finished scene setup...", + LogType.STAT, + throw_when_excep = True) + + self._is = _sensor.acquire_imu_sensor_interface() + self._dyn_control=_dynamic_control.acquire_dynamic_control_interface() + + def _set_contact_links_material(self, prim_path: str): + prim=get_prim_at_path(prim_path) + physics_material=UsdPhysics.MaterialAPI.Apply(prim) + physics_material.CreateDynamicFrictionAttr(0) + physics_material.CreateStaticFrictionAttr(0) + physics_material.CreateRestitutionAttr(1.0) + physxMaterialAPI=PhysxSchema.PhysxMaterialAPI.Apply(prim) + physxMaterialAPI.CreateFrictionCombineModeAttr().Set("multiply") # average, min, multiply, max + physxMaterialAPI.CreateRestitutionCombineModeAttr().Set("multiply") + + def _get_lightblue_checker_texture_path(self) -> str: + tex_rel_path = os.path.join(os.path.dirname(__file__), "..", "assets", "textures", "ibrido_terrain_texture.png") + return os.path.abspath(tex_rel_path) + + def _ensure_lightblue_checker_material(self): + """Create (or reuse) a light-blue checker material for primitive terrains.""" + stage = get_current_stage() + mat_path = "/World/Looks/IbridoCheckerMaterial" + mat_prim = stage.GetPrimAtPath(mat_path) + if mat_prim.IsValid(): + return mat_path + + texture_path = self._get_lightblue_checker_texture_path() + + material = UsdShade.Material.Define(stage, mat_path) + + st_reader = UsdShade.Shader.Define(stage, f"{mat_path}/PrimvarReader_st") + st_reader.CreateIdAttr("UsdPrimvarReader_float2") + st_reader.CreateInput("varname", Sdf.ValueTypeNames.Token).Set("st") + st_reader.CreateOutput("result", Sdf.ValueTypeNames.Float2) + + uv_transform = UsdShade.Shader.Define(stage, f"{mat_path}/UVTransform") + uv_transform.CreateIdAttr("UsdTransform2d") + # keep UV scale at 1 here; tiling is controlled via mesh UVs + uv_transform.CreateInput("in", Sdf.ValueTypeNames.Float2).ConnectToSource(st_reader.GetOutput("result")) + uv_transform.CreateInput("scale", Sdf.ValueTypeNames.Float2).Set(Gf.Vec2f(1.0, 1.0)) + uv_transform.CreateOutput("result", Sdf.ValueTypeNames.Float2) + + tex = UsdShade.Shader.Define(stage, f"{mat_path}/CheckerTex") + tex.CreateIdAttr("UsdUVTexture") + tex.CreateInput("file", Sdf.ValueTypeNames.Asset).Set(Sdf.AssetPath(texture_path)) + tex.CreateInput("st", Sdf.ValueTypeNames.Float2).ConnectToSource(uv_transform.GetOutput("result")) + tex.CreateInput("wrapS", Sdf.ValueTypeNames.Token).Set("repeat") + tex.CreateInput("wrapT", Sdf.ValueTypeNames.Token).Set("repeat") + tex.CreateInput("minFilter", Sdf.ValueTypeNames.Token).Set("nearest") + tex.CreateInput("magFilter", Sdf.ValueTypeNames.Token).Set("nearest") + # disable mipmaps to avoid blurring sharp edges + tex.CreateInput("mipFilter", Sdf.ValueTypeNames.Token).Set("nearest") + tex.CreateInput("enableMipMap", Sdf.ValueTypeNames.Bool).Set(False) + tex.CreateInput("fallback", Sdf.ValueTypeNames.Color4f).Set(Gf.Vec4f(0.69, 0.85, 1.0, 1.0)) + tex.CreateOutput("rgb", Sdf.ValueTypeNames.Float3) + tex.CreateOutput("a", Sdf.ValueTypeNames.Float) + + pbr = UsdShade.Shader.Define(stage, f"{mat_path}/PBRShader") + pbr.CreateIdAttr("UsdPreviewSurface") + pbr.CreateInput("diffuseColor", Sdf.ValueTypeNames.Color3f).ConnectToSource(tex.GetOutput("rgb")) + pbr.CreateInput("roughness", Sdf.ValueTypeNames.Float).Set(0.45) + pbr.CreateInput("metallic", Sdf.ValueTypeNames.Float).Set(0.0) + pbr.CreateOutput("surface", Sdf.ValueTypeNames.Token) + + material.CreateSurfaceOutput().ConnectToSource(pbr.GetOutput("surface")) + + return mat_path + + def _ensure_groundplane_material(self): + """Guarantee a ground-plane material exists (default checker) and return its path.""" + stage = get_current_stage() + mat_path = "/World/Looks/groundPlaneMaterial" + mat_prim = stage.GetPrimAtPath(mat_path) + if mat_prim.IsValid(): + return mat_path + + # create a temporary default ground plane to spawn the checker material, then delete the geom + tmp_gp_path = "/World/tmp_ground_for_material" + self._scene.add_default_ground_plane(z_position=-1000.0, + name="tmp_ground_for_material", + prim_path=tmp_gp_path, + static_friction=self._env_opts["static_friction"], + dynamic_friction=self._env_opts["dynamic_friction"], + restitution=self._env_opts["restitution"]) + + mat_prim = stage.GetPrimAtPath(mat_path) + prim_utils.delete_prim(tmp_gp_path) + + return mat_path if mat_prim.IsValid() else None + + def _apply_checker_material_to_terrain(self, terrain_root_path: str, material_path: str): + """Bind the checker material to all terrain prims (visual only).""" + + stage = get_current_stage() + mat_prim = stage.GetPrimAtPath(material_path) + if not mat_prim.IsValid(): + return + material = UsdShade.Material(mat_prim) + for prim in stage.Traverse(): + path = prim.GetPath().pathString + if not path.startswith(terrain_root_path): + continue + binding = UsdShade.MaterialBindingAPI.Apply(prim) + binding.Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _add_checker_overlay_plane(self, terrain_root_path: str, material_path: str): + """Create a thin visual-only mesh with UVs so the checker pattern shows up even on cube prims.""" + stage = get_current_stage() + plane_path = f"{terrain_root_path}/visual_checker" + plane_prim = stage.GetPrimAtPath(plane_path) + if plane_prim.IsValid(): + # if already exists, just (re)bind material + mat_prim = stage.GetPrimAtPath(material_path) + if mat_prim.IsValid(): + UsdShade.MaterialBindingAPI.Apply(plane_prim).Bind(UsdShade.Material(mat_prim), UsdShade.Tokens.strongerThanDescendants) + return + + # try to read base slab dimensions/position to size the overlay + slab_path = terrain_root_path + "_slab" + slab_prim = stage.GetPrimAtPath(slab_path) + center = Gf.Vec3f(0.0, 0.0, 0.0) + width = float(self._env_opts.get("ground_size", 50.0)) + length = width + thickness = 0.1 + if slab_prim.IsValid(): + xformable = UsdGeom.Xformable(slab_prim) + for op in xformable.GetOrderedXformOps(): + if op.GetOpType() == UsdGeom.XformOp.TypeTranslate: + center = Gf.Vec3f(op.Get()) + elif op.GetOpType() == UsdGeom.XformOp.TypeScale: + scale = op.Get() + width = float(scale[0]) + length = float(scale[1]) + thickness = float(scale[2]) + + half_w = 0.5 * width + half_l = 0.5 * length + z = center[2] + 0.5 * thickness + 1e-3 # slightly above the slab to avoid z-fighting + + plane = UsdGeom.Mesh.Define(stage, plane_path) + plane.CreatePointsAttr([ + Gf.Vec3f(center[0] - half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] + half_l, z), + Gf.Vec3f(center[0] - half_w, center[1] + half_l, z), + ]) + plane.CreateFaceVertexCountsAttr([4]) + plane.CreateFaceVertexIndicesAttr([0, 1, 2, 3]) + plane.CreateDoubleSidedAttr(True) + + # increase tiling density; adjustable via env opt + uv_repeats = max(1, int((width / 10.0) * 3.0)) + primvars = UsdGeom.PrimvarsAPI(plane) + st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex) + st.Set(Vt.Vec2fArray([ + Gf.Vec2f(0.0, 0.0), + Gf.Vec2f(uv_repeats, 0.0), + Gf.Vec2f(uv_repeats, uv_repeats), + Gf.Vec2f(0.0, uv_repeats), + ])) + st.SetInterpolation(UsdGeom.Tokens.vertex) + + mat_prim = stage.GetPrimAtPath(material_path) + if mat_prim.IsValid(): + material = UsdShade.Material(mat_prim) + UsdShade.MaterialBindingAPI.Apply(plane.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _add_checker_overlays_on_tiles(self, terrain_root_path: str, material_path: str): + """Add visual quads on top of each tile cube so the checker texture appears on raised steps.""" + stage = get_current_stage() + mat_prim = stage.GetPrimAtPath(material_path) + if not mat_prim.IsValid(): + return + material = UsdShade.Material(mat_prim) + for prim in stage.Traverse(): + path = prim.GetPath().pathString + if not path.startswith(terrain_root_path): + continue + if prim.GetTypeName() != "Cube": + continue + name = path.split("/")[-1] + if "wall" in name or name.endswith("_slab"): + continue + xformable = UsdGeom.Xformable(prim) + center = Gf.Vec3f(0.0, 0.0, 0.0) + scale = Gf.Vec3f(1.0, 1.0, 1.0) + for op in xformable.GetOrderedXformOps(): + if op.GetOpType() == UsdGeom.XformOp.TypeTranslate: + center = Gf.Vec3f(op.Get()) + elif op.GetOpType() == UsdGeom.XformOp.TypeScale: + scale = Gf.Vec3f(op.Get()) + width, length, height = float(scale[0]), float(scale[1]), float(scale[2]) + half_w = 0.5 * width + half_l = 0.5 * length + z = center[2] + 0.5 * height + 1e-3 + overlay_path = f"{path}_checker" + if stage.GetPrimAtPath(overlay_path).IsValid(): + UsdShade.MaterialBindingAPI.Apply(stage.GetPrimAtPath(overlay_path)).Bind(material, UsdShade.Tokens.strongerThanDescendants) + continue + mesh = UsdGeom.Mesh.Define(stage, overlay_path) + mesh.CreatePointsAttr([ + Gf.Vec3f(center[0] - half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] - half_l, z), + Gf.Vec3f(center[0] + half_w, center[1] + half_l, z), + Gf.Vec3f(center[0] - half_w, center[1] + half_l, z), + ]) + mesh.CreateFaceVertexCountsAttr([4]) + mesh.CreateFaceVertexIndicesAttr([0, 1, 2, 3]) + mesh.CreateDoubleSidedAttr(True) + uv_repeats = max(1, int((width / 10.0) * float(self._env_opts.get("checker_uv_density", 3.0)))) + primvars = UsdGeom.PrimvarsAPI(mesh) + st = primvars.CreatePrimvar("st", Sdf.ValueTypeNames.TexCoord2fArray, UsdGeom.Tokens.vertex) + st.Set(Vt.Vec2fArray([ + Gf.Vec2f(0.0, 0.0), + Gf.Vec2f(uv_repeats, 0.0), + Gf.Vec2f(uv_repeats, uv_repeats), + Gf.Vec2f(0.0, uv_repeats), + ])) + st.SetInterpolation(UsdGeom.Tokens.vertex) + UsdShade.MaterialBindingAPI.Apply(mesh.GetPrim()).Bind(material, UsdShade.Tokens.strongerThanDescendants) + + def _is_link(self, prim): + return prim.GetTypeName() == 'Xform' + + def _is_joint(self, prim): + return prim.GetTypeName() == 'PhysicsRevoluteJoint' + + def _create_collision_group(self, group_path, link_paths): + """ + Create a collision group under the given group_path that contains the links. + Args: + group_path (str): Path to create the collision group. + link_paths (List[str]): List of link paths to include in this group. + """ + collision_group = Sdf.PrimSpec( + self._stage.GetRootLayer().GetPrimAtPath(group_path), + group_path.split("/")[-1], + Sdf.SpecifierDef, + "PhysicsCollisionGroup" + ) + # Add the links to the collision group + for link_path in link_paths: + includes_rel = Sdf.RelationshipSpec(collision_group, "collection:colliders:includes", False) + includes_rel.targetPathList.Append(link_path) + + def _add_collision_filter(self, group_path, link1, link2): + """ + Filters collision between two successive links. + + Args: + group_path (str): Path of the collision group. + link1 (str): Path of the first link. + link2 (str): Path of the second link. + """ + # Create a relationship to filter collisions between the two links + filtered_groups = Sdf.RelationshipSpec( + self._stage.GetPrimAtPath(group_path), + "physics:filteredGroups", + False + ) + filtered_groups.targetPathList.Append(link1) + filtered_groups.targetPathList.Append(link2) + + def _render_sim(self, mode="human"): + + if mode == "human": + # follow requested robot/env + if self._env_opts["use_follow_camera"]: + ridx = int(self._env_opts["render_follow_robot_idx"]) + eidx = int(self._env_opts["render_follow_env_idx"]) + if ridx < len(self._robot_names) and eidx < self._num_envs: + rname = self._robot_names[ridx] + pos = self._root_p.get(rname, None) + if pos is not None and pos.shape[0] > eidx: + base = pos[eidx].detach().cpu() + offset = torch.as_tensor(self._env_opts["render_follow_offset"], + device=base.device, dtype=base.dtype) + target_offset = torch.as_tensor(self._env_opts["render_follow_target_offset"], + device=base.device, dtype=base.dtype) + quat = self._root_q.get(rname, None) + if quat is not None and quat.shape[0] > eidx: + q = quat[eidx].detach().cpu() + w, x, y, z = q.unbind(-1) + yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) + cy, sy = torch.cos(yaw), torch.sin(yaw) + rot = torch.tensor([[cy, -sy], [sy, cy]], device=base.device, dtype=base.dtype) + offset_xy = torch.matmul(rot, offset[:2]) + target_offset_xy = torch.matmul(rot, target_offset[:2]) + offset = torch.stack((offset_xy[0], offset_xy[1], offset[2])) + target_offset = torch.stack((target_offset_xy[0], target_offset_xy[1], target_offset[2])) + eye = (base + offset).tolist() + target = (base + target_offset).tolist() + set_camera_view(eye=eye, target=target, camera_prim_path=self._env_opts["camera_prim_path"]) + + self._world.render() + # optional height grid visualization + if self._env_opts["enable_height_vis"]: + for robot_name, vis in self._height_vis.items(): + # use latest stored states + if robot_name not in self._height_imgs or robot_name not in self._height_sensors: + continue + heights = self._height_imgs.get(robot_name, None) + if heights is None or heights.numel() == 0: + continue + pos_src = self._root_p.get(robot_name, None) + quat_src = self._root_q.get(robot_name, None) + if pos_src is None or quat_src is None: + continue + step = self._height_vis_step.get(robot_name, 0) + period = max(1, int(self._env_opts["height_vis_update_period"])) + if step % period == 0: + try: + vis.update( + base_positions=pos_src, + base_quats=quat_src, + heights=heights) + except Exception as exc: + print(f"[height_vis] update failed for {robot_name}: {exc}") + self._height_vis_step[robot_name] = step + 1 + return None + elif mode == "rgb_array": + # check if viewport is enabled -- if not, then complain because we won't get any data + if not self._render or not self._record: + exception = f"Cannot render '{mode}' when rendering is not enabled. Please check the provided" + \ + "arguments to the environment class at initialization." + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + # obtain the rgb data + rgb_data = self._rgb_annotator.get_data() + # convert to numpy array + rgb_data = np.frombuffer(rgb_data, dtype=np.uint8).reshape(*rgb_data.shape) + # return the rgb data + return rgb_data[:, :, :3] + else: + return None + + def _create_viewport_render_product(self, resolution=None): + """Create a render product of the viewport for rendering.""" + + try: + + # create render product + camera_prim = self._env_opts["camera_prim_path"] + res = resolution + if res is None: + res = tuple(int(x) for x in self._env_opts["render_resolution"]) + self._render_product = rep.create.render_product(camera_prim, res) + # create rgb annotator -- used to read data from the render product + self._rgb_annotator = rep.AnnotatorRegistry.get_annotator("rgb", device="cpu") + self._rgb_annotator.attach([self._render_product]) + self._record = True + except Exception as e: + carb.log_info("omni.replicator.core could not be imported. Skipping creation of render product.") + carb.log_info(str(e)) + + def _close(self): + if self._simulation_app.is_running(): + self._simulation_app.close() + + def _step_world(self): + self._world.step(render=False, step_sim=True) + + if (self._render) and (self._render_step_counter%self._env_opts["rendering_freq"]==0): + # if self._env_opts["render_to_file"]: + # rep.orchestrator.step() + self._render_sim() # manually trigger rendering (World.step(render=True) for some reason + # will step the simulation for a dt==rendering_dt) + self._render_step_counter += 1 + + def _generate_jnt_imp_control(self, robot_name: str): + + jnt_imp_controller = OmniJntImpCntrl(articulation=self._robots_articulations[robot_name], + device=self._device, + dtype=self._dtype, + enable_safety=True, + urdf_path=self._urdf_dump_paths[robot_name], + config_path=self._jnt_imp_config_paths[robot_name], + enable_profiling=False, + debug_checks=self._debug, + override_art_controller=self._override_low_lev_controller) + + return jnt_imp_controller + + def _reset_sim(self): + self._world.reset(soft=False) + + def _reset_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False): + + if env_indxs is not None: + if self._debug: + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + if randomize: + self._randomize_yaw(robot_name=robot_name,env_indxs=env_indxs) + + # root q + self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][env_indxs, :], + orientations=self._root_q_default[robot_name][env_indxs, :], + indices = env_indxs) + # jnts q + self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][env_indxs, :], + indices = env_indxs) + # root v and omega + self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][env_indxs, :], + indices = env_indxs) + # jnts v + concatenated_vel = torch.cat((self._root_v_default[robot_name][env_indxs, :], + self._root_omega_default[robot_name][env_indxs, :]), dim=1) + self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel, + indices = env_indxs) + # jnts eff + self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][env_indxs, :], + indices = env_indxs) + self._reset_perturbations(robot_name=robot_name, env_indxs=env_indxs) + else: + + if randomize: + self._randomize_yaw(robot_name=robot_name,env_indxs=None) + + # root q + self._robots_art_views[robot_name].set_world_poses(positions = self._root_p_default[robot_name][:, :], + orientations=self._root_q_default[robot_name][:, :], + indices = None) + # jnts q + self._robots_art_views[robot_name].set_joint_positions(positions = self._jnts_q_default[robot_name][:, :], + indices = None) + # root v and omega + self._robots_art_views[robot_name].set_joint_velocities(velocities = self._jnts_v_default[robot_name][:, :], + indices = None) + # jnts v + concatenated_vel = torch.cat((self._root_v_default[robot_name][:, :], + self._root_omega_default[robot_name][:, :]), dim=1) + self._robots_art_views[robot_name].set_velocities(velocities = concatenated_vel, + indices = None) + # jnts eff + self._robots_art_views[robot_name].set_joint_efforts(efforts = self._jnts_eff_default[robot_name][:, :], + indices = None) + self._reset_perturbations(robot_name=robot_name, env_indxs=None) + + # we update the robots state + self._read_root_state_from_robot(env_indxs=env_indxs, + robot_name=robot_name) + self._read_jnts_state_from_robot(env_indxs=env_indxs, + robot_name=robot_name) + + def _reset_perturbations(self, robot_name: str, env_indxs: torch.Tensor = None): + """Clear perturbation state and wrenches for selected envs.""" + if robot_name not in self._pert_active: + return + if env_indxs is None: + self._pert_active[robot_name].zero_() + self._pert_steps_remaining[robot_name].zero_() + self._pert_forces_world[robot_name].zero_() + self._pert_torques_world[robot_name].zero_() + self._pert_det_counter[robot_name].zero_() + else: + self._pert_active[robot_name][env_indxs] = False + self._pert_steps_remaining[robot_name][env_indxs] = 0 + self._pert_forces_world[robot_name][env_indxs, :] = 0 + self._pert_torques_world[robot_name][env_indxs, :] = 0 + self._pert_det_counter[robot_name][env_indxs] = 0 + + def _process_perturbations(self): + + # Iterate over each robot view + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + # Pre-fetch views for code clarity (references, not copies) + active = self._pert_active[robot_name] + steps_rem = self._pert_steps_remaining[robot_name] + forces_world = self._pert_forces_world[robot_name] + torques_world = self._pert_torques_world[robot_name] + planar_only = self._env_opts["pert_planar_only"] if "pert_planar_only" in self._env_opts else False + + # --- 1. Update Active Counters (In-Place) --- + if active.any(): + # In-place subtraction + steps_rem[active] -= 1 + + # --- 2. Reset Finished Perturbations (In-Place) --- + # Logic: Active AND (Steps <= 0) + # Note: Creating 'newly_ended' boolean mask is a tiny unavoidable allocation + newly_ended = active & (steps_rem <= 0) + + if newly_ended.any(): + # Use masked_fill_ for in-place zeroing + active.masked_fill_(newly_ended, False) + forces_world[newly_ended, :]=0 + torques_world[newly_ended, :]=0 + steps_rem.masked_fill_(newly_ended, 0) + + # --- 3. Trigger New Perturbations --- + + det_rate = self._env_opts["det_pert_rate"] + if det_rate: + # deterministic spacing: count physics steps and trigger when threshold reached (if not already active) + det_counter = self._pert_det_counter[robot_name] + det_counter += 1 + trigger_mask = (det_counter >= self._pert_det_steps) & (~active) + else: + # Reuse scratch buffer for probability check + # Assumes self._pert_scratch is (num_envs, 1) pre-allocated + self._pert_scratch[robot_name].uniform_(0.0, 1.0) # used for triggering new perturbations + # Check probs against threshold + # Flatten scratch to (N,) to match 'active' mask + trigger_mask = (self._pert_scratch[robot_name].flatten() < self._pert_wrenches_prob) & (~active) + + if trigger_mask.any(): + + # Cache weights (references) + weight = self._weights[robot_name] + + # we now treat the configured "max_*_impulse_*" as the maximum **impulse** (N·s or N·m·s), + # and convert impulse -> force/torque by dividing by the sampled duration (seconds). + # Use per-robot weight scaling as before. + lin_impulse_max = self._env_opts["max_lin_impulse_norm"] * weight + ang_impulse_max = self._env_opts["max_ang_impulse_norm"] * weight + + # --- Force (Impulse) Direction Generation (Reuse _pert_lindir buffer) --- + lindir = self._pert_lindir[robot_name] # (N, 3) + + if planar_only: + # planar push direction from random yaw + yaw_angles = self._pert_scratch[robot_name].uniform_(0.0, 2*math.pi).flatten() + lindir[:, 0] = torch.cos(yaw_angles) + lindir[:, 1] = torch.sin(yaw_angles) + lindir[:, 2] = 0.0 + else: + # 1. Fill with Standard Normal noise in-place + lindir.normal_() + # 2. Normalize in-place + norms = torch.norm(lindir, dim=1, keepdim=True).clamp_min_(1e-6) + lindir.div_(norms) + + # 3. Sample linear impulse magnitudes (reuse scratch) + # scratch has shape (N,1) - uniform [0,1] + self._pert_scratch[robot_name].uniform_(self._env_opts["lin_impulse_mag_min"], self._env_opts["lin_impulse_mag_max"]) + # impulse vectors = unit_dir * (rand * lin_impulse_max) + + lindir.mul_(self._pert_scratch[robot_name] * lin_impulse_max) # now contains linear impulses (N,3) + + # --- Angular (Impulse) Direction Generation (Reuse _pert_angdir buffer) --- + angdir = self._pert_angdir[robot_name] # (N, 3) + + if planar_only: + angdir.zero_() # no torque when planar-only is requested + else: + # 1. Fill with Standard Normal noise + angdir.normal_() + # 2. Normalize + norms = torch.norm(angdir, dim=1, keepdim=True).clamp_min_(1e-6) + angdir.div_(norms) + + # 3. Sample angular impulse magnitudes (reuse scratch) + self._pert_scratch[robot_name].uniform_(0.0, 1.0) + angdir.mul_(self._pert_scratch[robot_name] * ang_impulse_max) # now contains angular impulses (N,3) + + # --- Duration Generation (Reuse _pert_durations) --- + # Keep integer steps sampling (same shape/device/dtype) + self._pert_durations[robot_name] = torch.randint_like( + self._pert_durations[robot_name], + low=self._pert_min_steps, + high=self._pert_max_steps + 1 + ) + + # --- convert to float + duration_steps = self._pert_durations[robot_name].to(dtype=lindir.dtype, device=lindir.device) + # duration in seconds (shape (N,)) + duration_seconds = duration_steps * self.physics_dt() + # avoid divide-by-zero + duration_seconds = duration_seconds.clamp_min_(1e-6) + + # compute per-step forces/torques = impulse / duration_seconds + # lindir currently holds linear impulses, angdir holds angular impulses + forces_to_apply = lindir / duration_seconds + torques_to_apply = angdir / duration_seconds + + # Optional clipping based on robot weight (min/max) + f_norm = torch.norm(forces_to_apply, dim=1, keepdim=True).clamp_min_(1e-9) + target_norm = f_norm + + f_clip_scale_max = self._env_opts["pert_force_max_weight_scale"] + if f_clip_scale_max > 0.0: + force_max = self._weights[robot_name] * f_clip_scale_max # (N,1) + target_norm = torch.minimum(target_norm, force_max) + + f_clip_scale_min = self._env_opts["pert_force_min_weight_scale"] + if f_clip_scale_min > 0.0: + force_min = self._weights[robot_name] * f_clip_scale_min + target_norm = torch.maximum(target_norm, force_min) + + forces_to_apply = forces_to_apply * (target_norm / f_norm) + + t_clip_scale = self._env_opts["pert_torque_max_weight_scale"] + if t_clip_scale > 0.0: + torque_max = self._weights[robot_name] * self._env_opts["max_ang_impulse_lever"] * t_clip_scale + t_norm = torch.norm(torques_to_apply, dim=1, keepdim=True).clamp_min_(1e-9) + t_scale = torch.minimum(torch.ones_like(t_norm), torque_max / t_norm) + torques_to_apply = torques_to_apply * t_scale + + # --- Update State Buffers --- + # Use boolean indexing to scatter only triggered values + active[trigger_mask] = True + steps_rem[trigger_mask] = self._pert_durations[robot_name][trigger_mask, :].flatten() + forces_world[trigger_mask, :] = forces_to_apply[trigger_mask, :] + torques_world[trigger_mask, :] = torques_to_apply[trigger_mask, :] + if det_rate: + det_counter[trigger_mask] = 0 + + # --- 4. Apply Wrenches (Vectorized) --- + # Only call API if there are active perturbations to minimize overhead + + forces_world[~active, :]=0 + torques_world[~active, :]=0 + + self._blink_rigid_prim_views[robot_name].apply_forces_and_torques_at_pos( + forces=forces_world, + torques=torques_world, + positions=None, # body frame origin + is_global=True + ) + + @override + def _pre_step(self): + + if self._env_opts["use_random_pertub"]: + self._process_perturbations() + + success=super()._pre_step() + + return success + + @override + def _pre_step_db(self): + + if self._env_opts["use_random_pertub"]: + self._process_perturbations() + + success=super()._pre_step_db() + + return success + + @override + def _update_contact_state(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + super()._update_contact_state(robot_name, env_indxs) + + if self._env_opts["use_random_pertub"]: + # write APPLIED perturbations to root wrench (mainly for debug) + self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_forces_world[robot_name][env_indxs, :], + data_type="f", + contact_name="root", + robot_idxs = env_indxs, + gpu=self._use_gpu) + self.cluster_servers[robot_name].get_state().contact_wrenches_root.set(data=self._pert_torques_world[robot_name][env_indxs, :], + data_type="t", + contact_name="root", + robot_idxs = env_indxs, + gpu=self._use_gpu) + + def _post_warmup_validation(self, robot_name: str): + """Validate warmup state: base height, tilt, and (if available) contacts.""" + envs = torch.arange(self._num_envs, device=self._device) + + # terrain height query + def _terrain_height(xy: torch.Tensor): + if self._env_opts["use_flat_ground"] or self.terrain_generator is None: + return torch.zeros((xy.shape[0],), device=xy.device, dtype=self._dtype) + heights = [] + half_extent = self._env_opts["spawn_height_check_half_extent"] + for k in range(xy.shape[0]): + h = self.terrain_generator.get_max_height_in_rect( + float(xy[k, 0]), float(xy[k, 1]), half_extent=half_extent) + heights.append(h) + return torch.as_tensor(heights, device=xy.device, dtype=self._dtype) + + # base height check + base_xy = self._root_p[robot_name][:, 0:2] + base_z = self._root_p[robot_name][:, 2] + ground_z = _terrain_height(base_xy) + margin = float(self._env_opts["spawn_height_cushion"]) + bad_z = base_z < (ground_z + margin) + + # tilt check (angle between base up and world up) + q = self._root_q[robot_name] + # quaternion to up vector + w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3] + up = torch.stack([ + 2 * (x * z - w * y), + 2 * (y * z + w * x), + 1 - 2 * (x * x + y * y) + ], dim=1) + cos_tilt = up[:, 2].clamp(-1.0, 1.0) + tilt_thresh_deg = 35.0 + bad_tilt = cos_tilt < math.cos(math.radians(tilt_thresh_deg)) + + # contact check (only if sensors exist) + # bad_contact = torch.zeros_like(base_z, dtype=torch.bool) + # if robot_name in self.omni_contact_sensors and self.omni_contact_sensors[robot_name] is not None: + # counts = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + # for link in self._contact_names.get(robot_name, []): + # f_contact = self._get_contact_f(robot_name=robot_name, + # contact_link=link, + # env_indxs=None) + # if f_contact is None: + # continue + # # use normal component (assume z-up); ignore tangential forces + # active = f_contact[:, 2] > 1e-3 + # counts += active.int() + # bad_contact = counts < 3 + + failing = torch.nonzero(bad_z | bad_tilt, as_tuple=False).flatten() + if failing.numel() > 0: + # remediate: lift to terrain+margin, upright (preserve yaw), zero root velocities + # yaw = torch.atan2(2 * (w * z + x * y), 1 - 2 * (y * y + z * z)) + # safe_z = (ground_z + margin)[failing] + # self._root_p[robot_name][failing, 2] = safe_z + # cos_h = torch.cos(yaw[failing] / 2) + # sin_h = torch.sin(yaw[failing] / 2) + # upright = torch.zeros((failing.shape[0], 4), device=self._device, dtype=self._dtype) + # upright[:, 0] = cos_h + # upright[:, 3] = sin_h + # self._root_q[robot_name][failing, :] = upright + # self._root_v[robot_name][failing, :] = 0.0 + # self._root_omega[robot_name][failing, :] = 0.0 + + msgs = [] # print db message + if bad_z.any(): + msgs.append(f"low_z envs {torch.nonzero(bad_z, as_tuple=False).flatten().tolist()}") + if bad_tilt.any(): + msgs.append(f"tilt envs {torch.nonzero(bad_tilt, as_tuple=False).flatten().tolist()}") + Journal.log(self.__class__.__name__, + "_post_warmup_validation", + f"Warmup validation failures for {robot_name}: " + "; ".join(msgs), + LogType.WARN, + throw_when_excep=False) + return failing + + def _import_urdf(self, + robot_name: str, + fix_base = False, + self_collide = False, + merge_fixed = True): + + import_config=_urdf.ImportConfig() + # status,import_config=omni_kit.commands.execute("URDFCreateImportConfig") + + Journal.log(self.__class__.__name__, + "update_root_offsets", + "importing robot URDF", + LogType.STAT, + throw_when_excep = True) + _urdf.acquire_urdf_interface() + # we overwrite some settings which are bound to be fixed + import_config.merge_fixed_joints = merge_fixed # makes sim more stable + # in case of fixed joints with light objects + import_config.import_inertia_tensor = True + # import_config.convex_decomp = False + import_config.fix_base = fix_base + import_config.self_collision = self_collide + # import_config.distance_scale = 1 + # import_config.make_default_prim = True + # import_config.create_physics_scene = True + # import_config.default_drive_strength = 1047.19751 + # import_config.default_position_drive_damping = 52.35988 + # import_config.default_drive_type = _urdf.UrdfJointTargetType.JOINT_DRIVE_POSITION + # import URDF + success, robot_prim_path_default = omni_kit.commands.execute( + "URDFParseAndImportFile", + urdf_path=self._urdf_dump_paths[robot_name], + import_config=import_config, + # get_articulation_root=True, + ) + + robot_base_prim_path = self._env_opts["template_env_ns"] + "/" + robot_name + + if success: + Journal.log(self.__class__.__name__, + "_import_urdf", + "Successfully importedf URDF into IsaacSim", + LogType.STAT) + else: + Journal.log(self.__class__.__name__, + "_import_urdf", + "Failed to import URDF into IsaacSim", + LogType.EXCEP, + throw_when_excep = True) + + # moving default prim to base prim path for cloning + move_prim(robot_prim_path_default, # from + robot_base_prim_path) # to + + robot_base_prim = prim_utils.get_prim_at_path(robot_base_prim_path) + children = prim_utils.get_prim_children(robot_base_prim) + # log imported prim children to the journal (print was getting truncated in logs) + Journal.log(self.__class__.__name__, + "_import_urdf", + f"Imported robot URDF children: {children}", + LogType.STAT) + + # improve collision tesselation for cylinders (e.g., wheels) if requested + # self._apply_collision_refinement(robot_base_prim_path, + # self._env_opts["collision_refinement_level"]) + + return success + + def _apply_collision_refinement(self, robot_base_prim_path: str, refinement_level: int): + """Set refinement level on collision cylinders to avoid coarse faceting.""" + if refinement_level is None: + return + stage = get_current_stage() + coll_prefix = robot_base_prim_path + "/collisions" + count = 0 + for prim in stage.Traverse(): + if not prim.IsValid(): + continue + path_str = prim.GetPath().pathString + if not path_str.startswith(coll_prefix): + continue + if prim.GetTypeName() == "Cylinder": + attr = prim.GetAttribute("refinementLevel") + if not attr.IsValid(): + attr = prim.CreateAttribute("refinementLevel", Sdf.ValueTypeNames.Int) + attr.Set(int(refinement_level)) + count += 1 + Journal.log(self.__class__.__name__, + "_apply_collision_refinement", + f"Applied refinement level {refinement_level} to {count} cylinder collision prims under {coll_prefix}", + LogType.STAT) + + def apply_collision_filters(self, + physicscene_path: str, + coll_root_path: str): + + self._cloner.filter_collisions(physicsscene_path = physicscene_path, + collision_root_path = coll_root_path, + prim_paths=self._envs_prim_paths, + global_paths=self._ground_plane_prim_paths # can collide with these prims + ) + + def _read_root_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._get_root_state(numerical_diff=self._env_opts["use_diff_vels"], + env_indxs=env_indxs, + robot_name=robot_name) + + # height grid sensor readout + if robot_name in self._height_sensors: + pos_src = self._root_p[robot_name] if env_indxs is None else self._root_p[robot_name][env_indxs] + quat_src = self._root_q[robot_name] if env_indxs is None else self._root_q[robot_name][env_indxs] + heights = self._height_sensors[robot_name].read(pos_src, quat_src) + if self._env_opts["use_flat_ground"]: + heights.zero_() + if env_indxs is None: + self._height_imgs[robot_name] = heights + else: + # clone to avoid overlapping write/read views + self._height_imgs[robot_name][env_indxs] = heights.clone() + + # print("height image") + # print(self._height_imgs[robot_name][0, :, : ]) + + def _read_jnts_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._get_robots_jnt_state( + numerical_diff=self._env_opts["use_diff_vels"], + env_indxs=env_indxs, + robot_name=robot_name) + + def _get_root_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + numerical_diff: bool = False, + base_loc: bool = True): + + # reading = self._is.get_sensor_reading("/World/Cube/Imu_Sensor", + # use_latest_data = True) + + dt=self._cluster_dt[robot_name] # getting diff state always at cluster rate + + # measurements from simulator are in world frame + if env_indxs is not None: + + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True, + indices=env_indxs) # tuple: (pos, quat), quat is [w, i, j, k] in Isaac4.2 + + self._root_p[robot_name][env_indxs, :] = pose[0] + + going_to_fly=self._root_p[robot_name][env_indxs, 0:2]>(self._env_opts["terrain_border"]-0.1) + if going_to_fly.any(): + flying=going_to_fly.sum().item() + warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!" + Journal.log(self.__class__.__name__, + "_get_root_state", + warn, + LogType.WARN, + throw_when_excep = True) + self._root_q[robot_name][env_indxs, :] = pose[1] # root orientation + if not numerical_diff: + # we get velocities from the simulation. This is not good since + # these can actually represent artifacts which do not have physical meaning. + # It's better to obtain them by differentiation to avoid issues with controllers, etc... + self._root_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True, + indices=env_indxs) # root lin. velocity + self._root_omega[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True, + indices=env_indxs) # root ang. velocity + + # for now obtain root a numerically + self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \ + self._root_v_prev[robot_name][env_indxs, :]) / dt + self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \ + self._root_omega_prev[robot_name][env_indxs, :]) / dt + + self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] + self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :] + else: + # differentiate numerically + self._root_v[robot_name][env_indxs, :] = (self._root_p[robot_name][env_indxs, :] - \ + self._root_p_prev[robot_name][env_indxs, :]) / dt + self._root_omega[robot_name][env_indxs, :] = quat_to_omega( + self._root_q_prev[robot_name][env_indxs, :], + self._root_q[robot_name][env_indxs, :], + dt) + + self._root_a[robot_name][env_indxs, :] = (self._root_v[robot_name][env_indxs, :] - \ + self._root_v_prev[robot_name][env_indxs, :]) / dt + self._root_alpha[robot_name][env_indxs, :] = (self._root_omega[robot_name][env_indxs, :] - \ + self._root_omega_prev[robot_name][env_indxs, :]) / dt + + # update "previous" data for numerical differentiation + self._root_p_prev[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] + self._root_q_prev[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + self._root_v_prev[robot_name][env_indxs, :] = self._root_v[robot_name][env_indxs, :] + self._root_omega_prev[robot_name][env_indxs, :] = self._root_omega[robot_name][env_indxs, :] + self._track_terrain_hits(robot_name=robot_name, env_indxs=env_indxs) + + else: + # updating data for all environments + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True) # tuple: (pos, quat) + self._root_p[robot_name][:, :] = pose[0] + self._root_q[robot_name][:, :] = pose[1] # root orientation + + going_to_fly=self._root_p[robot_name][:, 0:2]>(self._env_opts["terrain_border"]-0.1) + if going_to_fly.any(): + flying=going_to_fly.sum().item() + warn = f"N. {flying} robots ({robot_name}) are about to go out of the terrain!!" + Journal.log(self.__class__.__name__, + "_get_root_state", + warn, + LogType.WARN, + throw_when_excep = True) + + if not numerical_diff: + # we get velocities from the simulation. This is not good since + # these can actually represent artifacts which do not have physical meaning. + # It's better to obtain them by differentiation to avoid issues with controllers, etc... + self._root_v[robot_name][:, :] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True) # root lin. velocity + self._root_omega[robot_name][:, :] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True) # root ang. velocity + + self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \ + self._root_v_prev[robot_name][:, :]) / dt + self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \ + self._root_omega_prev[robot_name][:, :]) / dt + + self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] + self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :] + else: + # differentiate numerically + self._root_v[robot_name][:, :] = (self._root_p[robot_name][:, :] - \ + self._root_p_prev[robot_name][:, :]) / dt + self._root_omega[robot_name][:, :] = quat_to_omega(self._root_q_prev[robot_name][:, :], + self._root_q[robot_name][:, :], + dt) + + self._root_a[robot_name][:, :] = (self._root_v[robot_name][:, :] - \ + self._root_v_prev[robot_name][:, :]) / dt + self._root_alpha[robot_name][:, :] = (self._root_omega[robot_name][:, :] - \ + self._root_omega_prev[robot_name][:, :]) / dt + + # update "previous" data for numerical differentiation + self._root_p_prev[robot_name][:, :] = self._root_p[robot_name][:, :] + self._root_q_prev[robot_name][:, :] = self._root_q[robot_name][:, :] + self._root_v_prev[robot_name][:, :] = self._root_v[robot_name][:, :] + self._root_omega_prev[robot_name][:, :] = self._root_omega[robot_name][:, :] + self._track_terrain_hits(robot_name=robot_name, env_indxs=None) + + if base_loc: + # rotate robot twist in base local + twist_w=torch.cat((self._root_v[robot_name], + self._root_omega[robot_name]), + dim=1) + twist_base_loc=torch.cat((self._root_v_base_loc[robot_name], + self._root_omega_base_loc[robot_name]), + dim=1) + world2base_frame(t_w=twist_w,q_b=self._root_q[robot_name],t_out=twist_base_loc) + self._root_v_base_loc[robot_name]=twist_base_loc[:, 0:3] + self._root_omega_base_loc[robot_name]=twist_base_loc[:, 3:6] + + # rotate robot a in base local + a_w=torch.cat((self._root_a[robot_name], + self._root_alpha[robot_name]), + dim=1) + a_base_loc=torch.cat((self._root_a_base_loc[robot_name], + self._root_alpha_base_loc[robot_name]), + dim=1) + world2base_frame(t_w=a_w,q_b=self._root_q[robot_name],t_out=a_base_loc) + self._root_a_base_loc[robot_name]=a_base_loc[:, 0:3] + self._root_alpha_base_loc[robot_name]=a_base_loc[:, 3:6] + + # rotate gravity in base local + world2base_frame3D(v_w=self._gravity_normalized[robot_name],q_b=self._root_q[robot_name], + v_out=self._gravity_normalized_base_loc[robot_name]) + + def _get_robots_jnt_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + numerical_diff: bool = False): + + dt= self.physics_dt() if self._override_low_lev_controller else self._cluster_dt[robot_name] + + # measurements from simulator are in world frame + if env_indxs is not None: + + self._jnts_q[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_positions( + clone = True, + indices=env_indxs) # joint positions + if not numerical_diff: + self._jnts_v[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True, + indices=env_indxs) # joint velocities + else: + # differentiate numerically + self._jnts_v[robot_name][env_indxs, :] = (self._jnts_q[robot_name][env_indxs, :] - \ + self._jnts_q_prev[robot_name][env_indxs, :]) / dt + # update "previous" data for numerical differentiation + self._jnts_q_prev[robot_name][env_indxs, :] = self._jnts_q[robot_name][env_indxs, :] + + self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( + clone = True, + joint_indices=None, + indices=env_indxs) # measured joint efforts (computed by joint force solver) + + else: + self._jnts_q[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) # joint positions + if not numerical_diff: + self._jnts_v[robot_name][:, :] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True) # joint velocities + else: + self._jnts_v[robot_name][:, :] = (self._jnts_q[robot_name][:, :] - \ + self._jnts_q_prev[robot_name][:, :]) / dt + + self._jnts_q_prev[robot_name][:, :] = self._jnts_q[robot_name][:, :] + + self._jnts_eff[robot_name][env_indxs, :] = self._robots_art_views[robot_name].get_measured_joint_efforts( + clone = True) # measured joint efforts (computed by joint force solver) + + def _get_contact_f(self, + robot_name: str, + contact_link: str, + env_indxs: torch.Tensor) -> torch.Tensor: + + if self.omni_contact_sensors[robot_name] is not None: + return self.omni_contact_sensors[robot_name].get(dt=self.physics_dt(), + contact_link=contact_link, + env_indxs=env_indxs, + clone=False) + + def _set_jnts_to_homing(self, robot_name: str): + self._robots_art_views[robot_name].set_joints_default_state(positions=self._homing, + velocities = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \ + dtype=self._dtype, device=self._device), + efforts = torch.zeros((self._homing.shape[0], self._homing.shape[1]), \ + dtype=self._dtype, device=self._device)) + + def _set_root_to_defconfig(self, robot_name: str): + self._robots_art_views[robot_name].set_default_state(positions=self._root_p_default[robot_name], + orientations=self._root_q_default[robot_name]) + + def _alter_twist_warmup(self, robot_name: str, env_indxs: torch.Tensor = None): + """Zero angular velocities and joint velocities for the given robot/envs.""" + if env_indxs is None: + twist = self._robots_art_views[robot_name].get_velocities(clone=True) + twist[:, 0] = 0.0 + twist[:, 1] = 0.0 + twist[:, 3:] = 0.0 # zero angular part, preserve current linear + self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=None) + + # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities( + # indices = None, clone=True) + # jnt_vel[:, :] = 0.0 + + # self._robots_art_views[robot_name].set_joint_velocities( + # velocities = jnt_vel, + # indices = None) + + else: + twist = self._robots_art_views[robot_name].get_velocities(clone=True, indices=env_indxs) + # twist[:, 0:2] = 0.0 + twist[:, 0] = 0.0 + twist[:, 1] = 0.0 + twist[:, 3:] = 0.0 + self._robots_art_views[robot_name].set_velocities(velocities=twist, indices=env_indxs) + + # jnt_vel=self._robots_art_views[robot_name].get_joint_velocities( + # indices = env_indxs, clone=True) + # jnt_vel[:, :] = 0.0 + + # self._robots_art_views[robot_name].set_joint_velocities( + # velocities = jnt_vel, + # indices = env_indxs) + + def _get_solver_info(self): + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + self._solver_position_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_position_iteration_counts() + self._solver_velocity_iteration_counts[robot_name] = self._robots_art_views[robot_name].get_solver_velocity_iteration_counts() + self._solver_stabilization_threshs[robot_name] = self._robots_art_views[robot_name].get_stabilization_thresholds() + + def _update_art_solver_options(self): + + # sets new solver iteration options for specifc articulations + self._get_solver_info() # gets current solver info for the articulations of the + # environments, so that dictionaries are filled properly + + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + # increase by a factor + self._solver_position_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_position_iteration_count) + self._solver_velocity_iteration_counts[robot_name] = torch.full((self._num_envs,), self._solver_velocity_iteration_count) + self._solver_stabilization_threshs[robot_name] = torch.full((self._num_envs,), self._solver_stabilization_thresh) + self._robots_art_views[robot_name].set_solver_position_iteration_counts(self._solver_position_iteration_counts[robot_name]) + self._robots_art_views[robot_name].set_solver_velocity_iteration_counts(self._solver_velocity_iteration_counts[robot_name]) + self._robots_art_views[robot_name].set_stabilization_thresholds(self._solver_stabilization_threshs[robot_name]) + self._get_solver_info() # gets again solver info for articulation, so that it's possible to debug if + # the operation was successful + + def _print_envs_info(self): + + ground_info = f"[Ground info]" + "\n" + \ + "static friction coeff.: " + str(self._terrain_material.GetStaticFrictionAttr().Get()) + "\n" + \ + "dynamics friction coeff.: " + str(self._terrain_material.GetDynamicFrictionAttr().Get()) + "\n" + \ + "restitution coeff.: " + str(self._terrain_material.GetRestitutionAttr().Get()) + "\n" +\ + "friction comb. mode: " + str(self._terrain_physix_material.GetFrictionCombineModeAttr().Get()) + "\n" + \ + "damping comb. mode: " + str(self._terrain_physix_material.GetDampingCombineModeAttr().Get()) + "\n" + \ + "restitution comb. mode: " + str(self._terrain_physix_material.GetRestitutionCombineModeAttr().Get()) + "\n" + + Journal.log(self.__class__.__name__, + "_print_envs_info", + ground_info, + LogType.STAT, + throw_when_excep = True) + + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + task_info = f"[{robot_name}]" + "\n" + \ + "bodies: " + str(self._robots_art_views[robot_name].body_names) + "\n" + \ + "n. prims: " + str(self._robots_art_views[robot_name].count) + "\n" + \ + "prims names: " + str(self._robots_art_views[robot_name].prim_paths) + "\n" + \ + "n. bodies: " + str(self._robots_art_views[robot_name].num_bodies) + "\n" + \ + "n. dofs: " + str(self._robots_art_views[robot_name].num_dof) + "\n" + \ + "dof names: " + str(self._robots_art_views[robot_name].dof_names) + "\n" + \ + "solver_position_iteration_counts: " + str(self._solver_position_iteration_counts[robot_name]) + "\n" + \ + "solver_velocity_iteration_counts: " + str(self._solver_velocity_iteration_counts[robot_name]) + "\n" + \ + "stabiliz. thresholds: " + str(self._solver_stabilization_threshs[robot_name]) + # print("dof limits: " + str(self._robots_art_views[robot_name].get_dof_limits())) + # print("effort modes: " + str(self._robots_art_views[robot_name].get_effort_modes())) + # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains())) + # print("dof max efforts: " + str(self._robots_art_views[robot_name].get_max_efforts())) + # print("dof gains: " + str(self._robots_art_views[robot_name].get_gains())) + # print("physics handle valid: " + str(self._robots_art_views[robot_name].is_physics_handle_valid()) + Journal.log(self.__class__.__name__, + "_print_envs_info", + task_info, + LogType.STAT, + throw_when_excep = True) + + def _fill_robot_info_from_world(self): + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + self._robot_bodynames[robot_name] = self._robots_art_views[robot_name].body_names + self._robot_n_links[robot_name] = self._robots_art_views[robot_name].num_bodies + self._robot_n_dofs[robot_name] = self._robots_art_views[robot_name].num_dof + self._robot_dof_names[robot_name] = self._robots_art_views[robot_name].dof_names + + def _set_initial_camera_params(self, + camera_position=[10, 10, 3], + camera_target=[0, 0, 0], + camera_prim_path="/OmniverseKit_Persp"): + set_camera_view(eye=camera_position, + target=camera_target, + camera_prim_path=camera_prim_path) + + def _init_contact_sensors(self, robot_name: str): + self.omni_contact_sensors[robot_name]=None + sensor_radii={} + contact_offsets={} + self._contact_names[robot_name]=self._env_opts["contact_prims"] + for contact_prim in self._env_opts["contact_prims"]: + sensor_radii[contact_prim]=self._env_opts["sensor_radii"] + contact_offsets[contact_prim]=np.array([0.0, 0.0, 0.0]) + if not (len(self._env_opts["contact_prims"])==0): + self.omni_contact_sensors[robot_name]=OmniContactSensors( + name=robot_name, + n_envs=self._num_envs, + contact_prims=self._env_opts["contact_prims"], + contact_offsets=contact_offsets, + sensor_radii=sensor_radii, + device=self._device, + dtype=self._dtype, + enable_debug=self._debug, + filter_paths=self._ground_plane_prim_paths) + self.omni_contact_sensors[robot_name].create_contact_sensors( + self._world, + envs_namespace=self._env_opts["envs_ns"]) + + def _init_robots_state(self): + + self._masses = {} + self._weights = {} + + self._pert_active = {} # bool mask: (num_envs,) + self._pert_steps_remaining = {} # int steps: (num_envs,) + self._pert_forces_world = {} # (num_envs,3) + self._pert_torques_world = {} # (num_envs,3) + self._pert_force_local = {} # (num_envs,3) (if needed) + self._pert_torque_local = {} # (num_envs,3) + self._pert_lindir = {} + self._pert_angdir = {} + self._pert_durations = {} + self._pert_scratch = {} + self._pert_det_counter = {} + + # convert durations in seconds to integer physics steps (min 1 step) + self._pert_min_steps = max(1, int(math.ceil(self._env_opts["pert_wrenches_min_duration"] / self.physics_dt()))) + self._pert_max_steps = max(self._pert_min_steps, int(math.ceil(self._env_opts["pert_wrenches_max_duration"] / self.physics_dt()))) + + pert_wrenches_step_rate=self._env_opts["pert_wrenches_rate"]/self.physics_dt() # 1 pert every n physics steps + self._pert_det_steps = max(1, int(round(pert_wrenches_step_rate))) + self._pert_wrenches_prob=min(1.0, 1.0/pert_wrenches_step_rate) # sampling prob to be used when not deterministic + + self._calc_robot_distrib() + + for i in range(0, len(self._robot_names)): + + robot_name = self._robot_names[i] + + pose = self._robots_art_views[robot_name].get_world_poses( + clone = True) # tuple: (pos, quat) + + # root p (measured, previous, default) + self._root_p[robot_name] = pose[0] + self._root_p_prev[robot_name] = torch.clone(pose[0]) + # print(self._root_p_default[robot_name].device) + self._root_p_default[robot_name] = torch.clone(pose[0]) + self._distr_offset[robot_name] + # root q (measured, previous, default) + self._root_q[robot_name] = pose[1] # root orientation + self._root_q_prev[robot_name] = torch.clone(pose[1]) + self._root_q_default[robot_name] = torch.clone(pose[1]) + # jnt q (measured, previous, default) + self._jnts_q[robot_name] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) # joint positions + self._jnts_q_prev[robot_name] = self._robots_art_views[robot_name].get_joint_positions( + clone = True) + self._jnts_q_default[robot_name] = torch.full((self._jnts_q[robot_name].shape[0], + self._jnts_q[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + + # root v (measured, default) + self._root_v[robot_name] = self._robots_art_views[robot_name].get_linear_velocities( + clone = True) # root lin. velocityù + self._root_v_base_loc[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_v_prev[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_v_default[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + # root omega (measured, default) + self._root_omega[robot_name] = self._robots_art_views[robot_name].get_angular_velocities( + clone = True) # root ang. velocity + self._root_omega_prev[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0) + + self._root_omega_base_loc[robot_name] = torch.full_like(self._root_omega[robot_name], fill_value=0.0) + self._root_omega_default[robot_name] = torch.full((self._root_omega[robot_name].shape[0], self._root_omega[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + # root a (measured,) + self._root_a[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_a_base_loc[robot_name] = torch.full_like(self._root_a[robot_name], fill_value=0.0) + self._root_alpha[robot_name] = torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._root_alpha_base_loc[robot_name] = torch.full_like(self._root_alpha[robot_name], fill_value=0.0) + + # height grid sensor storage + grid_sz = int(self._env_opts["height_sensor_pixels"]) + self._height_imgs[robot_name] = torch.zeros((self._num_envs, grid_sz, grid_sz), + dtype=self._dtype, + device=self._device) + + # joints v (measured, default) + self._jnts_v[robot_name] = self._robots_art_views[robot_name].get_joint_velocities( + clone = True) # joint velocities + self._jnts_v_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + + # joints efforts (measured, default) + self._jnts_eff[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + self._jnts_eff_default[robot_name] = torch.full((self._jnts_v[robot_name].shape[0], self._jnts_v[robot_name].shape[1]), + 0.0, + dtype=self._dtype, + device=self._device) + self._root_pos_offsets[robot_name] = torch.zeros((self._num_envs, 3), + device=self._device) # reference position offses + + self._root_q_offsets[robot_name] = torch.zeros((self._num_envs, 4), + device=self._device) + self._root_q_offsets[robot_name][:, 0] = 1.0 # init to valid identity quaternion + + # boolean active flag per env + self._pert_active[robot_name] = torch.zeros((self._num_envs,), dtype=torch.bool, device=self._device) + # remaining steps as integer tensor + self._pert_steps_remaining[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + # world force & torque (N and N*m) stored as floats + self._pert_forces_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_torques_world[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + # local frame copies (if you want to store local-frame versions) + self._pert_force_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_torque_local[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + + self._pert_lindir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + self._pert_angdir[robot_name] = torch.zeros((self._num_envs, 3), dtype=self._dtype, device=self._device) + + self._pert_durations[robot_name] = torch.zeros((self._num_envs, 1), dtype=torch.int32, device=self._device) + + self._pert_scratch[robot_name] = torch.zeros((self._num_envs, 1), dtype=self._dtype, device=self._device) + self._pert_det_counter[robot_name] = torch.zeros((self._num_envs,), dtype=torch.int32, device=self._device) + + self._masses[robot_name] = torch.sum(self._robots_art_views[robot_name].get_body_masses(clone=True), dim=1).to(dtype=self._dtype, device=self._device) + + self._weights[robot_name] = (self._masses[robot_name] * abs(self._env_opts["gravity"][2].item())).reshape((self._num_envs, 1)) + + def _track_terrain_hits(self, robot_name: str, env_indxs: torch.Tensor = None): + """Track transitions into the terrain boundary margin (1 m) and count hits per env.""" + if self._env_opts["use_flat_ground"]: + return + border = float(self._env_opts["terrain_border"]) + threshold = max(0.0, border - self._terrain_hit_margin) + state = self._terrain_hit_active[robot_name] + counts = self._terrain_hit_counts[robot_name] + if env_indxs is None: + pos_xy = self._root_p[robot_name][:, 0:2] + hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1) + new_hits = (~state) & hitting + if new_hits.any(): + counts[new_hits] += 1 + state.copy_(hitting) + else: + pos_xy = self._root_p[robot_name][env_indxs, 0:2] + hitting = torch.any(torch.abs(pos_xy) > threshold, dim=1) + prev_state = state[env_indxs] + new_hits = (~prev_state) & hitting + if new_hits.any(): + counts[env_indxs[new_hits]] += 1 + state[env_indxs] = hitting + + def _maybe_log_terrain_hits(self): + """Log boundary hits at low frequency only when counters change.""" + if self._env_opts["use_flat_ground"]: + return + period = int(self._env_opts["terrain_hit_log_period"]) + if period <= 0 or (self.step_counter % period != 0): + return + for robot_name in self._robot_names: + active = self._terrain_hit_active.get(robot_name, None) + if active is None: + continue + active_now = int(active.sum().item()) + if active_now == 0: + continue + counts = self._terrain_hit_counts[robot_name] + last = self._terrain_hit_counts_last_logged.get(robot_name, None) + if last is not None and torch.equal(counts, last): + continue + total_hits = int(counts.sum().item()) + msg = f"{active_now} {robot_name} robots within {self._terrain_hit_margin}m of terrain border. Total hits: {total_hits}." + Journal.log(self.__class__.__name__, + "_terrain_hits", + msg, + LogType.WARN, + throw_when_excep = True) + self._terrain_hit_counts_last_logged[robot_name] = counts.clone() + + @override + def _post_world_step(self) -> bool: + res = super()._post_world_step() + self._maybe_log_terrain_hits() + return res + + @override + def _post_world_step_db(self) -> bool: + res = super()._post_world_step_db() + self._maybe_log_terrain_hits() + return res + + def current_tstep(self): + self._world.current_time_step_index + + def world_time(self, robot_name: str): + return self._world.current_time + + def physics_dt(self): + return self._world.get_physics_dt() + + def rendering_dt(self): + return self._env_opts["rendering_dt"] + + def set_physics_dt(self, physics_dt:float): + self._world.set_simulation_dt(physics_dt=physics_dt,rendering_dt=None) + + def set_rendering_dt(self, rendering_dt:float): + self._world.set_simulation_dt(physics_dt=None,rendering_dt=rendering_dt) + + def _robot_jnt_names(self, robot_name: str): + return self._robots_art_views[robot_name].dof_names diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/jnt_imp_config.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/jnt_imp_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..88357f457e8a8de67d698e27491ce07fd2bcafa0 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/jnt_imp_config.yaml @@ -0,0 +1,46 @@ +XBotInterface: # just used for retrieving homing in sim with xbot_mujoco + urdf_path: $PWD/xmj_env_files/centauro.urdf + srdf_path: $PWD/xmj_env_files/centauro.srdf + +# defaults +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +startup_motor_pd: # iannis combo + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [200, 60] + "hip_pitch_*": [200, 60] + "knee_pitch_*": [200, 60] + "ankle_pitch_*": [200, 60] + "ankle_yaw_*": [120, 30] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/jnt_imp_config_no_yaw_ub.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/jnt_imp_config_no_yaw_ub.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d51f7a360e83a169cdfd4754a126c6999e4e8eb1 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/jnt_imp_config_no_yaw_ub.yaml @@ -0,0 +1,46 @@ +XBotInterface: # just used for retrieving homing in sim with xbot_mujoco + urdf_path: $PWD/xmj_env_files/centauro.urdf + srdf_path: $PWD/xmj_env_files/centauro.srdf + +# defaults +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +startup_motor_pd: # iannis combo + "j_arm*_1": [100, 25] + "j_arm*_2": [100, 25] + "j_arm*_3": [100, 25] + "j_arm*_4": [100, 25] + "j_arm*_5": [20, 8] + "j_arm*_6": [20, 8] + "j_arm*_7": [20, 8] + "hip_yaw_*": [200, 60] + "hip_pitch_*": [200, 60] + "knee_pitch_*": [200, 60] + "ankle_pitch_*": [200, 60] + "ankle_yaw_*": [600, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [400, 60] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_control_cluster.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_control_cluster.py new file mode 100644 index 0000000000000000000000000000000000000000..9c484f45d52cd3cff4da3401453a30258d1e53ab --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_control_cluster.py @@ -0,0 +1,106 @@ +import os +import argparse +import multiprocessing as mp +import importlib.util +import inspect + +from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict + +from EigenIPC.PyEigenIPC import Journal, LogType + +this_script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0] + +# Function to dynamically import a module from a specific file path +def import_env_module(env_path): + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +if __name__ == "__main__": + + # Parse command line arguments for CPU affinity + parser = argparse.ArgumentParser(description="Set CPU affinity for the script.") + parser.add_argument('--ns', type=str, help='Namespace to be used for cluster shared memory') + parser.add_argument('--urdf_path', type=str, help='Robot description package path for URDF ') + parser.add_argument('--srdf_path', type=str, help='Robot description package path for SRDF ') + parser.add_argument('--size', type=int, help='cluster size', default=1) + + # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8 + parser.add_argument('--cloop',action='store_true', help='whether to use RHC controllers in closed loop mode') + parser.add_argument('--cluster_dt', type=float, default=0.05, help='dt used by MPC controllers for discretization') + parser.add_argument('--n_nodes', type=int, default=31, help='n nodes used by MPC controllers') + + parser.add_argument('--verbose',action='store_true', help='run in verbose mode') + + parser.add_argument('--enable_debug',action='store_true', help='enable debug mode for cluster client and all controllers') + + parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data") + + parser.add_argument('--no_mp_fork',action='store_true', help='whether to multiprocess with forkserver context') + + parser.add_argument('--set_affinity',action='store_true', help='set affinity to a core for each controller') + + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--codegen_override_dir', type=str, help='Path to base dir where codegen is to be loaded', default="") + + parser.add_argument('--custom_args_names', nargs='+', default=None, + help='list of custom arguments names') + parser.add_argument('--custom_args_vals', nargs='+', default=None, + help='list of custom arguments values') + parser.add_argument('--custom_args_dtype', nargs='+', default=None, + help='list of custom arguments data types') + + parser.add_argument('--cluster_client_fname', type=str, + default="aug_mpc.controllers.rhc.hybrid_quad_client", + help="cluster client file import pattern (without extension)") + + args = parser.parse_args() + + # Ensure custom_args_names and custom_args_vals have the same length + custom_opts = generate_custom_arg_dict(args=args) + custom_opts.update({"cloop": args.cloop, + "cluster_dt": args.cluster_dt, + "n_nodes": args.n_nodes, + "codegen_override_dir": args.codegen_override_dir}) + if args.no_mp_fork: # this needs to be in the main + mp.set_start_method('spawn') + else: + # mp.set_start_method('forkserver') + mp.set_start_method('fork') + + cluster_module=importlib.import_module(args.cluster_client_fname) + # Get all classes defined in the module + classes_in_module = [name for name, obj in inspect.getmembers(cluster_module, inspect.isclass) + if obj.__module__ == cluster_module.__name__] + if len(classes_in_module) == 1: + cluster_classname=classes_in_module[0] + ClusterClient = getattr(cluster_module, cluster_classname) + cluster_client = ClusterClient(namespace=args.ns, + cluster_size=args.size, + urdf_xacro_path=args.urdf_path, + srdf_xacro_path=args.srdf_path, + open_loop=not args.cloop, + use_mp_fork = not args.no_mp_fork, + verbose=args.verbose, + debug=args.enable_debug, + base_dump_dir=args.dmpdir, + timeout_ms=args.timeout_ms, + custom_opts=custom_opts, + codegen_override=args.codegen_override_dir, + set_affinity=args.set_affinity) + cluster_client.run() + + else: + class_list_str = ", ".join(classes_in_module) + Journal.log("launch_control_cluster.py", + "", + f"Found more than one class in cluster client file {args.cluster_client_fname}. Found: {class_list_str}", + LogType.EXCEP, + throw_when_excep = False) + exit() + + # control_cluster_client = + # control_cluster_client.run() # spawns the controllers on separate processes (blocking) + diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_train_env.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_train_env.py new file mode 100644 index 0000000000000000000000000000000000000000..1864f2e4f20d85297c2b5568666ea16d9bd8f644 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_train_env.py @@ -0,0 +1,358 @@ +from aug_mpc.utils.determinism import deterministic_run + +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo +from mpc_hive.utilities.shared_data.cluster_data import SharedClusterInfo + +from EigenIPC.PyEigenIPC import VLevel, Journal, LogType +from EigenIPC.PyEigenIPC import StringTensorServer + +import os, argparse, sys, types, inspect + +from perf_sleep.pyperfsleep import PerfSleep + +import importlib.util +import torch +import signal + +algo = None # global to make it accessible by signal handler +exit_request=False +dummy_step_exit_req=False + +def handle_sigint(signum, frame): + global exit_request, dummy_step_exit_req + Journal.log("launch_train_env.py", + "", + f"Received sigint. Will stop training.", + LogType.WARN) + exit_request=True + dummy_step_exit_req=True # in case dummy_step_loop was used + +# Function to dynamically import a module from a specific file path +# def import_env_module(env_path): +# spec = importlib.util.spec_from_file_location("env_module", env_path) +# env_module = importlib.util.module_from_spec(spec) +# spec.loader.exec_module(env_module) +# return env_module + +def import_env_module(env_path, local_env_root: str = None): + """ + env_path: full path to the child env .py file to exec + local_env_root: directory where local copies of aug_mpc_envs.training_envs modules live + """ + if local_env_root is not None: + local_env_root = os.path.abspath(local_env_root) + # override aug_mpc_envs.training_envs package to point to the local_env_root + pkg_name = "aug_mpc_envs.training_envs" + if pkg_name not in sys.modules: + mod = types.ModuleType(pkg_name) + mod.__path__ = [local_env_root] # tell Python to look here first + sys.modules[pkg_name] = mod + else: + existing = getattr(sys.modules[pkg_name], "__path__", None) + if existing is None: + sys.modules[pkg_name].__path__ = [local_env_root] + elif local_env_root not in existing: + existing.insert(0, local_env_root) + + # load the module as usual + spec = importlib.util.spec_from_file_location("env_module", env_path) + env_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(env_module) + return env_module + +def log_env_hierarchy(env_class, env_path, env_type="training"): + """ + Logs the env class, its file, and full inheritance hierarchy with file paths. + env_class: the child env class + env_path: file path where the child class was loaded from + env_type: string label, e.g., "training", "evaluation", "resumed_training" + """ + def get_bases_recursive(cls): + """Recursively get all base classes with their file paths.""" + info = [] + for base in cls.__bases__: + try: + file = inspect.getfile(base) + except TypeError: + file = "built-in or unknown" + info.append(f"{base.__name__} (from {file})") + # Recurse unless it's object + if base is not object: + info.extend(get_bases_recursive(base)) + return info + + hierarchy_info = get_bases_recursive(env_class) + hierarchy_str = " -> ".join(hierarchy_info) if hierarchy_info else "No parents" + + Journal.log( + "launch_train_env.py", + "", + f"loading {env_type} env {env_class.__name__} (from {env_path}) " + f"with hierarchy: {hierarchy_str}", + LogType.INFO, + throw_when_excep=True + ) + +def dummy_step_loop(env): + global dummy_step_exit_req + while True: + if dummy_step_exit_req: + return True + step_ok=env.step(action=env.safe_action) # not a busy loop because of MPC in the step + if not step_ok: + return False + +if __name__ == "__main__": + + signal.signal(signal.SIGINT, handle_sigint) + + # Parse command line arguments for CPU affinity + parser = argparse.ArgumentParser(description="Set CPU affinity for the script.") + + parser.add_argument('--run_name', type=str, default=None, help='Name of training run') + parser.add_argument('--ns', type=str, help='Namespace to be used for shared memory') + parser.add_argument('--timeout_ms', type=int, help='Connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--drop_dir', type=str, help='Directory root where all run data will be dumped') + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--seed', type=int, help='Seed', default=1) + parser.add_argument('--use_cpu',action='store_true', help='If set, all the training (data included) will be performed on CPU') + + parser.add_argument('--db',action='store_true', help='Whether to enable local data logging for the algorithm (reward metrics, etc.)') + parser.add_argument('--env_db',action='store_true', help='Whether to enable env db data logging on shared mem (e.g. reward metrics are not available for reading anymore)') + parser.add_argument('--full_env_db',action='store_true', help='Whether to enable detailed episodic data storage (data over single transitions)') + parser.add_argument('--rmdb',action='store_true', help='Whether to enable remote debug (e.g. data logging on remote servers)') + + parser.add_argument('--tot_tsteps', type=int, help='Total number of timesteps to be collected', default=int(30e6)) + parser.add_argument('--action_repeat', type=int, help='Frame skipping (1-> no skip)', default=1) + parser.add_argument('--discount_factor', type=float, help='', default=0.99) + parser.add_argument('--obs_norm',action='store_true', help='Whether to enable the use of running normalizer in agent') + parser.add_argument('--obs_rescale',action='store_true', help='Whether to rescale observation depending on their expected range') + parser.add_argument('--add_weight_norm',action='store_true', help='Whether to add weight normalization to agent interal llayers') + parser.add_argument('--add_layer_norm',action='store_true', help='Whether to add layer normalization to agent internal llayers') + parser.add_argument('--add_batch_norm',action='store_true', help='Whether to add batch normalization to agent internal llayers') + + parser.add_argument('--act_rescale_critic',action='store_true', help='Whether to rescale actions provided to critic (if SAC) to be in range [-1, 1]') + parser.add_argument('--use_period_resets',action='store_true', help='') + + parser.add_argument('--sac',action='store_true', help='Use SAC, otherwise PPO, unless dummy is set') + parser.add_argument('--dummy',action='store_true', help='Use dummy agent (useful for testing and debugging environments)') + + parser.add_argument('--dump_checkpoints',action='store_true', help='Whether to dump model checkpoints during training') + + parser.add_argument('--demo_envs_perc', type=float, help='[0, 1]', default=0.0) + parser.add_argument('--demo_stop_thresh', type=float, default=None, + help='Performance hreshold above which demonstration envs should be deactivated.') + + parser.add_argument('--expl_envs_perc', type=float, help='[0, 1]', default=0) + + parser.add_argument('--use_rnd',action='store_true', help='Whether to use RND for exploration') + + parser.add_argument('--eval',action='store_true', help='Whether to perform an evaluation run') + parser.add_argument('--n_eval_timesteps', type=int, help='Total number of timesteps to be evaluated', default=int(1e6)) + parser.add_argument('--det_eval',action='store_true', help='Whether to perform a deterministic eval (only action mean is used). Only valid if --eval.') + parser.add_argument('--allow_expl_during_eval',action='store_true', help='Whether to allow expl envs during evaluation (useful to tune exploration)') + + parser.add_argument('--resume',action='store_true', help='Resume a previous training using a checkpoint') + parser.add_argument('--mpath', type=str, help='Model path to be used for policy evaluation', default=None) + parser.add_argument('--mname', type=str, help='Model name', default=None) + parser.add_argument('--override_env',action='store_true', help='Whether to override env when running evaluation') + + parser.add_argument('--anomaly_detect',action='store_true', help='Whether to enable anomaly detection (useful for debug)') + + parser.add_argument('--compression_ratio', type=float, + help='If e.g. 0.8, the fist layer will be of dimension [input_features_size x (input_features_size*compression_ratio)]', default=-1.0) + parser.add_argument('--actor_lwidth', type=int, help='Actor network layer width', default=128) + parser.add_argument('--critic_lwidth', type=int, help='Critic network layer width', default=256) + parser.add_argument('--actor_n_hlayers', type=int, help='Actor network size', default=3) + parser.add_argument('--critic_n_hlayers', type=int, help='Critic network size', default=4) + + parser.add_argument('--env_fname', type=str, default="twist_tracking_env", help='Training env file name (without extension)') + parser.add_argument('--env_classname', type=str, default="TwistTrackingEnv", help='Training env class name') + parser.add_argument('--override_agent_actions',action='store_true', help='Whether to override agent actions with custom ones from shared mem (useful for db)') + parser.add_argument('--override_agent_refs',action='store_true', help='Whether to override automatically generated agent refs (useful for debug)') + + parser.add_argument('--step_while_setup',action='store_true', help='Continue stepping env with default actions while setting up agent, etc..') + parser.add_argument('--reset_on_init',action='store_true', help='Whether to reset the environment on initialization') + + args = parser.parse_args() + args_dict = vars(args) + + if args.eval and args.resume: + Journal.log("launch_train_env.py", + "", + f"Cannot set both --eval and --resume flags. Exiting.", + LogType.EXCEP, + throw_when_excep = True) + + deterministic_run(seed=args.seed, torch_det_algos=False) + + anomaly_detect=False + if args.anomaly_detect: + torch.autograd.set_detect_anomaly(True) + + if (not args.mpath is None) and (not args.mname is None): + mpath_full = os.path.join(args.mpath, args.mname) + else: + mpath_full=None + + env_fname=args.env_fname + env_classname = args.env_classname + env_path="" + env_module=None + if (not args.eval and not args.resume) or (args.override_env): + # if starting a fresh traning or overriding env, load from a fresh env from aug_mpc + env_path = f"aug_mpc_envs.training_envs.{env_fname}" + env_module = importlib.import_module(env_path) + else: + if args.mpath is None: + Journal.log("launch_train_env.py", + "", + f"no mpath provided! Cannot load env. Either provide a mpath or run with --override_env", + LogType.EXCEP, + throw_when_excep = True) + + env_path = os.path.join(args.mpath, env_fname + ".py") + env_module = import_env_module(env_path, local_env_root=args.mpath) + + EnvClass = getattr(env_module, env_classname) + env_type = "training" if not args.eval else "evaluation" + if args.resume: + env_type = "resumed_training" + log_env_hierarchy(EnvClass, env_path, env_type) # db print of env class + + env = EnvClass(namespace=args.ns, + verbose=True, + vlevel=VLevel.V2, + use_gpu=not args.use_cpu, + debug=args.env_db, + override_agent_refs=args.override_agent_refs, + timeout_ms=args.timeout_ms, + env_opts=args_dict) + if not env.is_ready(): # something went wrong + exit() + + dummy_step_thread = None + if args.step_while_setup: + import threading + # spawn step thread (we don't true parallelization, thread is fine) + # start the dummy stepping in a separate thread so setup can continue concurrently + dummy_step_thread = threading.Thread(target=dummy_step_loop, args=(env,), daemon=True) + dummy_step_thread.start() + + # getting some sim info for debugging + sim_data = {} + sim_info_shared = SharedEnvInfo(namespace=args.ns, + is_server=False, + safe=False) + sim_info_shared.run() + sim_info_keys = sim_info_shared.param_keys + sim_info_data = sim_info_shared.get().flatten() + for i in range(len(sim_info_keys)): + sim_data[sim_info_keys[i]] = sim_info_data[i] + + # getting come cluster info for debugging + cluster_data={} + cluste_info_shared = SharedClusterInfo(namespace=args.ns, + is_server=False, + safe=False) + cluste_info_shared.run() + cluster_info_keys = cluste_info_shared.param_keys + cluster_info_data = cluste_info_shared.get().flatten() + for i in range(len(cluster_info_keys)): + cluster_data[cluster_info_keys[i]] = cluster_info_data[i] + + custom_args={} + custom_args["uname_host"]="user_host" + try: + username = os.getlogin() # add machine info to db data + hostname = os.uname().nodename + user_host = f"{username}@{hostname}" + custom_args["uname_host"]=user_host + except: + pass + + algo=None + if not args.dummy: + if args.sac: + from aug_mpc.training_algs.sac.sac import SAC + + algo = SAC(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + else: + from aug_mpc.training_algs.ppo.ppo import PPO + + algo = PPO(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + else: + from aug_mpc.training_algs.dummy.dummy import Dummy + + algo=Dummy(env=env, + debug=args.db, + remote_db=args.rmdb, + seed=args.seed) + + custom_args.update(args_dict) + custom_args.update(cluster_data) + custom_args.update(sim_data) + + run_name=env_classname if args.run_name is None else args.run_name + algo.setup(run_name=run_name, + ns=args.ns, + verbose=True, + drop_dir_name=args.drop_dir, + custom_args=custom_args, + comment=args.comment, + eval=args.eval, + resume=args.resume, + model_path=mpath_full, + n_eval_timesteps=args.n_eval_timesteps, + dump_checkpoints=args.dump_checkpoints, + norm_obs=args.obs_norm, + rescale_obs=args.obs_rescale) + + full_drop_dir=algo.drop_dir() + shared_drop_dir = StringTensorServer(length=1, + basename="SharedTrainingDropDir", + name_space=args.ns, + verbose=True, + vlevel=VLevel.V2, + force_reconnection=True) + shared_drop_dir.run() + + while True: + if not shared_drop_dir.write_vec([full_drop_dir], 0): + ns=1000000000 + PerfSleep.thread_sleep(ns) + continue + else: + break + + if args.step_while_setup: + # stop dummy step thread and give algo authority on step + dummy_step_exit_req=True + # wait for thread to join + if dummy_step_thread is not None: + dummy_step_thread.join() + Journal.log("launch_train_env.py", + "", + f"Dummy env step thread joined. Moving step authority to algo.", + LogType.INFO) + + eval=args.eval + if args.override_agent_actions: + eval=True + if not eval: + while not exit_request: + if not algo.learn(): + break + else: # eval phase + with torch.no_grad(): # no need for grad computation + while not exit_request: + if not algo.eval(): + break + + algo.done() # make sure to terminate training properly diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_world_interface.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_world_interface.py new file mode 100644 index 0000000000000000000000000000000000000000..1836cedcdd2191f40b35db291e86f4ff4a7b8295 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/launch_world_interface.py @@ -0,0 +1,207 @@ +import os +import argparse +import importlib.util +import inspect + +from aug_mpc.utils.rt_factor import RtFactor +from aug_mpc.utils.custom_arg_parsing import generate_custom_arg_dict +from aug_mpc.utils.determinism import deterministic_run + +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +script_name = os.path.splitext(os.path.basename(os.path.abspath(__file__)))[0] + +# Function to dynamically import a module from a specific file path +def import_world_module(env_path): + spec = importlib.util.spec_from_file_location("world_module", env_path) + world_module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(world_module) + return world_module + +if __name__ == '__main__': + + parser = argparse.ArgumentParser(description="Sim. env launcher") + # Add arguments + parser.add_argument('--robot_name', type=str, help='Alias to be used for the robot and also shared memory') + parser.add_argument('--urdf_path', type=str, help='path to the URDF file description for each robot') + parser.add_argument('--srdf_path', type=str, help='path to the SRDF file description for each robot (used for homing)') + parser.add_argument('--jnt_imp_config_path', type=str, help='path to a valid YAML file containing information on jnt impedance gains') + parser.add_argument('--num_envs', type=int, default=1) + parser.add_argument('--n_contacts', type=int, default=4) + parser.add_argument('--cluster_dt', type=float, default=0.03, help='dt at which the control cluster runs') + parser.add_argument('--dmpdir', type=str, help='directory where data is dumped', default="/root/aux_data") + parser.add_argument('--remote_stepping',action='store_true', + help='Whether to use remote stepping for cluster triggering (to be set during training)') + + # Replacing argparse.BooleanOptionalAction with 'store_true' and 'store_false' for compatibility with Python 3.8 + parser.add_argument('--use_gpu',action='store_true', help='Whether to use gpu simulation') + + parser.add_argument('--enable_debug',action='store_true', help='Whether to enable debug mode (may introduce significant overhead)') + + parser.add_argument('--headless',action='store_true', help='Whether to run simulation in headless mode') + + parser.add_argument('--verbose',action='store_true', help='Enable verbose mode') + + parser.add_argument('--comment', type=str, help='Any useful comment associated with this run', default="") + parser.add_argument('--timeout_ms', type=int, help='connection timeout after which the script self-terminates', default=60000) + parser.add_argument('--physics_dt', type=float, default=5e-4, help='') + + parser.add_argument('--use_custom_jnt_imp',action='store_true', + help='Whether to override the default PD controller with a custom one') + + parser.add_argument('--diff_vels',action='store_true', + help='Whether to obtain velocities by differentiation or not') + + parser.add_argument('--init_timesteps', type=int, help='initialization timesteps', default=None) + parser.add_argument('--seed', type=int, help='seed', default=0) + + parser.add_argument('--custom_args_names', nargs='+', default=None, + help='list of custom arguments names') + parser.add_argument('--custom_args_vals', nargs='+', default=None, + help='list of custom arguments values') + parser.add_argument('--custom_args_dtype', nargs='+', default=None, + help='list of custom arguments data types') + + parser.add_argument('--world_iface_fname', type=str, + default="aug_mpc_envs.world_interfaces.isaac_world_interface", + help="world interface file import pattern (without extension)") + + args = parser.parse_args() + + deterministic_run(seed=args.seed, torch_det_algos=False) + + default_init_duration=3.0 # [s] + default_init_tsteps=int(default_init_duration/args.physics_dt) + init_tsteps=args.init_timesteps + if init_tsteps is None: + init_tsteps=default_init_tsteps + # Ensure custom_args_names, custom_args_vals, and custom_args_dtype have the same length + custom_opt = generate_custom_arg_dict(args=args) + + Journal.log("launch_world_interface.py", + "", + f"Will warmup world interface for {default_init_duration}s ({default_init_tsteps} physics steps)", + LogType.STAT) + + robot_names = [args.robot_name] + robot_urdf_paths = [args.urdf_path] + robot_srdf_paths = [args.srdf_path] + control_clust_dts = [float(args.cluster_dt)] + use_remote_stepping = [args.remote_stepping] + n_contacts = [args.n_contacts] + jnt_imp_config_paths = [args.jnt_imp_config_path] + num_envs = args.num_envs + control_clust_dt = args.cluster_dt # [s]. Dt at which RHC controllers run + headless = args.headless + + # simulation parameters + remote_env_params = {} + remote_env_params["physics_dt"] = args.physics_dt # physics_dt? + remote_env_params["n_envs"] = num_envs + remote_env_params["use_gpu"] = args.use_gpu + remote_env_params["substepping_dt"] = control_clust_dts[0] + remote_env_params["headless"] = headless + remote_env_params["debug_enabled"] = args.enable_debug + remote_env_params["seed"] = args.seed + remote_env_params.update(custom_opt) + # sim info to be broadcasted on shared memory + # adding some data to dict for debugging + + shared_sim_infos = [] + for i in range(len(robot_names)): + shared_sim_infos.append(SharedEnvInfo( + namespace=robot_names[i], + is_server=True, + env_params_dict=remote_env_params, + verbose=True, + vlevel=VLevel.V2, + force_reconnection=True)) + shared_sim_infos[i].run() + + world_module=importlib.import_module(args.world_iface_fname) + classes_in_module = [name for name, obj in inspect.getmembers(world_module, inspect.isclass) + if obj.__module__ == world_module.__name__] + if len(classes_in_module) == 1: + cluster_classname=classes_in_module[0] + WorldInterface = getattr(world_module, cluster_classname) + else: + class_list_str = ", ".join(classes_in_module) + Journal.log("launch_world_interface.py", + "", + f"Found more than one class in world file {args.world_iface_fname}. Found: {class_list_str}", + LogType.EXCEP, + throw_when_excep = False) + exit() + + world_interface = WorldInterface(robot_names=robot_names, + robot_urdf_paths=robot_urdf_paths, + robot_srdf_paths=robot_srdf_paths, + cluster_dt=control_clust_dts, + jnt_imp_config_paths=jnt_imp_config_paths, + n_contacts=n_contacts, + use_remote_stepping=use_remote_stepping, + name=classes_in_module[0], + num_envs=num_envs, + debug=args.enable_debug, + verbose=args.verbose, + vlevel=VLevel.V2, + n_init_step=init_tsteps, + timeout_ms=args.timeout_ms, + env_opts=remote_env_params, + use_gpu=args.use_gpu, + override_low_lev_controller=args.use_custom_jnt_imp) # create environment + # reset_ok=world_interface.reset(reset_sim=True) + # if not reset_ok: + # world_interface.close() + # exit() + + rt_factor = RtFactor(dt_nom=world_interface.physics_dt(), + window_size=100) + + while True: + + if rt_factor.reset_due(): + rt_factor.reset() + + step_ok=world_interface.step() + + if not step_ok: + break + + rt_factor.update() + + for i in range(len(robot_names)): + robot_name=robot_names[i] + n_steps = world_interface.cluster_sim_step_counters[robot_name] + sol_counter = world_interface.cluster_servers[robot_name].solution_counter() + trigger_counter = world_interface.cluster_servers[robot_name].trigger_counter() + shared_sim_infos[i].write(dyn_info_name=["sim_rt_factor", + "total_rt_factor", + "env_stepping_dt", + "world_stepping_dt", + "time_to_get_states_from_env", + "cluster_state_update_dt", + "cluster_sol_time", + "n_sim_steps", + "n_cluster_trigger_steps", + "n_cluster_sol_steps", + "sim_time", + "cluster_time"], + val=[rt_factor.get(), + rt_factor.get() * num_envs, + rt_factor.get_avrg_step_time(), + world_interface.debug_data["time_to_step_world"], + world_interface.debug_data["time_to_get_states_from_env"], + world_interface.debug_data["cluster_state_update_dt"][robot_name], + world_interface.debug_data["cluster_sol_time"][robot_name], + n_steps, + trigger_counter, + sol_counter, + world_interface.debug_data["sim_time"][robot_name], + sol_counter*world_interface.cluster_servers[robot_name].cluster_dt() + ]) + + world_interface.close() diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/__init__.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/augmpc_cluster_client.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/augmpc_cluster_client.py new file mode 100644 index 0000000000000000000000000000000000000000..4d88c7b914b4bb37297e0371dfd1acbd7eafee04 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/augmpc_cluster_client.py @@ -0,0 +1,123 @@ +from mpc_hive.cluster_client.control_cluster_client import ControlClusterClient +from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds +from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds + +from EigenIPC.PyEigenIPC import Journal, LogType + +from typing import List, Dict + +import os + +from abc import abstractmethod + +class AugMpcClusterClient(ControlClusterClient): + + def _import_aux_libs(self): + super()._import_aux_libs() + + def __init__(self, + namespace: str, + urdf_xacro_path: str, + srdf_xacro_path: str, + cluster_size: int, + set_affinity: bool = False, + use_mp_fork: bool = False, + isolated_cores_only: bool = False, + core_ids_override_list: List[int] = None, + verbose: bool = False, + debug: bool = False, + codegen_base_dirname: str = "CodeGen", + base_dump_dir: str = "/tmp", + codegen_override: str = None, + custom_opts: Dict = {}): + + self._base_dump_dir = base_dump_dir + + self._temp_path = base_dump_dir + "/" + f"{self.__class__.__name__}" + f"_{namespace}" + + self._codegen_base_dirname = codegen_base_dirname + self._codegen_basedir = self._temp_path + "/" + self._codegen_base_dirname + + self._codegen_override = codegen_override # can be used to manually override + # the default codegen dir + + if not os.path.exists(self._temp_path): + os.makedirs(self._temp_path) + if not os.path.exists(self._codegen_basedir): + os.makedirs(self._codegen_basedir) + + self._urdf_xacro_path = urdf_xacro_path + self._srdf_xacro_path = srdf_xacro_path + self._urdf_path="" + self._srdf_path="" + + super().__init__(namespace = namespace, + cluster_size=cluster_size, + isolated_cores_only = isolated_cores_only, + set_affinity = set_affinity, + use_mp_fork = use_mp_fork, + core_ids_override_list = core_ids_override_list, + verbose = verbose, + debug = debug, + custom_opts=custom_opts) + self._generate_srdf(namespace=namespace) + + self._generate_urdf(namespace=namespace) + + + def codegen_dir(self): + + return self._codegen_basedir + + def codegen_dir_override(self): + + return self._codegen_override + + def _generate_srdf(self,namespace:str): + + custom_xacro_args=extract_custom_xacro_args(self._custom_opts) + cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(), + new_cmds=custom_xacro_args) + + # call _xrdf_cmds_override in case some cmds need to be overridden + override_cmds=self._xrdf_cmds_override() + cmds=merge_xacro_cmds(prev_cmds=cmds, + new_cmds=override_cmds) + + self._srdf_path=generate_srdf(robot_name=namespace, + xacro_path=self._srdf_xacro_path, + dump_path=self._temp_path, + xrdf_cmds=cmds) + + def _generate_urdf(self,namespace:str): + + custom_xacro_args=extract_custom_xacro_args(self._custom_opts) + cmds=merge_xacro_cmds(prev_cmds=self._xrdf_cmds(), + new_cmds=custom_xacro_args) + + # call _xrdf_cmds_override in case some cmds need to be overridden + override_cmds=self._xrdf_cmds_override() + cmds=merge_xacro_cmds(prev_cmds=cmds, + new_cmds=override_cmds) + + self._urdf_path=generate_urdf(robot_name=namespace, + xacro_path=self._urdf_xacro_path, + dump_path=self._temp_path, + xrdf_cmds=cmds) + + @abstractmethod + def _xrdf_cmds(self): + + # to be implemented by parent class ( + # for an example have a look at utils/centauro_xrdf_gen.py) + + pass + + def _xrdf_cmds_override(self): + + # to be overridden by parent class + + to_be_overridden = ["dummy_cmd:=true"] + + return to_be_overridden diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/augmpc_cluster_server.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/augmpc_cluster_server.py new file mode 100644 index 0000000000000000000000000000000000000000..ea5cf7226db1ae834b7bb599c33b426022fcc25d --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/augmpc_cluster_server.py @@ -0,0 +1,43 @@ +from mpc_hive.cluster_server.control_cluster_server import ControlClusterServer +from typing import List +from EigenIPC.PyEigenIPC import VLevel + +class AugMpcClusterServer(ControlClusterServer): + + def __init__(self, + robot_name: str, + cluster_size: int, + cluster_dt: float, + control_dt: float, + jnt_names: List[str], + n_contacts: int, + contact_linknames: List[str] = None, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + debug: bool = False, + use_gpu: bool = True, + force_reconnection: bool = True, + timeout_ms: int = 60000, + enable_height_sensor: bool = False, + height_grid_size: int = None, + height_grid_resolution: float = None): + + self.robot_name = robot_name + + super().__init__( + namespace=self.robot_name, + cluster_size=cluster_size, + cluster_dt=cluster_dt, + control_dt=control_dt, + jnt_names=jnt_names, + n_contacts = n_contacts, + contact_linknames = contact_linknames, + verbose=verbose, + vlevel=vlevel, + debug=debug, + use_gpu=use_gpu, + force_reconnection=force_reconnection, + timeout_ms=timeout_ms, + enable_height_sensor=enable_height_sensor, + height_grid_size=height_grid_size, + height_grid_resolution=height_grid_resolution) diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/__init__.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..db15baee95a0dcc257d9c1ccb788a7aabdb6e456 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/config/rhc_horizon_config.yaml @@ -0,0 +1,200 @@ +# A dummy example of a rhc controller configuration built on top of horizon +# and iLQR + +solver: + type: ilqr + ilqr.tol: 0.01 + ilqr.constr_viol_tol: 0.01 + ilqr.suppress_all_output: 'yes' + ilqr.codegen_enabled: true +# ilqr.codegen_workdir: /tmp/tyhio + ilqr.enable_gn: true + ilqr.hxx_reg_base: 0.0 + ilqr.n_threads: 0 + print_time: 0 + +constraints: + - ball_1_contact + - ball_2_contact + - ball_3_contact + - ball_4_contact + - z_ball_1 + - z_ball_2 + - z_ball_3 + - z_ball_4 + +costs: + - joint_regularization + - joint_posture + - base_position + - base_orientation + +.define: + - &w_base_pos 10 + - &w_base_ori 1 + - &w_ball_z 1 + # - &w_postural 0.0001 + - &w_jnt_v_reg 0.01 + - &w_jnt_a_reg 0.001 + - &w_jnt_f_reg 0.0001 + - &wheel_radius 0.124 + +base_position: + type: Cartesian + distal_link: base_link + indices: [0, 1, 2] + nodes: ${N} + weight: *w_base_pos + +base_orientation: + type: Cartesian + distal_link: base_link + indices: [3, 4, 5] + nodes: ${N} + weight: *w_base_ori + +# =============================== + +rolling_contact_1: + type: Rolling + frame: wheel_1 + radius: *wheel_radius + +rolling_contact_2: + type: Rolling + frame: wheel_2 + radius: *wheel_radius + +rolling_contact_3: + type: Rolling + frame: wheel_3 + radius: *wheel_radius + +rolling_contact_4: + type: Rolling + frame: wheel_4 + radius: *wheel_radius + +# contact_1: +# type: Cartesian +# distal_link: ball_1 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_2: +# type: Cartesian +# distal_link: ball_2 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_3: +# type: Cartesian +# distal_link: ball_3 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# contact_4: +# type: Cartesian +# distal_link: ball_4 +# indices: [0, 1, 2] +# cartesian_type: velocity + +# ================================== + +interaction_wheel_1: + type: VertexForce + frame: ball_1 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_1 + +interaction_wheel_2: + type: VertexForce + frame: ball_2 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_2 + +interaction_wheel_3: + type: VertexForce + frame: ball_3 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_3 + +interaction_wheel_4: + type: VertexForce + frame: ball_4 + fn_min: 10.0 + enable_fc: true + friction_coeff: 0.5 + vertex_frames: + - wheel_4 + +ball_1_contact: + type: Contact + subtask: [interaction_wheel_1, rolling_contact_1] + +ball_2_contact: + type: Contact + subtask: [interaction_wheel_2, rolling_contact_2] + +ball_3_contact: + type: Contact + subtask: [interaction_wheel_3, rolling_contact_3] + +ball_4_contact: + type: Contact + subtask: [interaction_wheel_4, rolling_contact_4] + +# joint_posture: +# type: Postural +# weight: *w_postural +# indices: [0, 1, 2, +# 4, 5, 6, +# 8, 9, 10, +# 12, 13, 14] +# nodes: ${range(N-8, N)} + +# todo: wrong, as the order COUNTS. If I add the contacts after the joint regularization, they wont get considered. +joint_regularization: + type: Regularization + nodes: all # maybe not on first nodes?? + weight: + velocity: *w_jnt_v_reg + acceleration: *w_jnt_a_reg + force: *w_jnt_f_reg + +z_ball_1: + type: Cartesian + distal_link: ball_1 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_2: + type: Cartesian + distal_link: ball_2 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_3: + type: Cartesian + distal_link: ball_3 + indices: [2] + cartesian_type: position + weight: *w_ball_z + +z_ball_4: + type: Cartesian + distal_link: ball_4 + indices: [2] + cartesian_type: position + weight: *w_ball_z \ No newline at end of file diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/gait_manager.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/gait_manager.py new file mode 100644 index 0000000000000000000000000000000000000000..448a35a2251bef0c51e573d6b32d93fcb2e7d9ce --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/gait_manager.py @@ -0,0 +1,566 @@ +import numpy as np + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +from typing import Dict + +class GaitManager: + + def __init__(self, + task_interface: TaskInterface, + phase_manager: pymanager.PhaseManager, + injection_node: int = None, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = None, + vertical_landing: bool = False, + landing_vert_weight: float = None, + phase_force_reg: float = None, + flight_duration: int = 15, + post_flight_stance: int = 3, + step_height: float = 0.1, + dh: float = 0.0, + custom_opts: Dict = {}): + + self._custom_opts=custom_opts + + self._is_open_loop=False + if "is_open_loop" in self._custom_opts: + self._is_open_loop=self._custom_opts["is_open_loop"] + + self.task_interface = task_interface + self._n_nodes_prb=self.task_interface.prb.getNNodes() + + self._phase_manager = phase_manager + self._model=self.task_interface.model + self._q0=self._model.q0 + self._kin_dyn=self.task_interface.model.kd + + # phase weights and regs + self._keep_yaw_vert=keep_yaw_vert + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_landing=vertical_landing + self._landing_vert_weight=landing_vert_weight + self._phase_force_reg=phase_force_reg + self._total_weight = np.atleast_2d(np.array([0, 0, self._kin_dyn.mass() * 9.81])).T + + self._f_reg_ref={} + + # flight parameters + self._post_flight_stance=post_flight_stance + self._flight_info_now=None + self._flight_duration_max=self._n_nodes_prb-(injection_node+1) + self._flight_duration_min=3 + self._flight_duration_default=flight_duration + # apex bounds/defaults + self._step_height_default=step_height + self._step_height_min=0.0 + self._step_height_max=0.5 + # end height bounds/defaults + self._dh_default=dh + self._dh_min=0.0 + self._dh_max=0.5 + # landing dx, dy bounds/defaults + self._land_dx_default=0.0 + self._land_dx_min=-0.5 + self._land_dx_max=0.5 + self._land_dy_default=0.0 + self._land_dy_min=-0.5 + self._land_dy_max=0.5 + + # timeline data + self._contact_timelines = dict() + self.timeline_names=[] + + self._flight_phases = {} + self._touchdown_phases = {} + self._contact_phases = {} + self._fk_contacts = {} + self._fkd_contacts = {} + self._f_reg_ref = {} + + # reference traj + self._tg = trajectoryGenerator.TrajectoryGenerator() + self._traj_der= [None, 0, 0] + self._traj_second_der=[None, 0, 0] + self._third_traj_der=[None, None, 0] + + self._ref_trjs = {} + self._ref_vtrjs = {} + + if injection_node is None: + self._injection_node = round(self.task_interface.prb.getNNodes()/2.0) + else: + self._injection_node = injection_node + + self._init_contact_timelines() + + self._reset_contact_timelines() + + def _init_contact_timelines(self): + + short_stance_duration=1 + flight_phase_short_duration=1 + + self.n_contacts=len(self._model.cmap.keys()) + self._dt=float(self.task_interface.prb.getDt()) + + self._name_to_idx_map={} + + j=0 + for contact in self._model.cmap.keys(): + + self._fk_contacts[contact]=self._kin_dyn.fk(contact) + self._fkd_contacts[contact]=self._kin_dyn.frameVelocity(contact, self._model.kd_frame) + self.timeline_names.append(contact) + self._contact_timelines[contact]=self._phase_manager.createTimeline(f'{contact}_timeline') + # stances + self._contact_phases[contact] = self._contact_timelines[contact].createPhase(short_stance_duration, + f'stance_{contact}_short') + + if self.task_interface.getTask(f'{contact}') is not None: + self._contact_phases[contact].addItem(self.task_interface.getTask(f'{contact}')) + else: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"contact task {contact} not found", + LogType.EXCEP, + throw_when_excep=True) + i=0 + self._f_reg_ref[contact]=[] + for force in self._model.cmap[contact]: + f_ref=self.task_interface.prb.createParameter(name=f"{contact}_force_reg_f{i}_ref", + dim=3) + force_reg=self.task_interface.prb.createResidual(f'{contact}_force_reg_f{i}', self._phase_force_reg * (force - f_ref), + nodes=[]) + self._f_reg_ref[contact].append(f_ref) + self.set_f_reg(contact_name=contact, scale=4) + self._contact_phases[contact].addCost(force_reg, nodes=list(range(0, short_stance_duration))) + i+=1 + + # flights + self._flight_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'flight_{contact}_short') + + # sanity checks (z trajectory) + self._zpos_task_found=True + self._zvel_task_found=True + self._xypos_task_found=True + self._xyvel_task_found=True + if self.task_interface.getTask(f'z_{contact}') is None: + self._zpos_task_found=False + if self.task_interface.getTask(f'vz_{contact}') is None: + self._zvel_task_found=False + if self.task_interface.getTask(f'xy_{contact}') is None: + self._xypos_task_found=False + if self.task_interface.getTask(f'vxy_{contact}') is None: + self._xyvel_task_found=False + if (not self._zpos_task_found) and (not self._zvel_task_found): + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"neither pos or vel task for contacts were found! Aborting.", + LogType.EXCEP, + throw_when_excep=True) + if (not self._zpos_task_found) and self._is_open_loop: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Running in open loop, but no contact pos task found. Aborting.", + LogType.EXCEP, + throw_when_excep=True) + if self._zpos_task_found and self._xyvel_task_found: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + if self._zvel_task_found and self._xypos_task_found: + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"Both pos and vel task for contact {contact} found! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + if (not self._xypos_task_found) and (not self._xyvel_task_found): + Journal.log(self.__class__.__name__, + "_init_contact_timelines", + f"neither pos or vel task for contact {contact} xy were found! Will proceed without xy landing constraints.", + LogType.WARN) + # if (not self._zvel_task_found) and (not self._is_open_loop): + # Journal.log(self.__class__.__name__, + # "_init_contact_timelines", + # f"Running in closed loop, but contact vel task not found. Aborting", + # LogType.EXCEP, + # throw_when_excep=True) + + self._ref_trjs[contact]=None + self._ref_vtrjs[contact]=None + self._touchdown_phases[contact]=None + + if self._zpos_task_found: # we use pos trajectory + self._ref_trjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) + init_z_foot = self._fk_contacts[contact](q=self._q0)['ee_pos'].elements()[2] + if self._is_open_loop: + self._ref_trjs[contact][2, :] = np.atleast_2d(init_z_foot) + else: + self._ref_trjs[contact][2, :] = 0.0 # place foot at ground level initially () + + # z + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'z_{contact}'), + self._ref_trjs[contact][2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + if self._xypos_task_found: # xy, we add a landing phase of unit duration to enforce landing pos costs + + self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'touchdown_{contact}_short') + + self._touchdown_phases[contact].addItemReference(self.task_interface.getTask(f'xy_{contact}'), + self._ref_trjs[contact][0:2, 0:1], + nodes=list(range(0, short_stance_duration))) + + else: # foot traj in velocity + # ref vel traj + self._ref_vtrjs[contact]=np.zeros(shape=[7, self.task_interface.prb.getNNodes()]) # allocate traj + # of max length eual to number of nodes + self._ref_vtrjs[contact][2, :] = np.atleast_2d(0) + + # z + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vz_{contact}'), + self._ref_vtrjs[contact][2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + if self._xyvel_task_found: # xy (when in vel the xy vel is set on the whole flight phase) + self._flight_phases[contact].addItemReference(self.task_interface.getTask(f'vxy_{contact}'), + self._ref_vtrjs[contact][0:2, 0:1], + nodes=list(range(0, flight_phase_short_duration))) + + if self._vertical_landing: # add touchdown phase for vertical landing + self._touchdown_phases[contact]=self._contact_timelines[contact].createPhase(flight_phase_short_duration, + f'touchdown_{contact}_short') + + if self._vertical_landing and self._touchdown_phases[contact] is not None: + v_xy=self._fkd_contacts[contact](q=self._model.q, qdot=self._model.v)['ee_vel_linear'][0:2] + vertical_landing=self.task_interface.prb.createResidual(f'{contact}_only_vert_v', + self._landing_vert_weight * v_xy, + nodes=[]) + self._touchdown_phases[contact].addCost(vertical_landing, nodes=list(range(0, short_stance_duration))) + + if self._keep_yaw_vert: + # keep ankle vertical over the whole horizon (can be useful with wheeled robots) + c_ori = self._model.kd.fk(contact)(q=self._model.q)['ee_rot'][2, :] + cost_ori = self.task_interface.prb.createResidual(f'{contact}_ori', self._yaw_vertical_weight * (c_ori.T - np.array([0, 0, 1]))) + # flight_phase.addCost(cost_ori, nodes=list(range(0, flight_duration+post_landing_stance))) + + self._name_to_idx_map[contact]=j + + j+=1 + + # current pos [c0, c1, ....], current length, nominal length, nom. apex, no. landing height, landing dx, landing dy (local world aligned) + self._flight_info_now=np.zeros(shape=(7*self.n_contacts)) + + self.update() + + def _reset_contact_timelines(self): + + for contact in self._model.cmap.keys(): + + idx=self._name_to_idx_map[contact] + # we follow same order as in shm for more efficient writing + self._flight_info_now[idx]= -1.0 # pos [nodes] + self._flight_info_now[idx+1*self.n_contacts]= -1.0 # duration (remaining) [nodes] + self._flight_info_now[idx+2*self.n_contacts]=self._flight_duration_default # [nodes] + self._flight_info_now[idx+3*self.n_contacts]=self._step_height_default + self._flight_info_now[idx+4*self.n_contacts]=self._dh_default + self._flight_info_now[idx+5*self.n_contacts]=self._land_dx_default + self._flight_info_now[idx+6*self.n_contacts]=self._land_dy_default + # fill timeline with stances + contact_timeline=self._contact_timelines[contact] + contact_timeline.clear() # remove phases + short_stance_phase = contact_timeline.getRegisteredPhase(f'stance_{contact}_short') + while contact_timeline.getEmptyNodes() > 0: + contact_timeline.addPhase(short_stance_phase) + + self.update() + + def reset(self): + # self.phase_manager.clear() + self.task_interface.reset() + self._reset_contact_timelines() + + def set_f_reg(self, + contact_name, + scale: float = 4.0): + f_refs=self._f_reg_ref[contact_name] + for force in f_refs: + ref=self._total_weight/(scale*len(f_refs)) + force.assign(ref) + + def set_flight_duration(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts]=val + + def get_flight_duration(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+2*self.n_contacts] + + def set_step_apexdh(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts]=val + + def get_step_apexdh(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+3*self.n_contacts] + + def set_step_enddh(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts]=val + + def get_step_enddh(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+4*self.n_contacts] + + def get_step_landing_dx(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts] + + def set_step_landing_dx(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+5*self.n_contacts]=val + + def get_step_landing_dy(self, contact_name): + return self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts] + + def set_step_landing_dy(self, contact_name, val: float): + self._flight_info_now[self._name_to_idx_map[contact_name]+6*self.n_contacts]=val + + def add_stand(self, contact_name): + # always add stand at the end of the horizon + timeline = self._contact_timelines[contact_name] + if timeline.getEmptyNodes() > 0: + timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short')) + + def add_flight(self, contact_name, + robot_q: np.ndarray): + + timeline = self._contact_timelines[contact_name] + + flights_on_horizon=self._contact_timelines[contact_name].getPhaseIdx(self._flight_phases[contact_name]) + + last_flight_idx=self._injection_node-1 # default to make things work + if not len(flights_on_horizon)==0: # some flight phases are there + last_flight_idx=flights_on_horizon[-1]+self._post_flight_stance + + if last_flight_idx1: + Journal.log(self.__class__.__name__, + "add_flight", + f"Got flight duration {flight_duration_req} < 1!", + LogType.WARN, + throw_when_excep=True) + + # process requests to ensure flight params are valid + # duration + if flight_duration_reqself._flight_duration_max: + flight_duration_req=self._flight_duration_max + # apex height + if flight_apex_reqself._step_height_max: + flight_apex_req=self._step_height_max + # landing height + if flight_enddh_reqself._dh_max: + flight_enddh_req=self._dh_max + # landing dx + if flight_land_dx_reqself._land_dx_max: + flight_land_dx_req=self._land_dx_max + # landing dy + if flight_land_dy_reqself._land_dy_max: + flight_land_dy_req=self._land_dy_max + + land_dx_w = flight_land_dx_req + land_dy_w = flight_land_dy_req + if self._xypos_task_found or self._xyvel_task_found: + # landing dx/dy are specified in horizontal frame; rotate into world aligned frame + q_base = robot_q[3:7] + if q_base.ndim == 1: + q_base = q_base.reshape(-1, 1) + q_w = q_base[3, 0] + q_x = q_base[0, 0] + q_y = q_base[1, 0] + q_z = q_base[2, 0] + r11 = 1 - 2 * (q_y * q_y + q_z * q_z) + r21 = 2 * (q_x * q_y + q_z * q_w) + norm = np.hypot(r11, r21) + if norm > 0.0: + cos_yaw = r11 / norm + sin_yaw = r21 / norm + else: + cos_yaw = 1.0 + sin_yaw = 0. + + land_dx_w = flight_land_dx_req * cos_yaw - flight_land_dy_req * sin_yaw + land_dy_w = flight_land_dx_req * sin_yaw + flight_land_dy_req * cos_yaw + + if self._ref_vtrjs[contact_name] is not None and \ + self._ref_trjs[contact_name] is not None: # only allow one mode (pos/velocity traj) + Journal.log(self.__class__.__name__, + "add_flight", + f"Both pos and vel traj for contact {contact_name} are not None! This is not allowed, aborting.", + LogType.EXCEP, + throw_when_excep=True) + + # inject pos traj if pos mode + if self._ref_trjs[contact_name] is not None: + # recompute trajectory online (just needed if using pos traj) + foot_pos=self._fk_contacts[contact_name](q=robot_q)['ee_pos'].elements() + starting_pos=foot_pos[2] # compute foot traj (local world aligned) + starting_x_pos=foot_pos[0] + starting_y_pos=foot_pos[1] + # starting_pos=0.0 + self._ref_trjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.from_derivatives( + flight_duration_req, + p_start=starting_pos, + p_goal=starting_pos+flight_enddh_req, + clearance=flight_apex_req, + derivatives=self._traj_der, + second_der=self._traj_second_der, + third_der=self._third_traj_der) + ) + if self._xypos_task_found: # we use _ref_trjs to write xy pos references + self._ref_trjs[contact_name][0, -1]=starting_x_pos+land_dx_w + self._ref_trjs[contact_name][1, -1]=starting_y_pos+land_dy_w + + for i in range(flight_duration_req): + res, phase_token_flight=timeline.addPhase(self._flight_phases[contact_name], + pos=self._injection_node+i, + absolute_position=True) + phase_token_flight.setItemReference(f'z_{contact_name}', + self._ref_trjs[contact_name][:, i]) + + if self._touchdown_phases[contact_name] is not None: + # add touchdown phase after flight + res, phase_token_touchdown=timeline.addPhase(self._touchdown_phases[contact_name], + pos=self._injection_node+flight_duration_req, + absolute_position=True) + if self._xypos_task_found: + phase_token_touchdown.setItemReference(f'xy_{contact_name}', + self._ref_trjs[contact_name][:, -1]) + + # inject vel traj if vel mode + if self._ref_vtrjs[contact_name] is not None: + self._ref_vtrjs[contact_name][2, 0:flight_duration_req]=np.atleast_2d(self._tg.derivative_of_trajectory( + flight_duration_req, + p_start=0.0, + p_goal=flight_enddh_req, + clearance=flight_apex_req, + derivatives=self._traj_der, + second_der=self._traj_second_der, + third_der=self._third_traj_der)) + if self._xyvel_task_found: # compute vel reference using problem dt and flight length + flight_duration_sec=float(flight_duration_req)*self._dt + self._ref_vtrjs[contact_name][0, 0:flight_duration_req]=land_dx_w/flight_duration_sec + self._ref_vtrjs[contact_name][1, 0:flight_duration_req]=land_dy_w/flight_duration_sec + + for i in range(flight_duration_req): + res, phase_token=timeline.addPhase(self._flight_phases[contact_name], + pos=self._injection_node+i, + absolute_position=True) + phase_token.setItemReference(f'vz_{contact_name}', + self._ref_vtrjs[contact_name][2:3, i:i+1]) + if self._touchdown_phases[contact_name] is not None: + # add touchdown phase for forcing vertical landing + res, phase_token=timeline.addPhase(self._touchdown_phases[contact_name], + pos=self._injection_node+flight_duration_req, + absolute_position=True) + + if timeline.getEmptyNodes() > 0: # fill empty nodes at the end of the horizon, if any, with stance + timeline.addPhase(timeline.getRegisteredPhase(f'stance_{contact_name}_short')) + + def update(self): + self._phase_manager.update() + + def update_flight_info(self, timeline_name): + + # get current position and remaining duration of flight phases over the horizon for a single contact + + # phase indexes over timeline + phase_idxs=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name]) + + if not len(phase_idxs)==0: # at least one flight phase on horizon -> read info from timeline + + # all active phases on timeline + active_phases=self._contact_timelines[timeline_name].getActivePhases() + + phase_idx_start=phase_idxs[0] + # active_nodes_start=active_phases[phase_idx_start].getActiveNodes() + pos_start=active_phases[phase_idx_start].getPosition() + # n_nodes_start=active_phases[phase_idx_start].getNNodes() + + phase_idx_last=phase_idxs[-1] # just get info for last phase on the horizon + # active_nodes_last=active_phases[phase_idx_last].getActiveNodes() + pos_last=active_phases[phase_idx_last].getPosition() + # n_nodes_last=active_phases[phase_idx_last].getNNodes() + + # write to info + self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts]=pos_last + self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts]=pos_last - pos_start + + return True + + return False + + def get_flight_info(self, timeline_name): + return (self._flight_info_now[self._name_to_idx_map[timeline_name]+0*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+1*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+2*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+3*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+4*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+5*self.n_contacts], + self._flight_info_now[self._name_to_idx_map[timeline_name]+6*self.n_contacts]) + + def get_flight_info_all(self): + return self._flight_info_now + + def set_ref_pos(self, + timeline_name:str, + ref_height: np.array = None, + threshold: float = 0.05): + + if ref_height is not None: + self._ref_trjs[timeline_name][2, :]=ref_height + if ref_height>threshold: + self.add_flight(timeline_name=timeline_name) + this_flight_token_idx=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name])[-1] + active_phases=self._contact_timelines[timeline_name].getActivePhases() + active_phases[this_flight_token_idx].setItemReference(f'z_{timeline_name}', + self._ref_trjs[timeline_name]) + else: + self.add_stand(timeline_name=timeline_name) + + def set_force_feedback(self, + timeline_name: str, + force_norm: float): + + flight_tokens=self._contact_timelines[timeline_name].getPhaseIdx(self._flight_phases[timeline_name]) + contact_tokens=self._contact_phases[timeline_name].getPhaseIdx(self._contact_phases[timeline_name]) + if not len(flight_tokens)==0: + first_flight=flight_tokens[0] + first_flight + + def check_horizon_full(self, + timeline_name): + timeline = self._contact_timelines[timeline_name] + if timeline.getEmptyNodes() > 0: + error = f"Empty nodes detected over the horizon for timeline {timeline}! Make sure to fill the whole horizon with valid phases!!" + Journal.log(self.__class__.__name__, + "check_horizon_full", + error, + LogType.EXCEP, + throw_when_excep = True) diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/horizon_imports.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/horizon_imports.py new file mode 100644 index 0000000000000000000000000000000000000000..991636dd18679c11050f97d3c47dd7df2d97eb5e --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/horizon_imports.py @@ -0,0 +1,18 @@ +# robot modeling and automatic differentiation +import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn +import casadi as cs + +# horizon stuff +import horizon.utils.kin_dyn as kd +from horizon.problem import Problem +from horizon.rhc.model_description import FullModelInverseDynamics +from horizon.rhc.taskInterface import TaskInterface +from horizon.rhc.tasks.interactionTask import VertexContact +from horizon.rhc.tasks.contactTask import ContactTask +from horizon.utils import trajectoryGenerator, utils + +# phase managing +import phase_manager.pymanager as pymanager +import phase_manager.pyphase as pyphase +import phase_manager.pytimeline as pytimeline + diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py new file mode 100644 index 0000000000000000000000000000000000000000..679e7d460ab545b0bd2f741904b5ac8af893f897 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/horizon_imports_glob.py @@ -0,0 +1,28 @@ +""" +Dynamically import all necessary Horizon and related dependencies. +This function is intended to be used within the import_child_lib method +of a class, enabling the parent process to load all required libraries. +""" +def import_horizon_global(): + # Global imports to make modules accessible in child processes + global casadi_kin_dyn, cs, kd, Problem, FullModelInverseDynamics + global TaskInterface, VertexContact, ContactTask, trajectoryGenerator, utils + global pymanager, pyphase, pytimeline + + # robot modeling and automatic differentiation + import casadi_kin_dyn.py3casadi_kin_dyn as casadi_kin_dyn + import casadi as cs + + # horizon stuff + import horizon.utils.kin_dyn as kd + from horizon.problem import Problem + from horizon.rhc.model_description import FullModelInverseDynamics + from horizon.rhc.taskInterface import TaskInterface + from horizon.rhc.tasks.interactionTask import VertexContact + from horizon.rhc.tasks.contactTask import ContactTask + from horizon.utils import trajectoryGenerator, utils + + # phase managing + import phase_manager.pymanager as pymanager + import phase_manager.pyphase as pyphase + import phase_manager.pytimeline as pytimeline diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..5cec4e610e97e3100c1468ee4e828fe2e64a28c4 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc.py @@ -0,0 +1,1324 @@ +from mpc_hive.controllers.rhc import RHController + +from aug_mpc.controllers.rhc.horizon_based.horizon_imports import * + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc_refs import HybridQuadRhcRefs +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType + +import numpy as np + +import os + +import time + +from typing import Dict, List + +class HybridQuadRhc(RHController): + + def __init__(self, + srdf_path: str, + urdf_path: str, + config_path: str, + robot_name: str, # used for shared memory namespaces + codegen_dir: str, + n_nodes:float = 25, + injection_node:int = 10, + dt: float = 0.02, + max_solver_iter = 1, # defaults to rt-iteration + open_loop: bool = True, + close_loop_all: bool = False, + dtype = np.float32, + verbose = False, + debug = False, + refs_in_hor_frame = True, + timeout_ms: int = 60000, + custom_opts: Dict = {}): + + self._refs_in_hor_frame = refs_in_hor_frame + + self._injection_node = injection_node + + self._open_loop = open_loop + self._close_loop_all = close_loop_all + + self._codegen_dir = codegen_dir + if not os.path.exists(self._codegen_dir): + os.makedirs(self._codegen_dir) + # else: + # # Directory already exists, delete it and recreate + # shutil.rmtree(self._codegen_dir) + # os.makedirs(self._codegen_dir) + + self.step_counter = 0 + self.sol_counter = 0 + + self.max_solver_iter = max_solver_iter + + self._timer_start = time.perf_counter() + self._prb_update_time = time.perf_counter() + self._phase_shift_time = time.perf_counter() + self._task_ref_update_time = time.perf_counter() + self._rti_time = time.perf_counter() + + self.robot_name = robot_name + + self.config_path = config_path + + self.urdf_path = urdf_path + # read urdf and srdf files + with open(self.urdf_path, 'r') as file: + self.urdf = file.read() + self._base_init = np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]) + + self._c_timelines = dict() + self._f_reg_timelines = dict() + + self._custom_opts={"replace_continuous_joints": False, + "use_force_feedback": False, + "lin_a_feedback": False, + "is_open_loop": self._open_loop, # fully open (just for db) + "fully_closed": False, # closed loop with full feedback (just for db) + "closed_partial": False, # closed loop with partial feedback + "adaptive_is": True, # closed loop with adaptation + "estimate_v_root": False, # when adaptive_is or closed_partial, estimate vbase + "alpha_from_outside": False, # alpha set ext. from shared memory + "alpha_half": 1.0, + "only_vel_wheels": True, # whether wheels (if present) are just vel controlled + "use_jnt_v_feedback": False + } + + self._custom_opts.update(custom_opts) + + self._alpha_half=self._custom_opts["alpha_half"] + + if self._open_loop: + self._custom_opts["fully_closed"]=False + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + else: + self._custom_opts["is_open_loop"]=False + if self._custom_opts["fully_closed"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["closed_partial"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["closed_partial"]: + self._custom_opts["adaptive_is"]=False + self._custom_opts["fully_closed"]=False + self._custom_opts["lin_a_feedback"]=False + if self._custom_opts["adaptive_is"]: + self._custom_opts["closed_partial"]=False + self._custom_opts["fully_closed"]=False + + super().__init__(srdf_path=srdf_path, + n_nodes=n_nodes, + dt=dt, + namespace=self.robot_name, + dtype=dtype, + verbose=verbose, + debug=debug, + timeout_ms=timeout_ms) + + self._rhc_fpaths.append(self.config_path) + + def _config_override(self): + pass + + def _post_problem_init(self): + + self.rhc_costs={} + self.rhc_constr={} + + self._fail_idx_scale=0.0 + self._expl_idx_window_size=int(1*self._n_nodes) + self._explosion_idx_buffer=np.zeros((1,self._expl_idx_window_size)) + self._expl_idx_counter=0 + self._expl_idx_buffer_counter=0 + + self._pred_node_idx=self._n_nodes-1 + + self._nq=self.nq() + self._nq_jnts=self._nq-7# assuming floating base + self._nv=self.nv() + self._nv_jnts=self._nv-6 + + self._alphas_q_root=np.zeros((7, 1), dtype=self._dtype) + self._alphas_q_jnts=np.zeros((self._nq_jnts, 1), dtype=self._dtype) + self._alphas_twist_root=np.zeros((6, 1), dtype=self._dtype) + self._alphas_v_jnts=np.zeros((self._nv_jnts, 1), dtype=self._dtype) + self._alphas_a=np.zeros((self._nv, 1), dtype=self._dtype) + self._alphas_q_root[:, :]=1.0 # default to all open + self._alphas_q_jnts[:, :]=1.0 + self._alphas_twist_root[:, :]=1.0 + self._alphas_v_jnts[:, :]=1.0 + self._alphas_a[:, :]=1.0 + + def _init_problem(self, + fixed_jnt_patterns: List[str] = None, + wheels_patterns: List[str] = None, + foot_linkname: str = None, + flight_duration: int = 10, + post_flight_stance: int = 3, + step_height: float = 0.12, + keep_yaw_vert: bool = False, + yaw_vertical_weight: float = 2.0, + vertical_landing: bool = False, + vertical_land_weight: float = 1.0, + phase_force_reg: float = 1e-2, + vel_bounds_weight: float = 1.0): + + self._fixed_jnt_patterns=fixed_jnt_patterns + + self._config_override() + + Journal.log(self.__class__.__name__, + "_init_problem", + f" Will use horizon config file at {self.config_path}", + LogType.INFO, + throw_when_excep=True) + + self._vel_bounds_weight=vel_bounds_weight + self._phase_force_reg=phase_force_reg + self._yaw_vertical_weight=yaw_vertical_weight + self._vertical_land_weight=vertical_land_weight + # overrides parent + self._prb = Problem(self._n_intervals, + receding=True, + casadi_type=cs.SX) + self._prb.setDt(self._dt) + + if "replace_continuous_joints" in self._custom_opts: + # continous joints are parametrized in So2 + if self._custom_opts["replace_continuous_joints"]: + self.urdf = self.urdf.replace('continuous', 'revolute') + else: + self.urdf = self.urdf.replace('continuous', 'revolute') + + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf) # used for getting joint names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + + self._init_robot_homer() + + # handle fixed joints + fixed_joint_map={} + if self._fixed_jnt_patterns is not None: + for jnt_name in self._get_robot_jnt_names(): + for fixed_jnt_pattern in self._fixed_jnt_patterns: + if fixed_jnt_pattern in jnt_name: + fixed_joint_map.update({f"{jnt_name}": + self._homer.get_homing_val(jnt_name=jnt_name)}) + break # do not search for other pattern matches + + if not len(fixed_joint_map)==0: # we need to recreate kin dyn and homers + Journal.log(self.__class__.__name__, + "_init_problem", + f"Will fix following joints: \n{str(fixed_joint_map)}", + LogType.INFO, + throw_when_excep=True) + # with the fixed joint map + self._kin_dyn = casadi_kin_dyn.CasadiKinDyn(self.urdf,fixed_joints=fixed_joint_map) + # assign again controlled joints names + self._assign_controller_side_jnt_names(jnt_names=self._get_robot_jnt_names()) + # updated robot homer for controlled joints + self._init_robot_homer() + + # handle continuous joints (need to change homing and retrieve + # cont jnts indexes) and homing + self._continuous_joints=self._get_continuous_jnt_names() + # reduced + self._continuous_joints_idxs=[] + self._continuous_joints_idxs_cos=[] + self._continuous_joints_idxs_sin=[] + self._continuous_joints_idxs_red=[] + self._rev_joints_idxs=[] + self._rev_joints_idxs_red=[] + # qfull + self._continuous_joints_idxs_qfull=[] + self._continuous_joints_idxs_cos_qfull=[] + self._continuous_joints_idxs_sin_qfull=[] + self._continuous_joints_idxs_red_qfull=[] + self._rev_joints_idxs_qfull=[] + self._rev_joints_idxs_red_qfull=[] + jnt_homing=[""]*(len(self._homer.get_homing())+len(self._continuous_joints)) + jnt_names=self._get_robot_jnt_names() + for i in range(len(jnt_names)): + jnt=jnt_names[i] + index=self._get_jnt_id(jnt)# accounting for floating joint + homing_idx=index-7 # homing is only for actuated joints + homing_value=self._homer.get_homing_val(jnt) + if jnt in self._continuous_joints: + jnt_homing[homing_idx]=np.cos(homing_value).item() + jnt_homing[homing_idx+1]=np.sin(homing_value).item() + # just actuated joints + self._continuous_joints_idxs.append(homing_idx) # cos + self._continuous_joints_idxs.append(homing_idx+1) # sin + self._continuous_joints_idxs_cos.append(homing_idx) + self._continuous_joints_idxs_sin.append(homing_idx+1) + self._continuous_joints_idxs_red.append(i) + # q full + self._continuous_joints_idxs_qfull.append(index) # cos + self._continuous_joints_idxs_qfull.append(index+1) # sin + self._continuous_joints_idxs_cos_qfull.append(index) + self._continuous_joints_idxs_sin_qfull.append(index+1) + self._continuous_joints_idxs_red_qfull.append(i+7) + else: + jnt_homing[homing_idx]=homing_value + # just actuated joints + self._rev_joints_idxs.append(homing_idx) + self._rev_joints_idxs_red.append(i) + # q full + self._rev_joints_idxs_qfull.append(index) + self._rev_joints_idxs_red_qfull.append(i+7) + + self._jnts_q_reduced=None + if not len(self._continuous_joints)==0: + cont_joints=", ".join(self._continuous_joints) + + Journal.log(self.__class__.__name__, + "_init_problem", + f"The following continuous joints were found: \n{cont_joints}", + LogType.INFO, + throw_when_excep=True) + # preallocating data + self._jnts_q_reduced=np.zeros((1,self.nv()-6),dtype=self._dtype) + self._jnts_q_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + self._full_q_reduced=np.zeros((7+len(jnt_names), self._n_nodes),dtype=self._dtype) + self._jnts_q_delta_expanded=np.zeros((self.nq()-7,1),dtype=self._dtype) + else: + self._custom_opts["replace_continuous_joints"]=True + Journal.log(self.__class__.__name__, + "_init_problem", + f"No continuous joints were found.", + LogType.INFO, + throw_when_excep=True) + + # retrieve wheels indexes (not considering continuous joints, + # ok just for v, eff, etc..) + self._wheel_patterns=wheels_patterns + self._wheels_idxs_v=self._get_wheels_jnt_v_idxs(wheel_patterns=self._wheel_patterns) + self._f0 = [0, 0, self._kin_dyn.mass()/4*9.81] + + # we can create an init for the base + init = self._base_init.tolist() + jnt_homing + + if foot_linkname is not None: + FK = self._kin_dyn.fk(foot_linkname) # just to get robot reference height + ground_level = FK(q=init)['ee_pos'] + self._base_init[2] = -ground_level[2] # override init + + self._model = FullModelInverseDynamics(problem=self._prb, + kd=self._kin_dyn, + q_init=self._homer.get_homing_map(), + base_init=self._base_init) + + self._ti = TaskInterface(prb=self._prb, + model=self._model, + max_solver_iter=self.max_solver_iter, + debug = self._debug, + verbose = self._verbose, + codegen_workdir = self._codegen_dir) + self._ti.setTaskFromYaml(self.config_path) + + # setting initial base pos ref if exists + base_pos = self._ti.getTask('base_height') + if base_pos is not None: + base_pos.setRef(np.atleast_2d(self._base_init).T) + + self._pm = pymanager.PhaseManager(self._n_nodes, debug=False) # intervals or nodes????? + + self._gm = GaitManager(self._ti, + self._pm, + self._injection_node, + keep_yaw_vert=keep_yaw_vert, + yaw_vertical_weight=self._yaw_vertical_weight, + vertical_landing=vertical_landing, + landing_vert_weight=self._vertical_land_weight, + phase_force_reg=self._phase_force_reg, + custom_opts=self._custom_opts, + flight_duration=flight_duration, + post_flight_stance=post_flight_stance, + step_height=step_height, + dh=0.0) + + self._ti.model.q.setBounds(self._ti.model.q0, self._ti.model.q0, nodes=0) + self._ti.model.v.setBounds(self._ti.model.v0, self._ti.model.v0, nodes=0) + self._ti.model.q.setInitialGuess(self._ti.model.q0) + self._ti.model.v.setInitialGuess(self._ti.model.v0) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f=len(cforces) + for c in cforces: + c.setInitialGuess(np.array(self._f0)/n_contact_f) + + vel_lims = self._model.kd.velocityLimits() + import horizon.utils as utils + self._prb.createResidual('vel_lb_barrier', self._vel_bounds_weight*utils.utils.barrier(vel_lims[7:] - self._model.v[7:])) + self._prb.createResidual('vel_ub_barrier', self._vel_bounds_weight*utils.utils.barrier1(-1 * vel_lims[7:] - self._model.v[7:])) + + self._meas_lin_a_par=None + # if self._custom_opts["lin_a_feedback"]: + # # acceleration feedback on first node + # self._meas_lin_a_par=self._prb.createParameter(name="lin_a_feedback", + # dim=3, nodes=0) + # base_lin_a_prb=self._prb.getInput().getVars()[0:3] + # self._prb.createConstraint('lin_acceleration_feedback', base_lin_a_prb - self._meas_lin_a_par, + # nodes=[0]) + + # if not self._open_loop: + # # we create a residual cost to be used as an attractor to the measured state on the first node + # # hard constraints injecting meas. states are pure EVIL! + # prb_state=self._prb.getState() + # full_state=prb_state.getVars() + # state_dim=prb_state.getBounds()[0].shape[0] + # meas_state=self._prb.createParameter(name="measured_state", + # dim=state_dim, nodes=0) + # self._prb.createResidual('meas_state_attractor', meas_state_attractor_weight * (full_state - meas_state), + # nodes=[0]) + + self._ti.finalize() + self._ti.bootstrap() + + self._ti.init_inv_dyn_for_res() # we initialize some objects for sol. postprocessing purposes + self._ti.load_initial_guess() + + self.n_dofs = self._get_ndofs() # after loading the URDF and creating the controller we + # know n_dofs -> we assign it (by default = None) + + self.n_contacts = len(self._model.cmap.keys()) + + # remove variables bounds (before they were necessary to have a good + # quality bootstrap solution) + self._q_inf=np.zeros((self.nq(), 1)) + self._q_inf[:, :]=np.inf + self._v_inf=np.zeros((self.nv(), 1)) + self._v_inf[:, :]=np.inf + self._ti.model.q.setBounds(-self._q_inf, self._q_inf, nodes=0) + self._ti.model.v.setBounds(-self._v_inf, self._v_inf, nodes=0) + + # self.horizon_anal = analyzer.ProblemAnalyzer(self._prb) + + def get_file_paths(self): + # can be overriden by child + paths = super().get_file_paths() + return paths + + def _get_quat_remap(self): + # overrides parent + return [1, 2, 3, 0] # mapping from robot quat. to Horizon's quaternion convention + + def _zmp(self, model): + + num = cs.SX([0, 0]) + den = cs.SX([0]) + pos_contact = dict() + force_val = dict() + + q = cs.SX.sym('q', model.nq) + v = cs.SX.sym('v', model.nv) + a = cs.SX.sym('a', model.nv) + + com = model.kd.centerOfMass()(q=q, v=v, a=a)['com'] + + n = cs.SX([0, 0, 1]) + for c in model.fmap.keys(): + pos_contact[c] = model.kd.fk(c)(q=q)['ee_pos'] + force_val[c] = cs.SX.sym('force_val', 3) + num += (pos_contact[c][0:2] - com[0:2]) * cs.dot(force_val[c], n) + den += cs.dot(force_val[c], n) + + zmp = com[0:2] + (num / den) + input_list = [] + input_list.append(q) + input_list.append(v) + input_list.append(a) + + for elem in force_val.values(): + input_list.append(elem) + + f = cs.Function('zmp', input_list, [zmp]) + return f + + def _add_zmp(self): + + input_zmp = [] + + input_zmp.append(self._model.q) + input_zmp.append(self._model.v) + input_zmp.append(self._model.a) + + for f_var in self._model.fmap.values(): + input_zmp.append(f_var) + + c_mean = cs.SX([0, 0, 0]) + for c_name, f_var in self._model.fmap.items(): + fk_c_pos = self._kin_dyn.fk(c_name)(q=self._model.q)['ee_pos'] + c_mean += fk_c_pos + + c_mean /= len(self._model.cmap.keys()) + + zmp_nominal_weight = 10. + zmp_fun = self._zmp(self._model)(*input_zmp) + + if 'wheel_joint_1' in self._model.kd.joint_names(): + zmp_residual = self._prb.createIntermediateResidual('zmp', zmp_nominal_weight * (zmp_fun[0:2] - c_mean[0:2])) + + def _quaternion_multiply(self, + q1, q2): + x1, y1, z1, w1 = q1 + x2, y2, z2, w2 = q2 + w = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + x = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + y = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + z = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return np.array([x, y, z, w]) + + def _get_continuous_jnt_names(self): + import xml.etree.ElementTree as ET + root = ET.fromstring(self.urdf) + continuous_joints = [] + for joint in root.findall('joint'): + joint_type = joint.get('type') + if joint_type == 'continuous': + joint_name = joint.get('name') + continuous_joints.append(joint_name) + return continuous_joints + + def _get_wheels_jnt_v_idxs(self, wheel_patterns: List[str]): + jnt_names=self._get_robot_jnt_names() + wheels_idxs=[] + for i in range(len(jnt_names)): + jnt_name=jnt_names[i] + for wheel_pattern in wheel_patterns: + if wheel_pattern in jnt_name: + wheels_idxs.append(i) + break + return wheels_idxs + + def _get_jnt_id(self, jnt_name): + return self._kin_dyn.joint_iq(jnt_name) + + def _init_rhc_task_cmds(self): + + rhc_refs = HybridQuadRhcRefs(gait_manager=self._gm, + robot_index_shm=self.controller_index, + robot_index_view=0, # when using optimize_mem the view if always of shape 1x... + namespace=self.namespace, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + use_force_feedback=self._custom_opts["use_force_feedback"], + optimize_mem=True) + + rhc_refs.run() + + rhc_refs.rob_refs.set_jnts_remapping(jnts_remapping=self._to_controller) + rhc_refs.rob_refs.set_q_remapping(q_remapping=self._get_quat_remap()) + + rhc_refs.set_default_refs(p_ref=np.atleast_2d(self._base_init)[:, 0:3], + q_ref=np.atleast_2d(self._base_init)[:, 3:7]) + + return rhc_refs + + def get_vertex_fnames_from_ti(self): + tasks=self._ti.task_list + contact_f_names=[] + for task in tasks: + if isinstance(task, ContactTask): + interaction_task=task.dynamics_tasks[0] + contact_f_names.append(interaction_task.vertex_frames[0]) + return contact_f_names + + def _get_contact_names(self): + # should get contact names from vertex frames + # list(self._ti.model.cmap.keys()) + return self.get_vertex_fnames_from_ti() + + def _get_robot_jnt_names(self): + + joints_names = self._kin_dyn.joint_names() + to_be_removed = ["universe", + "reference", + "world", + "floating", + "floating_base"] + for name in to_be_removed: + if name in joints_names: + joints_names.remove(name) + + return joints_names + + def _get_ndofs(self): + return len(self._model.joint_names) + + def nq(self): + return self._kin_dyn.nq() + + def nv(self): + return self._kin_dyn.nv() + + def _get_robot_mass(self): + + return self._kin_dyn.mass() + + def _get_root_full_q_from_sol(self, node_idx=1): + + root_q_full=self._ti.solution['q'][0:7, node_idx].reshape(1, 7).astype(self._dtype) + + np.nan_to_num(root_q_full, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) + np.clip(a=root_q_full, a_min=-1e3, a_max=1e3, out=root_q_full) + + return root_q_full + + def _get_full_q_from_sol(self, node_idx=1): + + return self._ti.solution['q'][:, node_idx].reshape(1, -1).astype(self._dtype) + + def _get_root_twist_from_sol(self, node_idx=1): + # provided in world frame + twist_base_local=self._get_v_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # twist_base_local[:, 0:3] = r_base_rhc @ twist_base_local[0, 0:3] + # twist_base_local[:, 3:6] = r_base_rhc @ twist_base_local[0, 3:6] + return twist_base_local + + def _get_root_a_from_sol(self, node_idx=0): + # provided in world frame + a_base_local=self._get_a_from_sol()[0:6, node_idx].reshape(1, 6) + # if world_aligned: + # q_root_rhc = self._get_root_full_q_from_sol(node_idx=node_idx)[:, 0:4] + # r_base_rhc=Rotation.from_quat(q_root_rhc.flatten()).as_matrix() + # a_base_local[:, 0:3] = r_base_rhc @ a_base_local[0, 0:3] + # a_base_local[:, 3:6] = r_base_rhc @ v[0, 3:6] + return a_base_local + + def _get_jnt_q_from_sol(self, node_idx=0, + reduce: bool = True, + clamp: bool = True): + + full_jnts_q=self._ti.solution['q'][7:, node_idx:node_idx+1].reshape(1,-1).astype(self._dtype) + + np.nan_to_num(full_jnts_q, nan=1e3, posinf=1e3, neginf=-1e3, copy=False) # in place + np.clip(a=full_jnts_q, a_max=1e3, a_min=-1e3, out=full_jnts_q) # in place + + if self._custom_opts["replace_continuous_joints"] or (not reduce): + if clamp: + return np.fmod(full_jnts_q, 2*np.pi) + else: + return full_jnts_q + else: + cos_sin=full_jnts_q[:,self._continuous_joints_idxs].reshape(-1,2) + # copy rev joint vals + self._jnts_q_reduced[:, self._rev_joints_idxs_red]=np.fmod(full_jnts_q[:, self._rev_joints_idxs], 2*np.pi).reshape(1, -1) + # and continuous + self._jnts_q_reduced[:, self._continuous_joints_idxs_red]=np.arctan2(cos_sin[:, 1], cos_sin[:, 0]).reshape(1,-1) + return self._jnts_q_reduced + + def _get_jnt_v_from_sol(self, node_idx=1): + + jnt_v_sol=self._get_v_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + np.nan_to_num(jnt_v_sol, nan=1e5, posinf=1e5, neginf=-1e5, copy=False) # in place + # np.clip(a=jnt_v_sol, a_max=1e5, a_min=-1e5, out=jnt_v_sol) # in place + + return jnt_v_sol + + def _get_jnt_a_from_sol(self, node_idx=1): + + return self._get_a_from_sol()[6:, node_idx].reshape(1, + self._nv_jnts) + + def _get_jnt_eff_from_sol(self, node_idx=0): + + efforts_on_node = self._ti.eval_efforts_on_node(node_idx=node_idx) + + # if self._custom_opts["only_vel_wheels"]: + + jnt_efforts=efforts_on_node[6:, 0] + + if self._custom_opts["only_vel_wheels"] and self._wheels_idxs_v: + jnt_efforts[self._wheels_idxs_v]=0.0 + + return jnt_efforts.reshape(1, + self._nv_jnts).astype(self._dtype) + + def _get_rhc_cost(self): + + return self._ti.solution["opt_cost"] + + def _get_rhc_constr_viol(self): + + return self._ti.solution["residual_norm"] + + def _get_rhc_nodes_cost(self): + + cost = self._ti.solver_rti.getCostValOnNodes() + return cost.reshape((1, -1)) + + def _get_rhc_nodes_constr_viol(self): + + constr_viol = self._ti.solver_rti.getConstrValOnNodes() + return constr_viol.reshape((1, -1)) + + def _get_rhc_niter_to_sol(self): + + return self._ti.solution["n_iter2sol"] + + def _set_ig_bootstrap(self, + q_state: np.ndarray = None, + v_state: np.ndarray = None): + + xig = self._ti.solution['x_opt'].copy() + uig = self._ti.solution['u_opt'].copy() + + # Normalize and keep quaternion in the same hemisphere as the previous + # solution to avoid artificial 180-deg jumps in the bootstrap warm start. + q_state_boot = q_state.copy() + q_prev = xig[3:7, 0] + q_now = q_state_boot[3:7, 0] + + q_now_norm = np.linalg.norm(q_now) + if q_now_norm > 1e-9: + q_state_boot[3:7, :] /= q_now_norm + else: + q_state_boot[3:7, :] = np.array([[0.0], [0.0], [0.0], [1.0]], dtype=self._dtype) + + q_prev_norm = np.linalg.norm(q_prev) + if q_prev_norm > 1e-9: + q_prev = q_prev / q_prev_norm + + q_now = q_state_boot[3:7, 0] + if np.dot(q_prev, q_now) < 0.0: + q_state_boot[3:7, :] *= -1.0 + + xig[0:self._nq, :] = q_state_boot + xig[self._nq:self._nq + self._nv, :] = 0.0 # 0 velocity on first nodes + uig[0:self._nv, :]=0.0 # 0 acceleration + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + # self._prb.getVariables("a").setInitialGuess(np.zeros((self._nv, 1), dtype=self._dtype)) + for _, cforces in self._ti.model.cmap.items(): + n_contact_f = len(cforces) + if n_contact_f == 0: + continue + f_guess = np.array(self._f0, dtype=self._dtype) / n_contact_f + for c in cforces: + c.setInitialGuess(f_guess) + + # print("initial guesses") + # print(self._nq) + # print(self._nv) + # print("q") + # qig=self._ti.model.q.getInitialGuess() + # print(qig.shape) + # print(qig) + # print("v") + # print(self._ti.model.v.getInitialGuess()) + # print("a") + # print(self._ti.model.a.getInitialGuess()) + # for _, cforces in self._ti.model.cmap.items(): + # for c in cforces: + # print("force") + # print(c.getInitialGuess()) + + return xig, uig + + def _set_ig(self): + + shift_num = -1 # shift data by one node + + x_opt = self._ti.solution['x_opt'] + u_opt = self._ti.solution['u_opt'] + + # building ig for state + xig = np.roll(x_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + xig[:, -1 - i] = x_opt[:, -1] + # building ig for inputs + uig = np.roll(u_opt, + shift_num, axis=1) # rolling state sol. + for i in range(abs(shift_num)): + # state on last node is copied to the elements + # which are "lost" during the shift operation + uig[:, -1 - i] = u_opt[:, -1] + + # assigning ig + self._prb.getState().setInitialGuess(xig) + self._prb.getInput().setInitialGuess(uig) + + return xig, uig + + def _update_open_loop(self, + bootstrap: bool = False): + + q_state, v_state, a_state=self._set_is_open() + + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + # robot_state=xig[:, 0] + # # open loop update: + # self._prb.setInitialState(x0=robot_state) # (xig has been shifted, so node 0 + # # is node 1 in the last opt solution) + + return q_state, v_state, a_state + + def _update_closed_loop(self, + bootstrap: bool = False): + + # set initial state + q_state=None + v_state=None + a_state=None + if self._custom_opts["adaptive_is"]: + # adaptive closed loop + q_state, v_state, a_state=self._set_is_adaptive() + elif self._custom_opts["fully_closed"]: + q_state, v_state, a_state=self._set_is_full() + elif self._custom_opts["closed_partial"]: + q_state, v_state, a_state=self._set_is_partial() + else: + Journal.log(self.__class__.__name__, + "_update_closed_loop", + "Neither adaptive_is, fully_closed, or closed_partial.", + LogType.EXCEP, + throw_when_excep = False) + q_state, v_state, a_state=self._set_is() + + # set initial guess for controller + if not bootstrap: + self._set_ig() + else: + self._set_ig_bootstrap(q_state=q_state, v_state=v_state) + + return q_state, v_state, a_state + + def _set_is_open(self): + + # overriding states with rhc data + q_full_root=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + q_jnts=self._get_jnt_q_from_sol(node_idx=1, reduce=False).reshape(-1, 1) + + twist_root=self._get_root_twist_from_sol(node_idx=1).reshape(-1, 1) + v_jnts=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_q_full_rhc=q[0:7] # root full q + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_twist_rhc=vel[0:6] # lin v. + jnts_v_rhc=vel[6:] # jnts v + + self.rhc_refs.set_alpha(alpha=1.0) # fully open + + # close state on known quantities + root_q_full_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_twist_rhc.setBounds(lb=twist_root, + ub=twist_root, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((twist_root, v_jnts), + axis=0) + + return (q_state, v_state, None) + + def _set_is_full(self): + + # measurements + q_full_root = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_full_q_rhc=q[0:7] # root p + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_full_q_rhc.setBounds(lb=q_full_root, + ub=q_full_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) # leaving lin v of the base free (estimated from constraints) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((q_full_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_partial(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + q_jnts=self._jnts_q_expanded.reshape(-1,1) + + # overriding states with rhc data (-> all overridden state are open looop) + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc # position is always open loop + + if not self._custom_opts["estimate_v_root"]: + v_root[:, :]=self._get_root_twist_from_sol(node_idx=1)[:, 0:3].reshape(-1, 1) + # override v jnts with the ones from controller + if not self._custom_opts["use_jnt_v_feedback"]: + v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # v_jnts[:, :]=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + # root_twist_from_rhc=self._get_root_twist_from_sol(node_idx=1) + # root_v_from_rhc=root_twist_from_rhc[:, 0:3].reshape(-1, 1) + # root_omega_from_rhc=root_twist_from_rhc[:, 3:6].reshape(-1, 1) + # jnt_q_from_rhc=self._get_jnt_q_from_sol(node_idx=1,reduce=False,clamp=False).reshape(-1, 1) + # jnt_v_from_rhc=self._get_jnt_v_from_sol(node_idx=1).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + self.rhc_refs.set_alpha(alpha=0.0) # non-adaptive + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root, + ub=q_root, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts, + ub=q_jnts, nodes=0) + + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) # leaving lin v of the base free (estimated from constraints) + else: # get it from controller + root_v_rhc.setBounds(lb=v_root, + ub=v_root, nodes=0) + root_omega_rhc.setBounds(lb=omega, + ub=omega, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts, + ub=v_jnts, nodes=0) + if self._custom_opts["lin_a_feedback"]: + # write base lin 13793197 from meas + lin_a_prb.setBounds(lb=a_root[0:3, :], + ub=a_root[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _set_is_adaptive(self): + + # measurements + p_root = self.robot_state.root_state.get(data_type="p", robot_idxs=self.controller_index_np).reshape(-1, 1) + q_root = self.robot_state.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root = self.robot_state.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega = self.robot_state.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + q_jnts = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_jnts = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_jnts = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # rhc variables to be set + q=self._prb.getVariables("q") # .setBounds() + root_p_rhc=q[0:3] # root p + root_q_rhc=q[3:7] # root orientation + jnts_q_rhc=q[7:] # jnts q + vel=self._prb.getVariables("v") + root_v_rhc=vel[0:3] # lin v. + root_omega_rhc=vel[3:6] # omega + jnts_v_rhc=vel[6:] # jnts v + acc=self._prb.getVariables("a") + lin_a_prb=acc[0:3] # lin acc + + # getting prediction defects + root_q_delta=self.rhc_pred_delta.root_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_q_delta=self.rhc_pred_delta.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np).reshape(-1, 1) + jnt_v_delta=self.rhc_pred_delta.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + v_root_delta = self.rhc_pred_delta.root_state.get(data_type="v", robot_idxs=self.controller_index_np).reshape(-1, 1) + omega_root_delta = self.rhc_pred_delta.root_state.get(data_type="omega", robot_idxs=self.controller_index_np).reshape(-1, 1) + a_root_delta = self.rhc_pred_delta.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np).reshape(-1, 1) + + # close state on known quantities, estimate some (e.g. lin vel) and + # open loop if thing start to explode + alpha_now=1.0 + delta=0.0 + if self._custom_opts["alpha_from_outside"]: + alpha_now=self.rhc_refs.get_alpha() + else: # "autotuned" alpha + if self._custom_opts["estimate_v_root"]: # we copmute delta based on jnt v (since we use meas.) + delta=np.max(np.abs(jnt_v_delta)) + else: + delta=np.max(np.abs(omega_root_delta)) + # fail_idx=self._get_failure_index() + # fail_idx=self._get_explosion_idx()/self._fail_idx_thresh + alpha_now=(np.tanh(2*self._alpha_half*(delta-self._alpha_half))+1)/2.0 + + bound_relaxation=-np.log(1-alpha_now+1e-16) # [~0, ~inf] if alpha_now [0, 1] + self.rhc_refs.set_alpha(alpha=alpha_now) # also writes on shared mem for db + self.rhc_refs.set_bound_relax(bound_relax=bound_relaxation) # also writes on shared mem for db + + self._alphas_q_root[:]=alpha_now # for now single alpha for everything + self._alphas_q_jnts[:]=alpha_now + self._alphas_twist_root[:]=alpha_now + self._alphas_v_jnts[:]=alpha_now + self._alphas_a[:]=alpha_now + if not self._custom_opts["estimate_v_root"]: + self._alphas_twist_root[0:3]=1.0 # open + self._alphas_v_jnts[:]=1.0 # open + + # position is always open loop + root_q_full_from_rhc=self._get_root_full_q_from_sol(node_idx=1).reshape(-1, 1) + root_p_from_rhc=root_q_full_from_rhc[0:3, :] + p_root[:, :]=root_p_from_rhc + + # expaning meas q if continuous joints + if (not len(self._continuous_joints)==0): # we need do expand some meas. rev jnts to So2 + self._jnts_q_expanded[self._rev_joints_idxs, :]=q_jnts[self._rev_joints_idxs_red ,:] + self._jnts_q_expanded[self._continuous_joints_idxs_cos, :]=np.cos(q_jnts[self._continuous_joints_idxs_red, :]) # cos + self._jnts_q_expanded[self._continuous_joints_idxs_sin, :]=np.sin(q_jnts[self._continuous_joints_idxs_red, :]) # sin + + # continous joints position is always open loop, but we need a delta vector of matching dimension + q_jnts_from_rhc=self._get_jnt_q_from_sol(node_idx=1).reshape(-1, 1) + + self._jnts_q_delta_expanded[self._rev_joints_idxs, :]=jnt_q_delta[self._rev_joints_idxs_red ,:] + + self._jnts_q_delta_expanded[self._continuous_joints_idxs_cos, :]=\ + np.cos(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.cos(q_jnts[self._continuous_joints_idxs_red, :]) + self._jnts_q_delta_expanded[self._continuous_joints_idxs_sin, :]=\ + np.sin(q_jnts_from_rhc[self._continuous_joints_idxs_red, :]) - \ + np.sin(q_jnts[self._continuous_joints_idxs_red, :]) + + q_jnts=self._jnts_q_expanded.reshape(-1,1) # overriting with expanded jnts + jnt_q_delta=self._jnts_q_delta_expanded.reshape(-1, 1) # overriting with expanded jnts + + self._alphas_q_jnts[self._continuous_joints_idxs_cos, :]=1.0 # open loop + self._alphas_q_jnts[self._continuous_joints_idxs_sin, :]=1.0 # open loop + + # self._alphas_v_jnts[self._continuous_joints_idxs_red, :]=0.0 # open loop + + root_p_rhc.setBounds(lb=p_root, + ub=p_root, nodes=0) + root_q_rhc.setBounds(lb=q_root+self._alphas_q_root[3:7]*root_q_delta, + ub=q_root+self._alphas_q_root[3:7]*root_q_delta, nodes=0) + jnts_q_rhc.setBounds(lb=q_jnts+self._alphas_q_jnts*jnt_q_delta, + ub=q_jnts+self._alphas_q_jnts*jnt_q_delta, nodes=0) + if self._custom_opts["estimate_v_root"]: + root_v_rhc.setBounds(lb=-self._v_inf[0:3], + ub=self._v_inf[0:3], nodes=0) + else: + root_v_rhc.setBounds(lb=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, + ub=v_root+self._alphas_twist_root[0:3, :]*v_root_delta, nodes=0) + root_omega_rhc.setBounds(lb=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, + ub=omega+self._alphas_twist_root[3:6, :]*omega_root_delta, nodes=0) + jnts_v_rhc.setBounds(lb=v_jnts+self._alphas_v_jnts*jnt_v_delta, + ub=v_jnts+self._alphas_v_jnts*jnt_v_delta, nodes=0) + if self._custom_opts["lin_a_feedback"]: + lin_a_prb.setBounds(lb=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + ub=a_root[0:3, :]+self._alphas_a[0:3]*a_root_delta[0:3, :], + nodes=0) + + # return state used for feedback + q_state=np.concatenate((p_root, q_root, q_jnts), + axis=0) + v_state=np.concatenate((v_root, omega, v_jnts), + axis=0) + a_state=np.concatenate((a_root, a_jnts), + axis=0) + + return (q_state, v_state, a_state) + + def _solve(self): + + if self._debug: + return self._db_solve(bootstrap=False) + else: + return self._min_solve(bootstrap=False) + + def _bootstrap(self): + + if self._debug: + return self._db_solve(bootstrap=True) + else: + return self._min_solve(bootstrap=True) + + def _min_solve(self, bootstrap: bool = False): + # minimal solve version -> no debug + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._pm.shift() # shifts phases of one dt + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve (to convergence) + self.sol_counter = self.sol_counter + 1 + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + return False + + def _db_solve(self, bootstrap: bool = False): + + self._timer_start = time.perf_counter() + + robot_qstate=None + robot_vstate=None + robot_astate=None + if self._open_loop: + robot_qstate, robot_vstate, robot_astate = self._update_open_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using data from the solution itself + else: + robot_qstate, robot_vstate, robot_astate = self._update_closed_loop(bootstrap=bootstrap) # updates the TO ig and + # initial conditions using robot measurements + + self._prb_update_time = time.perf_counter() + self._pm.shift() # shifts phases of one dt + self._phase_shift_time = time.perf_counter() + + if self._refs_in_hor_frame: + # q_base=self.robot_state.root_state.get(data_type="q", + # robot_idxs=self.controller_index).reshape(-1, 1) + # q_full=self._get_full_q_from_sol(node_idx=1).reshape(-1, 1) + # using internal base pose from rhc. in case of closed loop, it will be the meas state + force_norm=None + if self._custom_opts["use_force_feedback"]: + contact_forces=self.robot_state.contact_wrenches.get(data_type="f", + robot_idxs=self.controller_index_np, + contact_name=None).reshape(self.n_contacts,3) + force_norm=np.linalg.norm(contact_forces, axis=1) + self.rhc_refs.step(qstate=robot_qstate, vstate=robot_vstate, + force_norm=force_norm) + else: + self.rhc_refs.step() + + self._task_ref_update_time = time.perf_counter() + + try: + if not bootstrap: + converged = self._ti.rti() # RTI step + else: + converged = self._ti.bootstrap() # full solve bootstrap + self._rti_time = time.perf_counter() + self.sol_counter = self.sol_counter + 1 + self._update_db_data() + return not self._check_rhc_failure() + except Exception as e: # fail in case of exceptions + if self._verbose: + solve_mode = "RTI" if not bootstrap else "Bootstrap" + exception = f"{solve_mode}() for controller {self.controller_index} failed" + \ + f" with exception {type(e).__name__}" + Journal.log(self.__class__.__name__, + "solve", + exception, + LogType.EXCEP, + throw_when_excep = False) + self._update_db_data() + return False + + def _get_fail_idx(self): + + self._explosion_idx_buffer[:, self._expl_idx_buffer_counter]=self._get_explosion_idx() + self._expl_idx_buffer_counter+=1 + self._expl_idx_counter+=1 + if self._expl_idx_counter%self._expl_idx_window_size==0: + self._expl_idx_buffer_counter=0 # restart from 0 + + running_avrg=np.mean(self._explosion_idx_buffer).item() + + return running_avrg + + def _get_explosion_idx(self): + explosion_index = self._get_rhc_constr_viol() + self._get_rhc_cost()*self._fail_idx_scale + return explosion_index + + def _update_db_data(self): + + self._profiling_data_dict["problem_update_dt"] = self._prb_update_time - self._timer_start + self._profiling_data_dict["phases_shift_dt"] = self._phase_shift_time - self._prb_update_time + self._profiling_data_dict["task_ref_update"] = self._task_ref_update_time - self._phase_shift_time + self._profiling_data_dict["rti_solve_dt"] = self._rti_time - self._task_ref_update_time + self.rhc_costs.update(self._ti.solver_rti.getCostsValues()) + self.rhc_constr.update(self._ti.solver_rti.getConstraintsValues()) + + def _reset(self): + + # reset task interface (ig, solvers, etc..) + + # phase manager and sets bootstap as solution + self._gm.reset() + self._explosion_idx_buffer[:, :]=self._get_explosion_idx() # reset with data from reset solution + self._expl_idx_counter=0.0 + self._expl_idx_buffer_counter=0 + + def _get_cost_info(self): + + cost_dict = self._ti.solver_rti.getCostsValues() + cost_names = list(cost_dict.keys()) + cost_dims = [1] * len(cost_names) # costs are always scalar + return cost_names, cost_dims + + def _get_constr_info(self): + + constr_dict = self._ti.solver_rti.getConstraintsValues() + constr_names = list(constr_dict.keys()) + constr_dims = [-1] * len(constr_names) + i = 0 + for constr in constr_dict: + constr_val = constr_dict[constr] + constr_shape = constr_val.shape + constr_dims[i] = constr_shape[0] + i+=1 + return constr_names, constr_dims + + def _get_q_from_sol(self): + full_q=self._ti.solution['q'].astype(self._dtype) + if self._custom_opts["replace_continuous_joints"]: + return full_q + else: + cont_jnts=full_q[self._continuous_joints_idxs_qfull, :] + cos=cont_jnts[::2, :] + sin=cont_jnts[1::2, :] + # copy root + self._full_q_reduced[0:7, :]=full_q[0:7, :] + # copy rev joint vals + self._full_q_reduced[self._rev_joints_idxs_red_qfull, :]=full_q[self._rev_joints_idxs_qfull, :] + # and continuous + angle=np.arctan2(sin, cos) + self._full_q_reduced[self._continuous_joints_idxs_red_qfull, :]=angle + return self._full_q_reduced + + def _get_v_from_sol(self): + return self._ti.solution['v'].astype(self._dtype) + + def _get_a_from_sol(self): + return self._ti.solution['a'].astype(self._dtype) + + def _get_a_dot_from_sol(self): + return None + + def _get_f_from_sol(self): + # to be overridden by child class + contact_names =self._get_contacts() # we use controller-side names + try: + data=[] + for key in contact_names: + contact_f=self._ti.solution["f_" + key].astype(self._dtype) + np.nan_to_num(contact_f, nan=1e6, posinf=1e6, neginf=-1e6, copy=False) + np.clip(a=contact_f, a_max=1e6, a_min=-1e6, out=contact_f) + data.append(contact_f) + return np.concatenate(data, axis=0) + except: + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + return self.rhc_costs[cost_name] + + def _get_constr_from_sol(self, + constr_name: str): + return self.rhc_constr[constr_name] diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py new file mode 100644 index 0000000000000000000000000000000000000000..1c751aa8a6e8d32c028f94d4224123863866a933 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/hybrid_quad_rhc_refs.py @@ -0,0 +1,381 @@ +from aug_mpc.controllers.rhc.horizon_based.gait_manager import GaitManager +from aug_mpc.controllers.rhc.horizon_based.utils.math_utils import hor2w_frame + +from mpc_hive.utilities.shared_data.rhc_data import RhcRefs + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal + +from typing import Union + +import numpy as np + +class HybridQuadRhcRefs(RhcRefs): + + def __init__(self, + gait_manager: GaitManager, + robot_index_shm: int, + robot_index_view: int, + namespace: str, # namespace used for shared mem + verbose: bool = True, + vlevel: bool = VLevel.V2, + safe: bool = True, + use_force_feedback: bool = False, + optimize_mem: bool = False): + + self.robot_index = robot_index_shm + self.robot_index_view = robot_index_view + self.robot_index_np_view = np.array(self.robot_index_view) + + self._step_idx = 0 + self._print_frequency = 100 + + self._verbose = verbose + + self._use_force_feedback=use_force_feedback + + if optimize_mem: + super().__init__( + is_server=False, + with_gpu_mirror=False, + namespace=namespace, + safe=safe, + verbose=verbose, + vlevel=vlevel, + optimize_mem=optimize_mem, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + else: + super().__init__( + is_server=False, + with_gpu_mirror=False, + namespace=namespace, + safe=safe, + verbose=verbose, + vlevel=vlevel) + + if not isinstance(gait_manager, GaitManager): + exception = f"Provided gait_manager argument should be of GaitManager type!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + self.gait_manager = gait_manager + + self.timeline_names = self.gait_manager.timeline_names + + # task interfaces from horizon for setting commands to rhc + self._get_tasks() + + self._p_ref_default=np.zeros((1, 3)) + self._q_ref_default=np.zeros((1, 4)) + self._q_ref_default[0, 0]=1 + + def _get_tasks(self): + # can be overridden by child + # cartesian tasks are in LOCAL_WORLD_ALIGNED (frame centered at distal link, oriented as WORLD) + self.base_lin_velxy = self.gait_manager.task_interface.getTask('base_lin_velxy') + self.base_lin_velz = self.gait_manager.task_interface.getTask('base_lin_velz') + self.base_omega = self.gait_manager.task_interface.getTask('base_omega') + self.base_height = self.gait_manager.task_interface.getTask('base_height') + + def run(self): + + super().run() + if not (self.robot_index < self.rob_refs.n_robots()): + exception = f"Provided \(0-based\) robot index {self.robot_index} exceeds number of " + \ + " available robots {self.rob_refs.n_robots()}." + Journal.log(self.__class__.__name__, + "run", + exception, + LogType.EXCEP, + throw_when_excep = True) + contact_names = list(self.gait_manager.task_interface.model.cmap.keys()) + if not (self.n_contacts() == len(contact_names)): + exception = f"N of contacts within problem {len(contact_names)} does not match n of contacts {self.n_contacts()}" + Journal.log(self.__class__.__name__, + "run", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # set some defaults from gait manager + for i in range(self.n_contacts()): + self.flight_settings_req.set(data=self.gait_manager.get_flight_duration(contact_name=contact_names[i]), + data_type="len_remain", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + self.flight_settings_req.set(data=self.gait_manager.get_step_apexdh(contact_name=contact_names[i]), + data_type="apex_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + self.flight_settings_req.set(data=self.gait_manager.get_step_enddh(contact_name=contact_names[i]), + data_type="end_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i) + + self.flight_settings_req.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, + n_cols=self.flight_settings_req.n_cols, + read=False) + + def step(self, qstate: np.ndarray = None, + vstate: np.ndarray = None, + force_norm: np.ndarray = None): + + if self.is_running(): + + # updates robot refs from shared mem + self.rob_refs.synch_from_shared_mem(robot_idx=self.robot_index, robot_idx_view=self.robot_index_view) + self.phase_id.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self.contact_flags.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self.flight_settings_req.synch_all(read=True, retry=True, + row_index=self.robot_index, + row_index_view=self.robot_index_view) + self._set_contact_phases(q_full=qstate) + + # updated internal references with latest available ones + q_base=qstate[3:7,0:1] # quaternion + self._apply_refs_to_tasks(q_base=q_base) + + # if self._use_force_feedback: + # self._set_force_feedback(force_norm=force_norm) + + self._step_idx +=1 + + else: + exception = f"{self.__class__.__name__} is not running" + Journal.log(self.__class__.__name__, + "step", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _set_contact_phases(self, + q_full: np.ndarray): + + # phase_id = self.phase_id.read_retry(row_index=self.robot_index, + # col_index=0)[0] + + contact_flags_refs = self.contact_flags.get_numpy_mirror()[self.robot_index_np_view, :] + target_n_limbs_in_contact=np.sum(contact_flags_refs).item() + if target_n_limbs_in_contact==0: + target_n_limbs_in_contact=4 + + is_contact = contact_flags_refs.flatten().tolist() + n_contacts=len(is_contact) + + for i in range(n_contacts): # loop through contact timelines + timeline_name = self.timeline_names[i] + + self.gait_manager.set_f_reg(contact_name=timeline_name, + scale=target_n_limbs_in_contact) + + if is_contact[i]==False: # release contact + + # flight parameters requests are set only when inserting a flight phase + len_req_now=int(self.flight_settings_req.get(data_type="len_remain", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item()) + apex_req_now=self.flight_settings_req.get(data_type="apex_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + end_req_now=self.flight_settings_req.get(data_type="end_dpos", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + landing_dx_req_now=self.flight_settings_req.get(data_type="land_dx", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + landing_dy_req_now=self.flight_settings_req.get(data_type="land_dy", + robot_idxs=self.robot_index_np_view, + contact_idx=i).item() + + # set flight phase properties depending on last value on shared memory + self.gait_manager.set_flight_duration(contact_name=timeline_name, + val=len_req_now) + self.gait_manager.set_step_apexdh(contact_name=timeline_name, + val=apex_req_now) + self.gait_manager.set_step_enddh(contact_name=timeline_name, + val=end_req_now) + self.gait_manager.set_step_landing_dx(contact_name=timeline_name, + val=landing_dx_req_now) + self.gait_manager.set_step_landing_dy(contact_name=timeline_name, + val=landing_dy_req_now) + # insert flight phase over the horizon + self.gait_manager.add_flight(contact_name=timeline_name, + robot_q=q_full) + + else: # contact phase + self.gait_manager.add_stand(contact_name=timeline_name) + + at_least_one_flight=self.gait_manager.update_flight_info(timeline_name) + # flight_info=self.gait_manager.get_flight_info(timeline_name) + + self.gait_manager.check_horizon_full(timeline_name=timeline_name) + + # write flight info to shared mem for all contacts in one shot (we follow same order as in flight_info shm) + all_flight_info=self.gait_manager.get_flight_info_all() + flight_info_shared=self.flight_info.get_numpy_mirror() + flight_info_shared[self.robot_index_np_view, :]=all_flight_info + self.flight_info.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_np_view, + n_rows=1, n_cols=self.flight_info.n_cols, + read=False) + + self.gait_manager.update() + + def _apply_refs_to_tasks(self, q_base = None): + # overrides parent + if q_base is not None: # rhc refs are assumed to be specified in the so called "horizontal" + # frame, i.e. a vertical frame, with the x axis aligned with the projection of the base x axis + # onto the plane + root_pose = self.rob_refs.root_state.get(data_type = "q_full", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) # this should also be + # rotated into the horizontal frame (however, for now only the z componet is used, so it's ok) + + root_twist_ref = self.rob_refs.root_state.get(data_type="twist", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + + root_twist_ref_h = root_twist_ref.copy() + + hor2w_frame(root_twist_ref, q_base, root_twist_ref_h) # horizon works in local world aligned frame + + if self.base_lin_velxy is not None: + self.base_lin_velxy.setRef(root_twist_ref_h[0:2, :]) + if self.base_omega is not None: + self.base_omega.setRef(root_twist_ref_h[3:, :]) + if self.base_lin_velz is not None: + self.base_lin_velz.setRef(root_twist_ref_h[2:3, :]) + if self.base_height is not None: + self.base_height.setRef(root_pose) + else: + root_pose = self.rob_refs.root_state.get(data_type = "q_full", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + root_twist_ref = self.rob_refs.root_state.get(data_type="twist", + robot_idxs=self.robot_index_np_view).reshape(-1, 1) + + if self.base_lin_velxy is not None: + self.base_lin_velxy.setRef(root_twist_ref[0:2, :]) + if self.base_omega is not None: + self.base_omega.setRef(root_twist_ref[3:, :]) + if self.base_lin_velz is not None: + self.base_lin_velz.setRef(root_twist_ref[2:3, :]) + if self.base_height is not None: + self.base_height.setRef(root_pose) + + # def _set_force_feedback(self, + # force_norm: np.ndarray = None): + + # is_contact=force_norm>1.0 + + # for i in range(len(is_contact)): + # timeline_name = self._timeline_names[i] + # self.gait_manager.set_force_feedback(timeline_name=timeline_name, + # force_norm=force_norm[i]) + + # if not is_contact[i]: + + + def set_default_refs(self, + p_ref: np.ndarray, + q_ref: np.ndarray): + + self._p_ref_default[:, :]=p_ref + self._q_ref_default[:, :]=q_ref + + def set_alpha(self, alpha:float): + # set provided value + alpha_shared=self.alpha.get_numpy_mirror() + alpha_shared[self.robot_index_np_view, :] = alpha + self.alpha.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.alpha.n_cols, + read=False) + + def get_alpha(self): + self.alpha.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.alpha.n_cols, + read=True) + alpha=self.alpha.get_numpy_mirror()[self.robot_index_np_view, :].item() + return alpha + + def set_bound_relax(self, bound_relax:float): + # set provided value + bound_rel_shared=self.bound_rel.get_numpy_mirror() + bound_rel_shared[self.robot_index_np_view, :] = bound_relax + self.bound_rel.synch_retry(row_index=self.robot_index, col_index=0, n_rows=1, + row_index_view=self.robot_index_view, + n_cols=self.alpha.n_cols, + read=False) + + def reset(self): + + if self.is_running(): + + # resets shared mem + contact_flags_current = self.contact_flags.get_numpy_mirror() + phase_id_current = self.phase_id.get_numpy_mirror() + contact_flags_current[self.robot_index_np_view, :] = np.full((1, self.n_contacts()), dtype=np.bool_, fill_value=True) + phase_id_current[self.robot_index_np_view, :] = -1 # defaults to custom phase id + + contact_pos_current=self.rob_refs.contact_pos.get_numpy_mirror() + contact_pos_current[self.robot_index_np_view, :] = 0.0 + + flight_info_current=self.flight_info.get_numpy_mirror() + flight_info_current[self.robot_index_np_view, :] = 0.0 + + alpha=self.alpha.get_numpy_mirror() + alpha[self.robot_index_np_view, :] = 0.0 + + self.rob_refs.root_state.set(data_type="p", data=self._p_ref_default, robot_idxs=self.robot_index_np_view) + self.rob_refs.root_state.set(data_type="q", data=self._q_ref_default, robot_idxs=self.robot_index_np_view) + self.rob_refs.root_state.set(data_type="twist", data=np.zeros((1, 6)), robot_idxs=self.robot_index_np_view) + + self.contact_flags.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.contact_flags.n_cols, + read=False) + self.phase_id.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.phase_id.n_cols, + read=False) + self.rob_refs.root_state.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.rob_refs.root_state.n_cols, + read=False) + + self.rob_refs.contact_pos.synch_retry(row_index=self.robot_index, col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.rob_refs.contact_pos.n_cols, + read=False) + + self.flight_info.synch_retry(row_index=self.robot_index, + col_index=0, + row_index_view=self.robot_index_view, + n_rows=1, n_cols=self.flight_info.n_cols, + read=False) + + # should also empty the timeline for stepping phases + self._step_idx = 0 + + else: + exception = f"Cannot call reset() since run() was not called!" + Journal.log(self.__class__.__name__, + "reset", + exception, + LogType.EXCEP, + throw_when_excep = True) + diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/utils/__init__.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..8d4173d5b51ee8c644a54008ca0a40f1b34d1841 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/horizon_based/utils/math_utils.py @@ -0,0 +1,165 @@ + +import numpy as np + +def w2hor_frame(t_w: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a twist vector expressed in WORLD frame to + an "horizontal" frame (z aligned as world, x aligned as the projection + of the x-axis of the base frame described by q_b). This is useful for specifying locomotion + references in a "game"-like fashion. + t_out will hold the result + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_21 = 2 * (q_i * q_j + q_k * q_w) + + norm = np.sqrt(R_11 ** 2 + R_21 ** 2) + x_proj_x = R_11 / norm + x_proj_y = R_21 / norm + + y_proj_x = -x_proj_y + y_proj_y = x_proj_x + + t_out[0, :] = t_w[0, :] * x_proj_x + t_w[1, :] * x_proj_y + t_out[1, :] = t_w[0, :] * y_proj_x + t_w[1, :] * y_proj_y + t_out[2, :] = t_w[2, :] # z-component remains the same + t_out[3, :] = t_w[3, :] * x_proj_x + t_w[4, :] * x_proj_y + t_out[4, :] = t_w[3, :] * y_proj_x + t_w[4, :] * y_proj_y + t_out[5, :] = t_w[5, :] # z-component remains the same + +def hor2w_frame(t_h: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in "horizontal" frame to WORLD + v_out will hold the result + """ + + # Extract quaternion components + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + # Compute rotation matrix elements + R_11 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_21 = 2 * (q_i * q_j + q_k * q_w) + + # Normalize to get projection components + norm = np.sqrt(R_11 ** 2 + R_21 ** 2) + x_proj_x = R_11 / norm + x_proj_y = R_21 / norm + + # Orthogonal vector components + y_proj_x = -x_proj_y + y_proj_y = x_proj_x + + # Transform velocity vector components from horizontal to world frame + t_out[0, :] = t_h[0, :] * x_proj_x + t_h[1, :] * y_proj_x + t_out[1, :] = t_h[0, :] * x_proj_y + t_h[1, :] * y_proj_y + t_out[2, :] = t_h[2, :] # z-component remains the same + t_out[3, :] = t_h[3, :] * x_proj_x + t_h[4, :] * y_proj_x + t_out[4, :] = t_h[3, :] * x_proj_y + t_h[4, :] * y_proj_y + t_out[5, :] = t_h[5, :] # z-component remains the same + +def base2world_frame(t_b: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in the base frame to + the WORLD frame using the given quaternion that describes the orientation + of the base with respect to the world frame. The result is written in v_out. + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_01 = 2 * (q_i * q_j - q_k * q_w) + R_02 = 2 * (q_i * q_k + q_j * q_w) + + R_10 = 2 * (q_i * q_j + q_k * q_w) + R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2) + R_12 = 2 * (q_j * q_k - q_i * q_w) + + R_20 = 2 * (q_i * q_k - q_j * q_w) + R_21 = 2 * (q_j * q_k + q_i * q_w) + R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2) + + # Transform the velocity to the world frame + t_out[0, :] = t_b[0, :] * R_00 + t_b[1, :] * R_01 + t_b[2, :] * R_02 + t_out[1, :] = t_b[0, :] * R_10 + t_b[1, :] * R_11 + t_b[2, :] * R_12 + t_out[2, :] = t_b[0, :] * R_20 + t_b[1, :] * R_21 + t_b[2, :] * R_22 + t_out[3, :] = t_b[3, :] * R_00 + t_b[4, :] * R_01 + t_b[5, :] * R_02 + t_out[4, :] = t_b[3, :] * R_10 + t_b[4, :] * R_11 + t_b[5, :] * R_12 + t_out[5, :] = t_b[3, :] * R_20 + t_b[4, :] * R_21 + t_b[5, :] * R_22 + +def world2base_frame(t_w: np.ndarray, + q_b: np.ndarray, + t_out: np.ndarray): + """ + Transforms a velocity vector expressed in the WORLD frame to + the base frame using the given quaternion that describes the orientation + of the base with respect to the world frame. The result is written in v_out. + """ + # q_b = q_b / q_b.norm(dim=1, keepdim=True) + q_w, q_i, q_j, q_k = q_b[3, :], q_b[0, :], q_b[1, :], q_b[2, :] + + R_00 = 1 - 2 * (q_j ** 2 + q_k ** 2) + R_01 = 2 * (q_i * q_j - q_k * q_w) + R_02 = 2 * (q_i * q_k + q_j * q_w) + + R_10 = 2 * (q_i * q_j + q_k * q_w) + R_11 = 1 - 2 * (q_i ** 2 + q_k ** 2) + R_12 = 2 * (q_j * q_k - q_i * q_w) + + R_20 = 2 * (q_i * q_k - q_j * q_w) + R_21 = 2 * (q_j * q_k + q_i * q_w) + R_22 = 1 - 2 * (q_i ** 2 + q_j ** 2) + + # Transform the velocity to the base frame using the transpose of the rotation matrix + t_out[0, :] = t_w[0, :] * R_00 + t_w[1, :] * R_10 + t_w[2, :] * R_20 + t_out[1, :] = t_w[0, :] * R_01 + t_w[1, :] * R_11 + t_w[2, :] * R_21 + t_out[2, :] = t_w[0, :] * R_02 + t_w[1, :] * R_12 + t_w[2, :] * R_22 + t_out[3, :] = t_w[3, :] * R_00 + t_w[4, :] * R_10 + t_w[5, :] * R_20 + t_out[4, :] = t_w[3, :] * R_01 + t_w[4, :] * R_11 + t_w[5, :] * R_21 + t_out[5, :] = t_w[3, :] * R_02 + t_w[4, :] * R_12 + t_w[5, :] * R_22 + +if __name__ == "__main__": + + n_envs = 5000 + t_b = np.random.rand(6, n_envs) + + q_b = np.random.rand(4, n_envs) + q_b_norm = q_b / np.linalg.norm(q_b, axis=0, keepdims=True) + + t_w = np.zeros_like(t_b) # To hold horizontal frame velocities + t_b_recovered = np.zeros_like(t_b) # To hold recovered world frame velocities + base2world_frame(t_b, q_b_norm, t_w) + world2base_frame(t_w, q_b_norm, t_b_recovered) + assert np.allclose(t_b, t_b_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b" + print("Forward test passed: t_b_recovered matches t_b") + + t_b2 = np.zeros_like(t_b) # To hold horizontal frame velocities + t_w_recovered = np.zeros_like(t_b) + world2base_frame(t_b, q_b_norm, t_b2) + base2world_frame(t_b2, q_b_norm, t_w_recovered) + assert np.allclose(t_b, t_w_recovered, atol=1e-6), "Test failed: t_w_recovered does not match t_b" + print("Backward test passed: t_w_recovered matches t_w") + + # test transf. world-horizontal frame + v_h = np.zeros_like(t_b) # To hold horizontal frame velocities + v_recovered = np.zeros_like(t_b) + w2hor_frame(t_b, q_b_norm, v_h) + hor2w_frame(v_h, q_b_norm, v_recovered) + assert np.allclose(t_b, v_recovered, atol=1e-6), "Test failed: v_recovered does not match t_b" + print("horizontal forward frame test passed: matches ") + + t_w = np.zeros_like(t_b) # To hold horizontal frame velocities + v_h_recovered = np.zeros_like(t_b) + hor2w_frame(t_b, q_b_norm, t_w) + w2hor_frame(t_w, q_b_norm, v_h_recovered) + assert np.allclose(t_b, v_h_recovered, atol=1e-6), "Test failed: v_h_recovered does not match t_b" + print("horizontal backward frame test passed: matches ") + + \ No newline at end of file diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/hybrid_quad_client.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/hybrid_quad_client.py new file mode 100644 index 0000000000000000000000000000000000000000..757dc3aba72a500bbbf6c8fa21145f4ea63a2337 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/other/hybrid_quad_client.py @@ -0,0 +1,95 @@ +from aug_mpc.controllers.rhc.augmpc_cluster_client import AugMpcClusterClient + +from aug_mpc.controllers.rhc.horizon_based.hybrid_quad_rhc import HybridQuadRhc +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds_horizon +from aug_mpc.utils.sys_utils import PathsGetter + +from typing import List, Dict + +class HybridQuadrupedClusterClient(AugMpcClusterClient): + + def _import_aux_libs(self): + super()._import_aux_libs() + # Import Horizon and related dependencies as global libs + from aug_mpc.controllers.rhc.horizon_based.horizon_imports_glob import import_horizon_global + import_horizon_global() + + def __init__(self, + namespace: str, + urdf_xacro_path: str, + srdf_xacro_path: str, + cluster_size: int, + set_affinity: bool = False, + use_mp_fork: bool = False, + isolated_cores_only: bool = False, + core_ids_override_list: List[int] = None, + verbose: bool = False, + debug: bool = False, + open_loop: bool = True, + base_dump_dir: str = "/tmp", + timeout_ms: int = 60000, + codegen_override: str = None, + custom_opts: Dict = {}): + + self._open_loop = open_loop + + self._paths = PathsGetter() + + self._codegen_dir_name = namespace + + self._timeout_ms = timeout_ms + + super().__init__(namespace = namespace, + urdf_xacro_path = urdf_xacro_path, + srdf_xacro_path = srdf_xacro_path, + cluster_size=cluster_size, + set_affinity = set_affinity, + use_mp_fork = use_mp_fork, + isolated_cores_only = isolated_cores_only, + core_ids_override_list = core_ids_override_list, + verbose = verbose, + debug = debug, + base_dump_dir=base_dump_dir, + codegen_override=codegen_override, + custom_opts=custom_opts) + + self._n_nodes = 31 if not ("n_nodes" in self._custom_opts) else self._custom_opts["n_nodes"] + self._dt = 0.05 if not ("cluster_dt" in self._custom_opts) else self._custom_opts["cluster_dt"] + + def _xrdf_cmds(self): + parts = self._urdf_path.split('/') + urdf_descr_root_path = '/'.join(parts[:-2]) + cmds = get_xrdf_cmds_horizon(urdf_descr_root_path=urdf_descr_root_path) + return cmds + + def _process_codegen_dir(self,idx:int): + + codegen_dir = self.codegen_dir() + f"/{self._codegen_dir_name}Rhc{idx}" + codegen_dir_ovveride = self.codegen_dir_override() + if not (codegen_dir_ovveride=="" or \ + codegen_dir_ovveride=="none" or \ + codegen_dir_ovveride=="None" or \ + (codegen_dir_ovveride is None)): # if overrde was provided + codegen_dir = f"{codegen_dir_ovveride}{idx}"# override + + return codegen_dir + + def _generate_controller(self, + idx: int): + + codegen_dir=self._process_codegen_dir(idx=idx) + + controller = HybridQuadRhc( + urdf_path=self._urdf_path, + srdf_path=self._srdf_path, + config_path = self._paths.CONFIGPATH, + robot_name=self._namespace, + codegen_dir=codegen_dir, + n_nodes=self._n_nodes, + dt=self._dt, + max_solver_iter = 1, # rti + open_loop = self._open_loop, + verbose = self._verbose, + debug = self._debug) + + return controller \ No newline at end of file diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rhc.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rhc.py new file mode 100644 index 0000000000000000000000000000000000000000..c9410261d4dca35164fbe8abe5ac12b166b3483d --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/rhc.py @@ -0,0 +1,1262 @@ +# Copyright (C) 2023 Andrea Patrizi (AndrePatri, andreapatrizi1b6e6@gmail.com) +# +# This file is part of MPCHive and distributed under the General Public License version 2 license. +# +# MPCHive is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 2 of the License, or +# (at your option) any later version. +# +# MPCHive is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with MPCHive. If not, see . +# +from abc import ABC, abstractmethod +# from perf_sleep.pyperfsleep import PerfSleep +# from mpc_hive.utilities.cpu_utils.core_utils import get_memory_usage + +import time + +import numpy as np + +from mpc_hive.utilities.shared_data.rhc_data import RobotState +from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred, RhcPredDelta +from mpc_hive.utilities.shared_data.rhc_data import RhcStatus +from mpc_hive.utilities.shared_data.rhc_data import RhcInternal +from mpc_hive.utilities.shared_data.cluster_profiling import RhcProfiling +from mpc_hive.utilities.remote_triggering import RemoteTriggererClnt + +from mpc_hive.utilities.homing import RobotHomer + +from mpc_hive.utilities.math_utils import world2base_frame + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import Journal, LogType +from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper +from EigenIPC.PyEigenIPC import dtype + +from typing import List +# from typing import TypeVar, Union + +import signal +import os +import inspect + +class RHController(ABC): + + def __init__(self, + srdf_path: str, + n_nodes: int, + dt: float, + namespace: str, # shared mem namespace + dtype = np.float32, + verbose = False, + debug = False, + timeout_ms: int = 60000, + allow_less_jnts: bool = True): + + signal.signal(signal.SIGINT, self._handle_sigint) + + self._allow_less_jnts = allow_less_jnts # whether to allow less joints in rhc controller than the ones on the robot + # (e.g. some joints might not be desirable for control purposes) + + self.namespace = namespace + self._dtype = dtype + self._verbose = verbose + self._debug = debug + + # if not self._debug: + np.seterr(over='ignore') # ignoring overflows + + self._n_nodes = n_nodes + self._dt = dt + self._n_intervals = self._n_nodes - 1 + self._t_horizon = self._n_intervals * dt + self._set_rhc_pred_idx() # prection is by default written on last node + self._set_rhc_cmds_idx() # default to idx 2 (i.e. cmds to get to third node) + self.controller_index = None # will be assigned upon registration to a cluster + self.controller_index_np = None + self._robot_mass=1.0 + + self.srdf_path = srdf_path # using for parsing robot homing + + self._registered = False + self._closed = False + + self._allow_triggering_when_failed = True + + self._profiling_data_dict = {} + self._profiling_data_dict["full_solve_dt"] = np.nan + self._profiling_data_dict["rti_solve_dt"] = np.nan + self._profiling_data_dict["problem_update_dt"] = np.nan + self._profiling_data_dict["phases_shift_dt"] = np.nan + self._profiling_data_dict["task_ref_update"] = np.nan + + self.n_dofs = None + self.n_contacts = None + + # shared mem + self.robot_state = None + self.rhc_status = None + self.rhc_internal = None + self.cluster_stats = None + self.robot_cmds = None + self.robot_pred = None + self.rhc_pred_delta = None + self.rhc_refs = None + self._remote_triggerer = None + self._remote_triggerer_timeout = timeout_ms # [ms] + + # remote termination + self._remote_term = None + self._term_req_received = False + + # jnt names + self._env_side_jnt_names = [] + self._controller_side_jnt_names = [] + self._got_jnt_names_from_controllers = False + + # data maps + self._to_controller = [] + self._quat_remap = [0, 1, 2, 3] # defaults to no remap (to be overridden) + + self._got_contact_names = False + + self._received_trigger = False # used for proper termination + + self._n_resets = 0 + self._n_fails = 0 + self._fail_idx_thresh = 5e3 + self._contact_f_thresh = 1e5 + + self._failed = False + + self._start_time = time.perf_counter() # used for profiling when in debug mode + + self._homer = None # robot homing manager + self._homer_env = None # used for setting homing to jnts not contained in rhc prb + + self._class_name_base = f"{self.__class__.__name__}" + self._class_name = self._class_name_base # will append controller index upon registration + + self._contact_force_base_loc_aux=np.zeros((1,3),dtype=self._dtype) + self._norm_grav_vector_w=np.zeros((1,3),dtype=self._dtype) + self._norm_grav_vector_w[:, 2]=-1.0 + self._norm_grav_vector_base_loc=np.zeros((1,3),dtype=self._dtype) + + self._init() # initialize controller + + if not hasattr(self, '_rhc_fpaths'): + self._rhc_fpaths = [] + self._rhc_fpaths.append(os.path.abspath(__file__)) + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + # Get the file path of the class being initialized and append it to the list + if not hasattr(cls, '_rhc_fpaths'): + cls._rhc_fpaths = [] + child_class_file_path = os.path.abspath(inspect.getfile(cls)) + cls._rhc_fpaths.append(child_class_file_path) + + def this_paths(self): + return self._rhc_fpaths + + def __del__(self): + self._close() + + def _handle_sigint(self, signum, frame): + if self._verbose: + Journal.log(self._class_name, + "_handle_sigint", + "SIGINT received", + LogType.WARN) + self._term_req_received = True + + def _set_rhc_pred_idx(self): + # default to last node + self._pred_node_idx=self._n_nodes-1 + + def _set_rhc_cmds_idx(self): + # use index 2 by default to compensate for + # the inevitable action delay + # (get_state, trigger sol -> +dt -> apply sol ) + # if we apply action from second node (idenx 1) + # that action will already be one dt old. We assume we were already + # applying the right action to get to the state of node idx 1 and get the + # cmds for reaching the state at idx 1 + self._rhc_cmds_node_idx=2 + + def _close(self): + if not self._closed: + self._unregister_from_cluster() + if self.robot_cmds is not None: + self.robot_cmds.close() + if self.robot_pred is not None: + self.robot_pred.close() + if self.rhc_pred_delta is not None: + self.rhc_pred_delta.close() + if self.robot_state is not None: + self.robot_state.close() + if self.rhc_status is not None: + self.rhc_status.close() + if self.rhc_internal is not None: + self.rhc_internal.close() + if self.cluster_stats is not None: + self.cluster_stats.close() + if self._remote_triggerer is not None: + self._remote_triggerer.close() + if self._remote_term is not None: + self._remote_term.close() + self._closed = True + + def close(self): + self._close() + + def get_file_paths(self): + # can be overriden by child + base_paths = [] + base_paths.append(self._this_path) + return base_paths + + def init_rhc_task_cmds(self): + + self.rhc_refs = self._init_rhc_task_cmds() + self.rhc_refs.reset() + + def _init_states(self): + + quat_remap = self._get_quat_remap() + self.robot_state = RobotState(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_state.run() + self.robot_cmds = RhcCmds(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_cmds.run() + self.robot_pred = RhcPred(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.robot_pred.run() + self.rhc_pred_delta = RhcPredDelta(namespace=self.namespace, + is_server=False, + q_remapping=quat_remap, # remapping from environment to controller + with_gpu_mirror=False, + with_torch_view=False, + safe=False, + verbose=self._verbose, + vlevel=VLevel.V2, + optimize_mem=True, + n_robots=1, # we just need the row corresponding to this controller + n_jnts=None, # got from server + n_contacts=None # got from server + ) + self.rhc_pred_delta.run() + + def _rhc(self, rti: bool = True): + if self._debug: + self._rhc_db(rti=rti) + else: + self._rhc_min(rti=rti) + + def _rhc_db(self, rti: bool = True): + # rhc with debug data + self._start_time = time.perf_counter() + + self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with + # latest data on shared mem + + self._compute_pred_delta() + + if not self.failed(): + # we can solve only if not in failure state + if rti: + self._failed = not self._solve() # solve actual TO with RTI + else: + self._failed = not self._bootstrap() # full bootstrap solve + if (self._failed): + # perform failure procedure + self._on_failure() + else: + if not self._allow_triggering_when_failed: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. " + \ + " You should have reset() the controller!", + LogType.EXCEP, + throw_when_excep = True) + else: + if self._verbose: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. No solution will be performed. " + \ + " Use the reset() method to continue solving!", + LogType.WARN) + + self._write_cmds_from_sol() # we update update the views of the cmds + # from the latest solution + + # in debug, rhc internal state is streamed over + # shared mem. + self._update_rhc_internal() + self._profiling_data_dict["full_solve_dt"] = time.perf_counter() - self._start_time + self._update_profiling_data() # updates all profiling data + if self._verbose: + Journal.log(self._class_name, + "solve", + f"RHC full solve loop execution time -> " + str(self._profiling_data_dict["full_solve_dt"]), + LogType.INFO, + throw_when_excep = True) + + def _rhc_min(self, rti: bool = True): + + self.robot_state.synch_from_shared_mem(robot_idx=self.controller_index, robot_idx_view=self.controller_index_np) # updates robot state with + # latest data on shared mem + + self._compute_pred_delta() + + if not self.failed(): + # we can solve only if not in failure state + if rti: + self._failed = not self._solve() # solve actual TO with RTI + else: + self._failed = not self._bootstrap() # full bootstrap solve + if (self._failed): + # perform failure procedure + self._on_failure() + else: + if not self._allow_triggering_when_failed: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. " + \ + " You should have reset() the controller!", + LogType.EXCEP, + throw_when_excep = True) + else: + if self._verbose: + Journal.log(self._class_name, + "solve", + f"Received solution req, but controller is in failure state. No solution will be performed. " + \ + " Use the reset() method to continue solving!", + LogType.WARN) + + self._write_cmds_from_sol() # we update the views of the cmds + # from the latest solution even if failed + + def solve_once(self): + # run a single iteration of the solve loop (used for pooling) + if self._term_req_received: + return False + + if not self._remote_triggerer.wait(self._remote_triggerer_timeout): + Journal.log(self._class_name, + f"solve", + "Didn't receive any remote trigger req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + + self._received_trigger = True + + if self.rhc_status.resets.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0]: + self.reset() # rhc is reset + + if self.rhc_status.trigger.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0]: + rti_solve = self.rhc_status.rti_solve.read_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0)[0] + self._rhc(rti=rti_solve) # run solution with requested solve mode + self.rhc_status.trigger.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) # allow next solution trigger + + self._remote_triggerer.ack() # send ack signal to server + self._received_trigger = False + + self._term_req_received = self._term_req_received or self._remote_term.read_retry(row_index=0, + col_index=0, + row_index_view=0)[0] + + if self._term_req_received: + self.close() + return False + + return True + + def solve(self): + + # run the solution loop and wait for trigger signals + # using cond. variables (efficient) + while True: + if not self.solve_once(): + break + + self.close() # is not stricly necessary + + def reset(self): + + if not self._closed: + self.reset_rhc_data() + self._failed = False # allow triggering + self._n_resets += 1 + self.rhc_status.fails.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.resets.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _create_jnt_maps(self): + + # retrieve env-side joint names from shared mem + self._env_side_jnt_names = self.robot_state.jnt_names() + self._check_jnt_names_compatibility() # will raise exception if not self._allow_less_jnts + if not self._got_jnt_names_from_controllers: + exception = f"Cannot run the solve(). assign_env_side_jnt_names() was not called!" + Journal.log(self._class_name, + "_create_jnt_maps", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # robot homer guarantees that _controller_side_jnt_names is at least contained in self._env_side_jnt_names (or equal) + self._to_controller = [self._env_side_jnt_names.index(element) for element in self._controller_side_jnt_names] + # set joint remappings for shared data from env data to controller + # all shared data is by convention specified according to the env (jnts are ordered that way) + # the remapping is used so that when data is returned, its a remapped view from env to controller, + # so that data can be assigned direclty from the rhc controller without any issues + self.robot_state.set_jnts_remapping(jnts_remapping=self._to_controller) + self.robot_cmds.set_jnts_remapping(jnts_remapping=self._to_controller) + self.robot_pred.set_jnts_remapping(jnts_remapping=self._to_controller) + self.rhc_pred_delta.set_jnts_remapping(jnts_remapping=self._to_controller) + + return True + + def reset_rhc_data(self): + + self._reset() # custom reset (e.g. it should set the current solution to some + # default solution, like a bootstrap) + + self.rhc_refs.reset() # reset rhc refs to default (has to be called after _reset) + + self._write_cmds_from_sol() # use latest solution (e.g. from bootstrap if called before running + # the first solve) as default state + + def failed(self): + return self._failed + + def robot_mass(self): + return self._robot_mass + + def _assign_cntrl_index(self, reg_state: np.ndarray): + state = reg_state.flatten() # ensure 1D tensor + free_spots = np.nonzero(~state.flatten())[0] + return free_spots[0].item() # just return the first free spot + + def _register_to_cluster(self): + + self.rhc_status = RhcStatus(is_server=False, + namespace=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2, + with_torch_view=False, + with_gpu_mirror=False, + optimize_mem=True, + cluster_size=1, # we just need the row corresponding to this controller + n_contacts=None, # we get this from server + n_nodes=None # we get this from server + ) + self.rhc_status.run() # rhc status (reg. flags, failure, tot cost, tot cnstrl viol, etc...) + + # acquire semaphores since we have to perform non-atomic operations + # on the whole memory views + self.rhc_status.registration.data_sem_acquire() + self.rhc_status.controllers_counter.data_sem_acquire() + self.rhc_status.controllers_counter.synch_all(retry = True, + read = True) + + available_spots = self.rhc_status.cluster_size + # from here on all pre registration ops can be done + + # incrementing cluster controllers counter + controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror() + if controllers_counter[0, 0] + 1 > available_spots: # no space left -> return + self.rhc_status.controllers_counter.data_sem_release() + self.rhc_status.registration.data_sem_release() + exception = "Cannot register to cluster. No space left " + \ + f"({controllers_counter[0, 0]} controllers already registered)" + Journal.log(self._class_name, + "_register_to_cluster", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # now we can register + + # increment controllers counter + controllers_counter += 1 + self.controller_index = controllers_counter.item() -1 + + # actually register to cluster + self.rhc_status.controllers_counter.synch_all(retry = True, + read = False) # writes to shared mem (just one for all controllers (i.e n_rows = 1)) + + # read current registration state + self.rhc_status.registration.synch_all(retry = True, + read = True, + row_index=self.controller_index, + row_index_view=0) + registrations = self.rhc_status.registration.get_numpy_mirror() + # self.controller_index = self._assign_cntrl_index(registrations) + + + self._class_name_base = self._class_name_base+str(self.controller_index) + # self.controller_index_np = np.array(self.controller_index) + self.controller_index_np = np.array(0) # given that we use optimize_mem, the shared mem copy has shape 1 x n_cols (we can write and read at [0, :]) + + registrations[self.controller_index_np, 0] = True + self.rhc_status.registration.synch_all(retry = True, + read = False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + # now all heavy stuff that would otherwise make the registration slow + self._remote_term = SharedTWrapper(namespace=self.namespace, + basename="RemoteTermination", + is_server=False, + verbose = self._verbose, + vlevel = VLevel.V2, + with_gpu_mirror=False, + with_torch_view=False, + dtype=dtype.Bool) + self._remote_term.run() + + # other initializations + + self.cluster_stats = RhcProfiling(is_server=False, + name=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=True, + optimize_mem=True, + cluster_size=1 # we just need the row corresponding to this controller + ) # profiling data + self.cluster_stats.run() + self.cluster_stats.synch_info() + + self._create_jnt_maps() + self.init_rhc_task_cmds() # initializes rhc interface to external commands (defined by child class) + self._consinstency_checks() # sanity checks + + if self._homer is None: + self._init_robot_homer() # call this in case it wasn't called by child + + self._robot_mass = self._get_robot_mass() # uses child class implemented method + self._contact_f_scale = self._get_robot_mass() * 9.81 + + # writing some static info about this controller + # self.rhc_status.rhc_static_info.synch_all(retry = True, + # read = True, + # row_index=self.controller_index, + # col_index=0) # first read current static info from other controllers (not necessary if optimize_mem==True) + self.rhc_status.rhc_static_info.set(data=np.array(self._dt), + data_type="dts", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._t_horizon), + data_type="horizons", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._n_nodes), + data_type="nnodes", + rhc_idxs=self.controller_index_np, + gpu=False) + # writing some static rhc info which is only available after problem init + self.rhc_status.rhc_static_info.set(data=np.array(len(self._get_contacts())), + data_type="ncontacts", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self.robot_mass()), + data_type="robot_mass", + rhc_idxs=self.controller_index_np, + gpu=False) + self.rhc_status.rhc_static_info.set(data=np.array(self._pred_node_idx), + data_type="pred_node_idx", + rhc_idxs=self.controller_index_np, + gpu=False) + + self.rhc_status.rhc_static_info.synch_retry(row_index=self.controller_index, + col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.rhc_status.rhc_static_info.n_cols, + read=False) + + # we set homings also for joints which are not in the rhc homing map + # since this is usually required on server side + + homing_full = self._homer_env.get_homing().reshape(1, + self.robot_cmds.n_jnts()) + + null_action = np.zeros((1, self.robot_cmds.n_jnts()), + dtype=self._dtype) + + self.robot_cmds.jnts_state.set(data=homing_full, data_type="q", + robot_idxs=self.controller_index_np, + no_remap=True) + self.robot_cmds.jnts_state.set(data=null_action, data_type="v", + robot_idxs=self.controller_index_np, + no_remap=True) + self.robot_cmds.jnts_state.set(data=null_action, data_type="eff", + robot_idxs=self.controller_index_np, + no_remap=True) + + # write all joints (including homing for env-only ones) + self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # only write data corresponding to this controller + + self.reset() # reset controller + self._n_resets=0 + + # for last we create the trigger client + self._remote_triggerer = RemoteTriggererClnt(namespace=self.namespace, + verbose=self._verbose, + vlevel=VLevel.V2) # remote triggering + self._remote_triggerer.run() + + if self._debug: + # internal solution is published on shared mem + # we assume the user has made available the cost + # and constraint data at this point (e.g. through + # the solution of a bootstrap) + cost_data = self._get_cost_info() + constr_data = self._get_constr_info() + config = RhcInternal.Config(is_server=True, + enable_q= True, + enable_v=True, + enable_a=True, + enable_a_dot=False, + enable_f=True, + enable_f_dot=False, + enable_eff=False, + cost_names=cost_data[0], + cost_dims=cost_data[1], + constr_names=constr_data[0], + constr_dims=constr_data[1], + ) + self.rhc_internal = RhcInternal(config=config, + namespace=self.namespace, + rhc_index = self.controller_index, + n_contacts=self.n_contacts, + n_jnts=self.n_dofs, + jnt_names=self._controller_side_jnt_names, + n_nodes=self._n_nodes, + verbose = self._verbose, + vlevel=VLevel.V2, + force_reconnection=True, + safe=True) + self.rhc_internal.run() + + self._class_name = self._class_name + f"-{self.controller_index}" + + Journal.log(self._class_name, + "_register_to_cluster", + f"controller registered", + LogType.STAT, + throw_when_excep = True) + + # we can now release everything so that other controllers can register + self.rhc_status.controllers_counter.data_sem_release() + self.rhc_status.registration.data_sem_release() + + self._registered = True + + def _unregister_from_cluster(self): + + if self._received_trigger: + # received interrupt during solution --> + # send ack signal to server anyway + self._remote_triggerer.ack() + if self._registered: + # acquire semaphores since we have to perform operations + # on the whole memory views + self.rhc_status.registration.data_sem_acquire() + self.rhc_status.controllers_counter.data_sem_acquire() + self.rhc_status.registration.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self._deactivate() + # decrementing controllers counter + self.rhc_status.controllers_counter.synch_all(retry = True, + read = True) + controllers_counter = self.rhc_status.controllers_counter.get_numpy_mirror() + controllers_counter -= 1 + self.rhc_status.controllers_counter.synch_all(retry = True, + read = False) + Journal.log(self._class_name, + "_unregister_from_cluster", + "Done", + LogType.STAT, + throw_when_excep = True) + # we can now release everything + self.rhc_status.registration.data_sem_release() + self.rhc_status.controllers_counter.data_sem_release() + self._registered = False + + def _get_quat_remap(self): + # to be overridden by child class if necessary + return [0, 1, 2, 3] + + def _consinstency_checks(self): + + # check controller dt + server_side_cluster_dt = self.cluster_stats.get_info(info_name="cluster_dt") + if not (abs(server_side_cluster_dt - self._dt) < 1e-4): + exception = f"Trying to initialize a controller with control dt {self._dt}, which" + \ + f"does not match the cluster control dt {server_side_cluster_dt}" + Journal.log(self._class_name, + "_consinstency_checks", + exception, + LogType.EXCEP, + throw_when_excep = True) + # check contact names + + server_side_contact_names = set(self.robot_state.contact_names()) + control_side_contact_names = set(self._get_contacts()) + + if (not server_side_contact_names == control_side_contact_names) and self._verbose: + warn = f"Controller-side contact names do not match server-side names!" + \ + f"\nServer: {self.robot_state.contact_names()}\n Controller: {self._get_contacts()}" + Journal.log(self._class_name, + "_consinstency_checks", + warn, + LogType.WARN, + throw_when_excep = True) + if not len(self.robot_state.contact_names()) == len(self._get_contacts()): + # at least, we need the n of contacts to match! + exception = f"Controller-side n contacts {self._get_contacts()} do not match " + \ + f"server-side n contacts {len(self.robot_state.contact_names())}!" + Journal.log(self._class_name, + "_consinstency_checks", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _init(self): + + stat = f"Trying to initialize RHC controller " + \ + f"with dt: {self._dt} s, t_horizon: {self._t_horizon} s, n_intervals: {self._n_intervals}" + Journal.log(self._class_name, + "_init", + stat, + LogType.STAT, + throw_when_excep = True) + + self._init_states() # initializes shared mem. states + + self._init_problem() # we call the child's initialization method for the actual problem + self._post_problem_init() + + self._register_to_cluster() # registers the controller to the cluster + + Journal.log(self._class_name, + "_init", + f"RHC controller initialized with cluster index {self.controller_index} on process {os.getpid()}", + LogType.STAT, + throw_when_excep = True) + + def _deactivate(self): + # signal controller deactivation over shared mem + self.rhc_status.activation_state.write_retry(False, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + # also set cmds to homing for safety + # self.reset_rhc_data() + + def _on_failure(self): + + self.rhc_status.fails.write_retry(True, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self._deactivate() + self._n_fails += 1 + self.rhc_status.controllers_fail_counter.write_retry(self._n_fails, + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _init_robot_homer(self): + self._homer = RobotHomer(srdf_path=self.srdf_path, + jnt_names=self._controller_side_jnt_names, + verbose=self._verbose) + + self._homer_env = RobotHomer(srdf_path=self.srdf_path, + jnt_names=self.robot_state.jnt_names(), + verbose=self._verbose) + + def _update_profiling_data(self): + + # updated debug data on shared memory + # with the latest info available + self.cluster_stats.solve_loop_dt.write_retry(self._profiling_data_dict["full_solve_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.rti_sol_time.write_retry(self._profiling_data_dict["rti_solve_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.prb_update_dt.write_retry(self._profiling_data_dict["problem_update_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.phase_shift_dt.write_retry(self._profiling_data_dict["phases_shift_dt"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.cluster_stats.task_ref_update_dt.write_retry(self._profiling_data_dict["task_ref_update"], + row_index=self.controller_index, + col_index=0, + row_index_view=0) + + def _write_cmds_from_sol(self): + + # gets data from the solution and updates the view on the shared data + + # cmds for robot + # jnts + self.robot_cmds.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="v", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a", robot_idxs=self.controller_index_np) + self.robot_cmds.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np) + # root + self.robot_cmds.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="q_full", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._rhc_cmds_node_idx), data_type="twist", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_root_a_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np) + self.robot_cmds.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._rhc_cmds_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np) + f_contact = self._get_f_from_sol() + if f_contact is not None: + contact_names = self.robot_state.contact_names() + node_idx_f_estimate=self._rhc_cmds_node_idx-1 # we always write the force to reach the desired state (prev node) + rhc_q_estimate=self._get_root_full_q_from_sol(node_idx=node_idx_f_estimate)[:, 3:7] + for i in range(len(contact_names)): + contact = contact_names[i] + contact_idx = i*3 + contact_force_rhc_world=f_contact[contact_idx:(contact_idx+3), node_idx_f_estimate:node_idx_f_estimate+1].T + world2base_frame(v_w=contact_force_rhc_world, + q_b=rhc_q_estimate, + v_out=self._contact_force_base_loc_aux, + is_q_wijk=False # horizon q is ijkw + ) + self.robot_cmds.contact_wrenches.set(data=self._contact_force_base_loc_aux, + data_type="f", + robot_idxs=self.controller_index_np, + contact_name=contact) + + # prediction data from MPC horizon + self.robot_pred.jnts_state.set(data=self._get_jnt_q_from_sol(node_idx=self._pred_node_idx), data_type="q", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_v_from_sol(node_idx=self._pred_node_idx), data_type="v", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a", robot_idxs=self.controller_index_np) + self.robot_pred.jnts_state.set(data=self._get_jnt_eff_from_sol(node_idx=self._pred_node_idx-1), data_type="eff", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_full_q_from_sol(node_idx=self._pred_node_idx), data_type="q_full", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_twist_from_sol(node_idx=self._pred_node_idx), data_type="twist", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_root_a_from_sol(node_idx=self._pred_node_idx-1), data_type="a_full", robot_idxs=self.controller_index_np) + self.robot_pred.root_state.set(data=self._get_norm_grav_vector_from_sol(node_idx=self._pred_node_idx-1), data_type="gn", robot_idxs=self.controller_index_np) + + # write robot commands + self.robot_cmds.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # jnt state + self.robot_cmds.root_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.root_state.n_cols, + read=False) # root state, in case it was written + self.robot_cmds.contact_wrenches.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.contact_wrenches.n_cols, + read=False) # contact state + + # write robot pred + self.robot_pred.jnts_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) + self.robot_pred.root_state.synch_retry(row_index=self.controller_index, col_index=0, + row_index_view=0, + n_rows=1, n_cols=self.robot_cmds.root_state.n_cols, + read=False) + + # we also fill other data (cost, constr. violation, etc..) + self.rhc_status.rhc_cost.write_retry(self._get_rhc_cost(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_constr_viol.write_retry(self._get_rhc_constr_viol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_n_iter.write_retry(self._get_rhc_niter_to_sol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_nodes_cost.write_retry(data=self._get_rhc_nodes_cost(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_nodes_constr_viol.write_retry(data=self._get_rhc_nodes_constr_viol(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) + self.rhc_status.rhc_fail_idx.write_retry(self._get_failure_index(), + row_index=self.controller_index, + col_index=0, + row_index_view=0) # write idx on shared mem + + def _compute_pred_delta(self): + + # measurements + q_full_root_meas = self.robot_state.root_state.get(data_type="q_full", robot_idxs=self.controller_index_np) + twist_root_meas = self.robot_state.root_state.get(data_type="twist", robot_idxs=self.controller_index_np) + a_root_meas = self.robot_state.root_state.get(data_type="a_full", robot_idxs=self.controller_index_np) + g_vec_root_meas = self.robot_state.root_state.get(data_type="gn", robot_idxs=self.controller_index_np) + + q_jnts_meas = self.robot_state.jnts_state.get(data_type="q", robot_idxs=self.controller_index_np) + v_jnts_meas = self.robot_state.jnts_state.get(data_type="v", robot_idxs=self.controller_index_np) + a_jnts_meas = self.robot_state.jnts_state.get(data_type="a", robot_idxs=self.controller_index_np) + eff_jnts_meas = self.robot_state.jnts_state.get(data_type="eff", robot_idxs=self.controller_index_np) + + # prediction from rhc + delta_root_q_full=self._get_root_full_q_from_sol(node_idx=1)-q_full_root_meas + delta_root_twist=self._get_root_twist_from_sol(node_idx=1)-twist_root_meas + delta_root_a=self._get_root_a_from_sol(node_idx=0)-a_root_meas + delta_g_vec=self._get_norm_grav_vector_from_sol(node_idx=0)-g_vec_root_meas + + delta_jnts_q=self._get_jnt_q_from_sol(node_idx=1)-q_jnts_meas + delta_jnts_v=self._get_jnt_v_from_sol(node_idx=1)-v_jnts_meas + delta_jnts_a=self._get_jnt_a_from_sol(node_idx=0)-a_jnts_meas + delta_jnts_eff=self._get_jnt_eff_from_sol(node_idx=0)-eff_jnts_meas + + # writing pred. errors + self.rhc_pred_delta.root_state.set(data=delta_root_q_full, data_type="q_full", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_root_twist,data_type="twist", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_root_a,data_type="a_full", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.root_state.set(data=delta_g_vec, data_type="gn", robot_idxs=self.controller_index_np) + + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_q,data_type="q", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_v,data_type="v", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_a,data_type="a", robot_idxs=self.controller_index_np) + self.rhc_pred_delta.jnts_state.set(data=delta_jnts_eff, data_type="eff", robot_idxs=self.controller_index_np) + + # write on shared memory + self.rhc_pred_delta.jnts_state.synch_retry(row_index=self.controller_index, + col_index=0, + n_rows=1, + row_index_view=0, + n_cols=self.robot_cmds.jnts_state.n_cols, + read=False) # jnt state + self.rhc_pred_delta.root_state.synch_retry(row_index=self.controller_index, + col_index=0, + n_rows=1, + row_index_view=0, + n_cols=self.robot_cmds.root_state.n_cols, + read=False) # root state + + def _assign_controller_side_jnt_names(self, + jnt_names: List[str]): + + self._controller_side_jnt_names = jnt_names + self._got_jnt_names_from_controllers = True + + def _check_jnt_names_compatibility(self): + + set_rhc = set(self._controller_side_jnt_names) + set_env = set(self._env_side_jnt_names) + + if not set_rhc == set_env: + rhc_is_missing=set_env-set_rhc + env_is_missing=set_rhc-set_env + + msg_type=LogType.WARN + message="" + if not len(rhc_is_missing)==0: # allowed + message = "\nSome env-side joint names are missing on rhc client-side!\n" + \ + "RHC-SERVER-SIDE-> \n" + \ + " ".join(self._env_side_jnt_names) + "\n" +\ + "RHC-CLIENT-SIDE -> \n" + \ + " ".join(self._controller_side_jnt_names) + "\n" \ + "\MISSING -> \n" + \ + " ".join(list(rhc_is_missing)) + "\n" + if not self._allow_less_jnts: # raise exception + msg_type=LogType.EXCEP + + if not len(env_is_missing)==0: # not allowed + message = "\nSome rhc-side joint names are missing on rhc server-side!\n" + \ + "RHC-SERVER-SIDE-> \n" + \ + " ".join(self._env_side_jnt_names) + \ + "RHC-CLIENT-SIDE -> \n" + \ + " ".join(self._controller_side_jnt_names) + "\n" \ + "\nmissing -> \n" + \ + " ".join(list(env_is_missing)) + msg_type=LogType.EXCEP + + if msg_type==LogType.WARN and not self._verbose: + return + + Journal.log(self._class_name, + "_check_jnt_names_compatibility", + message, + msg_type, + throw_when_excep = True) + + def _get_cost_info(self): + # to be overridden by child class + return None, None + + def _get_constr_info(self): + # to be overridden by child class + return None, None + + def _get_fail_idx(self): + # to be overriden by parent + return 0.0 + + def _get_failure_index(self): + fail_idx=self._get_fail_idx()/self._fail_idx_thresh + if fail_idx>1.0: + fail_idx=1.0 + return fail_idx + + def _check_rhc_failure(self): + # we use fail idx viol to detect failures + idx = self._get_failure_index() + return idx>=1.0 + + def _update_rhc_internal(self): + # data which is not enabled in the config is not actually + # written so overhead is minimal for non-enabled data + self.rhc_internal.write_q(data= self._get_q_from_sol(), + retry=True) + self.rhc_internal.write_v(data= self._get_v_from_sol(), + retry=True) + self.rhc_internal.write_a(data= self._get_a_from_sol(), + retry=True) + self.rhc_internal.write_a_dot(data= self._get_a_dot_from_sol(), + retry=True) + self.rhc_internal.write_f(data= self._get_f_from_sol(), + retry=True) + self.rhc_internal.write_f_dot(data= self._get_f_dot_from_sol(), + retry=True) + self.rhc_internal.write_eff(data= self._get_eff_from_sol(), + retry=True) + for cost_idx in range(self.rhc_internal.config.n_costs): + # iterate over all costs and update all values + cost_name = self.rhc_internal.config.cost_names[cost_idx] + self.rhc_internal.write_cost(data= self._get_cost_from_sol(cost_name = cost_name), + cost_name = cost_name, + retry=True) + for constr_idx in range(self.rhc_internal.config.n_constr): + # iterate over all constraints and update all values + constr_name = self.rhc_internal.config.constr_names[constr_idx] + self.rhc_internal.write_constr(data= self._get_constr_from_sol(constr_name=constr_name), + constr_name = constr_name, + retry=True) + + def _get_contacts(self): + contact_names = self._get_contact_names() + self._got_contact_names = True + return contact_names + + def _get_q_from_sol(self): + # to be overridden by child class + return None + + def _get_v_from_sol(self): + # to be overridden by child class + return None + + def _get_a_from_sol(self): + # to be overridden by child class + return None + + def _get_a_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_f_from_sol(self): + # to be overridden by child class + return None + + def _get_f_dot_from_sol(self): + # to be overridden by child class + return None + + def _get_eff_from_sol(self): + # to be overridden by child class + return None + + def _get_cost_from_sol(self, + cost_name: str): + # to be overridden by child class + return None + + def _get_constr_from_sol(self, + constr_name: str): + # to be overridden by child class + return None + + @abstractmethod + def _reset(self): + pass + + @abstractmethod + def _init_rhc_task_cmds(self): + pass + + @abstractmethod + def _get_robot_jnt_names(self): + pass + + @abstractmethod + def _get_contact_names(self): + pass + + @abstractmethod + def _get_jnt_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_v_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_a_from_sol(self, node_idx=0) -> np.ndarray: + pass + + @abstractmethod + def _get_jnt_eff_from_sol(self, node_idx=0) -> np.ndarray: + pass + + @abstractmethod + def _get_root_full_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_full_q_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_root_twist_from_sol(self, node_idx=1) -> np.ndarray: + pass + + @abstractmethod + def _get_root_a_from_sol(self, node_idx=0) -> np.ndarray: + pass + + def _get_norm_grav_vector_from_sol(self, node_idx=1) -> np.ndarray: + rhc_q=self._get_root_full_q_from_sol(node_idx=node_idx)[:, 3:7] + world2base_frame(v_w=self._norm_grav_vector_w,q_b=rhc_q,v_out=self._norm_grav_vector_base_loc, + is_q_wijk=False) + return self._norm_grav_vector_base_loc + + def _get_rhc_cost(self): + # to be overridden + return np.nan + + def _get_rhc_constr_viol(self): + # to be overridden + return np.nan + + def _get_rhc_nodes_cost(self): + # to be overridden + return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype) + + def _get_rhc_nodes_constr_viol(self): + # to be overridden + return np.zeros((1,self.rhc_status.n_nodes), dtype=self._dtype) + + def _get_rhc_niter_to_sol(self) -> np.ndarray: + # to be overridden + return np.nan + + @abstractmethod + def _update_open_loop(self): + # updates rhc controller + # using the internal state + pass + + @abstractmethod + def _update_closed_loop(self): + # uses meas. from robot + pass + + @abstractmethod + def _solve(self) -> bool: + pass + + @abstractmethod + def _bootstrap(self) -> bool: + pass + + @abstractmethod + def _get_ndofs(self): + pass + + abstractmethod + def _get_robot_mass(self): + pass + + @abstractmethod + def _init_problem(self): + # initialized horizon's TO problem + pass + + @abstractmethod + def _post_problem_init(self): + pass diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/sac.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/sac.py new file mode 100644 index 0000000000000000000000000000000000000000..fd40a13287ccf6ccda694c218d114ede8d4b9d4b --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/sac.py @@ -0,0 +1,680 @@ +import torch +import torch.nn as nn +from torch.distributions.normal import Normal +import math + +from aug_mpc.utils.nn.normalization_utils import RunningNormalizer +from aug_mpc.utils.nn.layer_utils import llayer_init + +from typing import List + +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import VLevel + +class SACAgent(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + obs_ub: List[float] = None, + obs_lb: List[float] = None, + actions_ub: List[float] = None, + actions_lb: List[float] = None, + rescale_obs: bool = False, + norm_obs: bool = True, + use_action_rescale_for_critic: bool = True, + device:str="cuda", + dtype=torch.float32, + is_eval:bool=False, + load_qf:bool=False, + epsilon:float=1e-8, + debug:bool=False, + compression_ratio:float=-1.0, # > 0; if [0, 1] compression, >1 "expansion" + layer_width_actor:int=256, + n_hidden_layers_actor:int=2, + layer_width_critic:int=512, + n_hidden_layers_critic:int=4, + torch_compile: bool = False, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._use_torch_compile=torch_compile + + self._layer_width_actor=layer_width_actor + self._layer_width_critic=layer_width_critic + self._n_hidden_layers_actor=n_hidden_layers_actor + self._n_hidden_layers_critic=n_hidden_layers_critic + + self._obs_dim=obs_dim + self._actions_dim=actions_dim + self._actions_ub=actions_ub + self._actions_lb=actions_lb + + self._add_weight_norm=add_weight_norm + self._add_layer_norm=add_layer_norm + self._add_batch_norm=add_batch_norm + + self._is_eval=is_eval + self._load_qf=load_qf + + self._epsilon=epsilon + + if compression_ratio > 0.0: + self._layer_width_actor=int(compression_ratio*obs_dim) + self._layer_width_critic=int(compression_ratio*(obs_dim+actions_dim)) + + self._normalize_obs = norm_obs + self._rescale_obs=rescale_obs + if self._rescale_obs and self._normalize_obs: + Journal.log(self.__class__.__name__, + "__init__", + f"Both running normalization and obs rescaling is enabled. Was this intentional?", + LogType.WARN, + throw_when_excep = True) + + self._use_action_rescale_for_critic=use_action_rescale_for_critic + + self._rescaling_epsi=1e-9 + + self._debug = debug + + self._torch_device = device + self._torch_dtype = dtype + + # obs scale and bias + if obs_ub is None: + obs_ub = [1] * obs_dim + if obs_lb is None: + obs_lb = [-1] * obs_dim + if (len(obs_ub) != obs_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Observations ub list length should be equal to {obs_dim}, but got {len(obs_ub)}", + LogType.EXCEP, + throw_when_excep = True) + if (len(obs_lb) != obs_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Observations lb list length should be equal to {obs_dim}, but got {len(obs_lb)}", + LogType.EXCEP, + throw_when_excep = True) + + self._obs_ub = torch.tensor(obs_ub, dtype=self._torch_dtype, + device=self._torch_device) + self._obs_lb = torch.tensor(obs_lb, dtype=self._torch_dtype, + device=self._torch_device) + obs_scale = torch.full((obs_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + obs_scale[:] = (self._obs_ub-self._obs_lb)/2.0 + self.register_buffer( + "obs_scale", obs_scale + ) + obs_bias = torch.full((obs_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + obs_bias[:] = (self._obs_ub+self._obs_lb)/2.0 + self.register_buffer( + "obs_bias", obs_bias) + + self._build_nets() + + self._init_obs_norm() + + msg=f"Created SAC agent with actor [{self._layer_width_actor}, {self._n_hidden_layers_actor}]\ + and critic [{self._layer_width_critic}, {self._n_hidden_layers_critic}] sizes.\ + \n Runningobs normalizer: {type(self.obs_running_norm)} \ + \n Batch normalization: {self._add_batch_norm} \ + \n Layer normalization: {self._add_layer_norm} \ + \n Weight normalization: {self._add_weight_norm} \ + \n Critic input actions are descaled: {self._use_action_rescale_for_critic}" + Journal.log(self.__class__.__name__, + "__init__", + msg, + LogType.INFO) + + def _init_obs_norm(self): + + self.obs_running_norm=None + if self._normalize_obs: + self.obs_running_norm = RunningNormalizer((self._obs_dim,), + epsilon=self._epsilon, + device=self._torch_device, dtype=self._torch_dtype, + freeze_stats=True, # always start with freezed stats + debug=self._debug) + self.obs_running_norm.type(self._torch_dtype) # ensuring correct dtype for whole module + + def _build_nets(self): + + if self._add_weight_norm: + Journal.log(self.__class__.__name__, + "__init__", + f"Will use weight normalization reparametrization\n", + LogType.INFO) + + self.actor=None + self.qf1=None + self.qf2=None + self.qf1_target=None + self.qf2_target=None + + self.actor = Actor(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + actions_ub=self._actions_ub, + actions_lb=self._actions_lb, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_actor, + n_hidden_layers=self._n_hidden_layers_actor, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm, + ) + + if (not self._is_eval) or self._load_qf: # just needed for training or during eval + # for debug, if enabled + self.qf1 = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + self.qf1_target = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + + self.qf2 = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + self.qf2_target = CriticQ(obs_dim=self._obs_dim, + actions_dim=self._actions_dim, + device=self._torch_device, + dtype=self._torch_dtype, + layer_width=self._layer_width_critic, + n_hidden_layers=self._n_hidden_layers_critic, + add_weight_norm=self._add_weight_norm, + add_layer_norm=self._add_layer_norm, + add_batch_norm=self._add_batch_norm) + + self.qf1_target.load_state_dict(self.qf1.state_dict()) + self.qf2_target.load_state_dict(self.qf2.state_dict()) + + if self._use_torch_compile: + self.obs_running_norm=torch.compile(self.obs_running_norm) + self.actor = torch.compile(self.actor) + if (not self._is_eval) or self._load_qf: + self.qf1 = torch.compile(self.qf1) + self.qf2 = torch.compile(self.qf2) + self.qf1_target = torch.compile(self.qf1_target) + self.qf2_target = torch.compile(self.qf2_target) + + def reset(self, reset_stats: bool = False): + # we should just reinitialize the parameters, but for easiness + # we recreate the networks + + # force deallocation of objects + import gc + del self.actor + del self.qf1 + del self.qf2 + del self.qf1_target + del self.qf2_target + gc.collect() + + self._build_nets() + + if reset_stats: # we also reinitialize obs norm + self._init_obs_norm() + + # self.obs_running_norm.reset() + + def layer_width_actor(self): + return self._layer_width_actor + + def n_hidden_layers_actor(self): + return self._n_hidden_layers_actor + + def layer_width_critic(self): + return self._layer_width_critic + + def n_hidden_layers_critic(self): + return self._n_hidden_layers_critic + + def get_impl_path(self): + import os + return os.path.abspath(__file__) + + def update_obs_bnorm(self, x): + self.obs_running_norm.unfreeze() + self.obs_running_norm.manual_stat_update(x) + self.obs_running_norm.freeze() + + def _obs_scaling_layer(self, x): + x=(x-self.obs_bias) + x=x/(self.obs_scale+self._rescaling_epsi) + return x + + def _preprocess_obs(self, x): + if self._rescale_obs: + x = self._obs_scaling_layer(x) + if self.obs_running_norm is not None: + x = self.obs_running_norm(x) + return x + + def _preprocess_actions(self, a): + if self._use_action_rescale_for_critic: + a=self.actor.remove_scaling(a=a) # rescale to be in range [-1, 1] + return a + + def get_action(self, x): + x = self._preprocess_obs(x) + return self.actor.get_action(x) + + def get_qf1_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf1(x, a) + + def get_qf2_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf2(x, a) + + def get_qf1t_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf1_target(x, a) + + def get_qf2t_val(self, x, a): + x = self._preprocess_obs(x) + a = self._preprocess_actions(a) + return self.qf2_target(x, a) + + def load_state_dict(self, param_dict): + + missing, unexpected = super().load_state_dict(param_dict, + strict=False) + if not len(missing)==0: + Journal.log(self.__class__.__name__, + "load_state_dict", + f"These parameters are missing from the provided state dictionary: {str(missing)}\n", + LogType.EXCEP, + throw_when_excep = True) + if not len(unexpected)==0: + Journal.log(self.__class__.__name__, + "load_state_dict", + f"These parameters present in the provided state dictionary are not needed: {str(unexpected)}\n", + LogType.WARN) + + # sanity check on running normalizer + import re + running_norm_pattern = r"running_norm" + error=f"Found some keys in model state dictionary associated with a running normalizer. Are you running the agent with norm_obs=True?\n" + if any(re.match(running_norm_pattern, key) for key in unexpected): + Journal.log(self.__class__.__name__, + "load_state_dict", + error, + LogType.EXCEP, + throw_when_excep=True) + +class CriticQ(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + device: str = "cuda", + dtype = torch.float32, + layer_width: int = 512, + n_hidden_layers: int = 4, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._lrelu_slope=0.01 + + self._torch_device = device + self._torch_dtype = dtype + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + self._q_net_dim = self._obs_dim + self._actions_dim + + self._first_hidden_layer_width=self._q_net_dim # fist layer fully connected and of same dim + # as input + + init_type="kaiming_uniform" # maintains the variance of activations throughout the network + nonlinearity="leaky_relu" # suited for kaiming + + # Input layer + layers=llayer_init( + layer=nn.Linear(self._q_net_dim, self._first_hidden_layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Hidden layers + layers.extend( + llayer_init( + layer=nn.Linear(self._first_hidden_layer_width, layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + for _ in range(n_hidden_layers - 1): + layers.extend( + llayer_init( + layer=nn.Linear(layer_width, layer_width), + init_type=init_type, + nonlinearity=nonlinearity, + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Output layer + layers.extend( + llayer_init( + layer=nn.Linear(layer_width, 1), + init_type="uniform", + uniform_biases=False, # contact biases + bias_const=-0.1, # negative to prevent overestimation + scale_weight=1e-2, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False + ) + ) + + # Creating the full sequential network + self._q_net = nn.Sequential(*layers) + self._q_net.to(self._torch_device).type(self._torch_dtype) + + print("Critic architecture") + print(self._q_net) + + def get_n_params(self): + return sum(p.numel() for p in self.parameters()) + + def forward(self, x, a): + x = torch.cat([x, a], dim=1) + return self._q_net(x) + +class Actor(nn.Module): + def __init__(self, + obs_dim: int, + actions_dim: int, + actions_ub: List[float] = None, + actions_lb: List[float] = None, + device: str = "cuda", + dtype = torch.float32, + layer_width: int = 256, + n_hidden_layers: int = 2, + add_weight_norm: bool = False, + add_layer_norm: bool = False, + add_batch_norm: bool = False): + + super().__init__() + + self._lrelu_slope=0.01 + + self._torch_device = device + self._torch_dtype = dtype + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + + self._first_hidden_layer_width=self._obs_dim # fist layer fully connected and of same dim + + # Action scale and bias + if actions_ub is None: + actions_ub = [1] * actions_dim + if actions_lb is None: + actions_lb = [-1] * actions_dim + if (len(actions_ub) != actions_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Actions ub list length should be equal to {actions_dim}, but got {len(actions_ub)}", + LogType.EXCEP, + throw_when_excep = True) + if (len(actions_lb) != actions_dim): + Journal.log(self.__class__.__name__, + "__init__", + f"Actions lb list length should be equal to {actions_dim}, but got {len(actions_lb)}", + LogType.EXCEP, + throw_when_excep = True) + + self._actions_ub = torch.tensor(actions_ub, dtype=self._torch_dtype, + device=self._torch_device) + self._actions_lb = torch.tensor(actions_lb, dtype=self._torch_dtype, + device=self._torch_device) + action_scale = torch.full((actions_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + action_scale[:] = (self._actions_ub-self._actions_lb)/2.0 + self.register_buffer( + "action_scale", action_scale + ) + actions_bias = torch.full((actions_dim, ), + fill_value=0.0, + dtype=self._torch_dtype, + device=self._torch_device) + actions_bias[:] = (self._actions_ub+self._actions_lb)/2.0 + self.register_buffer( + "action_bias", actions_bias) + + # Network configuration + self.LOG_STD_MAX = 2 + self.LOG_STD_MIN = -5 + + # Input layer followed by hidden layers + layers=llayer_init(nn.Linear(self._obs_dim, self._first_hidden_layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0 + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Hidden layers + layers.extend( + llayer_init(nn.Linear(self._first_hidden_layer_width, layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + for _ in range(n_hidden_layers - 1): + layers.extend( + llayer_init(nn.Linear(layer_width, layer_width), + init_type="kaiming_uniform", + nonlinearity="leaky_relu", + a_leaky_relu=self._lrelu_slope, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm, + uniform_biases=False, # constant bias init + bias_const=0.0) + ) + layers.extend([nn.LeakyReLU(negative_slope=self._lrelu_slope)]) + + # Sequential layers for the feature extractor + self._fc12 = nn.Sequential(*layers) + + # Mean and log_std layers + out_fc_mean=llayer_init(nn.Linear(layer_width, self._actions_dim), + init_type="uniform", + uniform_biases=False, # constant bias init + bias_const=0.0, + scale_weight=1e-3, # scaling (output layer) + scale_bias=1.0, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False + ) + self.fc_mean = nn.Sequential(*out_fc_mean) + out_fc_logstd= llayer_init(nn.Linear(layer_width, self._actions_dim), + init_type="uniform", + uniform_biases=False, + bias_const=math.log(0.5), + scale_weight=1e-3, # scaling (output layer) + scale_bias=1.0, + device=self._torch_device, + dtype=self._torch_dtype, + add_weight_norm=False, + add_layer_norm=False, + add_batch_norm=False, + ) + self.fc_logstd = nn.Sequential(*out_fc_logstd) + + # Move all components to the specified device and dtype + self._fc12.to(device=self._torch_device, dtype=self._torch_dtype) + self.fc_mean.to(device=self._torch_device, dtype=self._torch_dtype) + self.fc_logstd.to(device=self._torch_device, dtype=self._torch_dtype) + + print("Actor architecture") + print(self._fc12) + print(self.fc_mean) + print(self.fc_logstd) + + def get_n_params(self): + return sum(p.numel() for p in self.parameters()) + + def forward(self, x): + x = self._fc12(x) + mean = self.fc_mean(x) + log_std = self.fc_logstd(x) + log_std = torch.tanh(log_std) + log_std = self.LOG_STD_MIN + 0.5 * (self.LOG_STD_MAX - self.LOG_STD_MIN) * (log_std + 1) # From SpinUp / Denis Yarats + return mean, log_std + + def get_action(self, x): + mean, log_std = self(x) + std = log_std.exp() + normal = torch.distributions.Normal(mean, std) + x_t = normal.rsample() # Reparameterization trick (for SAC we neex action + # to be differentible since we use Q nets. Using sample() would break the + # comp. graph and not allow gradients to flow) + y_t = torch.tanh(x_t) + action = y_t * self.action_scale + self.action_bias + log_prob_vec = normal.log_prob(x_t) # per-dimension log prob before tanh + log_prob_vec = log_prob_vec - torch.log(self.action_scale * (1 - y_t.pow(2)) + 1e-6) # tanh Jacobian + scaling + log_prob_sum = log_prob_vec.sum(1, keepdim=True) + mean = torch.tanh(mean) * self.action_scale + self.action_bias + return action, (log_prob_sum, log_prob_vec), mean + + def remove_scaling(self, a): + return (a - self.action_bias)/self.action_scale + +if __name__ == "__main__": + device = "cpu" # or "cpu" + import time + obs_dim = 273 + agent = SACAgent( + obs_dim=obs_dim, + actions_dim=10, + actions_lb=None, + actions_ub=None, + obs_lb=None, + obs_ub=None, + rescale_obs=False, + norm_obs=True, + use_action_rescale_for_critic=True, + is_eval=True, + compression_ratio=0.6, + layer_width_actor=128, + layer_width_critic=256, + n_hidden_layers_actor=3, + n_hidden_layers_critic=3, + device=device, + dtype=torch.float32, + add_weight_norm=True, + add_layer_norm=False, + add_batch_norm=False + ) + + n_samples = 10000 + random_obs = torch.rand((1, obs_dim), dtype=torch.float32, device=device) + + if device == "cuda": + torch.cuda.synchronize() + start = time.time() + + for i in range(n_samples): + actions, _, mean = agent.get_action(x=random_obs) + actions = actions.detach() + actions[:, :] = mean.detach() + + if device == "cuda": + torch.cuda.synchronize() + end = time.time() + + avrg_eval_time = (end - start) / n_samples + print(f"Average policy evaluation time on {device}: {avrg_eval_time:.6f} s") diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/sactor_critic_algo.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/sactor_critic_algo.py new file mode 100644 index 0000000000000000000000000000000000000000..e7106d55df943adc2679fde3692336130f187398 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/sactor_critic_algo.py @@ -0,0 +1,2391 @@ +from aug_mpc.agents.sactor_critic.sac import SACAgent +from aug_mpc.agents.dummies.dummy import DummyAgent + +from aug_mpc.utils.shared_data.algo_infos import SharedRLAlgorithmInfo, QfVal, QfTrgt +from aug_mpc.utils.shared_data.training_env import SubReturns, TotReturns +from aug_mpc.utils.nn.rnd import RNDFull + +import torch +import torch.optim as optim +import torch.nn as nn + +import random +import math +from typing import Dict + +import os +import shutil + +import time + +import wandb +import h5py +import numpy as np + +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import VLevel + +from abc import ABC, abstractmethod + +class SActorCriticAlgoBase(ABC): + + # base class for actor-critic RL algorithms + + def __init__(self, + env, + debug = False, + remote_db = False, + seed: int = 1): + + self._env = env + self._seed = seed + + self._eval = False + self._det_eval = True + + self._full_env_db=False + + self._agent = None + + self._debug = debug + self._remote_db = remote_db + + self._writer = None + + self._run_name = None + self._drop_dir = None + self._dbinfo_drop_fname = None + self._model_path = None + + self._policy_update_db_data_dict = {} + self._custom_env_data_db_dict = {} + self._rnd_db_data_dict = {} + self._hyperparameters = {} + self._wandb_d={} + + # get params from env + self._get_params_from_env() + + self._torch_device = torch.device("cpu") # defaults to cpu + + self._setup_done = False + + self._verbose = False + + self._is_done = False + + self._shared_algo_data = None + + self._this_child_path = None + self._this_basepath = os.path.abspath(__file__) + + def __del__(self): + + self.done() + + def _get_params_from_env(self): + + self._env_name = self._env.name() + self._episodic_reward_metrics = self._env.ep_rewards_metrics() + self._use_gpu = self._env.using_gpu() + self._dtype = self._env.dtype() + self._env_opts=self._env.env_opts() + self._num_envs = self._env.n_envs() + self._obs_dim = self._env.obs_dim() + self._actions_dim = self._env.actions_dim() + self._episode_timeout_lb, self._episode_timeout_ub = self._env.episode_timeout_bounds() + self._task_rand_timeout_lb, self._task_rand_timeout_ub = self._env.task_rand_timeout_bounds() + self._env_n_action_reps = self._env.n_action_reps() + self._is_continuous_actions_bool=self._env.is_action_continuous() + self._is_continuous_actions=torch.where(self._is_continuous_actions_bool)[0] + self._is_discrete_actions_bool=self._env.is_action_discrete() + self._is_discrete_actions=torch.where(self._is_discrete_actions_bool)[0] + + # default to all debug envs + self._db_env_selector=torch.tensor(list(range(0, self._num_envs)), dtype=torch.int) + self._db_env_selector_bool=torch.full((self._num_envs, ), + dtype=torch.bool, device="cpu", + fill_value=True) + # default to no expl envs + self._expl_env_selector=None + self._expl_env_selector_bool=torch.full((self._num_envs, ), dtype=torch.bool, device="cpu", + fill_value=False) + self._pert_counter=0.0 + # demo envs + self._demo_stop_thresh=None # performance metrics above which demo envs are deactivated + # (can be overridden thorugh the provided options) + self._demo_env_selector=self._env.demo_env_idxs() + self._demo_env_selector_bool=self._env.demo_env_idxs(get_bool=True) + + def learn(self): + + if not self._setup_done: + self._should_have_called_setup() + + self._start_time = time.perf_counter() + + # experience collection + with torch.no_grad(): # don't need grad computation here + for i in range(self._collection_freq): + if not self._collect_transition(): + return False + self._vec_transition_counter+=1 + + self._collection_t = time.perf_counter() + + if self._vec_transition_counter % self._bnorm_vecfreq == 0: + with torch.no_grad(): # don't need grad computation here + self._update_batch_norm(bsize=self._bnorm_bsize) + + # policy update + self._policy_update_t_start = time.perf_counter() + for i in range(self._update_freq): + self._update_policy() + self._update_counter+=1 + + self._policy_update_t = time.perf_counter() + + with torch.no_grad(): + if self._validate and (self._vec_transition_counter % self._validation_db_vecstep_freq == 0): + # validation + self._update_validation_losses() + self._validation_t = time.perf_counter() + self._post_step() + + if self._use_period_resets: + # periodic policy resets + if not self._adaptive_resets: + self._periodic_resets_on=(self._vec_transition_counter >= self._reset_vecstep_start) and \ + (self._vec_transition_counter < self._reset_vecstep_end) + + if self._periodic_resets_on and \ + (self._vec_transition_counter-self._reset_vecstep_start) % self._periodic_resets_vecfreq == 0: + self._reset_agent() + else: # trigger reset based on overfit metric + if self._overfit_idx > self._overfit_idx_thresh: + self._reset_agent() + + return not self.is_done() + + def eval(self): + + if not self._setup_done: + self._should_have_called_setup() + + self._start_time = time.perf_counter() + + if not self._collect_eval_transition(): + return False + self._vec_transition_counter+=1 + + self._collection_t = time.perf_counter() + + self._post_step() + + return not self.is_done() + + @abstractmethod + def _collect_transition(self)->bool: + pass + + @abstractmethod + def _collect_eval_transition(self)->bool: + pass + + @abstractmethod + def _update_policy(self): + pass + + @abstractmethod + def _update_validation_losses(self): + pass + + def _update_overfit_idx(self, loss, val_loss): + overfit_now=(val_loss-loss)/loss + self._overfit_idx=self._overfit_idx_alpha*overfit_now+\ + (1-self._overfit_idx_alpha)*self._overfit_idx + + def setup(self, + run_name: str, + ns: str, + custom_args: Dict = {}, + verbose: bool = False, + drop_dir_name: str = None, + eval: bool = False, + resume: bool = False, + model_path: str = None, + n_eval_timesteps: int = None, + comment: str = "", + dump_checkpoints: bool = False, + norm_obs: bool = True, + rescale_obs: bool = False): + + tot_tsteps=int(100e6) + if "tot_tsteps" in custom_args: + tot_tsteps=custom_args["tot_tsteps"] + + self._verbose = verbose + + self._ns=ns # only used for shared mem stuff + + self._dump_checkpoints = dump_checkpoints + + self._init_algo_shared_data(static_params=self._hyperparameters) # can only handle dicts with + # numeric values + + if "full_env_db" in custom_args: + self._full_env_db=custom_args["full_env_db"] + + self._eval = eval + self._resume=resume + if self._eval and self._resume: + Journal.log(self.__class__.__name__, + "setup", + f"Cannot set both eval and resume to true. Exiting.", + LogType.EXCEP, + throw_when_excep = True) + + self._load_qf=False + if self._eval: + if "load_qf" in custom_args: + self._load_qf=custom_args["load_qf"] + if self._resume: + self._load_qf=True # must load qf when resuming + self._eval=False + try: + self._det_eval=custom_args["det_eval"] + except: + pass + + self._override_agent_actions=False + if "override_agent_actions" in custom_args: + self._override_agent_actions=custom_args["override_agent_actions"] + + if self._override_agent_actions: # force evaluation mode + Journal.log(self.__class__.__name__, + "setup", + "will force evaluation mode since override_agent_actions was set to true", + LogType.INFO, + throw_when_excep = True) + self._eval=True + self._validate=False + self._load_qf=False + self._det_eval=False + self._resume=False + + self._run_name = run_name + from datetime import datetime + self._time_id = datetime.now().strftime('d%Y_%m_%d_h%H_m%M_s%S') + self._unique_id = self._time_id + "-" + self._run_name + + self._hyperparameters["unique_run_id"]=self._unique_id + self._hyperparameters.update(custom_args) + + self._torch_device = torch.device("cuda" if torch.cuda.is_available() and self._use_gpu else "cpu") + + try: + layer_width_actor=self._hyperparameters["actor_lwidth"] + layer_width_critic=self._hyperparameters["critic_lwidth"] + n_hidden_layers_actor=self._hyperparameters["actor_n_hlayers"] + n_hidden_layers_critic=self._hyperparameters["critic_n_hlayers"] + except: + layer_width_actor=256 + layer_width_critic=512 + n_hidden_layers_actor=2 + n_hidden_layers_critic=4 + pass + + use_torch_compile=False + add_weight_norm=False + add_layer_norm=False + add_batch_norm=False + compression_ratio=-1.0 + if "use_torch_compile" in self._hyperparameters and \ + self._hyperparameters["use_torch_compile"]: + use_torch_compile=True + if "add_weight_norm" in self._hyperparameters and \ + self._hyperparameters["add_weight_norm"]: + add_weight_norm=True + if "add_layer_norm" in self._hyperparameters and \ + self._hyperparameters["add_layer_norm"]: + add_layer_norm=True + if "add_batch_norm" in self._hyperparameters and \ + self._hyperparameters["add_batch_norm"]: + add_batch_norm=True + if "compression_ratio" in self._hyperparameters: + compression_ratio=self._hyperparameters["compression_ratio"] + + act_rescale_critic=False + if "act_rescale_critic" in self._hyperparameters: + act_rescale_critic=self._hyperparameters["act_rescale_critic"] + if not self._override_agent_actions: + self._agent = SACAgent(obs_dim=self._env.obs_dim(), + obs_ub=self._env.get_obs_ub().flatten().tolist(), + obs_lb=self._env.get_obs_lb().flatten().tolist(), + actions_dim=self._env.actions_dim(), + actions_ub=None, # agent will assume actions are properly normalized in [-1, 1] by the env + actions_lb=None, + rescale_obs=rescale_obs, + norm_obs=norm_obs, + use_action_rescale_for_critic=act_rescale_critic, + compression_ratio=compression_ratio, + device=self._torch_device, + dtype=self._dtype, + is_eval=self._eval, + load_qf=self._load_qf, + debug=self._debug, + layer_width_actor=layer_width_actor, + layer_width_critic=layer_width_critic, + n_hidden_layers_actor=n_hidden_layers_actor, + n_hidden_layers_critic=n_hidden_layers_critic, + torch_compile=use_torch_compile, + add_weight_norm=add_weight_norm, + add_layer_norm=add_layer_norm, + add_batch_norm=add_batch_norm) + else: # we use a fake agent + self._agent = DummyAgent(obs_dim=self._env.obs_dim(), + actions_dim=self._env.actions_dim(), + actions_ub=None, + actions_lb=None, + device=self._torch_device, + dtype=self._dtype, + debug=self._debug) + + # loging actual widths and layers in case they were override inside agent init + self._hyperparameters["actor_lwidth_actual"]=self._agent.layer_width_actor() + self._hyperparameters["actor_n_hlayers_actual"]=self._agent.n_hidden_layers_actor() + self._hyperparameters["critic_lwidth_actual"]=self._agent.layer_width_critic() + self._hyperparameters["critic_n_hlayers_actual"]=self._agent.n_hidden_layers_critic() + + # load model if necessary + if self._eval and (not self._override_agent_actions): # load pretrained model + if model_path is None: + msg = f"No model path provided in eval mode! Was this intentional? \ + No jnt remapping will be available and a randomly init agent will be used." + Journal.log(self.__class__.__name__, + "setup", + msg, + LogType.WARN, + throw_when_excep = True) + if n_eval_timesteps is None: + Journal.log(self.__class__.__name__, + "setup", + f"When eval is True, n_eval_timesteps should be provided!!", + LogType.EXCEP, + throw_when_excep = True) + # everything is ok + self._model_path = model_path + if self._model_path is not None: + self._load_model(self._model_path) + + # overwrite init params + self._init_params(tot_tsteps=n_eval_timesteps, + custom_args=custom_args) + else: + if self._resume: + if model_path is None: + msg = f"No model path provided in resume mode! Please provide a valid checkpoint path." + Journal.log(self.__class__.__name__, + "setup", + msg, + LogType.EXCEP, + throw_when_excep = True) + self._model_path = model_path + if self._model_path is not None: + self._load_model(self._model_path) # load model from checkpoint (including q functions and running normalizers) + self._init_params(tot_tsteps=tot_tsteps, + custom_args=custom_args) + + # adding additional db info + self._hyperparameters["obs_names"]=self._env.obs_names() + self._hyperparameters["action_names"]=self._env.action_names() + self._hyperparameters["sub_reward_names"]=self._env.sub_rew_names() + self._hyperparameters["sub_trunc_names"]=self._env.sub_trunc_names() + self._hyperparameters["sub_term_names"]=self._env.sub_term_names() + + self._allow_expl_during_eval=False + if "allow_expl_during_eval" in self._hyperparameters: + self._allow_expl_during_eval=self._hyperparameters["allow_expl_during_eval"] + + # reset environment + self._env.reset() + if self._eval: + self._env.switch_random_reset(on=False) + + if self._debug and (not self._override_agent_actions): + with torch.no_grad(): + init_obs = self._env.get_obs(clone=True) + _, init_log_pi, _ = self._agent.get_action(init_obs) + init_policy_entropy = (-init_log_pi[0]).mean().item() + init_policy_entropy_per_action = init_policy_entropy / float(self._actions_dim) + Journal.log(self.__class__.__name__, + "setup", + f"Initial policy entropy per action: {init_policy_entropy_per_action:.4f})", + LogType.INFO, + throw_when_excep = True) + + # create dump directory + copy important files for debug + self._init_drop_dir(drop_dir_name) + self._hyperparameters["drop_dir"]=self._drop_dir + + # add env options to hyperparameters + self._hyperparameters.update(self._env_opts) + + if not self._eval: + self._init_agent_optimizers() + + self._init_replay_buffers() # only needed when training + if self._validate: + self._init_validation_buffers() + + if self._autotune: + self._init_alpha_autotuning() + + if self._use_rnd: + self._rnd_net = RNDFull(input_dim=self._rnd_indim, output_dim=self._rnd_outdim, + layer_width=self._rnd_lwidth, n_hidden_layers=self._rnd_hlayers, + device=self._torch_device, + dtype=self._dtype, + normalize=norm_obs # normalize if also used for SAC agent + ) + self._rnd_optimizer = torch.optim.Adam(self._rnd_net.rnd_predictor_net.parameters(), + lr=self._rnd_lr) + + self._rnd_input = torch.full(size=(self._batch_size, self._rnd_net.input_dim()), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rnd_bnorm_input = torch.full(size=(self._bnorm_bsize, self._rnd_net.input_dim()), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + self._proc_exp_bonus_all = torch.full(size=(self._batch_size, 1), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._raw_exp_bonus_all = torch.full(size=(self._batch_size, 1), + fill_value=0.0, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + # if self._autotune_rnd_scale: + # self._reward_normalizer=RunningNormalizer((1,), epsilon=1e-8, + # device=self._torch_device, dtype=self._dtype, + # freeze_stats=False, + # debug=self._debug) + + self._init_dbdata() + + if (self._debug): + if self._remote_db: + job_type = "evaluation" if self._eval else "training" + project="IBRIDO-ablations" + wandb.init( + project=project, + group=self._run_name, + name=self._unique_id, + id=self._unique_id, + job_type=job_type, + # tags=None, + notes=comment, + resume="never", # do not allow runs with the same unique id + mode="online", # "online", "offline" or "disabled" + entity=None, + sync_tensorboard=True, + config=self._hyperparameters, + monitor_gym=True, + save_code=True, + dir=self._drop_dir + ) + wandb.watch((self._agent), log="all", log_freq=1000, log_graph=False) + + if "demo_stop_thresh" in self._hyperparameters: + self._demo_stop_thresh=self._hyperparameters["demo_stop_thresh"] + + actions = self._env.get_actions() + self._random_uniform = torch.full_like(actions, fill_value=0.0) # used for sampling random actions (preallocated + # for efficiency) + self._random_normal = torch.full_like(self._random_uniform,fill_value=0.0) + # for efficiency) + + self._actions_override=None + if self._override_agent_actions: + from aug_mpc.utils.shared_data.training_env import Actions + self._actions_override = Actions(namespace=ns+"_override", + n_envs=self._num_envs, + action_dim=actions.shape[1], + action_names=self._env.action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + self._actions_override.run() + + self._start_time_tot = time.perf_counter() + + self._start_time = time.perf_counter() + + self._replay_bf_full = False + self._validation_bf_full = False + self._bpos=0 + self._bpos_val=0 + + self._is_done = False + self._setup_done = True + + def is_done(self): + + return self._is_done + + def model_path(self): + + return self._model_path + + def _init_params(self, + tot_tsteps: int, + custom_args: Dict = {}): + + self._collection_freq=1 + self._update_freq=4 + + self._replay_buffer_size_vec=10*self._task_rand_timeout_ub # cover at least a number of eps + self._replay_buffer_size = self._replay_buffer_size_vec*self._num_envs + if self._replay_buffer_size_vec < 0: # in case env did not properly define _task_rand_timeout_ub + self._replay_buffer_size = int(1e6) + self._replay_buffer_size_vec = self._replay_buffer_size//self._num_envs + self._replay_buffer_size=self._replay_buffer_size_vec*self._num_envs + + self._batch_size = 16394 + + new_transitions_per_batch=self._collection_freq*self._num_envs/self._replay_buffer_size # assumes uniform sampling + self._utd_ratio=self._update_freq/(new_transitions_per_batch*self._batch_size) + + self._lr_policy = 1e-3 + self._lr_q = 5e-4 + + self._discount_factor = 0.99 + if "discount_factor" in custom_args: + self._discount_factor=custom_args["discount_factor"] + + self._smoothing_coeff = 0.01 + + self._policy_freq = 2 + self._trgt_net_freq = 1 + self._rnd_freq = 1 + + # exploration + + # entropy regularization (separate "discrete" and "continuous" actions) + self._entropy_metric_high = 0.5 + self._entropy_metric_low = 0.0 + + # self._entropy_disc_start = -0.05 + # self._entropy_disc_end = -0.5 + + # self._entropy_cont_start = -0.05 + # self._entropy_cont_end = -2.0 + + self._entropy_disc_start = -0.2 + self._entropy_disc_end = -0.2 + + self._entropy_cont_start = -0.8 + self._entropy_cont_end = -0.8 + + # enable/disable entropy annealing (default: enabled) + self._anneal_entropy = False + + self._trgt_avrg_entropy_per_action_disc = self._entropy_disc_start + self._trgt_avrg_entropy_per_action_cont = self._entropy_cont_start + + self._disc_idxs = self._is_discrete_actions.clone().to(torch.long) + self._cont_idxs = self._is_continuous_actions.clone().to(torch.long) + + self._target_entropy_disc = float(self._disc_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_disc) + self._target_entropy_cont = float(self._cont_idxs.numel()) * float(self._trgt_avrg_entropy_per_action_cont) + self._target_entropy = self._target_entropy_disc + self._target_entropy_cont + self._trgt_avrg_entropy_per_action = self._target_entropy / float(max(self._actions_dim, 1)) + self._hyperparameters["anneal_entropy"] = self._anneal_entropy + + self._autotune = True + self._alpha_disc = 0.2 # initial values + self._alpha_cont = 0.2 + self._alpha = 0.5*(self._alpha_disc + self._alpha_cont) + self._log_alpha_disc = math.log(self._alpha_disc) + self._log_alpha_cont = math.log(self._alpha_cont) + self._a_optimizer_disc = None + self._a_optimizer_cont = None + + # random expl ens + self._expl_envs_perc=0.0 # [0, 1] + if "expl_envs_perc" in custom_args: + self._expl_envs_perc=custom_args["expl_envs_perc"] + self._n_expl_envs = int(self._num_envs*self._expl_envs_perc) # n of random envs on which noisy actions will be applied + self._noise_freq_vec = 100 # substeps + self._noise_duration_vec = 5 # should be less than _noise_freq + # correct with env substepping + self._noise_freq_vec=self._noise_freq_vec//self._env_n_action_reps + self._noise_duration_vec=self._noise_duration_vec//self._env_n_action_reps + + self._continuous_act_expl_noise_std=0.3 # wrt actions scale + self._discrete_act_expl_noise_std=1.2 # setting it a bit > 1 helps in ensuring discr. actions range is explored + + # rnd + self._use_rnd=True + self._rnd_net=None + self._rnd_optimizer = None + self._rnd_lr = 1e-3 + if "use_rnd" in custom_args and (not self._eval): + self._use_rnd=custom_args["use_rnd"] + self._rnd_weight=1.0 + self._alpha_rnd=0.0 + self._novelty_scaler=None + if self._use_rnd: + from adarl.utils.NoveltyScaler import NoveltyScaler + + self._novelty_scaler=NoveltyScaler(th_device=self._torch_device, + bonus_weight=self._rnd_weight, + avg_alpha=self._alpha_rnd) + + self._rnd_lwidth=512 + self._rnd_hlayers=3 + self._rnd_outdim=16 + self._rnd_indim=self._obs_dim+self._actions_dim + + # batch normalization + self._bnorm_bsize = 4096 + self._bnorm_vecfreq_nom = 5 # wrt vec steps + # make sure _bnorm_vecfreq is a multiple of _collection_freq + self._bnorm_vecfreq = (self._bnorm_vecfreq_nom//self._collection_freq)*self._collection_freq + if self._bnorm_vecfreq == 0: + self._bnorm_vecfreq=self._collection_freq + self._reward_normalizer=None + + self._total_timesteps = int(tot_tsteps) + # self._total_timesteps = self._total_timesteps//self._env_n_action_reps # correct with n of action reps + self._total_timesteps_vec = self._total_timesteps // self._num_envs + self._total_steps = self._total_timesteps_vec//self._collection_freq + self._total_timesteps_vec = self._total_steps*self._collection_freq # correct to be a multiple of self._total_steps + self._total_timesteps = self._total_timesteps_vec*self._num_envs # actual n transitions + + # self._warmstart_timesteps = int(5e3) + warmstart_length_single_env=min(self._episode_timeout_lb, self._episode_timeout_ub, + self._task_rand_timeout_lb, self._task_rand_timeout_ub) + self._warmstart_timesteps=warmstart_length_single_env*self._num_envs + if self._warmstart_timesteps < self._batch_size: # ensure we collect sufficient experience before + # starting training + self._warmstart_timesteps=4*self._batch_size + self._warmstart_vectimesteps = self._warmstart_timesteps//self._num_envs + # ensuring multiple of collection_freq + self._warmstart_timesteps = self._num_envs*self._warmstart_vectimesteps # actual + + # period nets resets (for tackling the primacy bias) + self._use_period_resets=False + if "use_period_resets" in custom_args: + self._use_period_resets=custom_args["use_period_resets"] + self._adaptive_resets=True # trigger reset based on overfit metric + self._just_one_reset=False + self._periodic_resets_freq=int(4e6) + self._periodic_resets_start=int(1.5e6) + self._periodic_resets_end=int(0.8*self._total_timesteps) + + self._periodic_resets_vecfreq=self._periodic_resets_freq//self._num_envs + self._periodic_resets_vecfreq = (self._periodic_resets_vecfreq//self._collection_freq)*self._collection_freq + self._reset_vecstep_start=self._periodic_resets_start//self._num_envs + self._reset_vecstep_end=self._periodic_resets_end//self._num_envs + + if self._just_one_reset: + # we set the end as the fist reset + a fraction of the reset frequency (this way only one reset will happen) + self._reset_vecstep_end=int(self._reset_vecstep_start+0.8*self._periodic_resets_vecfreq) + + self._periodic_resets_on=False + + # debug + self._m_checkpoint_freq_nom = 1e6 # n totoal timesteps after which a checkpoint model is dumped + self._m_checkpoint_freq= self._m_checkpoint_freq_nom//self._num_envs + + # expl envs + if self._n_expl_envs>0 and ((self._num_envs-self._n_expl_envs)>0): # log data only from envs which are not altered (e.g. by exploration noise) + # computing expl env selector + self._expl_env_selector = torch.randperm(self._num_envs, device="cpu")[:self._n_expl_envs] + self._expl_env_selector_bool[self._expl_env_selector]=True + + # demo envs + if self._demo_env_selector_bool is None: + self._db_env_selector_bool[:]=~self._expl_env_selector_bool + else: # we log db data separately for env which are neither for demo nor for random exploration + self._demo_env_selector_bool=self._demo_env_selector_bool.cpu() + self._demo_env_selector=self._demo_env_selector.cpu() + self._db_env_selector_bool[:]=torch.logical_and(~self._expl_env_selector_bool, ~self._demo_env_selector_bool) + + self._n_expl_envs = self._expl_env_selector_bool.count_nonzero() + self._num_db_envs = self._db_env_selector_bool.count_nonzero() + + if not self._num_db_envs>0: + Journal.log(self.__class__.__name__, + "_init_params", + "No indipendent db env can be computed (check your demo and expl settings)! Will use all envs.", + LogType.EXCEP, + throw_when_excep = False) + self._num_db_envs=self._num_envs + self._db_env_selector_bool[:]=True + self._db_env_selector=torch.nonzero(self._db_env_selector_bool).flatten() + + self._transition_noise_freq=float(self._noise_duration_vec)/float(self._noise_freq_vec) + self._env_noise_freq=float(self._n_expl_envs)/float(self._num_envs) + self._noise_buff_freq=self._transition_noise_freq*self._env_noise_freq + + self._db_vecstep_frequency = 32 # log db data every n (vectorized) SUB timesteps + self._db_vecstep_frequency=round(self._db_vecstep_frequency/self._env_n_action_reps) # correcting with actions reps + # correct db vecstep frequency to ensure it's a multiple of self._collection_freq + self._db_vecstep_frequency=(self._db_vecstep_frequency//self._collection_freq)*self._collection_freq + if self._db_vecstep_frequency == 0: + self._db_vecstep_frequency=self._collection_freq + + self._env_db_checkpoints_vecfreq=150*self._db_vecstep_frequency # detailed db data from envs + + self._validate=True + self._validation_collection_vecfreq=50 # add vec transitions to val buffer with some vec freq + self._validation_ratio=1.0/self._validation_collection_vecfreq # [0, 1], 0.1 10% size of training buffer + self._validation_buffer_size_vec = int(self._replay_buffer_size*self._validation_ratio)//self._num_envs + self._validation_buffer_size = self._validation_buffer_size_vec*self._num_envs + self._validation_batch_size = int(self._batch_size*self._validation_ratio) + self._validation_db_vecstep_freq=self._db_vecstep_frequency + if self._eval: # no need for validation transitions during evaluation + self._validate=False + self._overfit_idx=0.0 + self._overfit_idx_alpha=0.03 # exponential MA + self._overfit_idx_thresh=2.0 + + self._n_policy_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq #TD3 delayed update + self._n_qf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq # qf updated at each vec timesteps + self._n_tqf_updates_to_be_done=(self._total_steps-self._warmstart_vectimesteps)*self._update_freq//self._trgt_net_freq + + self._exp_to_policy_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_policy_updates_to_be_done) + self._exp_to_qf_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_qf_updates_to_be_done) + self._exp_to_qft_grad_ratio=float(self._total_timesteps-self._warmstart_timesteps)/float(self._n_tqf_updates_to_be_done) + + self._db_data_size = round(self._total_timesteps_vec/self._db_vecstep_frequency)+self._db_vecstep_frequency + + # write them to hyperparam dictionary for debugging + self._hyperparameters["n_envs"] = self._num_envs + self._hyperparameters["obs_dim"] = self._obs_dim + self._hyperparameters["actions_dim"] = self._actions_dim + + self._hyperparameters["seed"] = self._seed + self._hyperparameters["using_gpu"] = self._use_gpu + self._hyperparameters["total_timesteps_vec"] = self._total_timesteps_vec + + self._hyperparameters["collection_freq"]=self._collection_freq + self._hyperparameters["update_freq"]=self._update_freq + self._hyperparameters["total_steps"]=self._total_steps + + self._hyperparameters["utd_ratio"] = self._utd_ratio + + self._hyperparameters["n_policy_updates_when_done"] = self._n_policy_updates_to_be_done + self._hyperparameters["n_qf_updates_when_done"] = self._n_qf_updates_to_be_done + self._hyperparameters["n_tqf_updates_when_done"] = self._n_tqf_updates_to_be_done + self._hyperparameters["experience_to_policy_grad_steps_ratio"] = self._exp_to_policy_grad_ratio + self._hyperparameters["experience_to_quality_fun_grad_steps_ratio"] = self._exp_to_qf_grad_ratio + self._hyperparameters["experience_to_trgt_quality_fun_grad_steps_ratio"] = self._exp_to_qft_grad_ratio + + self._hyperparameters["episodes timeout lb"] = self._episode_timeout_lb + self._hyperparameters["episodes timeout ub"] = self._episode_timeout_ub + self._hyperparameters["task rand timeout lb"] = self._task_rand_timeout_lb + self._hyperparameters["task rand timeout ub"] = self._task_rand_timeout_ub + + self._hyperparameters["warmstart_timesteps"] = self._warmstart_timesteps + self._hyperparameters["warmstart_vectimesteps"] = self._warmstart_vectimesteps + self._hyperparameters["replay_buffer_size"] = self._replay_buffer_size + self._hyperparameters["batch_size"] = self._batch_size + self._hyperparameters["total_timesteps"] = self._total_timesteps + self._hyperparameters["lr_policy"] = self._lr_policy + self._hyperparameters["lr_q"] = self._lr_q + self._hyperparameters["discount_factor"] = self._discount_factor + self._hyperparameters["smoothing_coeff"] = self._smoothing_coeff + self._hyperparameters["policy_freq"] = self._policy_freq + self._hyperparameters["trgt_net_freq"] = self._trgt_net_freq + self._hyperparameters["autotune"] = self._autotune + self._hyperparameters["target_entropy"] = self._target_entropy + self._hyperparameters["target_entropy_disc"] = self._target_entropy_disc + self._hyperparameters["target_entropy_cont"] = self._target_entropy_cont + self._hyperparameters["disc_entropy_idxs"] = self._disc_idxs.tolist() + self._hyperparameters["cont_entropy_idxs"] = self._cont_idxs.tolist() + self._hyperparameters["log_alpha_disc"] = None if self._log_alpha_disc is None else self._log_alpha_disc + self._hyperparameters["log_alpha_cont"] = None if self._log_alpha_cont is None else self._log_alpha_cont + self._hyperparameters["alpha"] = self._alpha + self._hyperparameters["alpha_disc"] = self._alpha_disc + self._hyperparameters["alpha_cont"] = self._alpha_cont + self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq + self._hyperparameters["db_vecstep_frequency"] = self._db_vecstep_frequency + self._hyperparameters["m_checkpoint_freq"] = self._m_checkpoint_freq + + self._hyperparameters["use_period_resets"]= self._use_period_resets + self._hyperparameters["just_one_reset"]= self._just_one_reset + self._hyperparameters["period_resets_vecfreq"]= self._periodic_resets_vecfreq + self._hyperparameters["period_resets_vecstart"]= self._reset_vecstep_start + self._hyperparameters["period_resets_vecend"]= self._reset_vecstep_end + self._hyperparameters["period_resets_freq"]= self._periodic_resets_freq + self._hyperparameters["period_resets_start"]= self._periodic_resets_start + self._hyperparameters["period_resets_end"]= self._periodic_resets_end + + self._hyperparameters["use_rnd"] = self._use_rnd + self._hyperparameters["rnd_lwidth"] = self._rnd_lwidth + self._hyperparameters["rnd_hlayers"] = self._rnd_hlayers + self._hyperparameters["rnd_outdim"] = self._rnd_outdim + self._hyperparameters["rnd_indim"] = self._rnd_indim + + self._hyperparameters["n_db_envs"] = self._num_db_envs + self._hyperparameters["n_expl_envs"] = self._n_expl_envs + self._hyperparameters["noise_freq"] = self._noise_freq_vec + self._hyperparameters["noise_duration_vec"] = self._noise_duration_vec + self._hyperparameters["noise_buff_freq"] = self._noise_buff_freq + self._hyperparameters["continuous_act_expl_noise_std"] = self._continuous_act_expl_noise_std + self._hyperparameters["discrete_act_expl_noise_std"] = self._discrete_act_expl_noise_std + + self._hyperparameters["n_demo_envs"] = self._env.n_demo_envs() + + self._hyperparameters["bnorm_bsize"] = self._bnorm_bsize + self._hyperparameters["bnorm_vecfreq"] = self._bnorm_vecfreq + + self._hyperparameters["validate"] = self._validate + self._hyperparameters["validation_ratio"] = self._validation_ratio + self._hyperparameters["validation_buffer_size_vec"] = self._validation_buffer_size_vec + self._hyperparameters["validation_buffer_size"] = self._validation_buffer_size + self._hyperparameters["validation_batch_size"] = self._validation_batch_size + self._hyperparameters["validation_collection_vecfreq"] = self._validation_collection_vecfreq + + # small debug log + info = f"\nUsing \n" + \ + f"total (vectorized) timesteps to be simulated {self._total_timesteps_vec}\n" + \ + f"total timesteps to be simulated {self._total_timesteps}\n" + \ + f"warmstart timesteps {self._warmstart_timesteps}\n" + \ + f"training replay buffer size {self._replay_buffer_size}\n" + \ + f"training replay buffer vec size {self._replay_buffer_size_vec}\n" + \ + f"training batch size {self._batch_size}\n" + \ + f"validation enabled {self._validate}\n" + \ + f"validation buffer size {self._validation_buffer_size}\n" + \ + f"validation buffer vec size {self._validation_buffer_size_vec}\n" + \ + f"validation collection freq {self._validation_collection_vecfreq}\n" + \ + f"validation update freq {self._validation_db_vecstep_freq}\n" + \ + f"validation batch size {self._validation_batch_size}\n" + \ + f"policy update freq {self._policy_freq}\n" + \ + f"target networks freq {self._trgt_net_freq}\n" + \ + f"episode timeout max steps {self._episode_timeout_ub}\n" + \ + f"episode timeout min steps {self._episode_timeout_lb}\n" + \ + f"task rand. max n steps {self._task_rand_timeout_ub}\n" + \ + f"task rand. min n steps {self._task_rand_timeout_lb}\n" + \ + f"number of action reps {self._env_n_action_reps}\n" + \ + f"total policy updates to be performed: {self._n_policy_updates_to_be_done}\n" + \ + f"total q fun updates to be performed: {self._n_qf_updates_to_be_done}\n" + \ + f"total trgt q fun updates to be performed: {self._n_tqf_updates_to_be_done}\n" + \ + f"experience to policy grad ratio: {self._exp_to_policy_grad_ratio}\n" + \ + f"experience to q fun grad ratio: {self._exp_to_qf_grad_ratio}\n" + \ + f"experience to trgt q fun grad ratio: {self._exp_to_qft_grad_ratio}\n" + \ + f"amount of noisy transitions in replay buffer: {self._noise_buff_freq*100}% \n" + db_env_idxs=", ".join(map(str, self._db_env_selector.tolist())) + n_db_envs_str=f"db envs {self._num_db_envs}/{self._num_envs} \n" + info=info + n_db_envs_str + "Debug env. indexes are [" + db_env_idxs+"]\n" + if self._env.demo_env_idxs() is not None: + demo_idxs_str=", ".join(map(str, self._env.demo_env_idxs().tolist())) + n_demo_envs_str=f"demo envs {self._env.n_demo_envs()}/{self._num_envs} \n" + info=info + n_demo_envs_str + "Demo env. indexes are [" + demo_idxs_str+"]\n" + if self._expl_env_selector is not None: + random_expl_idxs=", ".join(map(str, self._expl_env_selector.tolist())) + n_expl_envs_str=f"expl envs {self._n_expl_envs}/{self._num_envs} \n" + info=info + n_expl_envs_str + "Random exploration env. indexes are [" + random_expl_idxs+"]\n" + + Journal.log(self.__class__.__name__, + "_init_params", + info, + LogType.INFO, + throw_when_excep = True) + + # init counters + self._step_counter = 0 + self._vec_transition_counter = 0 + self._update_counter = 0 + self._log_it_counter = 0 + + def _init_dbdata(self): + + # initalize some debug data + self._collection_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._collection_t = -1.0 + + self._env_step_fps = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._env_step_rt_factor = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._batch_norm_update_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._policy_update_t_start = -1.0 + self._policy_update_t = -1.0 + self._policy_update_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._policy_update_fps = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._validation_t = -1.0 + self._validation_dt = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._n_of_played_episodes = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_timesteps_done = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_policy_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_qfun_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._n_tqfun_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._elapsed_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=0.0, device="cpu") + + self._ep_tsteps_env_distribution = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + self._reward_names = self._episodic_reward_metrics.reward_names() + self._reward_names_str = "[" + ', '.join(self._reward_names) + "]" + self._n_rewards = self._episodic_reward_metrics.n_rewards() + + # db environments + self._tot_rew_max = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_min = torch.full((self._db_data_size, self._num_db_envs, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_max_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_min_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_avrg_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._tot_rew_std_over_envs = torch.full((self._db_data_size, 1, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._sub_rew_max = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min = torch.full((self._db_data_size, self._num_db_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # custom data from env # (log data just from db envs for simplicity) + self._custom_env_data = {} + db_data_names = list(self._env.custom_db_data.keys()) + for dbdatan in db_data_names: # loop thorugh custom data + + self._custom_env_data[dbdatan] = {} + + max = self._env.custom_db_data[dbdatan].get_max(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + avrg = self._env.custom_db_data[dbdatan].get_avrg(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + min = self._env.custom_db_data[dbdatan].get_min(env_selector=self._db_env_selector).reshape(self._num_db_envs, -1) + max_over_envs = self._env.custom_db_data[dbdatan].get_max_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + min_over_envs = self._env.custom_db_data[dbdatan].get_min_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + avrg_over_envs = self._env.custom_db_data[dbdatan].get_avrg_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + std_over_envs = self._env.custom_db_data[dbdatan].get_std_over_envs(env_selector=self._db_env_selector).reshape(1, -1) + + self._custom_env_data[dbdatan]["max"] =torch.full((self._db_data_size, + max.shape[0], + max.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["avrg"] =torch.full((self._db_data_size, + avrg.shape[0], + avrg.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["min"] =torch.full((self._db_data_size, + min.shape[0], + min.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["max_over_envs"] =torch.full((self._db_data_size, + max_over_envs.shape[0], + max_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["min_over_envs"] =torch.full((self._db_data_size, + min_over_envs.shape[0], + min_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["avrg_over_envs"] =torch.full((self._db_data_size, + avrg_over_envs.shape[0], + avrg_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._custom_env_data[dbdatan]["std_over_envs"] =torch.full((self._db_data_size, + std_over_envs.shape[0], + std_over_envs.shape[1]), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # exploration envs + if self._n_expl_envs > 0: + self._ep_tsteps_expl_env_distribution = torch.full((self._db_data_size, self._n_expl_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + # also log sub rewards metrics for exploration envs + self._sub_rew_max_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_expl = torch.full((self._db_data_size, self._n_expl_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs_expl = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # demo environments + self._demo_envs_active = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._demo_perf_metric = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + if self._env.demo_env_idxs() is not None: + n_demo_envs=self._env.demo_env_idxs().shape[0] + + self._ep_tsteps_demo_env_distribution = torch.full((self._db_data_size, n_demo_envs, 1), + dtype=torch.int32, fill_value=-1, device="cpu") + + # also log sub rewards metrics for exploration envs + self._sub_rew_max_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_demo = torch.full((self._db_data_size, n_demo_envs, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_max_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_min_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_avrg_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._sub_rew_std_over_envs_demo = torch.full((self._db_data_size, 1, self._n_rewards), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + # algorithm-specific db info + self._qf1_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._min_qft_vals_mean = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._min_qft_vals_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_max = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_vals_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_max = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_vals_min = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._qf1_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._actor_loss= torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_disc = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_cont = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + if self._validate: # add db data for validation losses + self._overfit_index = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf1_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._qf2_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._actor_loss_validation= torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_disc_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alpha_loss_cont_validation = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._alphas = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alphas_disc = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._alphas_cont = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._policy_entropy_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_disc_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_mean=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_std=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_max=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._policy_entropy_cont_min=torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._running_mean_obs=None + self._running_std_obs=None + if self._agent.obs_running_norm is not None and not self._eval: + # some db data for the agent + self._running_mean_obs = torch.full((self._db_data_size, self._env.obs_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._running_std_obs = torch.full((self._db_data_size, self._env.obs_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + + # RND + self._rnd_loss=None + if self._use_rnd: + self._rnd_loss = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._expl_bonus_raw_avrg = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._expl_bonus_raw_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_raw_min = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_raw_max = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._expl_bonus_proc_avrg = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + self._expl_bonus_proc_std = torch.full((self._db_data_size, 1), + dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_proc_min = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + # self._expl_bonus_proc_max = torch.full((self._db_data_size, 1), + # dtype=torch.float32, fill_value=torch.nan, device="cpu") + + self._n_rnd_updates = torch.full((self._db_data_size, 1), + dtype=torch.int32, fill_value=0, device="cpu") + self._running_mean_rnd_input = None + self._running_std_rnd_input = None + if self._rnd_net.obs_running_norm is not None: + self._running_mean_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + self._running_std_rnd_input = torch.full((self._db_data_size, self._rnd_net.input_dim()), + dtype=torch.float32, fill_value=0.0, device="cpu") + + def _init_agent_optimizers(self): + self._qf_optimizer = optim.Adam(list(self._agent.qf1.parameters()) + list(self._agent.qf2.parameters()), + lr=self._lr_q) + self._actor_optimizer = optim.Adam(list(self._agent.actor.parameters()), + lr=self._lr_policy) + + def _init_alpha_autotuning(self): + self._log_alpha_disc = torch.full((1,), fill_value=math.log(self._alpha_disc), requires_grad=True, device=self._torch_device) + self._log_alpha_cont = torch.full((1,), fill_value=math.log(self._alpha_cont), requires_grad=True, device=self._torch_device) + self._alpha_disc = self._log_alpha_disc.exp().item() + self._alpha_cont = self._log_alpha_cont.exp().item() + self._alpha = 0.5*(self._alpha_disc + self._alpha_cont) + self._a_optimizer_disc = optim.Adam([self._log_alpha_disc], lr=self._lr_q) + self._a_optimizer_cont = optim.Adam([self._log_alpha_cont], lr=self._lr_q) + + def _init_replay_buffers(self): + + self._bpos = 0 + + self._obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._actions = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._actions_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rewards = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_obs = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_terminal = torch.full(size=(self._replay_buffer_size_vec, self._num_envs, 1), + fill_value=False, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + def _init_validation_buffers(self): + + self._bpos_val = 0 + + self._obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._actions_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._actions_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._rewards_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_obs_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, self._obs_dim), + fill_value=torch.nan, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + self._next_terminal_val = torch.full(size=(self._validation_buffer_size_vec, self._num_envs, 1), + fill_value=False, + dtype=self._dtype, + device=self._torch_device, + requires_grad=False) + + def _save_model(self, + is_checkpoint: bool = False): + + path = self._model_path + if is_checkpoint: # use iteration as id + path = path + "_checkpoint" + str(self._log_it_counter) + info = f"Saving model to {path}" + Journal.log(self.__class__.__name__, + "_save_model", + info, + LogType.INFO, + throw_when_excep = True) + agent_state_dict=self._agent.state_dict() + if not self._eval: # training + # we log the joints which were observed during training + observed_joints=self._env.get_observed_joints() + if observed_joints is not None: + agent_state_dict["observed_jnts"]=self._env.get_observed_joints() + + torch.save(agent_state_dict, path) # saves whole agent state + # torch.save(self._agent.parameters(), path) # only save agent parameters + info = f"Done." + Journal.log(self.__class__.__name__, + "_save_model", + info, + LogType.INFO, + throw_when_excep = True) + + def _dump_env_checkpoints(self): + + path = self._env_db_checkpoints_fname+str(self._log_it_counter) + + if path is not None: + info = f"Saving env db checkpoint data to {path}" + Journal.log(self.__class__.__name__, + "_dump_env_checkpoints", + info, + LogType.INFO, + throw_when_excep = True) + + with h5py.File(path+".hdf5", 'w') as hf: + + for key, value in self._hyperparameters.items(): + if value is None: + value = "None" + hf.attrs[key] = value + + # full training envs + sub_rew_full=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._db_env_selector) + tot_rew_full=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._db_env_selector) + + if self._n_expl_envs > 0: + sub_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._expl_env_selector) + tot_rew_full_expl=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._expl_env_selector) + if self._env.n_demo_envs() > 0: + sub_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_subrew(env_selector=self._demo_env_selector) + tot_rew_full_demo=self._episodic_reward_metrics.get_full_episodic_totrew(env_selector=self._demo_env_selector) + + ep_vec_freq=self._episodic_reward_metrics.ep_vec_freq() # assuming all db data was collected with the same ep_vec_freq + + hf.attrs['sub_reward_names'] = self._reward_names + hf.attrs['log_iteration'] = self._log_it_counter + hf.attrs['n_timesteps_done'] = self._n_timesteps_done[self._log_it_counter] + hf.attrs['n_policy_updates'] = self._n_policy_updates[self._log_it_counter] + hf.attrs['elapsed_min'] = self._elapsed_min[self._log_it_counter] + hf.attrs['ep_vec_freq'] = ep_vec_freq + hf.attrs['n_expl_envs'] = self._n_expl_envs + hf.attrs['n_demo_envs'] = self._env.n_demo_envs() + + # first dump custom db data names + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + episodic_data_names = self._env.custom_db_data[db_dname].data_names() + var_name = db_dname + hf.attrs[var_name+"_data_names"] = episodic_data_names + + for ep_idx in range(ep_vec_freq): # create separate datasets for each episode + ep_prefix=f'ep_{ep_idx}_' + + # rewards + hf.create_dataset(ep_prefix+'sub_rew', + data=sub_rew_full[ep_idx, :, :, :]) + hf.create_dataset(ep_prefix+'tot_rew', + data=tot_rew_full[ep_idx, :, :, :]) + if self._n_expl_envs > 0: + hf.create_dataset(ep_prefix+'sub_rew_expl', + data=sub_rew_full_expl[ep_idx, :, :, :]) + hf.create_dataset(ep_prefix+'tot_rew_expl', + data=tot_rew_full_expl[ep_idx, :, :, :]) + hf.create_dataset('expl_env_selector', data=self._expl_env_selector.cpu().numpy()) + if self._env.n_demo_envs() > 0: + hf.create_dataset(ep_prefix+'sub_rew_demo', + data=sub_rew_full_demo) + hf.create_dataset(ep_prefix+'tot_rew_demo', + data=tot_rew_full_demo[ep_idx, :, :, :]) + hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().cpu().numpy()) + + # dump all custom env data + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + episodic_data=self._env.custom_db_data[db_dname] + var_name = db_dname + hf.create_dataset(ep_prefix+var_name, + data=episodic_data.get_full_episodic_data(env_selector=self._db_env_selector)[ep_idx, :, :, :]) + if self._n_expl_envs > 0: + hf.create_dataset(ep_prefix+var_name+"_expl", + data=episodic_data.get_full_episodic_data(env_selector=self._expl_env_selector)[ep_idx, :, :, :]) + if self._env.n_demo_envs() > 0: + hf.create_dataset(ep_prefix+var_name+"_demo", + data=episodic_data.get_full_episodic_data(env_selector=self._demo_env_selector)[ep_idx, :, :, :]) + + Journal.log(self.__class__.__name__, + "_dump_env_checkpoints", + "done.", + LogType.INFO, + throw_when_excep = True) + + def done(self): + + if not self._is_done: + + if not self._eval: + self._save_model() + + self._dump_dbinfo_to_file() + + if self._full_env_db: + self._dump_env_checkpoints() + + if self._shared_algo_data is not None: + self._shared_algo_data.write(dyn_info_name=["is_done"], + val=[1.0]) + self._shared_algo_data.close() # close shared memory + + self._env.close() + + self._is_done = True + + def _dump_dbinfo_to_file(self): + + info = f"Dumping debug info at {self._dbinfo_drop_fname}" + Journal.log(self.__class__.__name__, + "_dump_dbinfo_to_file", + info, + LogType.INFO, + throw_when_excep = True) + + with h5py.File(self._dbinfo_drop_fname+".hdf5", 'w') as hf: + n_valid = int(max(0, min(self._log_it_counter, self._db_data_size))) + + def _slice_valid(arr): + if isinstance(arr, torch.Tensor): + return arr[:n_valid] + if isinstance(arr, np.ndarray): + return arr[:n_valid] + return arr + + def _ds(name, arr): + data = _slice_valid(arr) + hf.create_dataset(name, data=data.numpy() if isinstance(data, torch.Tensor) else data) + + # hf.create_dataset('numpy_data', data=numpy_data) + # Write dictionaries to HDF5 as attributes + for key, value in self._hyperparameters.items(): + if value is None: + value = "None" + hf.attrs[key] = value + + # rewards + hf.create_dataset('sub_reward_names', data=self._reward_names, + dtype='S40') + _ds('sub_rew_max', self._sub_rew_max) + _ds('sub_rew_avrg', self._sub_rew_avrg) + _ds('sub_rew_min', self._sub_rew_min) + _ds('sub_rew_max_over_envs', self._sub_rew_max_over_envs) + _ds('sub_rew_min_over_envs', self._sub_rew_min_over_envs) + _ds('sub_rew_avrg_over_envs', self._sub_rew_avrg_over_envs) + _ds('sub_rew_std_over_envs', self._sub_rew_std_over_envs) + + _ds('tot_rew_max', self._tot_rew_max) + _ds('tot_rew_avrg', self._tot_rew_avrg) + _ds('tot_rew_min', self._tot_rew_min) + _ds('tot_rew_max_over_envs', self._tot_rew_max_over_envs) + _ds('tot_rew_min_over_envs', self._tot_rew_min_over_envs) + _ds('tot_rew_avrg_over_envs', self._tot_rew_avrg_over_envs) + _ds('tot_rew_std_over_envs', self._tot_rew_std_over_envs) + + _ds('ep_tsteps_env_distr', self._ep_tsteps_env_distribution) + + if self._n_expl_envs > 0: + # expl envs + _ds('sub_rew_max_expl', self._sub_rew_max_expl) + _ds('sub_rew_avrg_expl', self._sub_rew_avrg_expl) + _ds('sub_rew_min_expl', self._sub_rew_min_expl) + _ds('sub_rew_max_over_envs_expl', self._sub_rew_max_over_envs_expl) + _ds('sub_rew_min_over_envs_expl', self._sub_rew_min_over_envs_expl) + _ds('sub_rew_avrg_over_envs_expl', self._sub_rew_avrg_over_envs_expl) + _ds('sub_rew_std_over_envs_expl', self._sub_rew_std_over_envs_expl) + + _ds('ep_timesteps_expl_env_distr', self._ep_tsteps_expl_env_distribution) + + hf.create_dataset('expl_env_selector', data=self._expl_env_selector.numpy()) + + _ds('demo_envs_active', self._demo_envs_active) + _ds('demo_perf_metric', self._demo_perf_metric) + + if self._env.n_demo_envs() > 0: + # demo envs + _ds('sub_rew_max_demo', self._sub_rew_max_demo) + _ds('sub_rew_avrg_demo', self._sub_rew_avrg_demo) + _ds('sub_rew_min_demo', self._sub_rew_min_demo) + _ds('sub_rew_max_over_envs_demo', self._sub_rew_max_over_envs_demo) + _ds('sub_rew_min_over_envs_demo', self._sub_rew_min_over_envs_demo) + _ds('sub_rew_avrg_over_envs_demo', self._sub_rew_avrg_over_envs_demo) + _ds('sub_rew_std_over_envs_demo', self._sub_rew_std_over_envs_demo) + + _ds('ep_timesteps_demo_env_distr', self._ep_tsteps_demo_env_distribution) + + hf.create_dataset('demo_env_idxs', data=self._env.demo_env_idxs().numpy()) + + # profiling data + _ds('env_step_fps', self._env_step_fps) + _ds('env_step_rt_factor', self._env_step_rt_factor) + _ds('collection_dt', self._collection_dt) + _ds('batch_norm_update_dt', self._batch_norm_update_dt) + _ds('policy_update_dt', self._policy_update_dt) + _ds('policy_update_fps', self._policy_update_fps) + _ds('validation_dt', self._validation_dt) + + _ds('n_of_played_episodes', self._n_of_played_episodes) + _ds('n_timesteps_done', self._n_timesteps_done) + _ds('n_policy_updates', self._n_policy_updates) + _ds('n_qfun_updates', self._n_qfun_updates) + _ds('n_tqfun_updates', self._n_tqfun_updates) + + _ds('elapsed_min', self._elapsed_min) + + # algo data + _ds('qf1_vals_mean', self._qf1_vals_mean) + _ds('qf2_vals_mean', self._qf2_vals_mean) + _ds('qf1_vals_std', self._qf1_vals_std) + _ds('qf2_vals_std', self._qf2_vals_std) + _ds('qf1_vals_max', self._qf1_vals_max) + _ds('qf1_vals_min', self._qf1_vals_min) + _ds('qf2_vals_max', self._qf2_vals_max) + _ds('qf2_vals_min', self._qf1_vals_min) + + _ds('min_qft_vals_mean', self._min_qft_vals_mean) + _ds('min_qft_vals_std', self._min_qft_vals_std) + + _ds('qf1_loss', self._qf1_loss) + _ds('qf2_loss', self._qf2_loss) + _ds('actor_loss', self._actor_loss) + _ds('alpha_loss', self._alpha_loss) + _ds('alpha_loss_disc', self._alpha_loss_disc) + _ds('alpha_loss_cont', self._alpha_loss_cont) + if self._validate: + _ds('qf1_loss_validation', self._qf1_loss_validation) + _ds('qf2_loss_validation', self._qf2_loss_validation) + _ds('actor_loss_validation', self._actor_loss_validation) + _ds('alpha_loss_validation', self._alpha_loss_validation) + _ds('alpha_loss_disc_validation', self._alpha_loss_disc_validation) + _ds('alpha_loss_cont_validation', self._alpha_loss_cont_validation) + _ds('overfit_index', self._overfit_index) + + _ds('alphas', self._alphas) + _ds('alphas_disc', self._alphas_disc) + _ds('alphas_cont', self._alphas_cont) + + _ds('policy_entropy_mean', self._policy_entropy_mean) + _ds('policy_entropy_std', self._policy_entropy_std) + _ds('policy_entropy_max', self._policy_entropy_max) + _ds('policy_entropy_min', self._policy_entropy_min) + _ds('policy_entropy_disc_mean', self._policy_entropy_disc_mean) + _ds('policy_entropy_disc_std', self._policy_entropy_disc_std) + _ds('policy_entropy_disc_max', self._policy_entropy_disc_max) + _ds('policy_entropy_disc_min', self._policy_entropy_disc_min) + _ds('policy_entropy_cont_mean', self._policy_entropy_cont_mean) + _ds('policy_entropy_cont_std', self._policy_entropy_cont_std) + _ds('policy_entropy_cont_max', self._policy_entropy_cont_max) + _ds('policy_entropy_cont_min', self._policy_entropy_cont_min) + hf.create_dataset('target_entropy', data=self._target_entropy) + hf.create_dataset('target_entropy_disc', data=self._target_entropy_disc) + hf.create_dataset('target_entropy_cont', data=self._target_entropy_cont) + + if self._use_rnd: + _ds('n_rnd_updates', self._n_rnd_updates) + _ds('expl_bonus_raw_avrg', self._expl_bonus_raw_avrg) + _ds('expl_bonus_raw_std', self._expl_bonus_raw_std) + _ds('expl_bonus_proc_avrg', self._expl_bonus_proc_avrg) + _ds('expl_bonus_proc_std', self._expl_bonus_proc_std) + + if self._rnd_net.obs_running_norm is not None: + if self._running_mean_rnd_input is not None: + _ds('running_mean_rnd_input', self._running_mean_rnd_input) + if self._running_std_rnd_input is not None: + _ds('running_std_rnd_input', self._running_std_rnd_input) + + # dump all custom env data + db_data_names = list(self._env.custom_db_data.keys()) + for db_dname in db_data_names: + data=self._custom_env_data[db_dname] + subnames = list(data.keys()) + for subname in subnames: + var_name = db_dname + "_" + subname + _ds(var_name, data[subname]) + + # other data + if self._agent.obs_running_norm is not None: + if self._running_mean_obs is not None: + _ds('running_mean_obs', self._running_mean_obs) + if self._running_std_obs is not None: + _ds('running_std_obs', self._running_std_obs) + + info = f"done." + Journal.log(self.__class__.__name__, + "_dump_dbinfo_to_file", + info, + LogType.INFO, + throw_when_excep = True) + + def _load_model(self, + model_path: str): + + info = f"Loading model at {model_path}" + + Journal.log(self.__class__.__name__, + "_load_model", + info, + LogType.INFO, + throw_when_excep = True) + model_dict=torch.load(model_path, + map_location=self._torch_device, + weights_only=False) + + observed_joints=self._env.get_observed_joints() + if not ("observed_jnts" in model_dict): + Journal.log(self.__class__.__name__, + "_load_model", + "No observed joints key found in loaded model dictionary! Let's hope joints are ordered in the same way.", + LogType.WARN) + else: + required_joints=model_dict["observed_jnts"] + self._check_observed_joints(observed_joints,required_joints) + + self._agent.load_state_dict(model_dict) + + if self._eval: + self._switch_training_mode(False) + + def _check_observed_joints(self, + observed_joints, + required_joints): + + observed=set(observed_joints) + required=set(required_joints) + + all_required_joints_avail = required.issubset(observed) + if not all_required_joints_avail: + missing=[item for item in required if item not in observed] + missing_str=', '.join(missing) + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"not all required joints are available. Missing {missing_str}", + LogType.EXCEP, + throw_when_excep = True) + exceeding=observed-required + if not len(exceeding)==0: + # do not support having more joints than the required + exc_jnts=" ".join(list(exceeding)) + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"more than the required joints found in the observed joint: {exc_jnts}", + LogType.EXCEP, + throw_when_excep = True) + + # here we are sure that required and observed sets match + self._to_agent_jnt_remap=None + if not required_joints==observed_joints: + Journal.log(self.__class__.__name__, + "_check_observed_joints", + f"required jnt obs from agent have different ordering from observed ones. Will compute a remapping.", + LogType.WARN, + throw_when_excep = True) + self._to_agent_jnt_remap = [observed_joints.index(element) for element in required_joints] + + self._env.set_jnts_remapping(remapping= self._to_agent_jnt_remap) + + def drop_dir(self): + return self._drop_dir + + def _init_drop_dir(self, + drop_dir_name: str = None): + + # main drop directory + if drop_dir_name is None: + # drop to current directory + self._drop_dir = "./" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id + else: + self._drop_dir = drop_dir_name + "/" + f"{self.__class__.__name__}/" + self._run_name + "/" + self._unique_id + os.makedirs(self._drop_dir) + + self._env_db_checkpoints_dropdir=None + self._env_db_checkpoints_fname=None + if self._full_env_db>0: + self._env_db_checkpoints_dropdir=self._drop_dir+"/env_db_checkpoints" + self._env_db_checkpoints_fname = self._env_db_checkpoints_dropdir + \ + "/" + self._unique_id + "_env_db_checkpoint" + os.makedirs(self._env_db_checkpoints_dropdir) + # model + if not self._eval or (self._model_path is None): + self._model_path = self._drop_dir + "/" + self._unique_id + "_model" + else: # we copy the model under evaluation to the drop dir + shutil.copy(self._model_path, self._drop_dir) + + # debug info + self._dbinfo_drop_fname = self._drop_dir + "/" + self._unique_id + "db_info" # extension added later + + # other auxiliary db files + aux_drop_dir = self._drop_dir + "/other" + os.makedirs(aux_drop_dir) + filepaths = self._env.get_file_paths() # envs implementation + filepaths.append(self._this_basepath) # algorithm implementation + filepaths.append(self._this_child_path) + filepaths.append(self._agent.get_impl_path()) # agent implementation + for file in filepaths: + shutil.copy(file, self._drop_dir) + aux_dirs = self._env.get_aux_dir() + for aux_dir in aux_dirs: + shutil.copytree(aux_dir, aux_drop_dir, dirs_exist_ok=True) + + def _get_performance_metric(self): + # to be overridden + return 0.0 + + def _post_step(self): + + self._collection_dt[self._log_it_counter] += \ + (self._collection_t-self._start_time) + self._batch_norm_update_dt[self._log_it_counter] += \ + (self._policy_update_t_start-self._collection_t) + self._policy_update_dt[self._log_it_counter] += \ + (self._policy_update_t - self._policy_update_t_start) + if self._validate: + self._validation_dt[self._log_it_counter] += \ + (self._validation_t - self._policy_update_t) + + self._step_counter+=1 # counts algo steps + + self._demo_envs_active[self._log_it_counter]=self._env.demo_active() + self._demo_perf_metric[self._log_it_counter]=self._get_performance_metric() + if self._env.n_demo_envs() > 0 and (self._demo_stop_thresh is not None): + # check if deactivation condition applies + self._env.switch_demo(active=self._demo_perf_metric[self._log_it_counter] 0: + self._ep_tsteps_expl_env_distribution[self._log_it_counter, :]=\ + self._episodic_reward_metrics.step_counters(env_selector=self._expl_env_selector)*self._env_n_action_reps + + self._sub_rew_max_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._expl_env_selector) + self._sub_rew_avrg_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._expl_env_selector) + self._sub_rew_min_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._expl_env_selector) + self._sub_rew_max_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_min_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._expl_env_selector) + self._sub_rew_std_over_envs_expl[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._expl_env_selector) + + # demo envs + if self._env.n_demo_envs() > 0 and self._env.demo_active(): + # only log if demo envs are active (db data will remaing to nan if that case) + self._ep_tsteps_demo_env_distribution[self._log_it_counter, :]=\ + self._episodic_reward_metrics.step_counters(env_selector=self._demo_env_selector)*self._env_n_action_reps + + self._sub_rew_max_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max(env_selector=self._demo_env_selector) + self._sub_rew_avrg_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg(env_selector=self._demo_env_selector) + self._sub_rew_min_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min(env_selector=self._demo_env_selector) + self._sub_rew_max_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_max_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_min_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_min_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_avrg_over_envs(env_selector=self._demo_env_selector) + self._sub_rew_std_over_envs_demo[self._log_it_counter, :, :] = self._episodic_reward_metrics.get_sub_rew_std_over_envs(env_selector=self._demo_env_selector) + + # other data + if self._agent.obs_running_norm is not None: + if self._running_mean_obs is not None: + self._running_mean_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_mean() + if self._running_std_obs is not None: + self._running_std_obs[self._log_it_counter, :] = self._agent.obs_running_norm.get_current_std() + + if self._use_rnd: + if self._running_mean_rnd_input is not None: + self._running_mean_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_mean() + if self._running_std_rnd_input is not None: + self._running_std_rnd_input[self._log_it_counter, :] = self._rnd_net.obs_running_norm.get_current_std() + + # write some episodic db info on shared mem + sub_returns=self._sub_returns.get_torch_mirror(gpu=False) + sub_returns[:, :]=self._episodic_reward_metrics.get_sub_rew_avrg() + tot_returns=self._tot_returns.get_torch_mirror(gpu=False) + tot_returns[:, :]=self._episodic_reward_metrics.get_tot_rew_avrg() + self._sub_returns.synch_all(read=False) + self._tot_returns.synch_all(read=False) + + self._log_info() + + self._log_it_counter+=1 + + if self._dump_checkpoints and \ + (self._vec_transition_counter % self._m_checkpoint_freq == 0): + self._save_model(is_checkpoint=True) + + if self._full_env_db and \ + (self._vec_transition_counter % self._env_db_checkpoints_vecfreq == 0): + self._dump_env_checkpoints() + + if self._vec_transition_counter == self._total_timesteps_vec: + self.done() + + def _should_have_called_setup(self): + + exception = f"setup() was not called!" + + Journal.log(self.__class__.__name__, + "_should_have_called_setup", + exception, + LogType.EXCEP, + throw_when_excep = True) + + def _log_info(self): + + if self._debug or self._verbose: + elapsed_h = self._elapsed_min[self._log_it_counter].item()/60.0 + est_remaining_time_h = elapsed_h * 1/(self._vec_transition_counter) * (self._total_timesteps_vec-self._vec_transition_counter) + is_done=self._vec_transition_counter==self._total_timesteps_vec + + actual_tsteps_with_updates=-1 + experience_to_policy_grad_ratio=-1 + experience_to_qfun_grad_ratio=-1 + experience_to_tqfun_grad_ratio=-1 + if not self._eval: + actual_tsteps_with_updates=(self._n_timesteps_done[self._log_it_counter].item()-self._warmstart_timesteps) + epsi=1e-6 # to avoid div by 0 + experience_to_policy_grad_ratio=actual_tsteps_with_updates/(self._n_policy_updates[self._log_it_counter].item()-epsi) + experience_to_qfun_grad_ratio=actual_tsteps_with_updates/(self._n_qfun_updates[self._log_it_counter].item()-epsi) + experience_to_tqfun_grad_ratio=actual_tsteps_with_updates/(self._n_tqfun_updates[self._log_it_counter].item()-epsi) + + if self._debug: + + if self._remote_db: + # write general algo debug info to shared memory + info_names=self._shared_algo_data.dynamic_info.get() + info_data = [ + self._n_timesteps_done[self._log_it_counter].item(), + self._n_policy_updates[self._log_it_counter].item(), + experience_to_policy_grad_ratio, + elapsed_h, + est_remaining_time_h, + self._env_step_fps[self._log_it_counter].item(), + self._env_step_rt_factor[self._log_it_counter].item(), + self._collection_dt[self._log_it_counter].item(), + self._policy_update_fps[self._log_it_counter].item(), + self._policy_update_dt[self._log_it_counter].item(), + is_done, + self._n_of_played_episodes[self._log_it_counter].item(), + self._batch_norm_update_dt[self._log_it_counter].item(), + ] + self._shared_algo_data.write(dyn_info_name=info_names, + val=info_data) + + # write debug info to remote wandb server + db_data_names = list(self._env.custom_db_data.keys()) + for dbdatan in db_data_names: + data = self._custom_env_data[dbdatan] + data_names = self._env.custom_db_data[dbdatan].data_names() + + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_max": + wandb.Histogram(data["max"][self._log_it_counter, :, :].numpy())}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_avrg": + wandb.Histogram(data["avrg"][self._log_it_counter, :, :].numpy())}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}" + "_min": + wandb.Histogram(data["min"][self._log_it_counter, :, :].numpy())}) + + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_max_over_envs": + data["max_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_min_over_envs": + data["min_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_avrg_over_envs": + data["avrg_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + self._custom_env_data_db_dict.update({f"env_dbdata/{dbdatan}-{data_names[i]}" + "_std_over_envs": + data["std_over_envs"][self._log_it_counter, :, i:i+1] for i in range(len(data_names))}) + + self._wandb_d.update({'log_iteration' : self._log_it_counter}) + self._wandb_d.update(dict(zip(info_names, info_data))) + + # debug environments + self._wandb_d.update({'correlation_db/ep_timesteps_env_distr': + wandb.Histogram(self._ep_tsteps_env_distribution[self._log_it_counter, :, :].numpy())}) + + self._wandb_d.update({'tot_reward/tot_rew_max': wandb.Histogram(self._tot_rew_max[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_avrg': wandb.Histogram(self._tot_rew_avrg[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_min': wandb.Histogram(self._tot_rew_min[self._log_it_counter, :, :].numpy()), + 'tot_reward/tot_rew_max_over_envs': self._tot_rew_max_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_min_over_envs': self._tot_rew_min_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_avrg_over_envs': self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item(), + 'tot_reward/tot_rew_std_over_envs': self._tot_rew_std_over_envs[self._log_it_counter, :, :].item(), + }) + # sub rewards from db envs + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max": + wandb.Histogram(self._sub_rew_max.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min": + wandb.Histogram(self._sub_rew_min.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg": + wandb.Histogram(self._sub_rew_avrg.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_max_over_envs": + self._sub_rew_max_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_min_over_envs": + self._sub_rew_min_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_avrg_over_envs": + self._sub_rew_avrg_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward/{self._reward_names[i]}_sub_rew_std_over_envs": + self._sub_rew_std_over_envs[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + # exploration envs + if self._n_expl_envs > 0: + self._wandb_d.update({'correlation_db/ep_timesteps_expl_env_distr': + wandb.Histogram(self._ep_tsteps_expl_env_distribution[self._log_it_counter, :, :].numpy())}) + + # sub reward from expl envs + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_expl": + wandb.Histogram(self._sub_rew_max_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_expl": + wandb.Histogram(self._sub_rew_avrg_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_expl": + wandb.Histogram(self._sub_rew_min_expl.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_max_over_envs_expl": + self._sub_rew_max_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_min_over_envs_expl": + self._sub_rew_min_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_avrg_over_envs_expl": + self._sub_rew_avrg_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_expl/{self._reward_names[i]}_sub_rew_std_over_envs_expl": + self._sub_rew_std_over_envs_expl[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + # demo envs (only log if active, otherwise we log nans which wandb doesn't like) + if self._env.n_demo_envs() > 0: + if self._env.demo_active(): + # log hystograms only if there are no nan in the data + self._wandb_d.update({'correlation_db/ep_timesteps_demo_env_distr': + wandb.Histogram(self._ep_tsteps_demo_env_distribution[self._log_it_counter, :, :].numpy())}) + + # sub reward from expl envs + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_demo": + wandb.Histogram(self._sub_rew_max_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_demo": + wandb.Histogram(self._sub_rew_avrg_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_demo": + wandb.Histogram(self._sub_rew_min_demo.numpy()[self._log_it_counter, :, i:i+1]) for i in range(len(self._reward_names))}) + + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_max_over_envs_demo": + self._sub_rew_max_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_min_over_envs_demo": + self._sub_rew_min_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_avrg_over_envs_demo": + self._sub_rew_avrg_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + self._wandb_d.update({f"sub_reward_demo/{self._reward_names[i]}_sub_rew_std_over_envs_demo": + self._sub_rew_std_over_envs_demo[self._log_it_counter, :, i:i+1] for i in range(len(self._reward_names))}) + + if self._vec_transition_counter > (self._warmstart_vectimesteps-1): + # algo info + self._policy_update_db_data_dict.update({ + "sac_q_info/qf1_vals_mean": self._qf1_vals_mean[self._log_it_counter, 0], + "sac_q_info/qf2_vals_mean": self._qf2_vals_mean[self._log_it_counter, 0], + "sac_q_info/min_qft_vals_mean": self._min_qft_vals_mean[self._log_it_counter, 0], + "sac_q_info/qf1_vals_std": self._qf1_vals_std[self._log_it_counter, 0], + "sac_q_info/qf2_vals_std": self._qf2_vals_std[self._log_it_counter, 0], + "sac_q_info/min_qft_vals_std": self._min_qft_vals_std[self._log_it_counter, 0], + "sac_q_info/qf1_vals_max": self._qf1_vals_max[self._log_it_counter, 0], + "sac_q_info/qf2_vals_max": self._qf2_vals_max[self._log_it_counter, 0], + "sac_q_info/qf1_vals_min": self._qf1_vals_min[self._log_it_counter, 0], + "sac_q_info/qf2_vals_min": self._qf2_vals_min[self._log_it_counter, 0], + + "sac_actor_info/policy_entropy_mean": self._policy_entropy_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_std": self._policy_entropy_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_max": self._policy_entropy_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_min": self._policy_entropy_min[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_mean": self._policy_entropy_disc_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_std": self._policy_entropy_disc_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_max": self._policy_entropy_disc_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_disc_min": self._policy_entropy_disc_min[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_mean": self._policy_entropy_cont_mean[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_std": self._policy_entropy_cont_std[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_max": self._policy_entropy_cont_max[self._log_it_counter, 0], + "sac_actor_info/policy_entropy_cont_min": self._policy_entropy_cont_min[self._log_it_counter, 0], + + "sac_q_info/qf1_loss": self._qf1_loss[self._log_it_counter, 0], + "sac_q_info/qf2_loss": self._qf2_loss[self._log_it_counter, 0], + "sac_actor_info/actor_loss": self._actor_loss[self._log_it_counter, 0]}) + alpha_logs = { + "sac_alpha_info/alpha": self._alphas[self._log_it_counter, 0], + "sac_alpha_info/alpha_disc": self._alphas_disc[self._log_it_counter, 0], + "sac_alpha_info/alpha_cont": self._alphas_cont[self._log_it_counter, 0], + "sac_alpha_info/target_entropy": self._target_entropy, + "sac_alpha_info/target_entropy_disc": self._target_entropy_disc, + "sac_alpha_info/target_entropy_cont": self._target_entropy_cont + } + if self._autotune: + alpha_logs.update({ + "sac_alpha_info/alpha_loss": self._alpha_loss[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_disc": self._alpha_loss_disc[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_cont": self._alpha_loss_cont[self._log_it_counter, 0], + }) + self._policy_update_db_data_dict.update(alpha_logs) + + if self._validate: + self._policy_update_db_data_dict.update({ + "sac_q_info/qf1_loss_validation": self._qf1_loss_validation[self._log_it_counter, 0], + "sac_q_info/qf2_loss_validation": self._qf2_loss_validation[self._log_it_counter, 0], + "sac_q_info/overfit_index": self._overfit_index[self._log_it_counter, 0], + "sac_actor_info/actor_loss_validation": self._actor_loss_validation[self._log_it_counter, 0]}) + if self._autotune: + self._policy_update_db_data_dict.update({ + "sac_alpha_info/alpha_loss_validation": self._alpha_loss_validation[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_disc_validation": self._alpha_loss_disc_validation[self._log_it_counter, 0], + "sac_alpha_info/alpha_loss_cont_validation": self._alpha_loss_cont_validation[self._log_it_counter, 0]}) + + self._wandb_d.update(self._policy_update_db_data_dict) + + if self._use_rnd: + self._rnd_db_data_dict.update({ + "rnd_info/expl_bonus_raw_avrg": self._expl_bonus_raw_avrg[self._log_it_counter, 0], + "rnd_info/expl_bonus_raw_std": self._expl_bonus_raw_std[self._log_it_counter, 0], + "rnd_info/expl_bonus_proc_avrg": self._expl_bonus_proc_avrg[self._log_it_counter, 0], + "rnd_info/expl_bonus_proc_std": self._expl_bonus_proc_std[self._log_it_counter, 0], + "rnd_info/rnd_loss": self._rnd_loss[self._log_it_counter, 0], + }) + self._wandb_d.update(self._rnd_db_data_dict) + + if self._rnd_net.obs_running_norm is not None: + # adding info on running normalizer if used + if self._running_mean_rnd_input is not None: + self._wandb_d.update({f"rnd_info/running_mean_rhc_input": self._running_mean_rnd_input[self._log_it_counter, :]}) + if self._running_std_rnd_input is not None: + self._wandb_d.update({f"rnd_info/running_std_rhc_input": self._running_std_rnd_input[self._log_it_counter, :]}) + + if self._agent.obs_running_norm is not None: + # adding info on running normalizer if used + if self._running_mean_obs is not None: + self._wandb_d.update({f"running_norm/mean": self._running_mean_obs[self._log_it_counter, :]}) + if self._running_std_obs is not None: + self._wandb_d.update({f"running_norm/std": self._running_std_obs[self._log_it_counter, :]}) + + self._wandb_d.update(self._custom_env_data_db_dict) + + wandb.log(self._wandb_d) + + if self._verbose: + + info =f"\nTotal n. timesteps simulated: {self._n_timesteps_done[self._log_it_counter].item()}/{self._total_timesteps}\n" + \ + f"N. policy updates performed: {self._n_policy_updates[self._log_it_counter].item()}/{self._n_policy_updates_to_be_done}\n" + \ + f"N. q fun updates performed: {self._n_qfun_updates[self._log_it_counter].item()}/{self._n_qf_updates_to_be_done}\n" + \ + f"N. trgt q fun updates performed: {self._n_tqfun_updates[self._log_it_counter].item()}/{self._n_tqf_updates_to_be_done}\n" + \ + f"experience to policy grad ratio: {experience_to_policy_grad_ratio}\n" + \ + f"experience to q fun grad ratio: {experience_to_qfun_grad_ratio}\n" + \ + f"experience to trgt q fun grad ratio: {experience_to_tqfun_grad_ratio}\n"+ \ + f"Warmstart completed: {self._vec_transition_counter > self._warmstart_vectimesteps or self._eval} ; ({self._vec_transition_counter}/{self._warmstart_vectimesteps})\n" +\ + f"Replay buffer full: {self._replay_bf_full}; current position {self._bpos}/{self._replay_buffer_size_vec}\n" +\ + f"Validation buffer full: {self._validation_bf_full}; current position {self._bpos_val}/{self._validation_buffer_size_vec}\n" +\ + f"Elapsed time: {self._elapsed_min[self._log_it_counter].item()/60.0} h\n" + \ + f"Estimated remaining training time: " + \ + f"{est_remaining_time_h} h\n" + \ + f"Total reward episodic data --> \n" + \ + f"max: {self._tot_rew_max_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"avg: {self._tot_rew_avrg_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"min: {self._tot_rew_min_over_envs[self._log_it_counter, :, :].item()}\n" + \ + f"Episodic sub-rewards episodic data --> \nsub rewards names: {self._reward_names_str}\n" + \ + f"max: {self._sub_rew_max_over_envs[self._log_it_counter, :]}\n" + \ + f"min: {self._sub_rew_min_over_envs[self._log_it_counter, :]}\n" + \ + f"avg: {self._sub_rew_avrg_over_envs[self._log_it_counter, :]}\n" + \ + f"std: {self._sub_rew_std_over_envs[self._log_it_counter, :]}\n" + \ + f"N. of episodes on which episodic rew stats are computed: {self._n_of_played_episodes[self._log_it_counter].item()}\n" + \ + f"Current env. step sps: {self._env_step_fps[self._log_it_counter].item()}, time for experience collection {self._collection_dt[self._log_it_counter].item()} s\n" + \ + f"Current env (sub-stepping) rt factor: {self._env_step_rt_factor[self._log_it_counter].item()}\n" + \ + f"Current policy update fps: {self._policy_update_fps[self._log_it_counter].item()}, time for policy updates {self._policy_update_dt[self._log_it_counter].item()} s\n" + \ + f"Time spent updating batch normalizations {self._batch_norm_update_dt[self._log_it_counter].item()} s\n" + \ + f"Time spent for computing validation {self._validation_dt[self._log_it_counter].item()} s\n" + \ + f"Demo envs are active: {self._demo_envs_active[self._log_it_counter].item()}. N demo envs if active {self._env.n_demo_envs()}\n" + \ + f"Performance metric now: {self._demo_perf_metric[self._log_it_counter].item()}\n" + \ + f"Entropy (disc): current {float(self._policy_entropy_disc_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_disc:.4f}\n" + \ + f"Entropy (cont): current {float(self._policy_entropy_cont_mean[self._log_it_counter, 0]):.4f}/{self._target_entropy_cont:.4f}\n" + if self._use_rnd: + info = info + f"N. rnd updates performed: {self._n_rnd_updates[self._log_it_counter].item()}\n" + + Journal.log(self.__class__.__name__, + "_post_step", + info, + LogType.INFO, + throw_when_excep = True) + + def _add_experience(self, + obs: torch.Tensor, actions: torch.Tensor, rewards: torch.Tensor, + next_obs: torch.Tensor, + next_terminal: torch.Tensor) -> None: + + if self._validate and \ + (self._vec_transition_counter % self._validation_collection_vecfreq == 0): + # fill validation buffer + + self._obs_val[self._bpos_val] = obs + self._next_obs_val[self._bpos_val] = next_obs + self._actions_val[self._bpos_val] = actions + self._rewards_val[self._bpos_val] = rewards + self._next_terminal_val[self._bpos_val] = next_terminal + + self._bpos_val += 1 + if self._bpos_val == self._validation_buffer_size_vec: + self._validation_bf_full = True + self._bpos_val = 0 + + else: # fill normal replay buffer + self._obs[self._bpos] = obs + self._next_obs[self._bpos] = next_obs + self._actions[self._bpos] = actions + self._rewards[self._bpos] = rewards + self._next_terminal[self._bpos] = next_terminal + + self._bpos += 1 + if self._bpos == self._replay_buffer_size_vec: + self._replay_bf_full = True + self._bpos = 0 + + def _sample(self, size: int = None): + + if size is None: + size=self._batch_size + + batched_obs = self._obs.view((-1, self._env.obs_dim())) + batched_next_obs = self._next_obs.view((-1, self._env.obs_dim())) + batched_actions = self._actions.view((-1, self._env.actions_dim())) + batched_rewards = self._rewards.view(-1) + batched_terminal = self._next_terminal.view(-1) + + # sampling from the batched buffer + up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (size,)) + + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_next_obs = batched_next_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + sampled_rewards = batched_rewards[shuffled_buffer_idxs] + sampled_terminal = batched_terminal[shuffled_buffer_idxs] + + return sampled_obs, sampled_actions,\ + sampled_next_obs,\ + sampled_rewards, \ + sampled_terminal + + def _sample_validation(self, size: int = None): + + if size is None: + size=self._validation_batch_size + + batched_obs = self._obs_val.view((-1, self._env.obs_dim())) + batched_next_obs = self._next_obs_val.view((-1, self._env.obs_dim())) + batched_actions = self._actions_val.view((-1, self._env.actions_dim())) + batched_rewards = self._rewards_val.view(-1) + batched_terminal = self._next_terminal_val.view(-1) + + # sampling from the batched buffer + up_to = self._validation_buffer_size if self._validation_bf_full else self._bpos_val*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (size,)) + + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_next_obs = batched_next_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + sampled_rewards = batched_rewards[shuffled_buffer_idxs] + sampled_terminal = batched_terminal[shuffled_buffer_idxs] + + return sampled_obs, sampled_actions,\ + sampled_next_obs,\ + sampled_rewards, \ + sampled_terminal + + def _sample_random_actions(self): + + self._random_uniform.uniform_(-1,1) + random_actions = self._random_uniform + + return random_actions + + def _perturb_some_actions(self, + actions: torch.Tensor): + + if self._is_continuous_actions_bool.any(): # if there are any continuous actions + self._perturb_actions(actions, + action_idxs=self._is_continuous_actions, + env_idxs=self._expl_env_selector.to(actions.device), + normal=True, # use normal for continuous + scaling=self._continuous_act_expl_noise_std) + if self._is_discrete_actions_bool.any(): # actions to be treated as discrete + self._perturb_actions(actions, + action_idxs=self._is_discrete_actions, + env_idxs=self._expl_env_selector.to(actions.device), + normal=False, # use uniform distr for discrete + scaling=self._discrete_act_expl_noise_std) + self._pert_counter+=1 + if self._pert_counter >= self._noise_duration_vec: + self._pert_counter=0 + + def _perturb_actions(self, + actions: torch.Tensor, + action_idxs: torch.Tensor, + env_idxs: torch.Tensor, + normal: bool = True, + scaling: float = 1.0): + if normal: # gaussian + # not super efficient (in theory random_normal can be made smaller in size) + self._random_normal.normal_(mean=0, std=1) + noise=self._random_normal + else: # uniform + self._random_uniform.uniform_(-1,1) + noise=self._random_uniform + + env_indices = env_idxs.reshape(-1,1) # Get indices of True environments + action_indices = action_idxs.reshape(1,-1) # Get indices of True actions + action_indices_flat=action_indices.flatten() + + perturbation=noise[env_indices, action_indices]*scaling + perturbed_actions=actions[env_indices, action_indices]+perturbation + perturbed_actions.clamp_(-1.0, 1.0) # enforce normalized bounds + + actions[env_indices, action_indices]=\ + perturbed_actions + + def _update_batch_norm(self, bsize: int = None): + + if bsize is None: + bsize=self._batch_size # same used for training + + up_to = self._replay_buffer_size if self._replay_bf_full else self._bpos*self._num_envs + shuffled_buffer_idxs = torch.randint(0, up_to, + (bsize,)) + + # update obs normalization + # (we should sample also next obs, but if most of the transitions are not terminal, + # this is not an issue and is more efficient) + if (self._agent.obs_running_norm is not None) and \ + (not self._eval): + batched_obs = self._obs.view((-1, self._obs_dim)) + sampled_obs = batched_obs[shuffled_buffer_idxs] + self._agent.update_obs_bnorm(x=sampled_obs) + + if self._use_rnd: # update running norm for RND + batched_obs = self._obs.view((-1, self._obs_dim)) + batched_actions = self._actions.view((-1, self._actions_dim)) + sampled_obs = batched_obs[shuffled_buffer_idxs] + sampled_actions = batched_actions[shuffled_buffer_idxs] + torch.cat(tensors=(sampled_obs, sampled_actions), dim=1, + out=self._rnd_bnorm_input) + self._rnd_net.update_input_bnorm(x=self._rnd_bnorm_input) + + # update running norm on rewards also + # if self._reward_normalizer is not None: + # batched_rew = self._rewards.view(-1) + # sampled_rew = batched_rew[shuffled_buffer_idxs] + # self._reward_normalizer.manual_stat_update(x=sampled_rew) + + def _reset_agent(self): + # not super efficient, but effective -> + # brand new agent, brand new optimizers + self._agent.reset() + # forcing deallocation of previous optimizers + import gc + del self._qf_optimizer + del self._actor_optimizer + if self._autotune: + del self._a_optimizer_disc + del self._a_optimizer_cont + del self._log_alpha_disc + del self._log_alpha_cont + gc.collect() + self._init_agent_optimizers() + if self._autotune: # also reinitialize alpha optimization + self._init_alpha_autotuning() + + self._overfit_idx=0.0 + + def _switch_training_mode(self, + train: bool = True): + + self._agent.train(train) + + def _init_algo_shared_data(self, + static_params: Dict): + + self._shared_algo_data = SharedRLAlgorithmInfo(namespace=self._ns, + is_server=True, + static_params=static_params, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + + self._shared_algo_data.run() + + # write some initializations + self._shared_algo_data.write(dyn_info_name=["is_done"], + val=[0.0]) + + # only written to if flags where enabled + self._qf_vals=QfVal(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._qf_vals.run() + self._qf_trgt=QfTrgt(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._qf_trgt.run() + + # episodic returns + reward_names=self._episodic_reward_metrics.data_names() + self._sub_returns=SubReturns(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + n_rewards=len(reward_names), + reward_names=reward_names, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._sub_returns.run() + + self._tot_returns=TotReturns(namespace=self._ns, + is_server=True, + n_envs=self._num_envs, + verbose=self._verbose, + vlevel=VLevel.V2, + safe=False, + force_reconnection=True) + self._tot_returns.run() diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/training_env_base.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/training_env_base.py new file mode 100644 index 0000000000000000000000000000000000000000..a1d82cd55b606a8fe783803efcb26726301c1693 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/training_env_base.py @@ -0,0 +1,2002 @@ +import torch +import math +from aug_mpc.utils.math_utils import quaternion_to_angular_velocity, quaternion_difference + +from mpc_hive.utilities.shared_data.rhc_data import RobotState +from mpc_hive.utilities.shared_data.rhc_data import RhcCmds, RhcPred +from mpc_hive.utilities.shared_data.rhc_data import RhcRefs +from mpc_hive.utilities.shared_data.rhc_data import RhcStatus +from mpc_hive.utilities.shared_data.sim_data import SharedEnvInfo + +from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperSrvr +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetSrvr +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest + +from aug_mpc.utils.shared_data.agent_refs import AgentRefs +from aug_mpc.utils.shared_data.training_env import SharedTrainingEnvInfo + +from aug_mpc.utils.shared_data.training_env import Observations, NextObservations +from aug_mpc.utils.shared_data.training_env import TotRewards +from aug_mpc.utils.shared_data.training_env import SubRewards +from aug_mpc.utils.shared_data.training_env import Actions +from aug_mpc.utils.shared_data.training_env import Terminations, SubTerminations +from aug_mpc.utils.shared_data.training_env import Truncations, SubTruncations +from aug_mpc.utils.shared_data.training_env import EpisodesCounter,TaskRandCounter,SafetyRandResetsCounter,RandomTruncCounter,SubStepAbsCounter + +from aug_mpc.utils.episodic_rewards import EpisodicRewards +from aug_mpc.utils.episodic_data import EpisodicData +from aug_mpc.utils.episodic_data import MemBuffer +from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother +from aug_mpc.utils.math_utils import check_capsize + +from mpc_hive.utilities.math_utils_torch import world2base_frame + +from EigenIPC.PyEigenIPC import VLevel +from EigenIPC.PyEigenIPC import LogType +from EigenIPC.PyEigenIPC import Journal +from EigenIPC.PyEigenIPC import StringTensorClient + +from perf_sleep.pyperfsleep import PerfSleep + +from abc import abstractmethod, ABC + +import os +from typing import List, Dict + +class AugMPCTrainingEnvBase(ABC): + + """Base class for a remote training environment tailored to Learning-based Receding Horizon Control""" + + def __init__(self, + namespace: str, + obs_dim: int, + actions_dim: int, + env_name: str = "", + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + debug: bool = True, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + self._this_path = os.path.abspath(__file__) + + self.custom_db_data = None + + self._random_reset_active=False + + self._action_smoother_continuous=None + self._action_smoother_discrete=None + + self._closed = False + self._ready=False + + self._namespace = namespace + self._with_gpu_mirror = True + self._safe_shared_mem = False + + self._obs_dim = obs_dim + self._actions_dim = actions_dim + + self._use_gpu = use_gpu + if self._use_gpu: + self._device="cuda" + else: + self._device="cpu" + + self._dtype = dtype + + self._verbose = verbose + self._vlevel = vlevel + + self._is_debug = debug + + self._env_name = env_name + + self._override_agent_refs = override_agent_refs + + self._substep_dt=1.0 # dt [s] between each substep + + self._env_opts={} + self._env_opts.update(env_opts) + self._process_env_opts() + + self._robot_state = None + self._rhc_cmds = None + self._rhc_pred = None + self._rhc_refs = None + self._rhc_status = None + + self._remote_stepper = None + self._remote_resetter = None + self._remote_reset_req = None + + self._agent_refs = None + + self._n_envs = 0 + + self._ep_timeout_counter = None + self._task_rand_counter = None + self._rand_safety_reset_counter = None + self._rand_trunc_counter = None + + self._actions_map={} # to be used to hold info like action idxs + self._obs_map={} + + self._obs = None + self._obs_ub = None + self._obs_lb = None + self._next_obs = None + self._actions = None + self._actual_actions = None + self._actions_ub = None + self._actions_lb = None + self._tot_rewards = None + self._sub_rewards = None + self._sub_terminations = None + self._sub_truncations = None + self._terminations = None + self._truncations = None + self._act_mem_buffer = None + + self._episodic_rewards_metrics = None + + self._timeout = timeout_ms + + self._height_grid_size = None + self._height_flat_dim = 0 + + self._attach_to_shared_mem() + + self._init_obs(obs_dim) + self._init_actions(actions_dim) + self._init_rewards() + self._init_terminations() + self._init_truncations() + self._init_custom_db_data() + + self._demo_setup() # setup for demo envs + + # to ensure maps are properly initialized + _ = self._get_action_names() + _ = self._get_obs_names() + _ = self._get_sub_trunc_names() + _ = self._get_sub_term_names() + + self._set_substep_rew() + self._set_substep_obs() + + self._custom_post_init() + + # update actions scale and offset in case it was modified in _custom_post_init + self._actions_scale = (self._actions_ub - self._actions_lb)/2.0 + self._actions_offset = (self._actions_ub + self._actions_lb)/2.0 + + if self._env_opts["use_action_smoothing"]: + self._init_action_smoothing() + + self._ready=self._init_step(reset_on_init=self._env_opts["reset_on_init"]) + + def _add_env_opt(self, + opts: Dict, + name: str, + default): + + if not name in opts: + opts[name]=default + + def _process_env_opts(self, ): + + self._check_for_env_opts("episode_timeout_lb", int) + self._check_for_env_opts("episode_timeout_ub", int) + self._check_for_env_opts("n_steps_task_rand_lb", int) + self._check_for_env_opts("n_steps_task_rand_ub", int) + self._check_for_env_opts("use_random_trunc", bool) + self._check_for_env_opts("random_trunc_freq", int) + self._check_for_env_opts("random_trunc_freq_delta", int) + self._check_for_env_opts("use_random_safety_reset", bool) + self._check_for_env_opts("random_reset_freq", int) + + self._check_for_env_opts("action_repeat", int) + + self._check_for_env_opts("n_preinit_steps", int) + + self._check_for_env_opts("demo_envs_perc", float) + + self._check_for_env_opts("vec_ep_freq_metrics_db", int) + + self._check_for_env_opts("srew_drescaling", bool) + + self._check_for_env_opts("use_action_history", bool) + self._check_for_env_opts("actions_history_size", int) + + self._check_for_env_opts("use_action_smoothing", bool) + self._check_for_env_opts("smoothing_horizon_c", float) + self._check_for_env_opts("smoothing_horizon_d", float) + + self._check_for_env_opts("add_heightmap_obs", bool) + + self._check_for_env_opts("reset_on_init", bool) + + # parse action repeat opt + get some sim information + if self._env_opts["action_repeat"] <=0: + self._env_opts["action_repeat"] = 1 + self._action_repeat=self._env_opts["action_repeat"] + # parse remote sim info + sim_info = {} + sim_info_shared = SharedEnvInfo(namespace=self._namespace, + is_server=False, + safe=False, + verbose=self._verbose, + vlevel=self._vlevel) + sim_info_shared.run() + sim_info_keys = sim_info_shared.param_keys + sim_info_data = sim_info_shared.get().flatten() + for i in range(len(sim_info_keys)): + sim_info[sim_info_keys[i]] = sim_info_data[i] + if "substepping_dt" in sim_info_keys: + self._substep_dt=sim_info["substepping_dt"] + self._env_opts.update(sim_info) + + self._env_opts["substep_dt"]=self._substep_dt + + self._env_opts["override_agent_refs"]=self._override_agent_refs + + self._env_opts["episode_timeout_lb"] = round(self._env_opts["episode_timeout_lb"]/self._action_repeat) + self._env_opts["episode_timeout_ub"] = round(self._env_opts["episode_timeout_ub"]/self._action_repeat) + + self._env_opts["n_steps_task_rand_lb"] = round(self._env_opts["n_steps_task_rand_lb"]/self._action_repeat) + self._env_opts["n_steps_task_rand_ub"] = round(self._env_opts["n_steps_task_rand_ub"]/self._action_repeat) + + if self._env_opts["random_reset_freq"] <=0: + self._env_opts["use_random_safety_reset"]=False + self._env_opts["random_reset_freq"]=-1 + self._random_reset_active=self._env_opts["use_random_safety_reset"] + + self._env_opts["random_trunc_freq"] = round(self._env_opts["random_trunc_freq"]/self._action_repeat) + self._env_opts["random_trunc_freq_delta"] = round(self._env_opts["random_trunc_freq_delta"]/self._action_repeat) + + if self._env_opts["random_trunc_freq"] <=0: + self._env_opts["use_random_trunc"]=False + self._env_opts["random_trunc_freq"]=-1 + + self._full_db=False + if "full_env_db" in self._env_opts: + self._full_db=self._env_opts["full_env_db"] + + def _check_for_env_opts(self, + name: str, + expected_type): + if not (name in self._env_opts): + Journal.log(self.__class__.__name__, + "_check_for_env_opts", + f"Required option {name} missing for env opts!", + LogType.EXCEP, + throw_when_excep=True) + if not isinstance(self._env_opts[name], expected_type): + Journal.log(self.__class__.__name__, + "_check_for_env_opts", + f"Option {name} in env opts is not of type {expected_type} (got {type(self._env_opts[name])})!", + LogType.EXCEP, + throw_when_excep=True) + + def __del__(self): + + self.close() + + def _demo_setup(self): + + self._demo_envs_idxs=None + self._demo_envs_idxs_bool=None + self._n_demo_envs=round(self._env_opts["demo_envs_perc"]*self._n_envs) + self._add_demos=False + if not self._n_demo_envs >0: + Journal.log(self.__class__.__name__, + "__init__", + "will not use demo environments", + LogType.INFO, + throw_when_excep=False) + else: + Journal.log(self.__class__.__name__, + "__init__", + f"Will run with {self._n_demo_envs} demonstration envs.", + LogType.INFO) + self._demo_envs_idxs = torch.randperm(self._n_envs, device=self._device)[:self._n_demo_envs] + self._demo_envs_idxs_bool=torch.full((self._n_envs, ), dtype=torch.bool, device=self._device, + fill_value=False) + self._demo_envs_idxs_bool[self._demo_envs_idxs]=True + + self._init_demo_envs() # custom logic + + demo_idxs_str=", ".join(map(str, self._demo_envs_idxs.tolist())) + Journal.log(self.__class__.__name__, + "__init__", + f"Demo env. indexes are [{demo_idxs_str}]", + LogType.INFO) + + def env_opts(self): + return self._env_opts + + def demo_env_idxs(self, get_bool: bool=False): + if get_bool: + return self._demo_envs_idxs_bool + else: + return self._demo_envs_idxs + + def _init_demo_envs(self): + pass + + def n_demo_envs(self): + return self._n_demo_envs + + def demo_active(self): + return self._add_demos + + def switch_demo(self, active: bool = False): + if self._demo_envs_idxs is not None: + self._add_demos=active + else: + Journal.log(self.__class__.__name__, + "switch_demo", + f"Cannot switch demostrations on. No demo envs available!", + LogType.EXCEP, + throw_when_excep=True) + + def _get_this_file_path(self): + return self._this_path + + def episode_timeout_bounds(self): + return self._env_opts["episode_timeout_lb"], self._env_opts["episode_timeout_ub"] + + def task_rand_timeout_bounds(self): + return self._env_opts["n_steps_task_rand_lb"], self._env_opts["n_steps_task_rand_ub"] + + def n_action_reps(self): + return self._action_repeat + + def get_file_paths(self): + from aug_mpc.utils.sys_utils import PathsGetter + path_getter = PathsGetter() + base_paths = [] + base_paths.append(self._get_this_file_path()) + base_paths.append(path_getter.REMOTENVPATH) + for script_path in path_getter.SCRIPTSPATHS: + base_paths.append(script_path) + + # rhc files + from EigenIPC.PyEigenIPC import StringTensorClient + from perf_sleep.pyperfsleep import PerfSleep + shared_rhc_shared_files = StringTensorClient( + basename="SharedRhcFilesDropDir", + name_space=self._namespace, + verbose=self._verbose, + vlevel=VLevel.V2) + shared_rhc_shared_files.run() + shared_rhc_files_vals=[""]*shared_rhc_shared_files.length() + while not shared_rhc_shared_files.read_vec(shared_rhc_files_vals, 0): + nsecs = 1000000000 # 1 sec + PerfSleep.thread_sleep(nsecs) # we just keep it alive + rhc_list=[] + for rhc_files in shared_rhc_files_vals: + file_list = rhc_files.split(", ") + rhc_list.extend(file_list) + rhc_list = list(set(rhc_list)) # removing duplicates + base_paths.extend(rhc_list) + + # world interface files + get_world_interface_paths = self.get_world_interface_paths() + base_paths.extend(get_world_interface_paths) + return base_paths + + def get_world_interface_paths(self): + paths = [] + shared_world_iface_files = StringTensorClient( + basename="SharedWorldInterfaceFilesDropDir", + name_space=self._namespace, + verbose=self._verbose, + vlevel=VLevel.V2) + shared_world_iface_files.run() + world_iface_vals=[""]*shared_world_iface_files.length() + while not shared_world_iface_files.read_vec(world_iface_vals, 0): + nsecs = 1000000000 # 1 sec + PerfSleep.thread_sleep(nsecs) # keep alive while waiting + shared_world_iface_files.close() + for files in world_iface_vals: + if files == "": + continue + file_list = files.split(", ") + for f in file_list: + if f not in paths: + paths.append(f) + return paths + + def get_aux_dir(self): + empty_list = [] + return empty_list + + def _init_step(self, reset_on_init: bool = True): + + self._check_controllers_registered(retry=True) + self._activate_rhc_controllers() + + # just an auxiliary tensor + initial_reset_aux = self._terminations.get_torch_mirror(gpu=self._use_gpu).clone() + initial_reset_aux[:, :] = reset_on_init # we reset all sim envs first + init_step_ok=True + init_step_ok=self._remote_sim_step() and init_step_ok + if not init_step_ok: + return False + init_step_ok=self._remote_reset(reset_mask=initial_reset_aux) and init_step_ok + if not init_step_ok: + return False + + for i in range(self._env_opts["n_preinit_steps"]): # perform some + # dummy remote env stepping to make sure to have meaningful + # initializations (doesn't increment step counter) + init_step_ok=self._remote_sim_step() and init_step_ok # 1 remote sim. step + if not init_step_ok: + return False + init_step_ok=self._send_remote_reset_req() and init_step_ok # fake reset request + if not init_step_ok: + return False + + self.reset() + + return init_step_ok + + def _debug(self): + + if self._use_gpu: + # using non_blocking which is not safe when GPU->CPU + self._obs.synch_mirror(from_gpu=True,non_blocking=True) # copy data from gpu to cpu view + self._next_obs.synch_mirror(from_gpu=True,non_blocking=True) + self._actions.synch_mirror(from_gpu=True,non_blocking=True) + self._truncations.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_truncations.synch_mirror(from_gpu=True,non_blocking=True) + self._terminations.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_terminations.synch_mirror(from_gpu=True,non_blocking=True) + self._tot_rewards.synch_mirror(from_gpu=True,non_blocking=True) + self._sub_rewards.synch_mirror(from_gpu=True,non_blocking=True) + # if we want reliable db data then we should synchronize data streams + torch.cuda.synchronize() + + # copy CPU view on shared memory + self._obs.synch_all(read=False, retry=True) + self._next_obs.synch_all(read=False, retry=True) + self._actions.synch_all(read=False, retry=True) + self._tot_rewards.synch_all(read=False, retry=True) + self._sub_rewards.synch_all(read=False, retry=True) + self._truncations.synch_all(read=False, retry = True) + self._sub_truncations.synch_all(read=False, retry = True) + self._terminations.synch_all(read=False, retry = True) + self._sub_terminations.synch_all(read=False, retry = True) + + self._debug_agent_refs() + + def _debug_agent_refs(self): + if self._use_gpu: + if not self._override_agent_refs: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) + if not self._override_agent_refs: + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry = True) + + def _remote_sim_step(self): + + self._remote_stepper.trigger() # triggers simulation + RHC + if not self._remote_stepper.wait_ack_from(1, self._timeout): + Journal.log(self.__class__.__name__, + "_remote_sim_step", + "Remote sim. env step ack not received within timeout", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _remote_reset(self, + reset_mask: torch.Tensor = None): + + reset_reqs = self._remote_reset_req.get_torch_mirror() + if reset_mask is None: # just send the signal to allow stepping, but do not reset any of + # the remote envs + reset_reqs[:, :] = False + else: + reset_reqs[:, :] = reset_mask # remotely reset envs corresponding to + # the mask (True--> to be reset) + self._remote_reset_req.synch_all(read=False, retry=True) # write on shared buffer + remote_reset_ok = self._send_remote_reset_req() # process remote request + + if reset_mask is not None: + self._synch_state(gpu=self._use_gpu) # if some env was reset, we use _obs + # to hold the states, including resets, while _next_obs will always hold the + # state right after stepping the sim env + # (could be a bit more efficient, since in theory we only need to read the envs + # corresponding to the reset_mask) + + + return remote_reset_ok + + def _send_remote_reset_req(self): + + self._remote_resetter.trigger() + if not self._remote_resetter.wait_ack_from(1, self._timeout): # remote reset completed + Journal.log(self.__class__.__name__, + "_post_step", + "Remote reset did not complete within the prescribed timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def step(self, + action): + + actions_norm = action.detach() # IMPORTANT: assumes actions are already normalized in [-1, 1] + + actions = self._actions.get_torch_mirror(gpu=self._use_gpu) # will hold agent actions (real range) + + # scale normalized actions to physical space before interfacing with controllers + actions[:, :] = actions_norm*self._actions_scale + self._actions_offset + + self._override_actions_with_demo() # if necessary override some actions with expert demonstrations + # (getting actions with get_actions will return the modified actions tensor) + + actions.clamp_(self._actions_lb, self._actions_ub) # just to be safe + + if self._act_mem_buffer is not None: # store norm actions in memory buffer + self._act_mem_buffer.update(new_data=actions_norm) + + if self._env_opts["use_action_smoothing"]: + self._apply_actions_smoothing() # smooth actions if enabled (the tensor returned by + # get_actions does not contain smoothing and can be safely employed for experience collection) + + self._apply_actions_to_rhc() # apply last agent actions to rhc controller + + stepping_ok = True + tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu) + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + tot_rewards.zero_() + sub_rewards.zero_() + self._substep_rewards.zero_() + next_obs.zero_() # necessary for substep obs + + for i in range(0, self._action_repeat): + + self._pre_substep() # custom logic @ substep freq + + stepping_ok = stepping_ok and self._check_controllers_registered(retry=False) # does not make sense to run training + # if we lost some controllers + stepping_ok = stepping_ok and self._remote_sim_step() # blocking, + + # no sim substepping is allowed to fail + self._synch_state(gpu=self._use_gpu) # read state from shared mem (done in substeps also, + # since substeps rewards will need updated substep obs) + + self._custom_post_substp_pre_rew() # custom substepping logic + self._compute_substep_rewards() + self._assemble_substep_rewards() # includes rewards clipping + self._custom_post_substp_post_rew() # custom substepping logic + + # fill substep obs + self._fill_substep_obs(self._substep_obs) + self._assemble_substep_obs() + if not i==(self._action_repeat-1): + # sends reset signal to complete remote step sequence, + # but does not reset any remote env + stepping_ok = stepping_ok and self._remote_reset(reset_mask=None) + else: # last substep + + self._fill_step_obs(next_obs) # update next obs + self._clamp_obs(next_obs) # good practice + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + obs[:, :] = next_obs # start from next observation, unless reset (handled in post_step()) + + self._compute_step_rewards() # implemented by child + + tot_rewards = self._tot_rewards.get_torch_mirror(gpu=self._use_gpu) + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + self._clamp_rewards(sub_rewards) # clamp all sub rewards + + tot_rewards[:, :] = torch.sum(sub_rewards, dim=1, keepdim=True) + + scale=1 # scale tot rew by the number of action repeats + if self._env_opts["srew_drescaling"]: # scale rewards depending on the n of subrewards + scale*=sub_rewards.shape[1] # n. dims rescaling + tot_rewards.mul_(1/scale) + + self._substep_abs_counter.increment() # @ substep freq + + if not stepping_ok: + return False + + stepping_ok = stepping_ok and self._post_step() # post sub-stepping operations + # (if action_repeat > 1, then just the db data at the last substep is logged) + # also, if a reset of an env occurs, obs will hold the reset state + + return stepping_ok + + def _post_step(self): + + # first increment counters + self._ep_timeout_counter.increment() # episode timeout + self._task_rand_counter.increment() # task randomization + if self._rand_trunc_counter is not None: # random truncations (for removing temp. correlations) + self._rand_trunc_counter.increment() + + # check truncation and termination conditions + self._check_truncations() # defined in child env + self._check_terminations() + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu) + ignore_ep_end=None + if self._rand_trunc_counter is not None: + ignore_ep_end=self._rand_trunc_counter.time_limits_reached() + if self._use_gpu: + ignore_ep_end=ignore_ep_end.cuda() + + truncated = torch.logical_or(truncated, + ignore_ep_end) # add truncation (sub truncations defined in child env + # remain untouched) + + episode_finished = torch.logical_or(terminated, + truncated) + episode_finished_cpu = episode_finished.cpu() + + if self._rand_safety_reset_counter is not None and self._random_reset_active: + self._rand_safety_reset_counter.increment(to_be_incremented=episode_finished_cpu.flatten()) + # truncated[:,:] = torch.logical_or(truncated, + # self._rand_safety_reset_counter.time_limits_reached().cuda()) + + if self._act_mem_buffer is not None: + self._act_mem_buffer.reset(to_be_reset=episode_finished.flatten(), + init_data=self._normalize_actions(self.default_action)) + + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.reset(to_be_reset=episode_finished.flatten(), + reset_val=self.default_action[:, self._is_continuous_actions]) + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.reset(to_be_reset=episode_finished.flatten(), + reset_val=self.default_action[:, ~self._is_continuous_actions]) + + # debug step if required (IMPORTANT: must be before remote reset so that we always db + # actual data from the step and not after reset) + if self._is_debug: + self._debug() # copies db data on shared memory + ignore_ep_end_cpu=ignore_ep_end if not self._use_gpu else ignore_ep_end.cpu() + self._update_custom_db_data(episode_finished=episode_finished_cpu, + ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc + ) + self._episodic_rewards_metrics.update(rewards = self._sub_rewards.get_torch_mirror(gpu=False), + ep_finished=episode_finished_cpu, + ignore_ep_end=ignore_ep_end_cpu # ignore data if random trunc + ) + + # remotely reset envs + to_be_reset=self._to_be_reset() + to_be_reset_custom=self._custom_reset() + if to_be_reset_custom is not None: + to_be_reset[:, :] = torch.logical_or(to_be_reset,to_be_reset_custom) + rm_reset_ok = self._remote_reset(reset_mask=to_be_reset) + + self._custom_post_step(episode_finished=episode_finished) # any additional logic from child env + # here, before actual reset taskes place (at this point the state is the reset one) + + # updating also prev pos and orientation in case some env was reset + self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + self._fill_step_obs(obs) + self._clamp_obs(obs) + + # updating prev step quantities + self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + # synchronize and reset counters for finished episodes + self._ep_timeout_counter.reset(to_be_reset=episode_finished) + self._task_rand_counter.reset(to_be_reset=episode_finished) + self._substep_abs_counter.reset(to_be_reset=torch.logical_or(terminated,to_be_reset), + randomize_offsets=True # otherwise timers across envs would be strongly correlated + ) # reset only if resetting environment or if terminal + + if self._rand_trunc_counter is not None: + # only reset when safety truncation was is triggered + self._rand_trunc_counter.reset(to_be_reset=self._rand_trunc_counter.time_limits_reached(), + randomize_limits=True, # we need to randomize otherwise the other counters will synchronize + # with the episode counters + randomize_offsets=False # always restart at 0 + ) + # safety reset counter is only when it reches its reset interval (just to keep + # the counter bounded) + if self._rand_safety_reset_counter is not None and self._random_reset_active: + self._rand_safety_reset_counter.reset(to_be_reset=self._rand_safety_reset_counter.time_limits_reached()) + + return rm_reset_ok + + def _to_be_reset(self): + # always reset if a termination occurred or if there's a random safety reset + # request + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + to_be_reset=terminated.clone() + if (self._rand_safety_reset_counter is not None) and self._random_reset_active: + to_be_reset=torch.logical_or(to_be_reset, + self._rand_safety_reset_counter.time_limits_reached()) + + return to_be_reset + + def _custom_reset(self): + # can be overridden by child + return None + + def _apply_actions_smoothing(self): + + actions = self._actions.get_torch_mirror(gpu=self._use_gpu) + actual_actions=self.get_actual_actions() # will write smoothed actions here + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.update(new_signal= + actions[:, self._is_continuous_actions]) + actual_actions[:, self._is_continuous_actions]=self._action_smoother_continuous.get() + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.update(new_signal= + actions[:, ~self._is_continuous_actions]) + actual_actions[:, ~self._is_continuous_actions]=self._action_smoother_discrete.get() + + def _update_custom_db_data(self, + episode_finished, + ignore_ep_end): + + # update defaults + self.custom_db_data["RhcRefsFlag"].update(new_data=self._rhc_refs.contact_flags.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) # before potentially resetting the flags, get data + self.custom_db_data["Actions"].update(new_data=self._actions.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Obs"].update(new_data=self._obs.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + self.custom_db_data["SubTerminations"].update(new_data=self._sub_terminations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["SubTruncations"].update(new_data=self._sub_truncations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + self.custom_db_data["Terminations"].update(new_data=self._terminations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Truncations"].update(new_data=self._truncations.get_torch_mirror(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + + self._get_custom_db_data(episode_finished=episode_finished, ignore_ep_end=ignore_ep_end) + + def reset_custom_db_data(self, keep_track: bool = False): + # to be called periodically to reset custom db data stat. collection + for custom_db_data in self.custom_db_data.values(): + custom_db_data.reset(keep_track=keep_track) + + def _assemble_substep_rewards(self): + # by default assemble substep rewards by averaging + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + # average over substeps depending on scale + # sub_rewards[:, self._is_substep_rew] = sub_rewards[:, self._is_substep_rew] + \ + # self._substep_rewards[:, self._is_substep_rew]/self._action_repeat + sub_rewards[:, self._is_substep_rew] += self._substep_rewards[:, self._is_substep_rew]/self._action_repeat + + def _assemble_substep_obs(self): + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + next_obs[:, self._is_substep_obs] += self._substep_obs[:, self._is_substep_obs]/self._action_repeat + + def randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + if self._override_agent_refs: + self._override_refs(env_indxs=env_indxs) + else: + self._randomize_task_refs(env_indxs=env_indxs) + + def reset(self): + + self.randomize_task_refs(env_indxs=None) # randomize all refs across envs + + self._obs.reset() + self._actions.reset() + self._next_obs.reset() + self._sub_rewards.reset() + self._tot_rewards.reset() + self._terminations.reset() + self._sub_terminations.reset() + self._truncations.reset() + self._sub_truncations.reset() + + self._ep_timeout_counter.reset(randomize_offsets=True) + self._task_rand_counter.reset() + self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._rand_safety_reset_counter is not None: + self._rand_safety_reset_counter.reset() + self._substep_abs_counter.reset() + + if self._act_mem_buffer is not None: + self._act_mem_buffer.reset_all(init_data=self._normalize_actions(self.default_action)) + + if self._action_smoother_continuous is not None: + self._action_smoother_continuous.reset(reset_val=self.default_action[:, self._is_continuous_actions]) + if self._action_smoother_discrete is not None: + self._action_smoother_discrete.reset(reset_val=self.default_action[:, ~self._is_continuous_actions]) + + self._synch_state(gpu=self._use_gpu) # read obs from shared mem + + # just calling custom post step to ensure tak refs are updated + terminated = self._terminations.get_torch_mirror(gpu=self._use_gpu) + truncated = self._truncations.get_torch_mirror(gpu=self._use_gpu) + episode_finished = torch.logical_or(terminated, + truncated) + self._custom_post_step(episode_finished=episode_finished) + + obs = self._obs.get_torch_mirror(gpu=self._use_gpu) + next_obs = self._next_obs.get_torch_mirror(gpu=self._use_gpu) + self._fill_step_obs(obs) # initialize observations + self._clamp_obs(obs) # to avoid bad things + self._fill_step_obs(next_obs) # and next obs + self._clamp_obs(next_obs) + + self.reset_custom_db_data(keep_track=False) + self._episodic_rewards_metrics.reset(keep_track=False) + + self._prev_root_p_step[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_step[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + self._prev_root_p_substep[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + self._prev_root_q_substep[:, :]=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + def is_ready(self): + return self._ready + + def close(self): + + if not self._closed: + + # close all shared mem. clients + self._robot_state.close() + self._rhc_cmds.close() + self._rhc_pred.close() + self._rhc_refs.close() + self._rhc_status.close() + + self._remote_stepper.close() + + self._ep_timeout_counter.close() + self._task_rand_counter.close() + if self._rand_safety_reset_counter is not None: + self._rand_safety_reset_counter.close() + + # closing env.-specific shared data + self._obs.close() + self._next_obs.close() + self._actions.close() + if self._actual_actions is not None: + self._actual_actions.close() + self._sub_rewards.close() + self._tot_rewards.close() + + self._terminations.close() + self._sub_terminations.close() + self._truncations.close() + self._sub_truncations.close() + + self._closed = True + + def get_obs(self, clone:bool=False): + if clone: + return self._obs.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._obs.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_next_obs(self, clone:bool=False): + if clone: + return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._next_obs.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_actions(self, clone:bool=False, normalized: bool = False): + actions = self._actions.get_torch_mirror(gpu=self._use_gpu).detach() + if normalized: + normalized_actions = self._normalize_actions(actions) + return normalized_actions.clone() if clone else normalized_actions + return actions.clone() if clone else actions + + def get_actual_actions(self, clone:bool=False, normalized: bool = False): + if self._env_opts["use_action_smoothing"]: + actions = self._actual_actions.get_torch_mirror(gpu=self._use_gpu).detach() + else: # actual action coincides with the one from the agent + possible modif. + actions = self.get_actions(clone=False, normalized=False) + if normalized: + normalized_actions = self._normalize_actions(actions) + return normalized_actions.clone() if clone else normalized_actions + return actions.clone() if clone else actions + + def _normalize_actions(self, actions: torch.Tensor): + scale = torch.where(self._actions_scale == 0.0, + torch.ones_like(self._actions_scale), + self._actions_scale) + normalized = (actions - self._actions_offset)/scale + zero_scale_mask = torch.eq(self._actions_scale, 0.0).squeeze(0) + if torch.any(zero_scale_mask): + normalized[:, zero_scale_mask] = 0.0 + return normalized + + def get_rewards(self, clone:bool=False): + if clone: + return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._tot_rewards.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_terminations(self, clone:bool=False): + if clone: + return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._terminations.get_torch_mirror(gpu=self._use_gpu).detach() + + def get_truncations(self, clone:bool=False): + if clone: + return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach().clone() + else: + return self._truncations.get_torch_mirror(gpu=self._use_gpu).detach() + + def obs_dim(self): + + return self._obs_dim + + def actions_dim(self): + + return self._actions_dim + + def ep_rewards_metrics(self): + + return self._episodic_rewards_metrics + + def using_gpu(self): + + return self._use_gpu + + def name(self): + + return self._env_name + + def n_envs(self): + + return self._n_envs + + def dtype(self): + + return self._dtype + + def obs_names(self): + return self._get_obs_names() + + def action_names(self): + return self._get_action_names() + + def sub_rew_names(self): + return self._get_rewards_names() + + def sub_term_names(self): + return self._get_sub_term_names() + + def sub_trunc_names(self): + return self._get_sub_trunc_names() + + def _get_obs_names(self): + # to be overridden by child class + return None + + def get_robot_jnt_names(self): + return self._robot_state.jnt_names() + + def _get_action_names(self): + # to be overridden by child class + return None + + def _get_rewards_names(self): + # to be overridden by child class + return None + + def _get_sub_term_names(self): + # to be overridden by child class + sub_term_names = [] + sub_term_names.append("rhc_failure") + sub_term_names.append("robot_capsize") + sub_term_names.append("rhc_capsize") + + return sub_term_names + + def _get_sub_trunc_names(self): + # to be overridden by child class + sub_trunc_names = [] + sub_trunc_names.append("ep_timeout") + + return sub_trunc_names + + def _get_custom_db_data(self, episode_finished): + # to be overridden by child class + pass + + def set_observed_joints(self): + # ny default observe all joints available + return self._robot_state.jnt_names() + + def _set_jnts_blacklist_pattern(self): + self._jnt_q_blacklist_patterns=[] + + def get_observed_joints(self): + return self._observed_jnt_names + + def _init_obs(self, obs_dim: int): + + device = "cuda" if self._use_gpu else "cpu" + + obs_threshold_default = 1e3 + self._obs_threshold_lb = -obs_threshold_default # used for clipping observations + self._obs_threshold_ub = obs_threshold_default + + self._obs_ub = torch.full((1, obs_dim), dtype=self._dtype, device=device, + fill_value=1.0) + self._obs_lb = torch.full((1, obs_dim), dtype=self._dtype, device=device, + fill_value=-1.0) + self._obs_scale = (self._obs_ub - self._obs_lb)/2.0 + self._obs_offset = (self._obs_ub + self._obs_lb)/2.0 + + if not self._obs_dim==len(self._get_obs_names()): + error=f"obs dim {self._obs_dim} does not match obs names length {len(self._get_obs_names())}!!" + Journal.log(self.__class__.__name__, + "_init_obs", + error, + LogType.EXCEP, + throw_when_excep = True) + + self._obs = Observations(namespace=self._namespace, + n_envs=self._n_envs, + obs_dim=self._obs_dim, + obs_names=self._get_obs_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._next_obs = NextObservations(namespace=self._namespace, + n_envs=self._n_envs, + obs_dim=self._obs_dim, + obs_names=self._get_obs_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._obs.run() + self._next_obs.run() + + self._is_substep_obs = torch.zeros((self.obs_dim(),), dtype=torch.bool, device=device) + self._is_substep_obs.fill_(False) # default to all step obs + + # not super memory efficient + self._substep_obs=torch.full_like(self._obs.get_torch_mirror(gpu=self._use_gpu), fill_value=0.0) + + def _init_actions(self, actions_dim: int): + + device = "cuda" if self._use_gpu else "cpu" + # action scalings to be applied to agent's output + self._actions_ub = torch.full((1, actions_dim), dtype=self._dtype, device=device, + fill_value=1.0) + self._actions_lb = torch.full((1, actions_dim), dtype=self._dtype, device=device, + fill_value=-1.0) + self._actions_scale = (self._actions_ub - self._actions_lb)/2.0 + self._actions_offset = (self._actions_ub + self._actions_lb)/2.0 + + if not self._actions_dim==len(self._get_action_names()): + error=f"action dim {self._actions_dim} does not match action names length {len(self._get_action_names())}!!" + Journal.log(self.__class__.__name__, + "_init_actions", + error, + LogType.EXCEP, + throw_when_excep = True) + self._actions = Actions(namespace=self._namespace, + n_envs=self._n_envs, + action_dim=self._actions_dim, + action_names=self._get_action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._actions.run() + + self.default_action = torch.full_like(input=self.get_actions(),fill_value=0.0) + self.safe_action = torch.full_like(input=self.get_actions(),fill_value=0.0) + + if self._env_opts["use_action_history"]: + self._act_mem_buffer=MemBuffer(name="ActionMemBuf", + data_tensor=self._actions.get_torch_mirror(), + data_names=self._get_action_names(), + debug=self._debug, + horizon=self._env_opts["actions_history_size"], + dtype=self._dtype, + use_gpu=self._use_gpu) + + # default to all continuous actions (changes the way noise is added) + self._is_continuous_actions=torch.full((actions_dim, ), + dtype=torch.bool, device=device, + fill_value=True) + + def _init_action_smoothing(self): + + continuous_actions=self.get_actions()[:, self._is_continuous_actions] + discrete_actions=self.get_actions()[:, ~self._is_continuous_actions] + self._action_smoother_continuous=ExponentialSignalSmoother(signal=continuous_actions, + update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent + smoothing_horizon=self._env_opts["smoothing_horizon_c"], + target_smoothing=0.5, + debug=self._debug, + dtype=self._dtype, + use_gpu=self._use_gpu, + name="ActionSmootherContinuous") + self._action_smoother_discrete=ExponentialSignalSmoother(signal=discrete_actions, + update_dt=self._substep_dt*self._action_repeat, # rate at which actions are decided by agent + smoothing_horizon=self._env_opts["smoothing_horizon_d"], + target_smoothing=0.5, + debug=self._debug, + dtype=self._dtype, + use_gpu=self._use_gpu, + name="ActionSmootherDiscrete") + + # we also need somewhere to keep the actual actions after smoothing + self._actual_actions = Actions(namespace=self._namespace+"_actual", + n_envs=self._n_envs, + action_dim=self._actions_dim, + action_names=self._get_action_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + self._actual_actions.run() + + def _init_rewards(self): + + reward_thresh_default = 1.0 + n_sub_rewards = len(self._get_rewards_names()) + device = "cuda" if self._use_gpu else "cpu" + self._reward_thresh_lb = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=-reward_thresh_default, device=device) # used for clipping rewards + self._reward_thresh_ub = torch.full((1, n_sub_rewards), dtype=self._dtype, fill_value=reward_thresh_default, device=device) + + self._sub_rewards = SubRewards(namespace=self._namespace, + n_envs=self._n_envs, + n_rewards=n_sub_rewards, + reward_names=self._get_rewards_names(), + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._tot_rewards = TotRewards(namespace=self._namespace, + n_envs=self._n_envs, + reward_names=["total_reward"], + env_names=None, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=0.0) + + self._sub_rewards.run() + self._tot_rewards.run() + + self._substep_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu).detach().clone() + # used to hold substep rewards (not super mem. efficient) + self._is_substep_rew = torch.zeros((self._substep_rewards.shape[1],),dtype=torch.bool,device=device) + self._is_substep_rew.fill_(True) # default to all substep rewards + + self._episodic_rewards_metrics = EpisodicRewards(reward_tensor=self._sub_rewards.get_torch_mirror(), + reward_names=self._get_rewards_names(), + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._episodic_rewards_metrics.set_constant_data_scaling(scaling=self._get_reward_scaling()) + + def _get_reward_scaling(self): + # to be overridden by child (default to no scaling) + return 1 + + def _max_ep_length(self): + #.should be overriden by child + return self._env_opts["episode_timeout_ub"] + + def _init_custom_db_data(self): + + self.custom_db_data = {} + # by default always log this contact data + rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror() + contact_names = self._rhc_refs.rob_refs.contact_names() + stepping_data = EpisodicData("RhcRefsFlag", rhc_latest_contact_ref, contact_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=stepping_data) + + # log also action data + actions = self._actions.get_torch_mirror() + action_names = self._get_action_names() + action_data = EpisodicData("Actions", actions, action_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=action_data) + + # and observations + observations = self._obs.get_torch_mirror() + observations_names = self._get_obs_names() + obs_data = EpisodicData("Obs", observations, observations_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + self._add_custom_db_data(db_data=obs_data) + + # log sub-term and sub-truncations data + t_scaling=1 # 1 so that we log an interpretable data in terms of why the episode finished + data_scaling = torch.full((self._n_envs, 1), + fill_value=t_scaling, + dtype=torch.int32,device="cpu") + sub_term = self._sub_terminations.get_torch_mirror() + term = self._terminations.get_torch_mirror() + sub_termination_names = self.sub_term_names() + + sub_term_data = EpisodicData("SubTerminations", sub_term, sub_termination_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + sub_term_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=sub_term_data) + term_data = EpisodicData("Terminations", term, ["terminations"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + term_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=term_data) + + sub_trunc = self._sub_truncations.get_torch_mirror() + trunc = self._truncations.get_torch_mirror() + sub_truncations_names = self.sub_trunc_names() + sub_trunc_data = EpisodicData("SubTruncations", sub_trunc, sub_truncations_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + sub_trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=sub_trunc_data) + trunc_data = EpisodicData("Truncations", trunc, ["truncations"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + trunc_data.set_constant_data_scaling(enable=True,scaling=data_scaling) + self._add_custom_db_data(db_data=trunc_data) + + def _add_custom_db_data(self, db_data: EpisodicData): + self.custom_db_data[db_data.name()] = db_data + + def _init_terminations(self): + + # Boolean array indicating whether each environment episode has terminated after + # the current step. An episode termination could occur based on predefined conditions + # in the environment, such as reaching a goal or exceeding a time limit. + + self._terminations = Terminations(namespace=self._namespace, + n_envs=self._n_envs, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._terminations.run() + + sub_t_names = self.sub_term_names() + self._sub_terminations = SubTerminations(namespace=self._namespace, + n_envs=self._n_envs, + n_term=len(sub_t_names), + term_names=sub_t_names, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._sub_terminations.run() + + device = "cuda" if self._use_gpu else "cpu" + self._is_capsized=torch.zeros((self._n_envs,1), + dtype=torch.bool, device=device) + self._is_rhc_capsized=torch.zeros((self._n_envs,1), + dtype=torch.bool, device=device) + self._max_pitch_angle=60.0*math.pi/180.0 + + def _init_truncations(self): + + self._truncations = Truncations(namespace=self._namespace, + n_envs=self._n_envs, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + + self._truncations.run() + + sub_trc_names = self.sub_trunc_names() + self._sub_truncations = SubTruncations(namespace=self._namespace, + n_envs=self._n_envs, + n_trunc=len(sub_trc_names), + truc_names=sub_trc_names, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + fill_value=False) + self._sub_truncations.run() + + def _update_jnt_blacklist(self): + device = "cuda" if self._use_gpu else "cpu" + all_available_jnts=self.get_observed_joints() + blacklist=[] + for i in range(len(all_available_jnts)): + for pattern in self._jnt_q_blacklist_patterns: + if pattern in all_available_jnts[i]: + # stop at first pattern match + blacklist.append(i) + break + if not len(blacklist)==0: + self._jnt_q_blacklist_idxs=torch.tensor(blacklist, dtype=torch.int, device=device) + + def _attach_to_shared_mem(self): + + # runs shared mem clients for getting observation and setting RHC commands + + # remote stepping data + self._remote_stepper = RemoteStepperSrvr(namespace=self._namespace, + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._remote_stepper.run() + self._remote_resetter = RemoteResetSrvr(namespace=self._namespace, + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._remote_resetter.run() + self._remote_reset_req = RemoteResetRequest(namespace=self._namespace, + is_server=False, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True) + self._remote_reset_req.run() + + self._jnts_remapping=None + self._jnt_q_blacklist_idxs=None + + self._robot_state = RobotState(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True, + enable_height_sensor=self._env_opts["add_heightmap_obs"]) + + self._rhc_cmds = RhcCmds(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_pred = RhcPred(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_refs = RhcRefs(namespace=self._namespace, + is_server=False, + safe=self._safe_shared_mem, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._rhc_status = RhcStatus(namespace=self._namespace, + is_server=False, + verbose=self._verbose, + vlevel=self._vlevel, + with_gpu_mirror=self._use_gpu, + with_torch_view=True) + + self._robot_state.run() + self._n_envs = self._robot_state.n_robots() + self._n_jnts = self._robot_state.n_jnts() + self._n_contacts = self._robot_state.n_contacts() # we assume same n contacts for all rhcs for now + + self._rhc_cmds.run() + self._rhc_pred.run() + self._rhc_refs.run() + self._rhc_status.run() + # we read rhc info now and just this time, since it's assumed to be static + self._check_controllers_registered(retry=True) # blocking + # (we need controllers to be connected to read meaningful data) + + self._rhc_status.rhc_static_info.synch_all(read=True,retry=True) + if self._use_gpu: + self._rhc_status.rhc_static_info.synch_mirror(from_gpu=False,non_blocking=False) + rhc_horizons=self._rhc_status.rhc_static_info.get("horizons",gpu=self._use_gpu) + rhc_nnodes=self._rhc_status.rhc_static_info.get("nnodes",gpu=self._use_gpu) + rhc_dts=self._rhc_status.rhc_static_info.get("dts",gpu=self._use_gpu) + + # height sensor metadata (client side) + if self._env_opts["add_heightmap_obs"]: + self._height_grid_size = self._robot_state.height_sensor.grid_size + self._height_flat_dim = self._robot_state.height_sensor.n_cols + rhc_ncontacts=self._rhc_status.rhc_static_info.get("ncontacts",gpu=self._use_gpu) + robot_mass=self._rhc_status.rhc_static_info.get("robot_mass",gpu=self._use_gpu) + pred_node_idxs_rhc=self._rhc_status.rhc_static_info.get("pred_node_idx",gpu=self._use_gpu) + + self._n_nodes_rhc=torch.round(rhc_nnodes) # we assume nodes are static during an env lifetime + self._rhc_horizons=rhc_horizons + self._rhc_dts=rhc_dts + self._n_contacts_rhc=rhc_ncontacts + self._rhc_robot_masses=robot_mass + if (self._rhc_robot_masses == 0).any(): + zero_indices = torch.nonzero(self._rhc_robot_masses == 0, as_tuple=True) + print(zero_indices) # This will print the indices of zero elements + Journal.log(self.__class__.__name__, + "_attach_to_shared_mem", + "Found at least one robot with 0 mass from RHC static info!!", + LogType.EXCEP, + throw_when_excep=True) + + self._rhc_robot_weight=robot_mass*9.81 + self._pred_node_idxs_rhc=pred_node_idxs_rhc + self._pred_horizon_rhc=self._pred_node_idxs_rhc*self._rhc_dts + + # run server for agent commands + self._agent_refs = AgentRefs(namespace=self._namespace, + is_server=True, + n_robots=self._n_envs, + n_jnts=self._robot_state.n_jnts(), + n_contacts=self._robot_state.n_contacts(), + contact_names=self._robot_state.contact_names(), + q_remapping=None, + with_gpu_mirror=self._use_gpu, + force_reconnection=True, + safe=False, + verbose=self._verbose, + vlevel=self._vlevel, + fill_value=0) + self._agent_refs.run() + q_init_agent_refs=torch.full_like(self._robot_state.root_state.get(data_type="q", gpu=self._use_gpu),fill_value=0.0) + q_init_agent_refs[:, 0]=1.0 + self._agent_refs.rob_refs.root_state.set(data_type="q", data=q_init_agent_refs, + gpu=self._use_gpu) + if self._use_gpu: + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=True) + self._agent_refs.rob_refs.root_state.synch_all(read=False, retry=True) + # episode steps counters (for detecting episode truncations for + # time limits) + self._ep_timeout_counter = EpisodesCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["episode_timeout_lb"], + n_steps_ub=self._env_opts["episode_timeout_ub"], + randomize_offsets_at_startup=True, # this has to be randomized + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) # handles step counter through episodes and through envs + self._ep_timeout_counter.run() + self._task_rand_counter = TaskRandCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["n_steps_task_rand_lb"], + n_steps_ub=self._env_opts["n_steps_task_rand_ub"], + randomize_offsets_at_startup=False, # not necessary since it will be synched with the timeout counter + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) # handles step counter through episodes and through envs + self._task_rand_counter.run() + self._task_rand_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._env_opts["use_random_trunc"]: + self._rand_trunc_counter=RandomTruncCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["random_trunc_freq"]-self._env_opts["random_trunc_freq_delta"], + n_steps_ub=self._env_opts["random_trunc_freq"], + randomize_offsets_at_startup=True, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=False) + self._rand_trunc_counter.run() + # self._rand_trunc_counter.sync_counters(other_counter=self._ep_timeout_counter) + if self._env_opts["use_random_safety_reset"]: + self._rand_safety_reset_counter=SafetyRandResetsCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=self._env_opts["random_reset_freq"], + n_steps_ub=self._env_opts["random_reset_freq"], + randomize_offsets_at_startup=True, + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=False) + self._rand_safety_reset_counter.run() + # self._rand_safety_reset_counter.sync_counters(other_counter=self._ep_timeout_counter) + + # timer to track abs time in each env (reset logic to be implemented in child) + self._substep_abs_counter = SubStepAbsCounter(namespace=self._namespace, + n_envs=self._n_envs, + n_steps_lb=1e9, + n_steps_ub=1e9, + randomize_offsets_at_startup=True, # randomizing startup offsets + is_server=True, + verbose=self._verbose, + vlevel=self._vlevel, + safe=True, + force_reconnection=True, + with_gpu_mirror=self._use_gpu, + debug=self._debug) + self._substep_abs_counter.run() + + # debug data servers + traing_env_param_dict = {} + traing_env_param_dict["use_gpu"] = self._use_gpu + traing_env_param_dict["debug"] = self._is_debug + traing_env_param_dict["n_preinit_steps"] = self._env_opts["n_preinit_steps"] + traing_env_param_dict["n_preinit_steps"] = self._n_envs + + self._training_sim_info = SharedTrainingEnvInfo(namespace=self._namespace, + is_server=True, + training_env_params_dict=traing_env_param_dict, + safe=False, + force_reconnection=True, + verbose=self._verbose, + vlevel=self._vlevel) + self._training_sim_info.run() + + self._observed_jnt_names=self.set_observed_joints() + self._set_jnts_blacklist_pattern() + self._update_jnt_blacklist() + + self._prev_root_p_substep=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone() + self._prev_root_q_substep=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone() + self._prev_root_p_step=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu).clone() + self._prev_root_q_step=self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu).clone() + + def _activate_rhc_controllers(self): + self._rhc_status.activation_state.get_torch_mirror()[:, :] = True + self._rhc_status.activation_state.synch_all(read=False, retry=True) # activates all controllers + + def _synch_state(self, + gpu: bool = True): + + # read from shared memory on CPU + # robot state + self._robot_state.root_state.synch_all(read = True, retry = True) + self._robot_state.jnts_state.synch_all(read = True, retry = True) + # rhc cmds + self._rhc_cmds.root_state.synch_all(read = True, retry = True) + self._rhc_cmds.jnts_state.synch_all(read = True, retry = True) + self._rhc_cmds.contact_wrenches.synch_all(read = True, retry = True) + # rhc pred + self._rhc_pred.root_state.synch_all(read = True, retry = True) + # self._rhc_pred.jnts_state.synch_all(read = True, retry = True) + # self._rhc_pred.contact_wrenches.synch_all(read = True, retry = True) + # refs for root link and contacts + self._rhc_refs.rob_refs.root_state.synch_all(read = True, retry = True) + self._rhc_refs.contact_flags.synch_all(read = True, retry = True) + self._rhc_refs.flight_info.synch_all(read = True, retry = True) + self._rhc_refs.flight_settings_req.synch_all(read = True, retry = True) + self._rhc_refs.rob_refs.contact_pos.synch_all(read = True, retry = True) + # rhc cost + self._rhc_status.rhc_cost.synch_all(read = True, retry = True) + # rhc constr. violations + self._rhc_status.rhc_constr_viol.synch_all(read = True, retry = True) + # failure states + self._rhc_status.fails.synch_all(read = True, retry = True) + # tot cost and cnstr viol on nodes + step variable + self._rhc_status.rhc_nodes_cost.synch_all(read = True, retry = True) + self._rhc_status.rhc_nodes_constr_viol.synch_all(read = True, retry = True) + self._rhc_status.rhc_fcn.synch_all(read = True, retry = True) + self._rhc_status.rhc_fail_idx.synch_all(read = True, retry = True) + if self._env_opts["add_heightmap_obs"]: + self._robot_state.height_sensor.synch_all(read=True, retry=True) + if gpu: + # copies data to "mirror" on GPU --> we can do it non-blocking since + # in this direction it should be safe + self._robot_state.root_state.synch_mirror(from_gpu=False,non_blocking=True) # copies shared data on GPU + self._robot_state.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.root_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_cmds.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_pred.root_state.synch_mirror(from_gpu=False,non_blocking=True) + # self._rhc_pred.jnts_state.synch_mirror(from_gpu=False,non_blocking=True) + # self._rhc_pred.contact_wrenches.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.contact_flags.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.flight_info.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_refs.flight_settings_req.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_cost.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_constr_viol.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.fails.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_nodes_cost.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_nodes_constr_viol.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_fcn.synch_mirror(from_gpu=False,non_blocking=True) + self._rhc_status.rhc_fail_idx.synch_mirror(from_gpu=False,non_blocking=True) + if self._env_opts["add_heightmap_obs"]: + self._robot_state.height_sensor.synch_mirror(from_gpu=False, non_blocking=True) + torch.cuda.synchronize() # ensuring that all the streams on the GPU are completed \ + # before the CPU continues execution + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # just used for setting agent refs externally (i.e. from shared mem on CPU) + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + def _clamp_obs(self, + obs: torch.Tensor): + if self._is_debug: + self._check_finite(obs, "observations", False) + torch.nan_to_num(input=obs, out=obs, nan=self._obs_threshold_ub, + posinf=self._obs_threshold_ub, + neginf=self._obs_threshold_lb) # prevent nans + + obs.clamp_(self._obs_threshold_lb, self._obs_threshold_ub) + + def _clamp_rewards(self, + rewards: torch.Tensor): + if self._is_debug: + self._check_finite(rewards, "rewards", False) + torch.nan_to_num(input=rewards, out=rewards, nan=0.0, + posinf=None, + neginf=None) # prevent nans + rewards.clamp_(self._reward_thresh_lb, self._reward_thresh_ub) + + def get_actions_lb(self): + return self._actions_lb + + def get_actions_ub(self): + return self._actions_ub + + def get_actions_scale(self): + return self._actions_scale + + def get_actions_offset(self): + return self._actions_offset + + def get_obs_lb(self): + return self._obs_lb + + def get_obs_ub(self): + return self._obs_ub + + def get_obs_scale(self): + self._obs_scale = (self._obs_ub - self._obs_lb)/2.0 + return self._obs_scale + + def get_obs_offset(self): + self._obs_offset = (self._obs_ub + self._obs_lb)/2.0 + return self._obs_offset + + def switch_random_reset(self, on: bool = True): + self._random_reset_active=on + + def set_jnts_remapping(self, + remapping: List = None): + + self._jnts_remapping=remapping + if self._jnts_remapping is not None: + self._robot_state.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + self._rhc_cmds.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + self._rhc_pred.set_jnts_remapping(jnts_remapping=self._jnts_remapping) + # we need to also update the list of observed joints to match + available_joints=self._robot_state.jnt_names() + # the remapping ordering + self._observed_jnt_names=[] + for i in range(len(available_joints)): + self._observed_jnt_names.append(available_joints[self._jnts_remapping[i]]) + + self._update_jnt_blacklist() + + updated_obs_names=self._get_obs_names() # get updated obs names (should use get_observed_joints + # internally, so that jnt names are updated) + + # also update jnt obs names on shared memory + names_old=self._obs.get_obs_names() + names_old_next=self._next_obs.get_obs_names() + names_old[:]=updated_obs_names + names_old_next[:]=updated_obs_names + self._obs.update_names() + self._next_obs.update_names() + + # also update + if "Obs" in self.custom_db_data: + db_obs_names=self.custom_db_data["Obs"].data_names() + db_obs_names[:]=updated_obs_names + + def _check_finite(self, + tensor: torch.Tensor, + name: str, + throw: bool = False): + if not torch.isfinite(tensor).all().item(): + exception = f"Found nonfinite elements in {name} tensor!!" + non_finite_idxs=torch.nonzero(~torch.isfinite(tensor)) + n_nonf_elems=non_finite_idxs.shape[0] + + if name=="observations": + for i in range(n_nonf_elems): + db_msg=f"{self.obs_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \ + f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}" + print(db_msg) + if name=="rewards": + for i in range(n_nonf_elems): + db_msg=f"{self.sub_rew_names()[non_finite_idxs[i,1]]} (env. {non_finite_idxs[i,0]}):" + \ + f" {tensor[non_finite_idxs[i,0],non_finite_idxs[i,1]].item()}" + print(db_msg) + print(tensor) + Journal.log(self.__class__.__name__, + "_check_finite", + exception, + LogType.EXCEP, + throw_when_excep = throw) + + def _check_controllers_registered(self, + retry: bool = False): + + if retry: + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + while not (n_connected_controllers == self._n_envs): + warn = f"Expected {self._n_envs} controllers to be connected during training, " + \ + f"but got {n_connected_controllers}. Will wait for all to be connected..." + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + warn, + LogType.WARN, + throw_when_excep = False) + nsecs = int(2 * 1000000000) + PerfSleep.thread_sleep(nsecs) + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + info = f"All {n_connected_controllers} controllers connected!" + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + info, + LogType.INFO, + throw_when_excep = False) + return True + else: + self._rhc_status.controllers_counter.synch_all(read=True, retry=True) + n_connected_controllers = self._rhc_status.controllers_counter.get_torch_mirror()[0, 0].item() + if not (n_connected_controllers == self._n_envs): + exception = f"Expected {self._n_envs} controllers to be connected during training, " + \ + f"but got {n_connected_controllers}. Aborting..." + Journal.log(self.__class__.__name__, + "_check_controllers_registered", + exception, + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _check_truncations(self): + + self._check_sub_truncations() + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + truncations = self._truncations.get_torch_mirror(gpu=self._use_gpu) + truncations[:, :] = torch.any(sub_truncations,dim=1,keepdim=True) + + def _check_terminations(self): + + self._check_sub_terminations() + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + terminations = self._terminations.get_torch_mirror(gpu=self._use_gpu) + terminations[:, :] = torch.any(sub_terminations,dim=1,keepdim=True) + + def _check_sub_truncations(self): + # default behaviour-> to be overriden by child + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + sub_truncations[:, 0:1]=self._ep_timeout_counter.time_limits_reached() + + def _check_sub_terminations(self): + # default behaviour-> to be overriden by child + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + + # terminate when either the real robot or the prediction from the MPC are capsized + check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle, + output_t=self._is_capsized) + check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle, + output_t=self._is_rhc_capsized) + + sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu) + sub_terminations[:, 1:2] = self._is_capsized + sub_terminations[:, 2:3] = self._is_rhc_capsized + + def is_action_continuous(self): + return self._is_continuous_actions + + def is_action_discrete(self): + return ~self._is_continuous_actions + + @abstractmethod + def _pre_substep(self): + pass + + @abstractmethod + def _custom_post_step(self,episode_finished): + pass + + @abstractmethod + def _custom_post_substp_post_rew(self): + pass + + @abstractmethod + def _custom_post_substp_pre_rew(self): + pass + + @abstractmethod + def _apply_actions_to_rhc(self): + pass + + def _override_actions_with_demo(self): + pass + + @abstractmethod + def _compute_substep_rewards(self): + pass + + @abstractmethod + def _set_substep_rew(self): + pass + + @abstractmethod + def _set_substep_obs(self): + pass + + @abstractmethod + def _compute_step_rewards(self): + pass + + @abstractmethod + def _fill_substep_obs(self, + obs: torch.Tensor): + pass + + @abstractmethod + def _fill_step_obs(self, + obs: torch.Tensor): + pass + + @abstractmethod + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + pass + + def _custom_post_init(self): + pass + + def _get_avrg_substep_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + # to be called at each substep + robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + root_v_avrg_w=(robot_p_meas-self._prev_root_p_substep)/self._substep_dt + root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_substep,robot_q_meas),\ + dt=self._substep_dt) + twist_w=torch.cat((root_v_avrg_w, + root_omega_avrg_w), + dim=1) + if not base_loc: + self._prev_root_p_substep[:, :]=robot_p_meas + self._prev_root_q_substep[:, :]=robot_q_meas + out[:, :]=twist_w + # rotate using the current (end-of-substep) orientation for consistency with other signals + world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out) + self._prev_root_p_substep[:, :]=robot_p_meas + self._prev_root_q_substep[:, :]=robot_q_meas + + def _get_avrg_step_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + # to be called after substeps of actions repeats + robot_p_meas = self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu) + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + + dt=self._substep_dt*self._action_repeat # accounting for frame skipping + root_v_avrg_w=(robot_p_meas-self._prev_root_p_step)/(dt) + root_omega_avrg_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(self._prev_root_q_step,robot_q_meas),\ + dt=dt) + twist_w=torch.cat((root_v_avrg_w, + root_omega_avrg_w), + dim=1) + if not base_loc: + out[:, :]=twist_w + # rotate using the current (end-of-step) orientation for consistency with other signals + world2base_frame(t_w=twist_w, q_b=robot_q_meas, t_out=out) + + def _get_avrg_rhc_root_twist(self, + out: torch.Tensor, + base_loc: bool = True): + + rhc_root_p =self._rhc_cmds.root_state.get(data_type="p",gpu=self._use_gpu) + rhc_root_q =self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + rhc_root_p_pred =self._rhc_pred.root_state.get(data_type="p",gpu=self._use_gpu) + rhc_root_q_pred =self._rhc_pred.root_state.get(data_type="q",gpu=self._use_gpu) + + rhc_root_v_avrg_rhc_w=(rhc_root_p_pred-rhc_root_p)/self._pred_horizon_rhc + rhc_root_omega_avrg_rhc_w=quaternion_to_angular_velocity(q_diff=quaternion_difference(rhc_root_q,rhc_root_q_pred),\ + dt=self._pred_horizon_rhc) + + rhc_pred_avrg_twist_rhc_w = torch.cat((rhc_root_v_avrg_rhc_w, + rhc_root_omega_avrg_rhc_w), + dim=1) + if not base_loc: + out[:, :]=rhc_pred_avrg_twist_rhc_w + # to rhc base frame (using first node as reference) + world2base_frame(t_w=rhc_pred_avrg_twist_rhc_w, q_b=rhc_root_q, t_out=out) diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/twist_tracking_env.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/twist_tracking_env.py new file mode 100644 index 0000000000000000000000000000000000000000..5a8f9e4403b6124e26f78086191441feace4386a --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/twist_tracking_env.py @@ -0,0 +1,1396 @@ +from typing import Dict + +import os + +import torch + +from EigenIPC.PyEigenIPC import VLevel, LogType, Journal + +from mpc_hive.utilities.shared_data.rhc_data import RobotState, RhcStatus, RhcRefs +from mpc_hive.utilities.math_utils_torch import world2base_frame, base2world_frame, w2hor_frame + +from aug_mpc.utils.sys_utils import PathsGetter +from aug_mpc.utils.timers import PeriodicTimer +from aug_mpc.utils.episodic_data import EpisodicData +from aug_mpc.utils.signal_smoother import ExponentialSignalSmoother +from aug_mpc.utils.math_utils import check_capsize +from aug_mpc.training_envs.training_env_base import AugMPCTrainingEnvBase + +class TwistTrackingEnv(AugMPCTrainingEnvBase): + """Base AugMPC training env that tracks commanded twists by pushing velocity and contact targets into the RHC controller while handling locomotion rewards/resets.""" + + def __init__(self, + namespace: str, + actions_dim: int = 10, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + debug: bool = True, + override_agent_refs: bool = False, + timeout_ms: int = 60000, + env_opts: Dict = {}): + + env_name = "LinVelTrack" + device = "cuda" if use_gpu else "cpu" + + self._add_env_opt(env_opts, "srew_drescaling", + False) + + self._add_env_opt(env_opts, "step_thresh", 0.) # when step action < thresh, a step is requested + + # counters settings + self._add_env_opt(env_opts, "single_task_ref_per_episode", + True # if True, the task ref is constant over the episode (ie + # episodes are truncated when task is changed) + ) + self._add_env_opt(env_opts, "add_angvel_ref_rand", default=True) # randomize also agular vel ref (just z component) + + self._add_env_opt(env_opts, "episode_timeout_lb", + 1024) + self._add_env_opt(env_opts, "episode_timeout_ub", + 1024) + self._add_env_opt(env_opts, "n_steps_task_rand_lb", + 512) + self._add_env_opt(env_opts, "n_steps_task_rand_ub", + 512) + self._add_env_opt(env_opts, "use_random_safety_reset", + True) + self._add_env_opt(env_opts, "random_reset_freq", + 10) # a random reset once every n-episodes (per env) + self._add_env_opt(env_opts, "use_random_trunc", + True) + self._add_env_opt(env_opts, "random_trunc_freq", + env_opts["episode_timeout_ub"]*5) # to remove temporal correlations between envs + self._add_env_opt(env_opts, "random_trunc_freq_delta", + env_opts["episode_timeout_ub"]*2) # to randomize trunc frequency between envs + + if not env_opts["single_task_ref_per_episode"]: + env_opts["random_reset_freq"]=int(env_opts["random_reset_freq"]/\ + (env_opts["episode_timeout_lb"]/float(env_opts["n_steps_task_rand_lb"]))) + + self._add_env_opt(env_opts, "action_repeat", 1) # frame skipping (different agent action every action_repeat + # env substeps) + + self._add_env_opt(env_opts, "n_preinit_steps", 1) # n steps of the controllers to properly initialize everything + + self._add_env_opt(env_opts, "vec_ep_freq_metrics_db", 1) # n eps over which debug metrics are reported + self._add_env_opt(env_opts, "demo_envs_perc", 0.0) + self._add_env_opt(env_opts, "max_cmd_v", 1.5) # maximum cmd v for lin v actions (single component) + self._add_env_opt(env_opts, "max_cmd_omega", 1.0) # maximum cmd v for omega v actions (single component) + + # action smoothing + self._add_env_opt(env_opts, "use_action_smoothing", False) + self._add_env_opt(env_opts, "smoothing_horizon_c", 0.01) + self._add_env_opt(env_opts, "smoothing_horizon_d", 0.03) + + # whether to smooth vel error signal + self._add_env_opt(env_opts, "use_track_reward_smoother", False) + self._add_env_opt(env_opts, "smoothing_horizon_vel_err", 0.08) + self._add_env_opt(env_opts, "track_rew_smoother", None) + + # rewards + self._reward_map={} + self._reward_lb_map={} + + self._add_env_opt(env_opts, "reward_lb_default", -0.5) + self._add_env_opt(env_opts, "reward_ub_default", 1e6) + + self._add_env_opt(env_opts, "task_error_reward_lb", -0.5) + self._add_env_opt(env_opts, "CoT_reward_lb", -0.5) + self._add_env_opt(env_opts, "power_reward_lb", -0.5) + self._add_env_opt(env_opts, "action_rate_reward_lb", -0.5) + self._add_env_opt(env_opts, "jnt_vel_reward_lb", -0.5) + self._add_env_opt(env_opts, "rhc_avrg_vel_reward_lb", -0.5) + + self._add_env_opt(env_opts, "add_power_reward", False) + self._add_env_opt(env_opts, "add_CoT_reward", True) + self._add_env_opt(env_opts, "use_CoT_wrt_ref", False) + self._add_env_opt(env_opts, "add_action_rate_reward", True) + self._add_env_opt(env_opts, "add_jnt_v_reward", False) + + self._add_env_opt(env_opts, "use_rhc_avrg_vel_tracking", False) + + # task tracking + self._add_env_opt(env_opts, "use_relative_error", default=False) # use relative vel error (wrt current task norm) + self._add_env_opt(env_opts, "directional_tracking", default=True) # whether to compute tracking error based on reference direction + # if env_opts["add_angvel_ref_rand"]: + # env_opts["directional_tracking"]=False + + self._add_env_opt(env_opts, "use_L1_norm", default=True) # whether to use L1 norm for the error (otherwise L2) + self._add_env_opt(env_opts, "use_exp_track_rew", default=True) # whether to use a reward of the form A*e^(B*x), + # otherwise A*(1-B*x) + + self._add_env_opt(env_opts, "use_fail_idx_weight", default=False) + self._add_env_opt(env_opts, "task_track_offset_exp", default=1.0) + self._add_env_opt(env_opts, "task_track_scale_exp", default=5.0) + self._add_env_opt(env_opts, "task_track_offset", default=1.0) + self._add_env_opt(env_opts, "task_track_scale", default=1.5) + self._add_env_opt(env_opts, "task_track_front_weight", default=1.0) + self._add_env_opt(env_opts, "task_track_lat_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_vert_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_omega_z_weight", default=0.4) + self._add_env_opt(env_opts, "task_track_omega_x_weight", default=0.05) + self._add_env_opt(env_opts, "task_track_omega_y_weight", default=0.05) + # if env_opts["add_angvel_ref_rand"]: + # env_opts["task_track_omega_x_weight"]=0.0 + # env_opts["task_track_omega_y_weight"]=0.0 + # env_opts["task_track_omega_z_weight"]=1.0 + + # task pred tracking + self._add_env_opt(env_opts, "task_pred_track_offset", default=1.0) + self._add_env_opt(env_opts, "task_pred_track_scale", default=3.0) + + # energy penalties + self._add_env_opt(env_opts, "CoT_offset", default=0.3) + self._add_env_opt(env_opts, "CoT_scale", default=0.5) + self._add_env_opt(env_opts, "power_offset", default=0.1) + self._add_env_opt(env_opts, "power_scale", default=8e-4) + + # action rate penalty + self._add_env_opt(env_opts, "action_rate_offset", default=0.1) + self._add_env_opt(env_opts, "action_rate_scale", default=2.0) + self._add_env_opt(env_opts, "action_rate_rew_d_weight", default=0.1) + self._add_env_opt(env_opts, "action_rate_rew_c_weight", default=1.0) + + # jnt vel penalty + self._add_env_opt(env_opts, "jnt_vel_offset", default=0.1) + self._add_env_opt(env_opts, "jnt_vel_scale", default=2.0) + + # terminations + self._add_env_opt(env_opts, "add_term_mpc_capsize", default=False) # add termination based on mpc capsizing prediction + + # observations + self._add_env_opt(env_opts, "rhc_fail_idx_scale", default=1.0) + self._add_env_opt(env_opts, "use_action_history", default=True) # whether to add information on past actions to obs + self._add_env_opt(env_opts, "add_prev_actions_stats_to_obs", default=False) # add actions std, mean + last action over a horizon to obs (if self._use_action_history True) + self._add_env_opt(env_opts, "actions_history_size", default=3) + + self._add_env_opt(env_opts, "add_mpc_contact_f_to_obs", default=True) # add estimate vertical contact f to obs + self._add_env_opt(env_opts, "add_fail_idx_to_obs", default=True) # we need to obserse mpc failure idx to correlate it with terminations + + self._add_env_opt(env_opts, "use_linvel_from_rhc", default=True) # no lin vel meas available, we use est. from mpc + self._add_env_opt(env_opts, "add_flight_info", default=True) # add feedback info on pos, remamining duration, length, + # apex and landing height of flight phases + self._add_env_opt(env_opts, "add_flight_settings", default=False) # add feedback info on current flight requests for mpc + + self._add_env_opt(env_opts, "use_prob_based_stepping", default=False) # interpret actions as stepping prob (never worked) + + self._add_env_opt(env_opts, "add_rhc_cmds_to_obs", default=True) # add the rhc cmds which are being applied now to the robot + + if not "add_periodic_clock_to_obs" in env_opts: + # add a sin/cos clock to obs (useful if task is explicitly + # time-dependent) + self._add_env_opt(env_opts, "add_periodic_clock_to_obs", default=False) + + self._add_env_opt(env_opts, "add_heightmap_obs", default=False) + + # temporarily creating robot state client to get some data + robot_state_tmp = RobotState(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False, + enable_height_sensor=env_opts["add_heightmap_obs"]) + robot_state_tmp.run() + rhc_status_tmp = RhcStatus(is_server=False, + namespace=namespace, + verbose=verbose, + vlevel=vlevel, + with_torch_view=False, + with_gpu_mirror=False) + rhc_status_tmp.run() + rhc_refs_tmp = RhcRefs(namespace=namespace, + is_server=False, + safe=False, + verbose=verbose, + vlevel=vlevel, + with_gpu_mirror=False, + with_torch_view=False) + rhc_refs_tmp.run() + n_jnts = robot_state_tmp.n_jnts() + self._contact_names = robot_state_tmp.contact_names() + self._n_contacts = len(self._contact_names) + self._flight_info_size=rhc_refs_tmp.flight_info.n_cols + self._flight_setting_size=rhc_refs_tmp.flight_settings_req.n_cols + # height sensor metadata (if present) + self._height_grid_size = None + self._height_flat_dim = 0 + if env_opts["add_heightmap_obs"]: + self._height_grid_size = robot_state_tmp.height_sensor.grid_size + self._height_flat_dim = robot_state_tmp.height_sensor.n_cols + + robot_state_tmp.close() + rhc_status_tmp.close() + rhc_refs_tmp.close() + + # defining obs dimension + obs_dim=3 # normalized gravity vector in base frame + obs_dim+=6 # meas twist in base frame + obs_dim+=2*n_jnts # joint pos + vel + if env_opts["add_mpc_contact_f_to_obs"]: + obs_dim+=3*self._n_contacts + obs_dim+=6 # twist reference in base frame frame + if env_opts["add_fail_idx_to_obs"]: + obs_dim+=1 # rhc controller failure index + if env_opts["add_term_mpc_capsize"]: + obs_dim+=3 # gravity vec from mpc + if env_opts["use_rhc_avrg_vel_tracking"]: + obs_dim+=6 # mpc avrg twist + if env_opts["add_flight_info"]: # contact pos, remaining duration, length, apex, landing height, landing dx, dy + obs_dim+=self._flight_info_size + if env_opts["add_flight_settings"]: + obs_dim+=self._flight_setting_size + if env_opts["add_rhc_cmds_to_obs"]: + obs_dim+=3*n_jnts + if env_opts["use_action_history"]: + if env_opts["add_prev_actions_stats_to_obs"]: + obs_dim+=3*actions_dim # previous agent actions statistics (mean, std + last action) + else: # full action history + obs_dim+=env_opts["actions_history_size"]*actions_dim + if env_opts["use_action_smoothing"]: + obs_dim+=actions_dim # it's better to also add the smoothed actions as obs + if env_opts["add_periodic_clock_to_obs"]: + obs_dim+=2 + if env_opts["add_heightmap_obs"]: + obs_dim+=self._height_flat_dim + # Agent task reference + self._add_env_opt(env_opts, "use_pof0", default=True) # with some prob, references will be null + self._add_env_opt(env_opts, "pof0_linvel", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr) + self._add_env_opt(env_opts, "pof0_omega", default=0.3) # [0, 1] prob of both linvel and omega refs being null(from bernoulli distr) + self._add_env_opt(env_opts, "max_linvel_ref", default=0.3) # m/s + self._add_env_opt(env_opts, "max_angvel_ref", default=0.0) # rad/s + if env_opts["add_angvel_ref_rand"]: + env_opts["max_angvel_ref"]=0.4 + + # ready to init base class + self._this_child_path = os.path.abspath(__file__) + AugMPCTrainingEnvBase.__init__(self, + namespace=namespace, + obs_dim=obs_dim, + actions_dim=actions_dim, + env_name=env_name, + verbose=verbose, + vlevel=vlevel, + use_gpu=use_gpu, + dtype=dtype, + debug=debug, + override_agent_refs=override_agent_refs, + timeout_ms=timeout_ms, + env_opts=env_opts) + + def _custom_post_init(self): + + device = "cuda" if self._use_gpu else "cpu" + + self._update_jnt_blacklist() # update blacklist for joints + + # constant base-frame unit vectors (reuse to avoid per-call allocations) + self._base_x_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device) + self._base_x_dir[:, 0] = 1.0 + self._base_y_dir = torch.zeros((self._n_envs, 3), dtype=self._dtype, device=device) + self._base_y_dir[:, 1] = 1.0 + + self._twist_ref_lb = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=-1.5) + self._twist_ref_ub = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=1.5) + + # task reference parameters (world frame) + # lin vel + self._twist_ref_lb[0, 0] = -self._env_opts["max_linvel_ref"] + self._twist_ref_lb[0, 1] = -self._env_opts["max_linvel_ref"] + self._twist_ref_lb[0, 2] = 0.0 + self._twist_ref_ub[0, 0] = self._env_opts["max_linvel_ref"] + self._twist_ref_ub[0, 1] = self._env_opts["max_linvel_ref"] + self._twist_ref_ub[0, 2] = 0.0 + # angular vel + self._twist_ref_lb[0, 3] = 0.0 + self._twist_ref_lb[0, 4] = 0.0 + self._twist_ref_lb[0, 5] = -self._env_opts["max_angvel_ref"] + self._twist_ref_ub[0, 3] = 0.0 + self._twist_ref_ub[0, 4] = 0.0 + self._twist_ref_ub[0, 5] = self._env_opts["max_angvel_ref"] + + self._twist_ref_offset = (self._twist_ref_ub + self._twist_ref_lb)/2.0 + self._twist_ref_scale = (self._twist_ref_ub - self._twist_ref_lb)/2.0 + + # adding some custom db info + agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=False) + agent_twist_ref_data = EpisodicData("AgentTwistRefs", agent_twist_ref, + ["v_x", "v_y", "v_z", "omega_x", "omega_y", "omega_z"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + rhc_fail_idx = EpisodicData("RhcFailIdx", self._rhc_fail_idx(gpu=False), ["rhc_fail_idx"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + f_names=[] + for contact in self._contact_names: + f_names.append(f"fc_{contact}_x_base_loc") + f_names.append(f"fc_{contact}_y_base_loc") + f_names.append(f"fc_{contact}_z_base_loc") + rhc_contact_f = EpisodicData("RhcContactForces", + self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), + f_names, + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._pow_db_data=torch.full(size=(self._n_envs,2), + dtype=self._dtype, device="cpu", + fill_value=-1.0) + power_db = EpisodicData("Power", + self._pow_db_data, + ["CoT", "W"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._track_error_db=torch.full_like(agent_twist_ref, fill_value=0.0) + task_err_db = EpisodicData("TrackingError", + agent_twist_ref, + ["e_vx", "e_vy", "e_vz", "e_omegax", "e_omegay", "e_omegaz"], + ep_vec_freq=self._env_opts["vec_ep_freq_metrics_db"], + store_transitions=self._full_db, + max_ep_duration=self._max_ep_length()) + + self._add_custom_db_data(db_data=agent_twist_ref_data) + self._add_custom_db_data(db_data=rhc_fail_idx) + self._add_custom_db_data(db_data=rhc_contact_f) + self._add_custom_db_data(db_data=power_db) + self._add_custom_db_data(db_data=task_err_db) + + # rewards + self._task_err_weights = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=0.0) + if self._env_opts["directional_tracking"]: + self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] # frontal + self._task_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] # lateral + self._task_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] # vertical + self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + else: + self._task_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 1] = self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"] + self._task_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + + self._task_pred_err_weights = torch.full((1, 6), dtype=self._dtype, device=device, + fill_value=0.0) + if self._env_opts["directional_tracking"]: + self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 1] = self._env_opts["task_track_lat_weight"] + self._task_pred_err_weights[0, 2] = self._env_opts["task_track_vert_weight"] + self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + else: + self._task_pred_err_weights[0, 0] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 1] = self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 2] = 0.1*self._env_opts["task_track_front_weight"] + self._task_pred_err_weights[0, 3] = self._env_opts["task_track_omega_x_weight"] + self._task_pred_err_weights[0, 4] = self._env_opts["task_track_omega_y_weight"] + self._task_pred_err_weights[0, 5] = self._env_opts["task_track_omega_z_weight"] + + self._power_penalty_weights = torch.full((1, self._n_jnts), dtype=self._dtype, device=device, + fill_value=1.0) + self._power_penalty_weights_sum = torch.sum(self._power_penalty_weights).item() + subr_names=self._get_rewards_names() # initializes + + # reward clipping + self._reward_thresh_lb[:, :] = self._env_opts["reward_lb_default"] + self._reward_thresh_ub[:, :]= self._env_opts["reward_ub_default"] + + for reward_name, env_opt_key in self._reward_lb_map.items(): + if reward_name in self._reward_map: + self._reward_thresh_lb[:, self._reward_map[reward_name]] = self._env_opts[env_opt_key] + + # obs bounds + self._obs_threshold_lb = -1e3 # used for clipping observations + self._obs_threshold_ub = 1e3 + + # actions + if not self._env_opts["use_prob_based_stepping"]: + self._is_continuous_actions[6:10]=False + + v_cmd_max = self._env_opts["max_cmd_v"] + omega_cmd_max = self._env_opts["max_cmd_omega"] + self._actions_lb[:, 0:3] = -v_cmd_max + self._actions_ub[:, 0:3] = v_cmd_max + self._actions_lb[:, 3:6] = -omega_cmd_max # twist cmds + self._actions_ub[:, 3:6] = omega_cmd_max + if "contact_flag_start" in self._actions_map: + idx=self._actions_map["contact_flag_start"] + if self._env_opts["use_prob_based_stepping"]: + self._actions_lb[:, idx:idx+self._n_contacts] = 0.0 # contact flags + self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 + else: + self._actions_lb[:, idx:idx+self._n_contacts] = -1.0 + self._actions_ub[:, idx:idx+self._n_contacts] = 1.0 + + self.default_action[:, :] = (self._actions_ub+self._actions_lb)/2.0 + # self.default_action[:, ~self._is_continuous_actions] = 1.0 + self.safe_action[:, :] = self.default_action + if "contact_flag_start" in self._actions_map: # safe actions for contacts is 1 (keep contact) + idx=self._actions_map["contact_flag_start"] + self.safe_action[:, idx:idx+self._n_contacts] = 1.0 + + # assign obs bounds (useful if not using automatic obs normalization) + obs_names=self._get_obs_names() + obs_patterns=["gn", + "linvel", + "omega", + "q_jnt", + "v_jnt", + "fc", + "rhc_fail", + "rhc_cmd_q", + "rhc_cmd_v", + "rhc_cmd_eff", + "flight_pos" + ] + obs_ubs=[1.0, + 5*v_cmd_max, + 5*omega_cmd_max, + 2*torch.pi, + 30.0, + 2.0, + 1.0, + 2*torch.pi, + 30.0, + 200.0, + self._n_nodes_rhc.mean().item()] + obs_lbs=[-1.0, + -5*v_cmd_max, + -5*omega_cmd_max, + -2*torch.pi, + -30.0, + -2.0, + 0.0, + -2*torch.pi, + -30.0, + -200.0, + 0.0] + obs_bounds = {name: (lb, ub) for name, lb, ub in zip(obs_patterns, obs_lbs, obs_ubs)} + + for i in range(len(obs_names)): + obs_name=obs_names[i] + for pattern in obs_patterns: + if pattern in obs_name: + lb=obs_bounds[pattern][0] + ub=obs_bounds[pattern][1] + self._obs_lb[:, i]=lb + self._obs_ub[:, i]=ub + break + + # handle action memory buffer in obs + if self._env_opts["use_action_history"]: # just history stats + if self._env_opts["add_prev_actions_stats_to_obs"]: + i=0 + prev_actions_idx = next((i for i, s in enumerate(obs_names) if "_prev_act" in s), None) + prev_actions_mean_idx=next((i for i, s in enumerate(obs_names) if "_avrg_act" in s), None) + prev_actions_std_idx=next((i for i, s in enumerate(obs_names) if "_std_act" in s), None) + + # assume actions are always normalized in [-1, 1] by agent + if prev_actions_idx is not None: + self._obs_lb[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=-1.0 + self._obs_ub[:, prev_actions_idx:prev_actions_idx+self.actions_dim()]=1.0 + if prev_actions_mean_idx is not None: + self._obs_lb[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=-1.0 + self._obs_ub[:, prev_actions_mean_idx:prev_actions_mean_idx+self.actions_dim()]=1.0 + if prev_actions_std_idx is not None: + self._obs_lb[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=0 + self._obs_ub[:, prev_actions_std_idx:prev_actions_std_idx+self.actions_dim()]=1.0 + + else: # full history + i=0 + first_action_mem_buffer_idx = next((i for i, s in enumerate(obs_names) if "_m1_act" in s), None) + if first_action_mem_buffer_idx is not None: + action_idx_start_idx_counter=first_action_mem_buffer_idx + for j in range(self._env_opts["actions_history_size"]): + self._obs_lb[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=-1.0 + self._obs_ub[:, action_idx_start_idx_counter:action_idx_start_idx_counter+self.actions_dim()]=1.0 + action_idx_start_idx_counter+=self.actions_dim() + + # some aux data to avoid allocations at training runtime + self._rhc_twist_cmd_rhc_world=self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu).detach().clone() + self._rhc_twist_cmd_rhc_h=self._rhc_twist_cmd_rhc_world.detach().clone() + self._agent_twist_ref_current_w=self._rhc_twist_cmd_rhc_world.detach().clone() + self._agent_twist_ref_current_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._substep_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._step_avrg_root_twist_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._root_twist_avrg_rhc_base_loc=self._rhc_twist_cmd_rhc_world.detach().clone() + self._root_twist_avrg_rhc_base_loc_next=self._rhc_twist_cmd_rhc_world.detach().clone() + + self._random_thresh_contacts=torch.rand((self._n_envs,self._n_contacts), device=device) + # aux data + self._task_err_scaling = torch.zeros((self._n_envs, 1),dtype=self._dtype,device=device) + + self._pof1_b_linvel= torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_linvel"]) + self._pof1_b_omega = torch.full(size=(self._n_envs,1),dtype=self._dtype,device=device,fill_value=1-self._env_opts["pof0_omega"]) + self._bernoulli_coeffs_linvel = self._pof1_b_linvel.clone() + self._bernoulli_coeffs_linvel[:, :] = 1.0 + self._bernoulli_coeffs_omega = self._pof1_b_omega.clone() + self._bernoulli_coeffs_omega[:, :] = 1.0 + + # smoothing + self._track_rew_smoother=None + if self._env_opts["use_track_reward_smoother"]: + sub_reward_proxy=self._sub_rewards.get_torch_mirror(gpu=self._use_gpu)[:, 0:1] + smoothing_dt=self._substep_dt + if not self._is_substep_rew[self._reward_map["task_error"]]: # assuming first reward is tracking + smoothing_dt=self._substep_dt*self._action_repeat + self._track_rew_smoother=ExponentialSignalSmoother( + name=self.__class__.__name__+"VelErrorSmoother", + signal=sub_reward_proxy, # same dimension of vel error + update_dt=smoothing_dt, + smoothing_horizon=self._env_opts["smoothing_horizon_vel_err"], + target_smoothing=0.5, + debug=self._is_debug, + dtype=self._dtype, + use_gpu=self._use_gpu) + + # if we need the action rate, we also need the action history + if self._env_opts["add_action_rate_reward"]: + if not self._env_opts["use_action_history"]: + Journal.log(self.__class__.__name__, + "_custom_post_init", + "add_action_rate_reward is True, but ", + LogType.EXCEP, + throw_when_excep=True) + + history_size=self._env_opts["actions_history_size"] + if history_size < 2: + Journal.log(self.__class__.__name__, + "_custom_post_init", + f"add_action_rate_reward requires actions history ({history_size}) to be >=2!", + LogType.EXCEP, + throw_when_excep=True) + + # add periodic timer if required + self._periodic_clock=None + if self._env_opts["add_periodic_clock_to_obs"]: + self._add_env_opt(self._env_opts, "clock_period", + default=int(1.5*self._action_repeat*self.task_rand_timeout_bounds()[1])) # correcting with n substeps + # (we are using the _substep_abs_counter counter) + self._periodic_clock=PeriodicTimer(counter=self._substep_abs_counter, + period=self._env_opts["clock_period"], + dtype=self._dtype, + device=self._device) + + def get_file_paths(self): + paths=AugMPCTrainingEnvBase.get_file_paths(self) + paths.append(self._this_child_path) + return paths + + def get_aux_dir(self): + aux_dirs = [] + path_getter = PathsGetter() + aux_dirs.append(path_getter.RHCDIR) + return aux_dirs + + def _get_reward_scaling(self): + if self._env_opts["single_task_ref_per_episode"]: + return self._env_opts["n_steps_task_rand_ub"] + else: + return self._env_opts["episode_timeout_ub"] + + def _max_ep_length(self): + if self._env_opts["single_task_ref_per_episode"]: + return self._env_opts["n_steps_task_rand_ub"] + else: + return self._env_opts["episode_timeout_ub"] + + def _check_sub_truncations(self): + # overrides parent + sub_truncations = self._sub_truncations.get_torch_mirror(gpu=self._use_gpu) + sub_truncations[:, 0:1] = self._ep_timeout_counter.time_limits_reached() + if self._env_opts["single_task_ref_per_episode"]: + sub_truncations[:, 1:2] = self._task_rand_counter.time_limits_reached() + + def _check_sub_terminations(self): + # default behaviour-> to be overriden by child + sub_terminations = self._sub_terminations.get_torch_mirror(gpu=self._use_gpu) + + # terminate if mpc just failed + sub_terminations[:, 0:1] = self._rhc_status.fails.get_torch_mirror(gpu=self._use_gpu) + + # check if robot is capsizing + robot_q_meas = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + check_capsize(quat=robot_q_meas,max_angle=self._max_pitch_angle, + output_t=self._is_capsized) + sub_terminations[:, 1:2] = self._is_capsized + + if self._env_opts["add_term_mpc_capsize"]: + # check if robot is about to capsize accordin to MPC + robot_q_pred = self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) + check_capsize(quat=robot_q_pred,max_angle=self._max_pitch_angle, + output_t=self._is_rhc_capsized) + sub_terminations[:, 2:3] = self._is_rhc_capsized + + def _custom_reset(self): + return None + + def reset(self): + AugMPCTrainingEnvBase.reset(self) + + def _pre_substep(self): + pass + + def _custom_post_step(self,episode_finished): + # executed after checking truncations and terminations and remote env reset + if self._use_gpu: + time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached().cuda(),episode_finished) + self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten()) + else: + time_to_rand_or_ep_finished = torch.logical_or(self._task_rand_counter.time_limits_reached(),episode_finished) + self.randomize_task_refs(env_indxs=time_to_rand_or_ep_finished.flatten()) + # task refs are randomized in world frame -> we rotate them in base local + # (not super efficient, we should do it just for the finished envs) + self._update_loc_twist_refs() + + if self._track_rew_smoother is not None: # reset smoother + self._track_rew_smoother.reset_all(to_be_reset=episode_finished.flatten(), + value=0.0) + + def _custom_post_substp_pre_rew(self): + self._update_loc_twist_refs() + + def _custom_post_substp_post_rew(self): + pass + + def _update_loc_twist_refs(self): + # get fresh robot orientation + if not self._override_agent_refs: + robot_q = self._robot_state.root_state.get(data_type="q",gpu=self._use_gpu) + # rotate agent ref from world to robot base + world2base_frame(t_w=self._agent_twist_ref_current_w, q_b=robot_q, + t_out=self._agent_twist_ref_current_base_loc) + # write it to agent refs tensors + self._agent_refs.rob_refs.root_state.set(data_type="twist", data=self._agent_twist_ref_current_base_loc, + gpu=self._use_gpu) + + def _apply_actions_to_rhc(self): + + self._set_rhc_refs() + + self._write_rhc_refs() + + def _set_rhc_refs(self): + + action_to_be_applied = self.get_actual_actions() # see _get_action_names() to get + # the meaning of each component of this tensor + + rhc_latest_twist_cmd = self._rhc_refs.rob_refs.root_state.get(data_type="twist", gpu=self._use_gpu) + rhc_latest_contact_ref = self._rhc_refs.contact_flags.get_torch_mirror(gpu=self._use_gpu) + rhc_latest_pos_ref = self._rhc_refs.rob_refs.contact_pos.get(data_type="p_z", gpu=self._use_gpu) + rhc_q=self._rhc_cmds.root_state.get(data_type="q",gpu=self._use_gpu) # this is always + # avaialble + + # reference twist for MPC is assumed to always be specified in MPC's + # horizontal frame, while agent actions are interpreted as in MPC's + # base frame -> we need to rotate the actions into the horizontal frame + base2world_frame(t_b=action_to_be_applied[:, 0:6],q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_world) + w2hor_frame(t_w=self._rhc_twist_cmd_rhc_world,q_b=rhc_q,t_out=self._rhc_twist_cmd_rhc_h) + + rhc_latest_twist_cmd[:, 0:6] = self._rhc_twist_cmd_rhc_h + + # self._rhc_refs.rob_refs.root_state.set(data_type="p", data=rhc_latest_p_ref, + # gpu=self._use_gpu) + self._rhc_refs.rob_refs.root_state.set(data_type="twist", data=rhc_latest_twist_cmd, + gpu=self._use_gpu) + + # contact flags + idx=self._actions_map["contact_flag_start"] + if self._env_opts["use_prob_based_stepping"]: + # encode actions as probs + self._random_thresh_contacts.uniform_() # random values in-place between 0 and 1 + rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] >= self._random_thresh_contacts # keep contact with + # probability action_to_be_applied[:, 6:10] + else: # just use a threshold + rhc_latest_contact_ref[:, :] = action_to_be_applied[:, idx:idx+self._n_contacts] > self._env_opts["step_thresh"] + # actually apply actions to controller + + def _write_rhc_refs(self): + + if self._use_gpu: + # GPU->CPU --> we cannot use asynchronous data transfer since it's unsafe + self._rhc_refs.rob_refs.root_state.synch_mirror(from_gpu=True,non_blocking=False) # write from gpu to cpu mirror + self._rhc_refs.contact_flags.synch_mirror(from_gpu=True,non_blocking=False) + self._rhc_refs.rob_refs.contact_pos.synch_mirror(from_gpu=True,non_blocking=False) + + self._rhc_refs.rob_refs.root_state.synch_all(read=False, retry=True) # write mirror to shared mem + self._rhc_refs.contact_flags.synch_all(read=False, retry=True) + self._rhc_refs.rob_refs.contact_pos.synch_all(read=False, retry=True) + + def _override_refs(self, + env_indxs: torch.Tensor = None): + + # runs at every post_step + self._agent_refs.rob_refs.root_state.synch_all(read=True,retry=True) # first read from mem + if self._use_gpu: + # copies latest refs to GPU + self._agent_refs.rob_refs.root_state.synch_mirror(from_gpu=False,non_blocking=False) + + agent_linvel_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="v", + gpu=self._use_gpu) + + agent_yaw_omega_ref_current=self._agent_refs.rob_refs.root_state.get(data_type="omega", + gpu=self._use_gpu) + + # self._p_trgt_w[:, :]=self._robot_state.root_state.get(data_type="p",gpu=self._use_gpu)[:, 0:2] + \ + # agent_p_ref_current[:, 0:2] + self._agent_twist_ref_current_w[:, 0:3]=agent_linvel_ref_current # set linvel target + + self._agent_twist_ref_current_w[:, 5:6]=agent_yaw_omega_ref_current[:, 2:3] # set yaw ang. vel target from shared mem + + def _fill_substep_obs(self, + obs: torch.Tensor): + + # measured stuff + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + + if self._env_opts["use_linvel_from_rhc"]: + # twist estimate from mpc + robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu) + obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_rhc_base_loc_next[:, 0:3] + else: + obs[:, self._obs_map["linvel_meas"]:(self._obs_map["linvel_meas"]+3)] = robot_twist_meas_base_loc[:, 0:3] + obs[:, self._obs_map["omega_meas"]:(self._obs_map["omega_meas"]+3)] = robot_twist_meas_base_loc[:, 3:6] + + obs[:, self._obs_map["v_jnt"]:(self._obs_map["v_jnt"]+self._n_jnts)] = robot_jnt_v_meas + + def _fill_step_obs(self, + obs: torch.Tensor): + + # measured stuff + robot_gravity_norm_base_loc = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + robot_jnt_q_meas = self._robot_state.jnts_state.get(data_type="q",gpu=self._use_gpu) + if self._jnt_q_blacklist_idxs is not None: # we don't want to read joint pos from blacklist + robot_jnt_q_meas[:, self._jnt_q_blacklist_idxs]=0.0 + robot_jnt_v_meas = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + + # twist estimate from mpc + robot_twist_rhc_base_loc_next = self._rhc_cmds.root_state.get(data_type="twist",gpu=self._use_gpu) + # cmds for jnt imp to be applied next + robot_jnt_q_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="q",gpu=self._use_gpu) + robot_jnt_v_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="v",gpu=self._use_gpu) + robot_jnt_eff_rhc_applied_next=self._rhc_cmds.jnts_state.get(data_type="eff",gpu=self._use_gpu) + + flight_info_now = self._rhc_refs.flight_info.get(data_type="all",gpu=self._use_gpu) + flight_settings_now = self._rhc_refs.flight_settings_req.get(data_type="all",gpu=self._use_gpu) + + # refs + agent_twist_ref = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) + + obs[:, self._obs_map["gn_base"]:(self._obs_map["gn_base"]+3)] = robot_gravity_norm_base_loc # norm. gravity vector in base frame + + obs[:, self._obs_map["q_jnt"]:(self._obs_map["q_jnt"]+self._n_jnts)] = robot_jnt_q_meas # meas jnt pos + obs[:, self._obs_map["twist_ref"]:(self._obs_map["twist_ref"]+6)] = agent_twist_ref # high lev agent refs to be tracked + + if self._env_opts["add_mpc_contact_f_to_obs"]: + n_forces=3*len(self._contact_names) + obs[:, self._obs_map["contact_f_mpc"]:(self._obs_map["contact_f_mpc"]+n_forces)] = self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=self._use_gpu) + if self._env_opts["add_fail_idx_to_obs"]: + obs[:, self._obs_map["rhc_fail_idx"]:(self._obs_map["rhc_fail_idx"]+1)] = self._rhc_fail_idx(gpu=self._use_gpu) + if self._env_opts["add_term_mpc_capsize"]: + obs[:, self._obs_map["gn_base_mpc"]:(self._obs_map["gn_base_mpc"]+3)] = self._rhc_cmds.root_state.get(data_type="gn",gpu=self._use_gpu) + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc, base_loc=True) + obs[:, self._obs_map["avrg_twist_mpc"]:(self._obs_map["avrg_twist_mpc"]+6)] = self._root_twist_avrg_rhc_base_loc + if self._env_opts["add_flight_info"]: + obs[:, self._obs_map["flight_info"]:(self._obs_map["flight_info"]+self._flight_info_size)] = flight_info_now + if self._env_opts["add_flight_settings"]: + obs[:, self._obs_map["flight_settings_req"]:(self._obs_map["flight_settings_req"]+self._flight_setting_size)] = \ + flight_settings_now + + if self._env_opts["add_rhc_cmds_to_obs"]: + obs[:, self._obs_map["rhc_cmds_q"]:(self._obs_map["rhc_cmds_q"]+self._n_jnts)] = robot_jnt_q_rhc_applied_next + obs[:, self._obs_map["rhc_cmds_v"]:(self._obs_map["rhc_cmds_v"]+self._n_jnts)] = robot_jnt_v_rhc_applied_next + obs[:, self._obs_map["rhc_cmds_eff"]:(self._obs_map["rhc_cmds_eff"]+self._n_jnts)] = robot_jnt_eff_rhc_applied_next + if self._env_opts["use_action_history"]: + if self._env_opts["add_prev_actions_stats_to_obs"]: # just add last, std and mean to obs + obs[:, self._obs_map["action_history_prev"]:(self._obs_map["action_history_prev"]+self.actions_dim())]=self._act_mem_buffer.get(idx=0) + obs[:, self._obs_map["action_history_avrg"]:(self._obs_map["action_history_avrg"]+self.actions_dim())]=self._act_mem_buffer.mean(clone=False) + obs[:, self._obs_map["action_history_std"]:(self._obs_map["action_history_std"]+self.actions_dim())]=self._act_mem_buffer.std(clone=False) + else: # add whole memory buffer to obs + next_idx=self._obs_map["action_history"] + for i in range(self._env_opts["actions_history_size"]): + obs[:, next_idx:(next_idx+self.actions_dim())]=self._act_mem_buffer.get(idx=i) # get all (n_envs x (obs_dim x horizon)) + next_idx+=self.actions_dim() + + if self._env_opts["use_action_smoothing"]: # adding smoothed actions + obs[:, self._obs_map["action_smoothing"]:(self._obs_map["action_smoothing"]+self.actions_dim())]=self.get_actual_actions(normalized=True) + next_idx+=self.actions_dim() + + if self._env_opts["add_periodic_clock_to_obs"]: + obs[:, next_idx:(next_idx+2)]=self._periodic_clock.get() + next_idx+=2 + if self._env_opts["add_heightmap_obs"]: + hm = self._robot_state.height_sensor.get(gpu=self._use_gpu) + obs[:, self._obs_map["heightmap"]:(self._obs_map["heightmap"]+self._height_flat_dim)] = hm + + def _get_custom_db_data(self, + episode_finished, + ignore_ep_end): + episode_finished = episode_finished.cpu() + self.custom_db_data["AgentTwistRefs"].update( + new_data=self._agent_refs.rob_refs.root_state.get(data_type="twist", gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["RhcFailIdx"].update(new_data=self._rhc_fail_idx(gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["RhcContactForces"].update( + new_data=self._rhc_cmds.contact_wrenches.get(data_type="f",gpu=False), + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["Power"].update( + new_data=self._pow_db_data, + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + self.custom_db_data["TrackingError"].update( + new_data=self._track_error_db, + ep_finished=episode_finished, + ignore_ep_end=ignore_ep_end) + + # reward functions + def _action_rate(self): + continuous_actions=self._is_continuous_actions + discrete_actions=~self._is_continuous_actions + n_c_actions=continuous_actions.sum().item() + n_d_actions=discrete_actions.sum().item() + actions_prev=self._act_mem_buffer.get(idx=1) + actions_now=self._act_mem_buffer.get(idx=0) + actions_rate=(actions_now-actions_prev) # actions already normalized + actions_rate_c=actions_rate[:, continuous_actions] + actions_rate_d=actions_rate[:, discrete_actions] + + actions_rate_sqrd=None # assuming n_c_actions > 0 always + actions_rate_sqrd=self._env_opts["action_rate_rew_c_weight"]*torch.sum(actions_rate_c*actions_rate_c, dim=1, keepdim=True)/n_c_actions + if discrete_actions.any(): + actions_rate_sqrd+=self._env_opts["action_rate_rew_d_weight"]*torch.sum(actions_rate_d*actions_rate_d, dim=1, keepdim=True)/n_d_actions + return actions_rate_sqrd + + def _mech_pow(self, jnts_vel, jnts_effort, autoscaled: bool = False, drained: bool = True): + mech_pow_jnts=(jnts_effort*jnts_vel)*self._power_penalty_weights + if drained: + mech_pow_jnts.clamp_(0.0,torch.inf) # do not account for regenerative power + mech_pow_tot = torch.sum(mech_pow_jnts, dim=1, keepdim=True) + self._pow_db_data[:, 1:2]=mech_pow_tot.cpu() + if autoscaled: + mech_pow_tot=mech_pow_tot/self._power_penalty_weights_sum + return mech_pow_tot + + def _cost_of_transport(self, jnts_vel, jnts_effort, v_norm, mass_weight: bool = False): + drained_mech_pow=self._mech_pow(jnts_vel=jnts_vel, + jnts_effort=jnts_effort, + drained=True) + CoT=drained_mech_pow/(v_norm+1e-2) + if mass_weight: + robot_weight=self._rhc_robot_weight + CoT=CoT/robot_weight + + # add to db metrics + self._pow_db_data[:, 0:1]=CoT.cpu() + self._pow_db_data[:, 1:2]=drained_mech_pow.cpu() + + return CoT + + def _jnt_vel_penalty(self, jnts_vel): + weighted_jnt_vel = torch.sum(jnts_vel*jnts_vel, dim=1, keepdim=True)/self._n_jnts + return weighted_jnt_vel + + def _rhc_fail_idx(self, gpu: bool): + rhc_fail_idx = self._rhc_status.rhc_fail_idx.get_torch_mirror(gpu=gpu) + return self._env_opts["rhc_fail_idx_scale"]*rhc_fail_idx + + # basic L1 and L2 error functions + def _track_err_wmse(self, task_ref, task_meas, scaling, weights): + # weighted mean-squared error computation + task_error = (task_meas-task_ref) + # add to db metrics + self._track_error_db[:, :]=torch.abs(task_error) + scaled_error=task_error/scaling + + task_wmse = torch.sum(scaled_error*scaled_error*weights, dim=1, keepdim=True)/torch.sum(weights).item() + return task_wmse # weighted mean square error (along task dimension) + + def _track_err_dir_wmse(self, task_ref, task_meas, scaling, weights): + # weighted DIRECTIONAL mean-squared error computation + task_error = (task_meas-task_ref) + # add to db metrics + self._track_error_db[:, :]=torch.abs(task_error) + task_error=task_error/scaling + + # projection along commanded direction and gravity, matching paper formulation + v_ref=task_ref[:, 0:3] + delta_v=task_error[:, 0:3] + + v_ref_norm=torch.norm(v_ref, dim=1, keepdim=True) + cmd_dir=v_ref/(v_ref_norm+1e-8) + # fallback to measured direction if command is (near) zero to avoid degenerate projection + meas_dir=task_meas[:, 0:3] + meas_dir=meas_dir/(torch.norm(meas_dir, dim=1, keepdim=True)+1e-8) + cmd_dir=torch.where((v_ref_norm>1e-6), cmd_dir, meas_dir) + + gravity_dir = self._robot_state.root_state.get(data_type="gn",gpu=self._use_gpu) # normalized gravity in base frame + gravity_dir = gravity_dir/(torch.norm(gravity_dir, dim=1, keepdim=True)+1e-8) + + forward_error=torch.sum(delta_v*cmd_dir, dim=1, keepdim=True) + vertical_error=torch.sum(delta_v*gravity_dir, dim=1, keepdim=True) + lateral_vec=delta_v - vertical_error*gravity_dir - forward_error*cmd_dir + lateral_error=torch.norm(lateral_vec, dim=1, keepdim=True) + + # angular directional components: use gravity as vertical, project base x onto the world xy plane for roll, and close the triad with pitch + base_x = self._base_x_dir + base_y = self._base_y_dir + + roll_dir = base_x - torch.sum(base_x*gravity_dir, dim=1, keepdim=True)*gravity_dir + roll_norm = torch.norm(roll_dir, dim=1, keepdim=True) + roll_dir_alt = base_y - torch.sum(base_y*gravity_dir, dim=1, keepdim=True)*gravity_dir # fallback if base x is almost aligned with gravity + roll_norm_alt = torch.norm(roll_dir_alt, dim=1, keepdim=True) + use_alt_roll = roll_norm < 1e-6 + roll_dir = torch.where(use_alt_roll, roll_dir_alt, roll_dir) + roll_norm = torch.where(use_alt_roll, roll_norm_alt, roll_norm) + roll_dir = roll_dir/(roll_norm+1e-8) + + pitch_dir = torch.cross(gravity_dir, roll_dir, dim=1) + pitch_dir = pitch_dir/(torch.norm(pitch_dir, dim=1, keepdim=True)+1e-8) + + delta_omega = task_error[:, 3:6] + omega_roll_error = torch.sum(delta_omega*roll_dir, dim=1, keepdim=True) + omega_pitch_error = torch.sum(delta_omega*pitch_dir, dim=1, keepdim=True) + omega_vertical_error = torch.sum(delta_omega*gravity_dir, dim=1, keepdim=True) + + full_error=torch.cat((forward_error, lateral_error, vertical_error, omega_roll_error, omega_pitch_error, omega_vertical_error), dim=1) + task_wmse_dir = torch.sum(full_error*full_error*weights, dim=1, keepdim=True)/torch.sum(weights).item() + return task_wmse_dir # weighted mean square error (along task dimension) + + # L2 errors + def _tracking_err_rel_wmse(self, task_ref, task_meas, weights, directional: bool = False): + ref_norm = task_ref.norm(dim=1,keepdim=True) # norm of the full twist reference + self._task_err_scaling[:, :] = ref_norm+1e-2 + if directional: + task_rel_err_wmse=self._track_err_dir_wmse(task_ref=task_ref, task_meas=task_meas, + scaling=self._task_err_scaling, weights=weights) + else: + task_rel_err_wmse=self._track_err_wmse(task_ref=task_ref, task_meas=task_meas, + scaling=self._task_err_scaling, weights=weights) + return task_rel_err_wmse + + def _tracking_err_wmse(self, task_ref, task_meas, weights, directional: bool = False): + self._task_err_scaling[:, :] = 1 + if directional: + task_err_wmse = self._track_err_dir_wmse(task_ref=task_ref, + task_meas=task_meas, scaling=self._task_err_scaling, weights=weights) + else: + task_err_wmse = self._track_err_wmse(task_ref=task_ref, + task_meas=task_meas, scaling=self._task_err_scaling, weights=weights) + return task_err_wmse + + # L1 errors + def _tracking_err_rel_lin(self, task_ref, task_meas, weights, directional): + task_rel_err_wmse = self._tracking_err_rel_wmse(task_ref=task_ref, + task_meas=task_meas, weights=weights, directional=directional) + return task_rel_err_wmse.sqrt() + + def _tracking_err_lin(self, task_ref, task_meas, weights, directional: bool = False): + self._task_err_scaling[:, :] = 1 + task_err_wmse=self._tracking_err_wmse(task_ref=task_ref, + task_meas=task_meas, weights=weights, directional=directional) + return task_err_wmse.sqrt() + + # reward computation over steps/substeps + def _compute_step_rewards(self): + + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + # tracking reward + if self._env_opts["use_L1_norm"]: # linear errors + task_error_fun = self._tracking_err_lin + if self._env_opts["use_relative_error"]: + task_error_fun = self._tracking_err_rel_lin + else: # quadratic error + task_error_fun = self._tracking_err_wmse + if self._env_opts["use_relative_error"]: + task_error_fun = self._tracking_err_rel_wmse + + agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) # high level agent refs (hybrid twist) + self._get_avrg_step_root_twist(out=self._step_avrg_root_twist_base_loc, base_loc=True) + task_error = task_error_fun(task_meas=self._step_avrg_root_twist_base_loc, + task_ref=agent_task_ref_base_loc, + weights=self._task_err_weights, + directional=self._env_opts["directional_tracking"]) + + idx=self._reward_map["task_error"] + if self._env_opts["use_exp_track_rew"]: + sub_rewards[:, idx:(idx+1)] = \ + self._env_opts["task_track_offset_exp"]*torch.exp(-self._env_opts["task_track_scale_exp"]*task_error) + else: # simple linear reward + sub_rewards[:, idx:(idx+1)] = \ + self._env_opts["task_track_offset"]*(1.0-self._env_opts["task_track_scale"]*task_error) + + if self._env_opts["use_fail_idx_weight"]: # add weight based on fail idx + fail_idx=self._rhc_fail_idx(gpu=self._use_gpu) + sub_rewards[:, idx:(idx+1)]=(1-fail_idx)*sub_rewards[:, idx:(idx+1)] + if self._track_rew_smoother is not None: # smooth reward if required + self._track_rew_smoother.update(new_signal=sub_rewards[:, 0:1]) + sub_rewards[:, idx:(idx+1)]=self._track_rew_smoother.get() + + # action rate + if self._env_opts["add_action_rate_reward"]: + action_rate=self._action_rate() + idx=self._reward_map["action_rate"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["action_rate_offset"]*(1.0-self._env_opts["action_rate_scale"]*action_rate) + + # mpc vel tracking + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._get_avrg_rhc_root_twist(out=self._root_twist_avrg_rhc_base_loc_next,base_loc=True) # get estimated avrg vel + # from MPC after stepping + task_pred_error=task_error_fun(task_meas=self._root_twist_avrg_rhc_base_loc_next, + task_ref=agent_task_ref_base_loc, + weights=self._task_pred_err_weights, + directional=self._env_opts["directional_tracking"]) + idx=self._reward_map["rhc_avrg_vel_error"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["task_pred_track_offset"]*torch.exp(-self._env_opts["task_pred_track_scale"]*task_pred_error) + + def _compute_substep_rewards(self): + + sub_rewards = self._sub_rewards.get_torch_mirror(gpu=self._use_gpu) + + if self._env_opts["add_CoT_reward"] or self._env_opts["add_power_reward"]: + jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + jnts_effort = self._robot_state.jnts_state.get(data_type="eff",gpu=self._use_gpu) + + if self._env_opts["add_CoT_reward"]: + if self._env_opts["use_CoT_wrt_ref"]: # uses v ref norm for computing cot + agent_task_ref_base_loc = self._agent_refs.rob_refs.root_state.get(data_type="twist",gpu=self._use_gpu) + v_norm=torch.norm(agent_task_ref_base_loc, dim=1, keepdim=True) + else: # uses measured velocity + robot_twist_meas_base_loc = self._robot_state.root_state.get(data_type="twist",gpu=self._use_gpu) + v_norm=torch.norm(robot_twist_meas_base_loc[:,0:3], dim=1, keepdim=True) + CoT=self._cost_of_transport(jnts_vel=jnts_vel,jnts_effort=jnts_effort,v_norm=v_norm, + mass_weight=True + ) + idx=self._reward_map["CoT"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["CoT_offset"]*(1-self._env_opts["CoT_scale"]*CoT) + if self._env_opts["add_power_reward"]: + weighted_mech_power=self._mech_pow(jnts_vel=jnts_vel,jnts_effort=jnts_effort, drained=True) + idx=self._reward_map["mech_pow"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["power_offset"]*(1-self._env_opts["power_scale"]*weighted_mech_power) + + if self._env_opts["add_jnt_v_reward"]: + jnts_vel = self._robot_state.jnts_state.get(data_type="v",gpu=self._use_gpu) + jnt_v=self._jnt_vel_penalty(jnts_vel=jnts_vel) + idx=self._reward_map["jnt_v"] + sub_rewards[:, idx:(idx+1)] = self._env_opts["jnt_vel_offset"]*(1-self._env_opts["jnt_vel_scale"]*jnt_v) + + def _randomize_task_refs(self, + env_indxs: torch.Tensor = None): + + # we randomize the reference in world frame, since it's much more intuitive + # (it will be rotated in base frame when provided to the agent and used for rew + # computation) + + if self._env_opts["use_pof0"]: # sample from bernoulli distribution + torch.bernoulli(input=self._pof1_b_linvel,out=self._bernoulli_coeffs_linvel) # by default bernoulli_coeffs are 1 if not self._env_opts["use_pof0"] + torch.bernoulli(input=self._pof1_b_omega,out=self._bernoulli_coeffs_omega) + if env_indxs is None: + random_uniform=torch.full_like(self._agent_twist_ref_current_w, fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[:, :] = random_uniform*self._twist_ref_scale + self._twist_ref_offset + self._agent_twist_ref_current_w[:, 0:3] = self._agent_twist_ref_current_w[:, 0:3]*self._bernoulli_coeffs_linvel # linvel + self._agent_twist_ref_current_w[:, 3:6] = self._agent_twist_ref_current_w[:, 3:6]*self._bernoulli_coeffs_omega # omega + else: + random_uniform=torch.full_like(self._agent_twist_ref_current_w[env_indxs, :], fill_value=0.0) + torch.nn.init.uniform_(random_uniform, a=-1, b=1) + self._agent_twist_ref_current_w[env_indxs, :] = random_uniform * self._twist_ref_scale + self._twist_ref_offset + self._agent_twist_ref_current_w[env_indxs, 0:3] = self._agent_twist_ref_current_w[env_indxs, 0:3]*self._bernoulli_coeffs_linvel[env_indxs, :] + self._agent_twist_ref_current_w[env_indxs, 3:6] = self._agent_twist_ref_current_w[env_indxs, 3:6]*self._bernoulli_coeffs_omega[env_indxs, :] # omega + + def _get_obs_names(self): + + obs_names = [""] * self.obs_dim() + + # proprioceptive stream of obs + next_idx=0 + + self._obs_map["gn_base"]=next_idx + obs_names[0] = "gn_x_base_loc" + obs_names[1] = "gn_y_base_loc" + obs_names[2] = "gn_z_base_loc" + next_idx+=3 + + self._obs_map["linvel_meas"]=next_idx + obs_names[next_idx] = "linvel_x_base_loc" + obs_names[next_idx+1] = "linvel_y_base_loc" + obs_names[next_idx+2] = "linvel_z_base_loc" + next_idx+=3 + + self._obs_map["omega_meas"]=next_idx + obs_names[next_idx] = "omega_x_base_loc" + obs_names[next_idx+1] = "omega_y_base_loc" + obs_names[next_idx+2] = "omega_z_base_loc" + next_idx+=3 + + jnt_names=self.get_observed_joints() + self._obs_map["q_jnt"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"q_jnt_{jnt_names[i]}" + next_idx+=self._n_jnts + + self._obs_map["v_jnt"]=next_idx + for i in range(self._n_jnts): # jnt obs (v): + obs_names[next_idx+i] = f"v_jnt_{jnt_names[i]}" + next_idx+=self._n_jnts + + # references + self._obs_map["twist_ref"]=next_idx + obs_names[next_idx] = "linvel_x_ref_base_loc" + obs_names[next_idx+1] = "linvel_y_ref_base_loc" + obs_names[next_idx+2] = "linvel_z_ref_base_loc" + obs_names[next_idx+3] = "omega_x_ref_base_loc" + obs_names[next_idx+4] = "omega_y_ref_base_loc" + obs_names[next_idx+5] = "omega_z_ref_base_loc" + next_idx+=6 + + # contact forces + if self._env_opts["add_mpc_contact_f_to_obs"]: + i = 0 + self._obs_map["contact_f_mpc"]=next_idx + for contact in self._contact_names: + obs_names[next_idx+i] = f"fc_{contact}_x_base_loc" + obs_names[next_idx+i+1] = f"fc_{contact}_y_base_loc" + obs_names[next_idx+i+2] = f"fc_{contact}_z_base_loc" + i+=3 + next_idx+=3*len(self._contact_names) + + # data directly from MPC + if self._env_opts["add_fail_idx_to_obs"]: + self._obs_map["rhc_fail_idx"]=next_idx + obs_names[next_idx] = "rhc_fail_idx" + next_idx+=1 + if self._env_opts["add_term_mpc_capsize"]: + self._obs_map["gn_base_mpc"]=next_idx + obs_names[next_idx] = "gn_x_rhc_base_loc" + obs_names[next_idx+1] = "gn_y_rhc_base_loc" + obs_names[next_idx+2] = "gn_z_rhc_base_loc" + next_idx+=3 + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._obs_map["avrg_twist_mpc"]=next_idx + obs_names[next_idx] = "linvel_x_avrg_rhc" + obs_names[next_idx+1] = "linvel_y_avrg_rhc" + obs_names[next_idx+2] = "linvel_z_avrg_rhc" + obs_names[next_idx+3] = "omega_x_avrg_rhc" + obs_names[next_idx+4] = "omega_y_avrg_rhc" + obs_names[next_idx+5] = "omega_z_avrg_rhc" + next_idx+=6 + + if self._env_opts["add_flight_info"]: + self._obs_map["flight_info"]=next_idx + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_pos_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_remaining_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_apex_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_end_nominal_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + + if self._env_opts["add_flight_settings"]: + self._obs_map["flight_settings_req"]=next_idx + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_len_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_apex_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + for i in range(len(self._contact_names)): + obs_names[next_idx+i] = "flight_end_req_"+ self._contact_names[i] + next_idx+=len(self._contact_names) + + if self._env_opts["add_rhc_cmds_to_obs"]: + self._obs_map["rhc_cmds_q"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_q_{jnt_names[i]}" + next_idx+=self._n_jnts + self._obs_map["rhc_cmds_v"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_v_{jnt_names[i]}" + next_idx+=self._n_jnts + self._obs_map["rhc_cmds_eff"]=next_idx + for i in range(self._n_jnts): # jnt obs (pos): + obs_names[next_idx+i] = f"rhc_cmd_eff_{jnt_names[i]}" + next_idx+=self._n_jnts + + # previous actions info + if self._env_opts["use_action_history"]: + self._obs_map["action_history"]=next_idx + action_names = self._get_action_names() + if self._env_opts["add_prev_actions_stats_to_obs"]: + self._obs_map["action_history_prev"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_prev_act" + next_idx+=self.actions_dim() + self._obs_map["action_history_avrg"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_avrg_act" + next_idx+=self.actions_dim() + self._obs_map["action_history_std"]=next_idx + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_std_act" + next_idx+=self.actions_dim() + else: + for i in range(self._env_opts["actions_history_size"]): + for act_idx in range(self.actions_dim()): + obs_names[next_idx+act_idx] = action_names[act_idx]+f"_m{i+1}_act" + next_idx+=self.actions_dim() + + if self._env_opts["use_action_smoothing"]: + self._obs_map["action_smoothing"]=next_idx + for smoothed_action in range(self.actions_dim()): + obs_names[next_idx+smoothed_action] = action_names[smoothed_action]+f"_smoothed" + next_idx+=self.actions_dim() + + if self._env_opts["add_periodic_clock_to_obs"]: + self._obs_map["clock"]=next_idx + obs_names[next_idx] = "clock_cos" + obs_names[next_idx+1] = "clock_sin" + next_idx+=2 + if self._env_opts["add_heightmap_obs"] and self._height_grid_size is not None: + self._obs_map["heightmap"]=next_idx + gs = self._height_grid_size + for r in range(gs): + for c in range(gs): + obs_names[next_idx] = f"height_r{r}_c{c}" + next_idx += 1 + + return obs_names + + def _set_substep_obs(self): + # which obs are to be averaged over substeps? + + self._is_substep_obs[self._obs_map["linvel_meas"]:self._obs_map["linvel_meas"]+3]=True + self._is_substep_obs[self._obs_map["omega_meas"]:self._obs_map["omega_meas"]+3]=True + self._is_substep_obs[self._obs_map["v_jnt"]:self._obs_map["v_jnt"]+self._n_jnts]=True # also good for noise + + # self._is_substep_obs[self._obs_map["contact_f_mpc"]:self._obs_map["contact_f_mpc"]+3*len(self._contact_names)]=True + + def _get_action_names(self): + + action_names = [""] * self.actions_dim() + action_names[0] = "vx_cmd" # twist commands from agent to RHC controller + action_names[1] = "vy_cmd" + action_names[2] = "vz_cmd" + action_names[3] = "roll_omega_cmd" + action_names[4] = "pitch_omega_cmd" + action_names[5] = "yaw_omega_cmd" + + next_idx=6 + + self._actions_map["contact_flag_start"]=next_idx + for i in range(len(self._contact_names)): + contact=self._contact_names[i] + action_names[next_idx] = f"contact_flag_{contact}" + next_idx+=1 + + return action_names + + def _set_substep_rew(self): + + # which rewards are to be computed at substeps frequency? + self._is_substep_rew[self._reward_map["task_error"]]=False + if self._env_opts["add_CoT_reward"]: + self._is_substep_rew[self._reward_map["CoT"]]=True + if self._env_opts["add_power_reward"]: + self._is_substep_rew[self._reward_map["mech_pow"]]=True + if self._env_opts["add_action_rate_reward"]: + self._is_substep_rew[self._reward_map["action_rate"]]=False + if self._env_opts["add_jnt_v_reward"]: + self._is_substep_rew[self._reward_map["jnt_v"]]=True + + if self._env_opts["use_rhc_avrg_vel_tracking"]: + self._is_substep_rew[self._reward_map["rhc_avrg_vel_error"]]=False + + def _get_rewards_names(self): + + counter=0 + reward_names = [] + + # adding rewards + reward_names.append("task_error") + self._reward_map["task_error"]=counter + self._reward_lb_map["task_error"]="task_error_reward_lb" + counter+=1 + if self._env_opts["add_power_reward"] and self._env_opts["add_CoT_reward"]: + Journal.log(self.__class__.__name__, + "__init__", + "Only one between CoT and power reward can be used!", + LogType.EXCEP, + throw_when_excep=True) + if self._env_opts["add_CoT_reward"]: + reward_names.append("CoT") + self._reward_map["CoT"]=counter + self._reward_lb_map["CoT"]="CoT_reward_lb" + counter+=1 + if self._env_opts["add_power_reward"]: + reward_names.append("mech_pow") + self._reward_map["mech_pow"]=counter + self._reward_lb_map["mech_pow"]="power_reward_lb" + counter+=1 + if self._env_opts["add_action_rate_reward"]: + reward_names.append("action_rate") + self._reward_map["action_rate"]=counter + self._reward_lb_map["action_rate"]="action_rate_reward_lb" + counter+=1 + if self._env_opts["add_jnt_v_reward"]: + reward_names.append("jnt_v") + self._reward_map["jnt_v"]=counter + self._reward_lb_map["jnt_v"]="jnt_vel_reward_lb" + counter+=1 + if self._env_opts["use_rhc_avrg_vel_tracking"]: + reward_names.append("rhc_avrg_vel_error") + self._reward_map["rhc_avrg_vel_error"]=counter + self._reward_lb_map["rhc_avrg_vel_error"]="rhc_avrg_vel_reward_lb" + counter+=1 + + return reward_names + + def _get_sub_trunc_names(self): + sub_trunc_names = [] + sub_trunc_names.append("ep_timeout") + if self._env_opts["single_task_ref_per_episode"]: + sub_trunc_names.append("task_ref_rand") + return sub_trunc_names + + def _get_sub_term_names(self): + # to be overridden by child class + sub_term_names = [] + sub_term_names.append("rhc_failure") + sub_term_names.append("robot_capsize") + if self._env_opts["add_term_mpc_capsize"]: + sub_term_names.append("rhc_capsize") + + return sub_term_names + + def _set_jnts_blacklist_pattern(self): + # used to exclude pos measurement from wheels + self._jnt_q_blacklist_patterns=["wheel"] diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/world_interface_base.py b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/world_interface_base.py new file mode 100644 index 0000000000000000000000000000000000000000..9dc8f41cb9b4a2c1518bd204e663bb6568902b3d --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/world_interface_base.py @@ -0,0 +1,1719 @@ +from aug_mpc.controllers.rhc.augmpc_cluster_server import AugMpcClusterServer +from aug_mpc.utils.shared_data.remote_stepping import RemoteStepperClnt +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetClnt +from aug_mpc.utils.shared_data.remote_stepping import RemoteResetRequest +from aug_mpc.utils.jnt_imp_control_base import JntImpCntrlBase +from aug_mpc.utils.hybrid_quad_xrdf_gen import get_xrdf_cmds +from aug_mpc.utils.xrdf_gen import generate_srdf, generate_urdf +from aug_mpc.utils.math_utils import quaternion_difference +from aug_mpc.utils.custom_arg_parsing import extract_custom_xacro_args, merge_xacro_cmds + +from aug_mpc.utils.filtering import FirstOrderFilter + +from mpc_hive.utilities.homing import RobotHomer +from mpc_hive.utilities.shared_data.jnt_imp_control import JntImpCntrlData + +from EigenIPC.PyEigenIPC import VLevel, Journal, LogType, dtype +from EigenIPC.PyEigenIPC import StringTensorServer +from EigenIPC.PyEigenIPCExt.wrappers.shared_data_view import SharedTWrapper + +from typing import List, Dict, TypeVar + +import os +import inspect +import signal +import time + +import numpy as np +import torch + +from abc import ABC, abstractmethod + +JntImpCntrlChild = TypeVar('JntImpCntrlChild', bound='JntImpCntrlBase') + +class AugMPCWorldInterfaceBase(ABC): + + def __init__(self, + robot_names: List[str], + robot_urdf_paths: List[str], + robot_srdf_paths: List[str], + jnt_imp_config_paths: List[str], + n_contacts: List[int], + cluster_dt: List[float], + use_remote_stepping: List[bool], + name: str = "AugMPCWorldInterfaceBase", + num_envs: int = 1, + debug = False, + verbose: bool = False, + vlevel: VLevel = VLevel.V1, + n_init_step: int = 0, + timeout_ms: int = 60000, + env_opts: Dict = None, + use_gpu: bool = True, + dtype: torch.dtype = torch.float32, + dump_basepath: str = "/tmp", + override_low_lev_controller: bool = False): + + # checks on input args + # type checks + if not isinstance(robot_names, List): + exception = "robot_names must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(robot_urdf_paths, List): + exception = "robot_urdf_paths must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(robot_srdf_paths, List): + exception = "robot_srdf_paths must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(cluster_dt, List): + exception = "cluster_dt must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(use_remote_stepping, List): + exception = "use_remote_stepping must be a list!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(n_contacts, List): + exception = "n_contacts must be a list (of integers)!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not isinstance(jnt_imp_config_paths, List): + exception = "jnt_imp_config_paths must be a list paths!" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + # dim checks + if not len(robot_urdf_paths) == len(robot_names): + exception = f"robot_urdf_paths has len {len(robot_urdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(robot_srdf_paths) == len(robot_names): + exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(cluster_dt) == len(robot_names): + exception = f"cluster_dt has len {len(cluster_dt)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(use_remote_stepping) == len(robot_names): + exception = f"use_remote_stepping has len {len(use_remote_stepping)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(robot_srdf_paths) == len(robot_names): + exception = f"robot_srdf_paths has len {len(robot_srdf_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + if not len(jnt_imp_config_paths) == len(robot_names): + exception = f"jnt_imp_config_paths has len {len(jnt_imp_config_paths)}" + \ + f" while robot_names {len(robot_names)}" + Journal.log(self.__class__.__name__, + "__init__", + exception, + LogType.EXCEP, + throw_when_excep = True) + + self._remote_exit_flag=None + + self._name=name + self._num_envs=num_envs + self._debug=debug + self._verbose=verbose + self._vlevel=vlevel + self._force_reconnection=True + self._timeout_ms=timeout_ms + self._use_gpu=use_gpu + self._device = "cuda" if self._use_gpu else "cpu" + self._dtype=dtype + self._robot_names=robot_names + self._env_opts={} + self._env_opts["deact_when_failure"]=True + self._env_opts["filter_jnt_vel"]=False + self._env_opts["filter_cutoff_freq"]=10.0 # [Hz] + self._env_opts["filter_sampling_rate"]=100 # rate at which state is filtered [Hz] + self._env_opts["add_remote_exit_flag"]=False # add shared data server to trigger a remote exit + self._env_opts["wheel_joint_patterns"]=["wheel"] + self._env_opts["filter_wheel_pos_ref"]=True + self._env_opts["zero_wheel_eff_ref"]=True + + self._env_opts["enable_height_sensor"]=False + self._env_opts["height_sensor_resolution"]=0.16 + self._env_opts["height_sensor_pixels"]=10 + self._env_opts["height_sensor_lateral_offset"]=0.0 + self._env_opts["height_sensor_forward_offset"]=0.0 + + self._env_opts["run_cluster_bootstrap"] = False + + self._filter_step_ssteps_freq=None + + self._env_opts.update(env_opts) + + self.step_counter = 0 # global step counter + self._n_init_steps = n_init_step # n steps to be performed before applying solutions from control clusters + self._srdf_dump_paths = robot_srdf_paths + self._homers = {} + self._homing = None + self._jnt_imp_cntrl_shared_data = {} + self._jnt_imp_controllers = {} + self._jnt_imp_config_paths = {} + + # control cluster data + self.cluster_sim_step_counters = {} + self.cluster_servers = {} + self._trigger_sol = {} + self._wait_sol = {} + self._cluster_dt = {} + self._robot_urdf_paths={} + self._robot_srdf_paths={} + self._contact_names={} + self._num_contacts={} + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + self._cluster_dt[robot_name]=cluster_dt[i] + self._robot_urdf_paths[robot_name]=robot_urdf_paths[i] + self._robot_srdf_paths[robot_name]=robot_srdf_paths[i] + self._contact_names[robot_name]=None + self._num_contacts[robot_name]=n_contacts[i] + self._jnt_imp_config_paths[robot_name]=jnt_imp_config_paths[i] + # db data + self.debug_data = {} + self.debug_data["time_to_step_world"] = np.nan + self.debug_data["time_to_get_states_from_env"] = np.nan + self.debug_data["cluster_sol_time"] = {} + self.debug_data["cluster_state_update_dt"] = {} + self.debug_data["sim_time"] = {} + self.debug_data["cluster_time"] = {} + + self._env_timer = time.perf_counter() + + # remote sim stepping options + self._timeout = timeout_ms # timeout for remote stepping + self._use_remote_stepping = use_remote_stepping + # should use remote stepping + self._remote_steppers = {} + self._remote_resetters = {} + self._remote_reset_requests = {} + self._is_first_trigger = {} + + self._closed = False + + self._this_child_path=os.path.abspath(inspect.getfile(self.__class__)) + self._descr_dump_path=dump_basepath+"/"+f"{self.__class__.__name__}" + self._urdf_dump_paths = {} + self._srdf_dump_paths = {} + self.xrdf_cmd_vals = [] # by default empty, needs to be overriden by + # child class + self._world_iface_files_server=None + + self._override_low_lev_controller=override_low_lev_controller + + self._root_p = {} + self._root_q = {} + self._jnts_q = {} + self._root_p_prev = {} # used for num differentiation + self._root_q_prev = {} # used for num differentiation + self._jnts_q_prev = {} # used for num differentiation + self._root_v_prev = {} # used for num differentiation + self._root_omega_prev = {} # used for num differentiation + self._root_p_default = {} + self._root_q_default = {} + self._jnts_q_default = {} + + self._gravity_normalized = {} + self._gravity_normalized_base_loc = {} + + self._root_v = {} + self._root_v_base_loc = {} + self._root_v_default = {} + self._root_omega = {} + self._root_omega_base_loc = {} + self._root_omega_default = {} + self._root_a = {} + self._root_a_base_loc = {} + self._root_alpha = {} + self._root_alpha_base_loc = {} + + self._jnts_v = {} + self._jnt_vel_filter = {} + self._jnts_v_default = {} + self._jnts_eff = {} + self._jnts_eff_default = {} + + self._root_pos_offsets = {} + self._root_q_offsets = {} + self._root_q_offsets_yaw = {} + self._root_q_yaw_rel_ws = {} + + self._parse_env_opts() + + self._enable_height_shared = self._env_opts["enable_height_sensor"] + self._height_sensor_resolution = self._env_opts["height_sensor_resolution"] + self._height_sensor_pixels = self._env_opts["height_sensor_pixels"] + + self._pre_setup() # child's method + + self._init_world() # after this point all info from sim or robot is + # available + + self._publish_world_interface_files() + + setup_ok=self._setup() + if not setup_ok: + self.close() + + self._exit_request=False + signal.signal(signal.SIGINT, self.signal_handler) + + def signal_handler(self, sig, frame): + Journal.log(self.__class__.__name__, + "signal_handler", + "received SIGINT -> cleaning up", + LogType.WARN) + self._exit_request=True + + def __del__(self): + self.close() + + def is_closed(self): + return self._closed + + def close(self) -> None: + if not self._closed: + for i in range(len(self._robot_names)): + if self._robot_names[i] in self.cluster_servers: + self.cluster_servers[self._robot_names[i]].close() + if self._use_remote_stepping[i]: # remote signaling + if self._robot_names[i] in self._remote_reset_requests: + self._remote_reset_requests[self._robot_names[i]].close() + self._remote_resetters[self._robot_names[i]].close() + self._remote_steppers[self._robot_names[i]].close() + if self._robot_names[i] in self._jnt_imp_cntrl_shared_data: + jnt_imp_shared_data=self._jnt_imp_cntrl_shared_data[self._robot_names[i]] + if jnt_imp_shared_data is not None: + jnt_imp_shared_data.close() + if self._remote_exit_flag is not None: + self._remote_exit_flag.close() + if self._world_iface_files_server is not None: + self._world_iface_files_server.close() + self._close() + self._closed=True + + def _collect_world_interface_files(self): + files = [self._this_child_path] + # prefer generated URDF/SRDF if available, fallback to provided xacros + if len(self._urdf_dump_paths) > 0: + files.extend(list(self._urdf_dump_paths.values())) + else: + files.extend(list(self._robot_urdf_paths.values())) + if len(self._srdf_dump_paths) > 0: + files.extend(list(self._srdf_dump_paths.values())) + else: + files.extend(list(self._robot_srdf_paths.values())) + files.extend(list(self._jnt_imp_config_paths.values())) + # remove duplicates while preserving order + unique_files=[] + for f in files: + if f not in unique_files: + unique_files.append(f) + return unique_files + + def _publish_world_interface_files(self): + + if not any(self._use_remote_stepping): + return + self._world_iface_files_server=StringTensorServer(length=1, + basename="SharedWorldInterfaceFilesDropDir", + name_space=self._robot_names[0], + verbose=self._verbose, + vlevel=self._vlevel, + force_reconnection=True) + self._world_iface_files_server.run() + combined_paths=", ".join(self._collect_world_interface_files()) + while not self._world_iface_files_server.write_vec([combined_paths], 0): + Journal.log(self.__class__.__name__, + "_publish_world_interface_files", + f"Failed to pub world interface files. Retrying...", + LogType.WARN) + time.sleep(0.1) + Journal.log(self.__class__.__name__, + "_publish_world_interface_files", + f"World interface files advertised: {combined_paths}", + LogType.STAT) + + def _setup(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + # normalized gravity vector + self._gravity_normalized[robot_name]=torch.full_like(self._root_v[robot_name], fill_value=0.0) + self._gravity_normalized[robot_name][:, 2]=-1.0 + self._gravity_normalized_base_loc[robot_name]=self._gravity_normalized[robot_name].detach().clone() + + # Pre-allocate yaw-related buffers once and reuse them in root_q_yaw_rel(). + q_ref = self._root_q[robot_name] + self._root_q_offsets_yaw[robot_name] = torch.zeros( + (self._num_envs,), dtype=q_ref.dtype, device=q_ref.device) + self._root_q_yaw_rel_ws[robot_name] = { + "yaw_abs": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "yaw_rel": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "yaw_sin": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "yaw_cos": torch.zeros((self._num_envs,), dtype=q_ref.dtype, device=q_ref.device), + "q_abs_unit": torch.zeros_like(q_ref), + "q_yaw_abs": torch.zeros_like(q_ref), + "q_yaw_rel": torch.zeros_like(q_ref), + "q_yaw_abs_conj": torch.zeros_like(q_ref), + "q_pr": torch.zeros_like(q_ref), + "q_rel": torch.zeros_like(q_ref), + } + + self.cluster_sim_step_counters[robot_name]=0 + self._is_first_trigger[robot_name] = True + if not isinstance(self._cluster_dt[robot_name], (float)): + exception = f"cluster_dt[{i}] should be a float!" + Journal.log(self.__class__.__name__, + "_setup", + exception, + LogType.EXCEP, + throw_when_excep = False) + return False + self._cluster_dt[robot_name] = self._cluster_dt[robot_name] + self._trigger_sol[robot_name] = True # allow first trigger + self._wait_sol[robot_name] = False + + # initialize a lrhc cluster server for communicating with rhc controllers + self.cluster_servers[robot_name] = AugMpcClusterServer(cluster_size=self._num_envs, + cluster_dt=self._cluster_dt[robot_name], + control_dt=self.physics_dt(), + jnt_names=self._robot_jnt_names(robot_name=robot_name), + n_contacts=self._n_contacts(robot_name=robot_name), + contact_linknames=self._contact_names[robot_name], + verbose=self._verbose, + vlevel=self._vlevel, + debug=self._debug, + robot_name=robot_name, + use_gpu=self._use_gpu, + force_reconnection=self._force_reconnection, + timeout_ms=self._timeout, + enable_height_sensor=self._enable_height_shared, + height_grid_size=self._height_sensor_pixels, + height_grid_resolution=self._height_sensor_resolution) + self.cluster_servers[robot_name].run() + self.debug_data["cluster_sol_time"][robot_name] = np.nan + self.debug_data["cluster_state_update_dt"][robot_name] = np.nan + self.debug_data["sim_time"][robot_name] = np.nan + # remote sim stepping + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name] = RemoteStepperClnt(namespace=robot_name, + verbose=self._debug, + vlevel=self._vlevel) + self._remote_resetters[robot_name] = RemoteResetClnt(namespace=robot_name, + verbose=self._debug, + vlevel=self._vlevel) + self._remote_reset_requests[robot_name] = RemoteResetRequest(namespace=robot_name, + n_env=self._num_envs, + is_server=True, + verbose=self._debug, + vlevel=self._vlevel, + force_reconnection=self._force_reconnection, + safe=False) + self._remote_steppers[robot_name].run() + self._remote_resetters[robot_name].run() + self._remote_reset_requests[robot_name].run() + else: + self._remote_steppers[robot_name] = None + self._remote_reset_requests[robot_name] = None + self._remote_resetters[robot_name] = None + + self._homers[robot_name] = RobotHomer(srdf_path=self._srdf_dump_paths[robot_name], + jnt_names=self._robot_jnt_names(robot_name=robot_name), + filter=True, + verbose=self._verbose) + robot_homing=torch.from_numpy(self._homers[robot_name].get_homing().reshape(1,-1)) + if "cuda" in self._device: + robot_homing=robot_homing.cuda() + self._homing=robot_homing.repeat(self._num_envs, 1) + + self._jnts_q_default[robot_name] = self._homing + self._set_jnts_to_homing(robot_name=robot_name) + self._set_root_to_defconfig(robot_name=robot_name) + self._reset_sim() + + self._init_safe_cluster_actions(robot_name=robot_name) + + Journal.log(self.__class__.__name__, + "_setup", + f"Will use joint impedance config at {self._jnt_imp_config_paths[robot_name]} for {robot_name}", + LogType.STAT) + + self._jnt_imp_controllers[robot_name] = self._generate_jnt_imp_control(robot_name=robot_name) + self._jnt_imp_controllers[robot_name].set_velocity_controlled_joints( + name_patterns=self._env_opts["wheel_joint_patterns"], + filter_pos_ref=self._env_opts["filter_wheel_pos_ref"], + zero_eff_ref=self._env_opts["zero_wheel_eff_ref"]) + self._jnt_imp_cntrl_shared_data[robot_name] = JntImpCntrlData(is_server=True, + n_envs=self._num_envs, + n_jnts=len(self._robot_jnt_names(robot_name=robot_name)), + jnt_names=self._robot_jnt_names(robot_name=robot_name), + namespace=robot_name, + verbose=self._verbose, + force_reconnection=self._force_reconnection, + vlevel=self._vlevel, + use_gpu=self._use_gpu, + safe=False) + self._jnt_imp_cntrl_shared_data[robot_name].run() + + self._jnt_vel_filter[robot_name]=None + if self._env_opts["filter_jnt_vel"]: + self._jnt_vel_filter[robot_name]=FirstOrderFilter(dt=1.0/self._env_opts["filter_sampling_rate"], + filter_BW=self._env_opts["filter_cutoff_freq"], + rows=self._num_envs, + cols=len(self._robot_jnt_names(robot_name=robot_name)), + device=self._device, + dtype=self._dtype) + + physics_rate=1.0/self.physics_dt() + self._filter_step_ssteps_freq=int(physics_rate/self._env_opts["filter_sampling_rate"]) + if self._filter_step_ssteps_freq <=0: + Journal.log(self.__class__.__name__, + "_setup", + f"The filter_sampling_rate should be smaller that the physics rate ({physics_rate} Hz)", + LogType.EXCEP, + throw_when_excep=True) + + for n in range(self._n_init_steps): # run some initialization steps + if hasattr(self, "_alter_twist_warmup"): + self._alter_twist_warmup(robot_name=robot_name, env_indxs=None) + self._step_world() + + self._read_jnts_state_from_robot(robot_name=robot_name, + env_indxs=None) + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + # allow child to perform additional warmup validations (e.g., terrain/tilt) + # retry_done = False + if hasattr(self, "_post_warmup_validation"): + failing = self._post_warmup_validation(robot_name=robot_name) + if failing is not None and failing.numel() > 0: + # retry: reset only failing envs, rerun warmup, revalidate once + failing = failing.to(self._device) + Journal.log(self.__class__.__name__, + "_setup", + f"Warmup validation failed for {robot_name}, envs indexes {failing.tolist()}", + LogType.EXCEP, + throw_when_excep=True) + else: + Journal.log(self.__class__.__name__, + "_setup", + f"Warmup validation passed for {robot_name}", + LogType.INFO) + + # write some inits for all robots + # self._update_root_offsets(robot_name) + self._synch_default_root_states(robot_name=robot_name) + epsi=0.03 # adding a bit of height to avoid initial penetration + self._root_p_default[robot_name][:, 2]=self._root_p_default[robot_name][:, 2]+epsi + + reset_ok=self._reset(env_indxs=None, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True, + acquire_offsets=True) # resets everything, updates the cluster with fresh reset states + # and acquire offsets + if not reset_ok: + return False + + # cluster setup here + control_cluster=self.cluster_servers[robot_name] + + control_cluster.pre_trigger() + to_be_activated=control_cluster.get_inactive_controllers() + if to_be_activated is not None: + control_cluster.activate_controllers( + idxs=to_be_activated) + + if self._env_opts["run_cluster_bootstrap"]: + cluster_setup_ok=self._setup_mpc_cluster(robot_name) + if not cluster_setup_ok: + return False + self._set_cluster_actions(robot_name=robot_name) # write last cmds + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + + if self._use_remote_stepping[i]: + step_wait_ok = self._wait_for_remote_step_req(robot_name=robot_name) + if not step_wait_ok: + return False + + self._set_startup_jnt_imp_gains(robot_name=robot_name) # set gains to + # startup config (usually lower) + + control_cluster.pre_trigger() + control_cluster.trigger_solution(bootstrap=False) # trigger first solution (in real-time iteration) before first call to step to ensure that first solution is ready when step is called the first time + + if self._env_opts["add_remote_exit_flag"]: + self._remote_exit_flag=SharedTWrapper(namespace = self._robot_names[0],# use first robot as name + basename = "IbridoRemoteEnvExitFlag", + is_server = True, + n_rows = 1, + n_cols = 1, + verbose = True, + vlevel = self._vlevel, + safe = False, + dtype=dtype.Bool, + force_reconnection=True, + fill_value = False) + self._remote_exit_flag.run() + + self._setup_done=True + + return self._setup_done + + def _setup_mpc_cluster(self, robot_name: str): + + control_cluster = self.cluster_servers[robot_name] + + # self._set_state_to_cluster(robot_name=robot_name) + rhc_state = control_cluster.get_state() + root_twist=rhc_state.root_state.get(data_type="twist", robot_idxs = None, gpu=self._use_gpu) + jnt_v=rhc_state.jnts_state.get(data_type="v", robot_idxs = None, gpu=self._use_gpu) + root_twist[:, :]=0 # override meas state to make sure MPC bootstrap uses zero velocity + jnt_v[:, :]=0 + + control_cluster.write_robot_state() + + # trigger bootstrap solution (solvers will run up to convergence) + control_cluster.trigger_solution(bootstrap=True) # this will trigger the bootstrap solver with the initial state, + # which will run until convergence before returning + wait_ok=control_cluster.wait_for_solution() # blocking + if not wait_ok: + return False + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + if failed is not None: + failed_idxs = torch.nonzero(failed).squeeze(-1) + if failed_idxs.numel() > 0: + Journal.log(self.__class__.__name__, + "_setup", + f"Bootstrap solution failed for {robot_name} | n_failed: {failed_idxs.numel()}, idxs: {failed_idxs.cpu().tolist()}", + LogType.EXCEP, + throw_when_excep=False) + return False + + return True + + def step(self) -> bool: + + success=False + + if self._remote_exit_flag is not None: + # check for exit request + self._remote_exit_flag.synch_all(read=True, retry = False) + self._exit_request=self._exit_request or \ + bool(self._remote_exit_flag.get_numpy_mirror()[0, 0].item()) + + if self._exit_request: + self.close() + + if self.is_running() and (not self.is_closed()): + if self._debug: + pre_step_ok=self._pre_step_db() + if not pre_step_ok: + return False + self._env_timer=time.perf_counter() + self._step_world() + self.debug_data["time_to_step_world"] = \ + time.perf_counter() - self._env_timer + self._post_world_step_db() + success=True + else: + pre_step_ok=self._pre_step() + if not pre_step_ok: + return False + self._step_world() + self._post_world_step() + success=True + + return success + + def render(self, mode:str="human") -> None: + self._render_sim(mode) + + def reset(self, + env_indxs: torch.Tensor = None, + reset_cluster: bool = False, + reset_cluster_counter = False, + randomize: bool = False, + reset_sim: bool = False) -> None: + + for i in range(len(self._robot_names)): + robot_name=self._robot_names[i] + reset_ok=self._reset(robot_name=robot_name, + env_indxs=env_indxs, + randomize=randomize, + reset_cluster=reset_cluster, + reset_cluster_counter=reset_cluster_counter) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=env_indxs) + + if reset_sim: + self._reset_sim() + + return True + + def _reset_cluster(self, + robot_name: str, + env_indxs: torch.Tensor = None, + reset_cluster_counter: bool = False): + + control_cluster = self.cluster_servers[robot_name] + + reset_ok=control_cluster.reset_controllers(idxs=env_indxs) + if not reset_ok: + return False + + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=env_indxs) + control_cluster.write_robot_state() # writes to shared memory + + if reset_cluster_counter: + self.cluster_sim_step_counters[robot_name] = 0 + + return True + + def _step_jnt_vel_filter(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + self._jnt_vel_filter[robot_name].update(refk=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs), + idxs=env_indxs) + + def _set_state_to_cluster(self, + robot_name: str, + env_indxs: torch.Tensor = None, + base_loc: bool = True): + + if self._debug: + if not isinstance(env_indxs, (torch.Tensor, type(None))): + msg = "Provided env_indxs should be a torch tensor of indexes!" + raise Exception(f"[{self.__class__.__name__}]" + f"[{self.journal.exception}]: " + msg) + + control_cluster = self.cluster_servers[robot_name] + # floating base + rhc_state = control_cluster.get_state() + # configuration + rhc_state.root_state.set(data=self.root_p_rel(robot_name=robot_name, env_idxs=env_indxs), + data_type="p", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_q(robot_name=robot_name, env_idxs=env_indxs), + data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + # rhc_state.root_state.set(data=self.root_q_yaw_rel(robot_name=robot_name, env_idxs=env_indxs), + # data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + # twist + rhc_state.root_state.set(data=self.root_v(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_omega(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="omega", robot_idxs = env_indxs, gpu=self._use_gpu) + + # angular accc. + rhc_state.root_state.set(data=self.root_a(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="a", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.root_state.set(data=self.root_alpha(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="alpha", robot_idxs = env_indxs, gpu=self._use_gpu) + + # gravity vec + rhc_state.root_state.set(data=self.gravity(robot_name=robot_name, env_idxs=env_indxs,base_loc=base_loc), + data_type="gn", robot_idxs = env_indxs, gpu=self._use_gpu) + + # joints + rhc_state.jnts_state.set(data=self.jnts_q(robot_name=robot_name, env_idxs=env_indxs), + data_type="q", robot_idxs = env_indxs, gpu=self._use_gpu) + + v_jnts=self.jnts_v(robot_name=robot_name, env_idxs=env_indxs) + if self._jnt_vel_filter[robot_name] is not None: # apply filtering + v_jnts=self._jnt_vel_filter[robot_name].get(idxs=env_indxs) + rhc_state.jnts_state.set(data=v_jnts, + data_type="v", robot_idxs = env_indxs, gpu=self._use_gpu) + rhc_state.jnts_state.set(data=self.jnts_eff(robot_name=robot_name, env_idxs=env_indxs), + data_type="eff", robot_idxs = env_indxs, gpu=self._use_gpu) + + # height map + if self._enable_height_shared: + hdata = self._height_imgs[robot_name] + if env_indxs is not None: + hdata = hdata[env_indxs] + flat = hdata.reshape(hdata.shape[0], -1) + rhc_state.height_sensor.set(data=flat, data_type=None, robot_idxs=env_indxs, gpu=self._use_gpu) + + # Updating contact state for selected contact links + self._update_contact_state(robot_name=robot_name, env_indxs=env_indxs) + + def _update_contact_state(self, + robot_name: str, + env_indxs: torch.Tensor = None): + for i in range(0, self.cluster_servers[robot_name].n_contact_sensors()): + contact_link = self.cluster_servers[robot_name].contact_linknames()[i] + f_contact = self._get_contact_f(robot_name=robot_name, + contact_link=contact_link, + env_indxs=env_indxs) + if f_contact is not None: + self.cluster_servers[robot_name].get_state().contact_wrenches.set(data=f_contact, data_type="f", + contact_name=contact_link, + robot_idxs = env_indxs, + gpu=self._use_gpu) + + def _init_safe_cluster_actions(self, + robot_name: str): + + # this does not actually write on shared memory, + # but it's enough to get safe actions for the simulator before the + # cluster starts to receive data from the controllers + control_cluster = self.cluster_servers[robot_name] + rhc_cmds = control_cluster.get_actions() + n_jnts = rhc_cmds.n_jnts() + + null_action = torch.zeros((self._num_envs, n_jnts), + dtype=self._dtype, + device=self._device) + rhc_cmds.jnts_state.set(data=self._homing, data_type="q", gpu=self._use_gpu) + rhc_cmds.jnts_state.set(data=null_action, data_type="v", gpu=self._use_gpu) + rhc_cmds.jnts_state.set(data=null_action, data_type="eff", gpu=self._use_gpu) + + def _pre_step_db(self) -> None: + + # cluster step logic here + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + + if self._override_low_lev_controller: + # if overriding low-lev jnt imp. this has to run at the highest + # freq possible + start=time.perf_counter() + self._read_jnts_state_from_robot(robot_name=robot_name) + self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start + + self._write_state_to_jnt_imp(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + if self._jnt_vel_filter[robot_name] is not None and \ + (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0: + # filter joint vel at a fixed frequency wrt sim steps + if not self._override_low_lev_controller: + # we need a fresh sensor reading + self._read_jnts_state_from_robot(robot_name=robot_name) + self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None) + + control_cluster = self.cluster_servers[robot_name] + if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]): + wait_ok=control_cluster.wait_for_solution() # this is blocking + if not wait_ok: + return False + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + self._set_cluster_actions(robot_name=robot_name) # write last cmds to low level control + if not self._override_low_lev_controller: + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + # we can update the jnt state just at the rate at which the cluster needs it + start=time.perf_counter() + self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None) + else: + # read state necessary for cluster + start=time.perf_counter() + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + self.debug_data["time_to_get_states_from_env"]= time.perf_counter()-start + start=time.perf_counter() + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=None) + control_cluster.write_robot_state() + self.debug_data["cluster_state_update_dt"][robot_name] = time.perf_counter()-start + + self._update_jnt_imp_cntrl_shared_data() # only if debug_mode_jnt_imp is enabled + + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name].ack() # signal cluster stepping is finished + if failed is not None and self._env_opts["deact_when_failure"]: # deactivate robot completely + self._deactivate(env_indxs=failed, + robot_name=robot_name) + wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking) + wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name) + if not wait_reset_ok or not wait_step_ok: + return False + else: + if failed is not None: + reset_ok=self._reset(env_indxs=failed, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=failed) + + control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers()) + + control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving + # values of some rhc flags on shared memory + control_cluster.trigger_solution() # trigger only active controllers + + return True + + def _pre_step(self) -> None: + + # cluster step logic here + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + if self._override_low_lev_controller: + # if overriding low-lev jnt imp. this has to run at the highest + # freq possible + self._read_jnts_state_from_robot(robot_name=robot_name) + self._write_state_to_jnt_imp(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + if self._jnt_vel_filter[robot_name] is not None and \ + (self.cluster_sim_step_counters[robot_name]+1) % self._filter_step_ssteps_freq == 0: + # filter joint vel at a fixed frequency wrt sim steps + if not self._override_low_lev_controller: + # we need a fresh sensor reading + self._read_jnts_state_from_robot(robot_name=robot_name) + self._step_jnt_vel_filter(robot_name=robot_name, env_indxs=None) + + control_cluster = self.cluster_servers[robot_name] + if control_cluster.is_cluster_instant(self.cluster_sim_step_counters[robot_name]): + wait_ok=control_cluster.wait_for_solution() # this is blocking + if not wait_ok: + return False + failed = control_cluster.get_failed_controllers(gpu=self._use_gpu) + self._set_cluster_actions(robot_name=robot_name) # set last cmds to low level control + if not self._override_low_lev_controller: + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) # apply to robot + # we can update the jnt state just at the rate at which the cluster needs it + self._read_jnts_state_from_robot(robot_name=robot_name, env_indxs=None) + # read state necessary for cluster + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=None) + # write last robot state to the cluster of controllers + self._set_state_to_cluster(robot_name=robot_name, + env_indxs=None) + control_cluster.write_robot_state() # write on shared mem + + if self._use_remote_stepping[i]: + self._remote_steppers[robot_name].ack() # signal cluster stepping is finished + if failed is not None and self._env_opts["deact_when_failure"]: + self._deactivate(env_indxs=failed, + robot_name=robot_name) + wait_reset_ok=self._process_remote_reset_req(robot_name=robot_name) # wait for remote reset request (blocking) + wait_step_ok=self._wait_for_remote_step_req(robot_name=robot_name) + if not wait_reset_ok or not wait_step_ok: + return False + else: + if failed is not None: + reset_ok=self._reset(env_indxs=failed, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=failed) + control_cluster.activate_controllers(idxs=control_cluster.get_inactive_controllers()) + + control_cluster.pre_trigger() # performs pre-trigger steps, like retrieving + # values of some rhc flags on shared memory + control_cluster.trigger_solution() # trigger only active controllers + + return True + + def _post_world_step_db(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + control_cluster = self.cluster_servers[robot_name] + self.cluster_sim_step_counters[robot_name]+=1 # this has to be update with sim freq + if self._debug: + self.debug_data["sim_time"][robot_name]=self.world_time(robot_name=robot_name) + self.debug_data["cluster_sol_time"][robot_name] = \ + control_cluster.solution_time() + + self.step_counter +=1 + + def _post_world_step(self) -> bool: + + for i in range(len(self._robot_names)): + robot_name = self._robot_names[i] + self.cluster_sim_step_counters[robot_name]+=1 + self.step_counter +=1 + + def _reset(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False, + reset_cluster: bool = False, + reset_cluster_counter = False, + acquire_offsets: bool = False): + + # resets the state of target robot and env to the defaults + self._reset_state(env_indxs=env_indxs, + robot_name=robot_name, + randomize=randomize) + + # and jnt imp. controllers + self._reset_jnt_imp_control(robot_name=robot_name, + env_indxs=env_indxs) + + # read reset state + self._read_root_state_from_robot(robot_name=robot_name, + env_indxs=env_indxs) + self._read_jnts_state_from_robot(robot_name=robot_name, + env_indxs=env_indxs) + + if self._jnt_vel_filter[robot_name] is not None: + self._jnt_vel_filter[robot_name].reset(idxs=env_indxs) + + if acquire_offsets: + self._update_root_offsets(robot_name=robot_name, + env_indxs=env_indxs) + + if reset_cluster: # reset controllers remotely + reset_ok=self._reset_cluster(env_indxs=env_indxs, + robot_name=robot_name, + reset_cluster_counter=reset_cluster_counter) + if not reset_ok: + return False + + return True + + def _randomize_yaw(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + root_q_default = self._root_q_default[robot_name] + if env_indxs is None: + env_indxs = torch.arange(root_q_default.shape[0]) + + num_indices = env_indxs.shape[0] + yaw_angles = torch.rand((num_indices,), + device=root_q_default.device) * 2 * torch.pi # uniformly distributed random angles + + # Compute cos and sin once + cos_half = torch.cos(yaw_angles / 2) + root_q_default[env_indxs, :] = torch.stack((cos_half, + torch.zeros_like(cos_half), + torch.zeros_like(cos_half), + torch.sin(yaw_angles / 2)), dim=1).reshape(num_indices, 4) + + def _deactivate(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + # deactivate jnt imp controllers for given robots and envs (makes the robot fall) + self._jnt_imp_controllers[robot_name].deactivate(robot_indxs=env_indxs) + + def _n_contacts(self, robot_name: str) -> List[int]: + return self._num_contacts[robot_name] + + def root_p(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._root_p[robot_name] + else: + return self._root_p[robot_name][env_idxs, :] + + def root_p_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + rel_pos = torch.sub(self.root_p(robot_name=robot_name), + self._root_pos_offsets[robot_name]) + else: + rel_pos = torch.sub(self.root_p(robot_name=robot_name, + env_idxs=env_idxs), + self._root_pos_offsets[robot_name][env_idxs, :]) + return rel_pos + + def root_q(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._root_q[robot_name] + else: + return self._root_q[robot_name][env_idxs, :] + + def root_q_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return quaternion_difference(self._root_q_offsets[robot_name], + self.root_q(robot_name=robot_name)) + rel_q = quaternion_difference(self._root_q_offsets[robot_name][env_idxs, :], + self.root_q(robot_name=robot_name, + env_idxs=env_idxs)) + return rel_q + + def _quat_to_yaw_wxyz(self, q: torch.Tensor, out: torch.Tensor = None): + # Quaternion convention is w, x, y, z. + w, x, y, z = q[:, 0], q[:, 1], q[:, 2], q[:, 3] + num = 2.0 * (w * z + x * y) + den = 1.0 - 2.0 * (y * y + z * z) + if out is None: + return torch.atan2(num, den) + return torch.atan2(num, den, out=out) + + def _yaw_to_quat_wxyz(self, yaw: torch.Tensor, like_q: torch.Tensor, + out: torch.Tensor = None): + q = out + if q is None: + q = torch.zeros((yaw.shape[0], 4), dtype=like_q.dtype, device=like_q.device) + else: + q.zero_() + q[:, 0] = torch.cos(yaw / 2.0) + q[:, 3] = torch.sin(yaw / 2.0) + return q + + def _quat_conjugate_wxyz(self, q: torch.Tensor, out: torch.Tensor = None): + qi = out + if qi is None: + qi = torch.empty_like(q) + qi[:, :] = q + qi[:, 1:] = -qi[:, 1:] + return qi + + def _quat_multiply_wxyz(self, q1: torch.Tensor, q2: torch.Tensor, + out: torch.Tensor = None): + q_out = out + if q_out is None: + q_out = torch.empty_like(q1) + w1, x1, y1, z1 = q1[:, 0], q1[:, 1], q1[:, 2], q1[:, 3] + w2, x2, y2, z2 = q2[:, 0], q2[:, 1], q2[:, 2], q2[:, 3] + q_out[:, 0] = w1 * w2 - x1 * x2 - y1 * y2 - z1 * z2 + q_out[:, 1] = w1 * x2 + x1 * w2 + y1 * z2 - z1 * y2 + q_out[:, 2] = w1 * y2 - x1 * z2 + y1 * w2 + z1 * x2 + q_out[:, 3] = w1 * z2 + x1 * y2 - y1 * x2 + z1 * w2 + return q_out + + def _normalize_quat_wxyz(self, q: torch.Tensor, out: torch.Tensor = None): + q_norm = out + if q_norm is None: + q_norm = torch.empty_like(q) + q_norm[:, :] = q + q_norm /= torch.clamp(torch.norm(q_norm, dim=1, keepdim=True), min=1e-9) + return q_norm + + def root_q_yaw_rel(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + # Return quaternion with startup yaw removed while preserving current pitch/roll. + if env_idxs is None: + ws = self._root_q_yaw_rel_ws[robot_name] + q_abs = self._root_q[robot_name] + yaw_start = self._root_q_offsets_yaw[robot_name] + + self._normalize_quat_wxyz(q=q_abs, out=ws["q_abs_unit"]) + self._quat_to_yaw_wxyz(q=ws["q_abs_unit"], out=ws["yaw_abs"]) + + torch.sub(ws["yaw_abs"], yaw_start, out=ws["yaw_rel"]) + torch.sin(ws["yaw_rel"], out=ws["yaw_sin"]) + torch.cos(ws["yaw_rel"], out=ws["yaw_cos"]) + torch.atan2(ws["yaw_sin"], ws["yaw_cos"], out=ws["yaw_rel"]) + + # Build pure-yaw quaternions for: + # 1) the current absolute heading and 2) the startup-relative heading. + self._yaw_to_quat_wxyz(yaw=ws["yaw_abs"], like_q=ws["q_abs_unit"], out=ws["q_yaw_abs"]) + self._yaw_to_quat_wxyz(yaw=ws["yaw_rel"], like_q=ws["q_abs_unit"], out=ws["q_yaw_rel"]) + + # Isolate pitch/roll by removing the absolute yaw from the current orientation. + # For unit quaternions q_pr = q_yaw_abs^{-1} * q_abs. + self._quat_conjugate_wxyz(q=ws["q_yaw_abs"], out=ws["q_yaw_abs_conj"]) + self._quat_multiply_wxyz(q1=ws["q_yaw_abs_conj"], q2=ws["q_abs_unit"], out=ws["q_pr"]) + # Recompose orientation with relative yaw + current pitch/roll. + self._quat_multiply_wxyz(q1=ws["q_yaw_rel"], q2=ws["q_pr"], out=ws["q_rel"]) + + return self._normalize_quat_wxyz(q=ws["q_rel"], out=ws["q_rel"]) + + q_abs = self.root_q(robot_name=robot_name, env_idxs=env_idxs) + q_abs = self._normalize_quat_wxyz(q=q_abs, out=q_abs) + + yaw_abs = self._quat_to_yaw_wxyz(q_abs) + yaw_start = self._root_q_offsets_yaw[robot_name][env_idxs] + yaw_rel = yaw_abs - yaw_start + yaw_rel = torch.atan2(torch.sin(yaw_rel), torch.cos(yaw_rel)) + + q_yaw_abs = self._yaw_to_quat_wxyz(yaw_abs, like_q=q_abs) + q_yaw_rel = self._yaw_to_quat_wxyz(yaw_rel, like_q=q_abs) + q_pr = self._quat_multiply_wxyz(self._quat_conjugate_wxyz(q_yaw_abs), q_abs) + q_rel = self._quat_multiply_wxyz(q_yaw_rel, q_pr) + + return self._normalize_quat_wxyz(q_rel) + + def root_v(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_v=self._root_v[robot_name] + if base_loc: + root_v=self._root_v_base_loc[robot_name] + if env_idxs is None: + return root_v + else: + return root_v[env_idxs, :] + + def root_omega(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_omega=self._root_omega[robot_name] + if base_loc: + root_omega=self._root_omega_base_loc[robot_name] + if env_idxs is None: + return root_omega + else: + return root_omega[env_idxs, :] + + def root_a(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_a=self._root_a[robot_name] + if base_loc: + root_a=self._root_a_base_loc[robot_name] + if env_idxs is None: + return root_a + else: + return root_a[env_idxs, :] + + def root_alpha(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + root_alpha=self._root_alpha[robot_name] + if base_loc: + root_alpha=self._root_alpha_base_loc[robot_name] + if env_idxs is None: + return root_alpha + else: + return root_alpha[env_idxs, :] + + def gravity(self, + robot_name: str, + env_idxs: torch.Tensor = None, + base_loc: bool = True): + + gravity_loc=self._gravity_normalized[robot_name] + if base_loc: + gravity_loc=self._gravity_normalized_base_loc[robot_name] + if env_idxs is None: + return gravity_loc + else: + return gravity_loc[env_idxs, :] + + def jnts_q(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._jnts_q[robot_name] + else: + return self._jnts_q[robot_name][env_idxs, :] + + def jnts_v(self, + robot_name: str, + env_idxs: torch.Tensor = None): + + if env_idxs is None: + return self._jnts_v[robot_name] + else: + return self._jnts_v[robot_name][env_idxs, :] + + def jnts_eff(self, + robot_name: str, + env_idxs: torch.Tensor = None): # (measured) efforts + + if env_idxs is None: + return self._jnts_eff[robot_name] + else: + return self._jnts_eff[robot_name][env_idxs, :] + + def _wait_for_remote_step_req(self, + robot_name: str): + if not self._remote_steppers[robot_name].wait(self._timeout): + Journal.log(self.__class__.__name__, + "_wait_for_remote_step_req", + "Didn't receive any remote step req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + return True + + def _process_remote_reset_req(self, + robot_name: str): + + if not self._remote_resetters[robot_name].wait(self._timeout): + Journal.log(self.__class__.__name__, + "_process_remote_reset_req", + "Didn't receive any remote reset req within timeout!", + LogType.EXCEP, + throw_when_excep = False) + return False + + reset_requests = self._remote_reset_requests[robot_name] + reset_requests.synch_all(read=True, retry=True) # read reset requests from shared mem + to_be_reset = reset_requests.to_be_reset(gpu=self._use_gpu) + if to_be_reset is not None: + reset_ok=self._reset(env_indxs=to_be_reset, + robot_name=robot_name, + reset_cluster=True, + reset_cluster_counter=False, + randomize=True) + if not reset_ok: + return False + self._set_startup_jnt_imp_gains(robot_name=robot_name, + env_indxs=to_be_reset) # set gains to startup config (usually lower gains) + + control_cluster = self.cluster_servers[robot_name] + control_cluster.activate_controllers(idxs=to_be_reset) # activate controllers + # (necessary if failed) + + self._remote_resetters[robot_name].ack() # signal reset performed + + return True + + def _update_jnt_imp_cntrl_shared_data(self): + if self._debug: + for i in range(0, len(self._robot_names)): + robot_name = self._robot_names[i] + # updating all the jnt impedance data - > this may introduce some overhead + imp_data = self._jnt_imp_cntrl_shared_data[robot_name].imp_data_view + # set data + imp_data.set(data_type="pos_err", + data=self._jnt_imp_controllers[robot_name].pos_err(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_err", + data=self._jnt_imp_controllers[robot_name].vel_err(), + gpu=self._use_gpu) + imp_data.set(data_type="pos_gains", + data=self._jnt_imp_controllers[robot_name].pos_gains(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_gains", + data=self._jnt_imp_controllers[robot_name].vel_gains(), + gpu=self._use_gpu) + imp_data.set(data_type="eff_ff", + data=self._jnt_imp_controllers[robot_name].eff_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="pos", + data=self._jnt_imp_controllers[robot_name].pos(), + gpu=self._use_gpu) + imp_data.set(data_type="pos_ref", + data=self._jnt_imp_controllers[robot_name].pos_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="vel", + data=self._jnt_imp_controllers[robot_name].vel(), + gpu=self._use_gpu) + imp_data.set(data_type="vel_ref", + data=self._jnt_imp_controllers[robot_name].vel_ref(), + gpu=self._use_gpu) + imp_data.set(data_type="eff", + data=self._jnt_imp_controllers[robot_name].eff(), + gpu=self._use_gpu) + imp_data.set(data_type="imp_eff", + data=self._jnt_imp_controllers[robot_name].imp_eff(), + gpu=self._use_gpu) + # copy from GPU to CPU if using gpu + if self._use_gpu: + imp_data.synch_mirror(from_gpu=True,non_blocking=True) + # even if it's from GPU->CPu we can use non-blocking since it's just for db + # purposes + # write copies to shared memory + imp_data.synch_all(read=False, retry=False) + + def _set_startup_jnt_imp_gains(self, + robot_name:str, + env_indxs: torch.Tensor = None): + + startup_p_gains=self._jnt_imp_controllers[robot_name].startup_p_gains() + startup_d_gains=self._jnt_imp_controllers[robot_name].startup_d_gains() + + if env_indxs is not None: + self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs, + pos_gains=startup_p_gains[env_indxs, :], + vel_gains=startup_d_gains[env_indxs, :]) + else: + self._jnt_imp_controllers[robot_name].set_gains(robot_indxs=env_indxs, + pos_gains=startup_p_gains[:, :], + vel_gains=startup_d_gains[:, :]) + + def _write_state_to_jnt_imp(self, + robot_name: str): + + # always update ,imp. controller internal state (jnt imp control is supposed to be + # always running) + self._jnt_imp_controllers[robot_name].update_state(pos=self.jnts_q(robot_name=robot_name), + vel = self.jnts_v(robot_name=robot_name), + eff = self.jnts_eff(robot_name=robot_name)) + + def _set_cluster_actions(self, + robot_name): + control_cluster = self.cluster_servers[robot_name] + actions=control_cluster.get_actions() + active_controllers=control_cluster.get_active_controllers(gpu=self._use_gpu) + + if active_controllers is not None: + self._jnt_imp_controllers[robot_name].set_refs( + pos_ref=actions.jnts_state.get(data_type="q", gpu=self._use_gpu)[active_controllers, :], + vel_ref=actions.jnts_state.get(data_type="v", gpu=self._use_gpu)[active_controllers, :], + eff_ref=actions.jnts_state.get(data_type="eff", gpu=self._use_gpu)[active_controllers, :], + robot_indxs=active_controllers) + + def _jnt_imp_reset_overrride(self, robot_name:str): + # to be overriden + pass + + def _apply_cmds_to_jnt_imp_control(self, robot_name:str): + + self._jnt_imp_controllers[robot_name].apply_cmds() + + def _update_root_offsets(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + msg = "Provided env_indxs should be a torch tensor of indexes!" + Journal.log(self.__class__.__name__, + "update_root_offsets", + msg, + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist()) + if self._verbose: + Journal.log(self.__class__.__name__, + "update_root_offsets", + f"updating root offsets " + for_robots, + LogType.STAT, + throw_when_excep = True) + + # only planar position used + if env_indxs is None: + self._root_pos_offsets[robot_name][:, 0:2] = self._root_p[robot_name][:, 0:2] + self._normalize_quat_wxyz(q=self._root_q[robot_name], out=self._root_q_offsets[robot_name]) + self._quat_to_yaw_wxyz(q=self._root_q_offsets[robot_name], + out=self._root_q_offsets_yaw[robot_name]) + + else: + self._root_pos_offsets[robot_name][env_indxs, 0:2] = self._root_p[robot_name][env_indxs, 0:2] + q_root_norm=self._normalize_quat_wxyz(self._root_q[robot_name][env_indxs, :]) + self._root_q_offsets[robot_name][env_indxs, :] = q_root_norm + self._root_q_offsets_yaw[robot_name][env_indxs] = self._quat_to_yaw_wxyz(q=q_root_norm) + + def _reset_jnt_imp_control(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + Journal.log(self.__class__.__name__, + "reset_jnt_imp_control", + "Provided env_indxs should be a torch tensor of indexes!", + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs) + + if self._verbose: + Journal.log(self.__class__.__name__, + "reset_jnt_imp_control", + f"resetting joint impedances " + for_robots, + LogType.STAT, + throw_when_excep = True) + + # resets all internal data, refs to defaults + self._jnt_imp_controllers[robot_name].reset(robot_indxs=env_indxs) + + #restore jnt imp refs to homing + if env_indxs is None: + self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[:, :], + robot_indxs = None) + else: + self._jnt_imp_controllers[robot_name].set_refs(pos_ref=self._homing[env_indxs, :], + robot_indxs = env_indxs) + + # self._write_state_to_jnt_imp(robot_name=robot_name) + # actually applies reset commands to the articulation + self._write_state_to_jnt_imp(robot_name=robot_name) + self._jnt_imp_reset_overrride(robot_name=robot_name) + self._apply_cmds_to_jnt_imp_control(robot_name=robot_name) + + def _synch_default_root_states(self, + robot_name: str, + env_indxs: torch.Tensor = None): + + if self._debug: + for_robots = "" + if env_indxs is not None: + if not isinstance(env_indxs, torch.Tensor): + msg = "Provided env_indxs should be a torch tensor of indexes!" + Journal.log(self.__class__.__name__, + "synch_default_root_states", + msg, + LogType.EXCEP, + throw_when_excep = True) + if self._use_gpu: + if not env_indxs.device.type == "cuda": + error = "Provided env_indxs should be on GPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + else: + if not env_indxs.device.type == "cpu": + error = "Provided env_indxs should be on CPU!" + Journal.log(self.__class__.__name__, + "_step_jnt_imp_control", + error, + LogType.EXCEP, + True) + for_robots = f"for robot {robot_name}, indexes: " + str(env_indxs.tolist()) + if self._verbose: + Journal.log(self.__class__.__name__, + "synch_default_root_states", + f"updating default root states " + for_robots, + LogType.STAT, + throw_when_excep = True) + + if env_indxs is None: + self._root_p_default[robot_name][:, :] = self._root_p[robot_name] + self._root_q_default[robot_name][:, :] = self._root_q[robot_name] + else: + self._root_p_default[robot_name][env_indxs, :] = self._root_p[robot_name][env_indxs, :] + self._root_q_default[robot_name][env_indxs, :] = self._root_q[robot_name][env_indxs, :] + + def _generate_rob_descriptions(self, + robot_name: str, + urdf_path: str, + srdf_path: str): + + custom_xacro_args=extract_custom_xacro_args(self._env_opts) + Journal.log(self.__class__.__name__, + "_generate_rob_descriptions", + "generating URDF for robot "+ f"{robot_name}, from URDF {urdf_path}...", + LogType.STAT, + throw_when_excep = True) + xrdf_cmds=self._xrdf_cmds(robot_name=robot_name) + xrdf_cmds=merge_xacro_cmds(prev_cmds=xrdf_cmds, + new_cmds=custom_xacro_args) + self._urdf_dump_paths[robot_name]=generate_urdf(robot_name=robot_name, + xacro_path=urdf_path, + dump_path=self._descr_dump_path, + xrdf_cmds=xrdf_cmds) + Journal.log(self.__class__.__name__, + "_generate_rob_descriptions", + "generating SRDF for robot "+ f"{robot_name}, from SRDF {srdf_path}...", + LogType.STAT, + throw_when_excep = True) + # we also generate SRDF files, which are useful for control + self._srdf_dump_paths[robot_name]=generate_srdf(robot_name=robot_name, + xacro_path=srdf_path, + dump_path=self._descr_dump_path, + xrdf_cmds=xrdf_cmds) + + def _xrdf_cmds(self, robot_name:str): + urdfpath=self._robot_urdf_paths[robot_name] + # we assume directory tree of the robot package is like + # robot-ros-pkg/robot_urdf/urdf/robot.urdf.xacro + parts = urdfpath.split('/') + urdf_descr_root_path = '/'.join(parts[:-2]) + cmds = get_xrdf_cmds(urdf_descr_root_path=urdf_descr_root_path) + return cmds + + @abstractmethod + def current_tstep(self) -> int: + pass + + @abstractmethod + def world_time(self, robot_name: str) -> float: + return self.cluster_sim_step_counters[robot_name]*self.physics_dt() + + @abstractmethod + def is_running(self) -> bool: + pass + + @abstractmethod + def _get_contact_f(self, + robot_name: str, + contact_link: str, + env_indxs: torch.Tensor) -> torch.Tensor: + return None + + @abstractmethod + def physics_dt(self) -> float: + pass + + @abstractmethod + def rendering_dt(self) -> float: + pass + + @abstractmethod + def set_physics_dt(self, physics_dt:float): + pass + + @abstractmethod + def set_rendering_dt(self, rendering_dt:float): + pass + + @abstractmethod + def _robot_jnt_names(self, robot_name: str) -> List[str]: + pass + + @abstractmethod + def _read_root_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + # IMPORTANT: Child interfaces should provide root quaternions in w, x, y, z convention. + pass + + @abstractmethod + def _read_jnts_state_from_robot(self, + robot_name: str, + env_indxs: torch.Tensor = None): + pass + + @abstractmethod + def _init_robots_state(self): + pass + + @abstractmethod + def _reset_state(self, + robot_name: str, + env_indxs: torch.Tensor = None, + randomize: bool = False): + pass + + @abstractmethod + def _init_world(self): + pass + + @abstractmethod + def _reset_sim(self) -> None: + pass + + @abstractmethod + def _set_jnts_to_homing(self, robot_name: str): + pass + + @abstractmethod + def _set_root_to_defconfig(self, robot_name: str): + pass + + @abstractmethod + def _parse_env_opts(self): + pass + + @abstractmethod + def _pre_setup(self): + pass + + @abstractmethod + def _generate_jnt_imp_control(self) -> JntImpCntrlChild: + pass + + @abstractmethod + def _render_sim(self, mode:str="human") -> None: + pass + + @abstractmethod + def _close(self) -> None: + pass + + @abstractmethod + def _step_world(self) -> None: + pass diff --git a/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/xbot2_basic.yaml b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/xbot2_basic.yaml new file mode 100644 index 0000000000000000000000000000000000000000..142e3d84db18838f5fa449a9edc97d5b8c76b382 --- /dev/null +++ b/bundles/centauro/d2026_03_07_h19_m22_s30-CentauroCloopPartialNoYawUb_FakePosTrackingEnv/xbot2_basic.yaml @@ -0,0 +1,86 @@ +XBotInterface: + urdf_path: $PWD/centauro.urdf + srdf_path: $PWD/centauro_old.srdf + # urdf_path: $PWD/centauro_dagana_right.urdf + # srdf_path: $PWD/centauro_dagana_right.srdf + +ModelInterface: + model_type: RBDL + is_model_floating_base: true + +motor_pd: + "j_arm*_1": [500, 10] + "j_arm*_2": [500, 10] + "j_arm*_3": [500, 10] + "j_arm*_4": [500, 10] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [3000, 30] + "hip_pitch_*": [3000, 30] + "knee_pitch_*": [3000, 30] + "ankle_pitch_*": [1000, 10] + "ankle_yaw_*": [300, 10] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [1000, 30] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +startup_motor_pd: + "j_arm*_1": [500, 30] + "j_arm*_2": [500, 30] + "j_arm*_3": [500, 30] + "j_arm*_4": [500, 30] + "j_arm*_5": [100, 5] + "j_arm*_6": [100, 5] + "j_arm*_7": [100, 5] + "hip_yaw_*": [800, 80] + "hip_pitch_*": [800, 80] + "knee_pitch_*": [800, 80] + "ankle_pitch_*": [800, 80] + "ankle_yaw_*": [480, 50] + "neck_pitch": [10, 1] + "neck_yaw": [10, 1] + "torso_yaw": [800, 80] + "j_wheel_*": [0, 30] + "velodyne_*": [10, 1] + "d435_*": [10, 1] + "dagana_*": [50, 1] + +motor_vel: + j_wheel_*: [1] + neck_velodyne: [1] + +# hal +xbotcore_device_configs: + sim: $PWD/hal/centauro_gz.yaml + dummy: $PWD/hal/centauro_dummy.yaml + +# threads +xbotcore_threads: + rt_main: {sched: fifo , prio: 60, period: 0.001} + nrt_main: {sched: other, prio: 0 , period: 0.005} + +# plugins +xbotcore_plugins: + + homing: + thread: rt_main + type: homing + + ros_io: {thread: nrt_main, type: ros_io} + + ros_control: {thread: nrt_main, type: ros_control, params: {autostart: {type: bool, value: true}}} + +# global parameters +xbotcore_param: + /xbot/hal/joint_safety/delta_check_dt: 0.02 + /xbot/hal/joint_safety/filter_autostart: {value: true, type: bool} + # /xbot/hal/joint_safety/filter_cutoff_hz: {value: 1.0, type: double} + /xbot/hal/joint_safety/filter_safe_cutoff_hz: {value: 5.0, type: double} + /xbot/hal/joint_safety/filter_medium_cutoff_hz: {value: 15.0, type: double} + /xbot/hal/joint_safety/filter_fast_cutoff_hz: {value: 25.0, type: double} + /xbot/hal/enable_safety: {value: true, type: bool} \ No newline at end of file