feat: multi-mode control (server/rans_environment.py)
Browse files- server/rans_environment.py +55 -16
server/rans_environment.py
CHANGED
|
@@ -100,10 +100,15 @@ class RANSEnvironment(Environment):
|
|
| 100 |
self._initial_pos_range = initial_pos_range
|
| 101 |
self._initial_vel_range = initial_vel_range
|
| 102 |
|
| 103 |
-
# Physics simulator
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
# Task
|
| 109 |
self._task = TASK_REGISTRY[task](task_config or {})
|
|
@@ -136,23 +141,57 @@ class RANSEnvironment(Environment):
|
|
| 136 |
return self._make_observation(reward=0.0, done=False, info=task_info)
|
| 137 |
|
| 138 |
def step(self, action: Action) -> Observation:
|
| 139 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
if not hasattr(action, "thrusters"):
|
| 141 |
raise ValueError(
|
| 142 |
-
f"Expected SpacecraftAction
|
| 143 |
-
f"received {type(action).__name__}."
|
| 144 |
)
|
| 145 |
|
| 146 |
-
#
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
|
| 154 |
-
#
|
| 155 |
-
|
|
|
|
| 156 |
self._step_count += 1
|
| 157 |
|
| 158 |
# Compute task reward
|
|
|
|
| 100 |
self._initial_pos_range = initial_pos_range
|
| 101 |
self._initial_vel_range = initial_vel_range
|
| 102 |
|
| 103 |
+
# Physics simulator β RANS_NUM_THRUSTERS overrides spacecraft_config
|
| 104 |
+
if spacecraft_config is None:
|
| 105 |
+
n_env = os.environ.get("RANS_NUM_THRUSTERS")
|
| 106 |
+
if n_env is not None:
|
| 107 |
+
n = int(n_env)
|
| 108 |
+
spacecraft_config = SpacecraftConfig.from_num_thrusters(n)
|
| 109 |
+
else:
|
| 110 |
+
spacecraft_config = SpacecraftConfig.default_8_thruster()
|
| 111 |
+
self._spacecraft = Spacecraft2D(spacecraft_config)
|
| 112 |
|
| 113 |
# Task
|
| 114 |
self._task = TASK_REGISTRY[task](task_config or {})
|
|
|
|
| 141 |
return self._make_observation(reward=0.0, done=False, info=task_info)
|
| 142 |
|
| 143 |
def step(self, action: Action) -> Observation:
|
| 144 |
+
"""
|
| 145 |
+
Advance the simulation by one step.
|
| 146 |
+
|
| 147 |
+
Control mode is selected automatically based on which fields are set:
|
| 148 |
+
|
| 149 |
+
1. **Thrusters** (``action.thrusters`` is not None):
|
| 150 |
+
List of per-thruster activations β [0, 1].
|
| 151 |
+
2. **Force/torque** (``action.fx``, ``action.fy``, or ``action.torque``
|
| 152 |
+
is not None):
|
| 153 |
+
Direct world-frame force/torque β bypasses thruster geometry.
|
| 154 |
+
3. **Velocity target** (``action.vx_target``, ``action.vy_target``, or
|
| 155 |
+
``action.omega_target`` is not None):
|
| 156 |
+
PD controller drives the spacecraft toward the requested velocities.
|
| 157 |
+
"""
|
| 158 |
if not hasattr(action, "thrusters"):
|
| 159 |
raise ValueError(
|
| 160 |
+
f"Expected SpacecraftAction, received {type(action).__name__}."
|
|
|
|
| 161 |
)
|
| 162 |
|
| 163 |
+
# ββ Mode 1: thruster activations βββββββββββββββββββββββββββββββββ
|
| 164 |
+
if action.thrusters is not None:
|
| 165 |
+
activations = np.array(action.thrusters, dtype=np.float64)
|
| 166 |
+
n = self._spacecraft.n_thrusters
|
| 167 |
+
if len(activations) != n:
|
| 168 |
+
padded = np.zeros(n, dtype=np.float64)
|
| 169 |
+
padded[: min(len(activations), n)] = activations[:n]
|
| 170 |
+
activations = padded
|
| 171 |
+
self._spacecraft.step(activations)
|
| 172 |
+
|
| 173 |
+
# ββ Mode 2: direct force / torque ββββββββββββββββββββββββββββββββ
|
| 174 |
+
elif any(v is not None for v in [action.fx, action.fy, action.torque]):
|
| 175 |
+
self._spacecraft.step_force_torque(
|
| 176 |
+
fx_world=float(action.fx or 0.0),
|
| 177 |
+
fy_world=float(action.fy or 0.0),
|
| 178 |
+
torque=float(action.torque or 0.0),
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
# ββ Mode 3: velocity target βββββββββββββββββββββββββββββββββββββββ
|
| 182 |
+
elif any(
|
| 183 |
+
v is not None
|
| 184 |
+
for v in [action.vx_target, action.vy_target, action.omega_target]
|
| 185 |
+
):
|
| 186 |
+
self._spacecraft.step_velocity_target(
|
| 187 |
+
vx_target=float(action.vx_target or 0.0),
|
| 188 |
+
vy_target=float(action.vy_target or 0.0),
|
| 189 |
+
omega_target=float(action.omega_target or 0.0),
|
| 190 |
+
)
|
| 191 |
|
| 192 |
+
# ββ No action β advance with zero forces ββββββββββββββββββββββββββ
|
| 193 |
+
else:
|
| 194 |
+
self._spacecraft.step(np.zeros(self._spacecraft.n_thrusters))
|
| 195 |
self._step_count += 1
|
| 196 |
|
| 197 |
# Compute task reward
|