privateboss
/

Lunar_Lander-V3_Discrete

Model card Files Files and versions

xet

Community

privateboss commited on Nov 25, 2025

Commit

2545370

verified ·

1 Parent(s): 1349907

Update reward_shaping.py

Browse files

Files changed (1) hide show

reward_shaping.py +1 -19

reward_shaping.py CHANGED Viewed

@@ -3,7 +3,6 @@ import numpy as np
 from gymnasium.spaces import Box
 from gymnasium import Wrapper
-# --- CONFIGURATION FOR REWARD SHAPING ---
 TIME_PENALTY = -0.05
 # ----------------------------------------
@@ -19,7 +18,6 @@ class LunarLanderRewardShaping(Wrapper):
     def step(self, action):
         observation, reward, terminated, truncated, info = self.env.step(action)
-        # 1. Unpack relevant variables from observation
         x_pos = observation[0]
         y_pos = observation[1]
         x_vel = observation[2]
@@ -27,61 +25,45 @@ class LunarLanderRewardShaping(Wrapper):
         angle = observation[4]
         angular_vel = observation[5]
-        # Leg contact boolean/float values
         left_leg_contact = observation[6]
         right_leg_contact = observation[7]
-        # Determine if the main engine (action 2) was fired
         main_engine_fired = 1 if action == 2 else 0
-        # Determine if *any* thruster (actions 1, 2, or 3) was fired
         any_thruster_fired = 1 if action != 0 else 0
-        # 2. Calculate the total current shaping value
         current_shaping_reward = 0.0
-        # A. Horizontal Position Penalty (Forces agent toward X=0)
-        # This ensures landing between the flags.
         current_shaping_reward += -30 * np.abs(x_pos)
-        # B. Sharpened Vertical velocity penalty near the ground
         y_factor = 1.0 - y_pos
         current_shaping_reward += -20 * np.abs(y_vel) * y_factor
-        # C. Scaled back Horizontal speed penalty
         current_shaping_reward += -10 * np.abs(x_vel)
-        # D. Penalize severe angle
         current_shaping_reward += -5 * np.abs(angle)
-        # E. Penalize high spin rate
         current_shaping_reward += -10 * np.abs(angular_vel)
-        # F. Main Engine usage penalty near the ground
         current_shaping_reward += -50 * main_engine_fired * (1.0 - y_pos)
-        # G. Height Penalty (Penalizes distance from the ground)
         current_shaping_reward += -20 * y_pos
-        # H. Post-Contact Thrust Penalty
         contact_sum = left_leg_contact + right_leg_contact
         if contact_sum > 0:
             current_shaping_reward += -900 * any_thruster_fired * contact_sum
-        # I. Aggressive Landing Leg Use Incentive
         current_shaping_reward += 15 * contact_sum
-        # 3. Calculate the differential shaping reward (reward for improvement)
         if self.last_shaping_reward is not None:
             shaping_reward_diff = current_shaping_reward - self.last_shaping_reward
-            # Clip differential reward to prevent massive, unstable jumps
             reward += np.clip(shaping_reward_diff, -10.0, 10.0)
         self.last_shaping_reward = current_shaping_reward
         reward += TIME_PENALTY
-        # ---------------------------------------------
         return observation, reward, terminated, truncated, info

 from gymnasium.spaces import Box
 from gymnasium import Wrapper
 TIME_PENALTY = -0.05
 # ----------------------------------------
     def step(self, action):
         observation, reward, terminated, truncated, info = self.env.step(action)
         x_pos = observation[0]
         y_pos = observation[1]
         x_vel = observation[2]
         angle = observation[4]
         angular_vel = observation[5]
         left_leg_contact = observation[6]
         right_leg_contact = observation[7]
         main_engine_fired = 1 if action == 2 else 0
         any_thruster_fired = 1 if action != 0 else 0
         current_shaping_reward = 0.0
         current_shaping_reward += -30 * np.abs(x_pos)
         y_factor = 1.0 - y_pos
         current_shaping_reward += -20 * np.abs(y_vel) * y_factor
         current_shaping_reward += -10 * np.abs(x_vel)
         current_shaping_reward += -5 * np.abs(angle)
         current_shaping_reward += -10 * np.abs(angular_vel)
         current_shaping_reward += -50 * main_engine_fired * (1.0 - y_pos)
         current_shaping_reward += -20 * y_pos
         contact_sum = left_leg_contact + right_leg_contact
         if contact_sum > 0:
             current_shaping_reward += -900 * any_thruster_fired * contact_sum
         current_shaping_reward += 15 * contact_sum
         if self.last_shaping_reward is not None:
             shaping_reward_diff = current_shaping_reward - self.last_shaping_reward
             reward += np.clip(shaping_reward_diff, -10.0, 10.0)
         self.last_shaping_reward = current_shaping_reward
         reward += TIME_PENALTY
         return observation, reward, terminated, truncated, info