privateboss commited on
Commit
2545370
·
verified ·
1 Parent(s): 1349907

Update reward_shaping.py

Browse files
Files changed (1) hide show
  1. reward_shaping.py +1 -19
reward_shaping.py CHANGED
@@ -3,7 +3,6 @@ import numpy as np
3
  from gymnasium.spaces import Box
4
  from gymnasium import Wrapper
5
 
6
- # --- CONFIGURATION FOR REWARD SHAPING ---
7
  TIME_PENALTY = -0.05
8
  # ----------------------------------------
9
 
@@ -19,7 +18,6 @@ class LunarLanderRewardShaping(Wrapper):
19
  def step(self, action):
20
  observation, reward, terminated, truncated, info = self.env.step(action)
21
 
22
- # 1. Unpack relevant variables from observation
23
  x_pos = observation[0]
24
  y_pos = observation[1]
25
  x_vel = observation[2]
@@ -27,61 +25,45 @@ class LunarLanderRewardShaping(Wrapper):
27
  angle = observation[4]
28
  angular_vel = observation[5]
29
 
30
- # Leg contact boolean/float values
31
  left_leg_contact = observation[6]
32
  right_leg_contact = observation[7]
33
 
34
- # Determine if the main engine (action 2) was fired
35
  main_engine_fired = 1 if action == 2 else 0
36
 
37
- # Determine if *any* thruster (actions 1, 2, or 3) was fired
38
  any_thruster_fired = 1 if action != 0 else 0
39
 
40
- # 2. Calculate the total current shaping value
41
  current_shaping_reward = 0.0
42
 
43
- # A. Horizontal Position Penalty (Forces agent toward X=0)
44
- # This ensures landing between the flags.
45
  current_shaping_reward += -30 * np.abs(x_pos)
46
 
47
- # B. Sharpened Vertical velocity penalty near the ground
48
 
49
  y_factor = 1.0 - y_pos
50
  current_shaping_reward += -20 * np.abs(y_vel) * y_factor
51
 
52
- # C. Scaled back Horizontal speed penalty
53
  current_shaping_reward += -10 * np.abs(x_vel)
54
 
55
- # D. Penalize severe angle
56
  current_shaping_reward += -5 * np.abs(angle)
57
 
58
- # E. Penalize high spin rate
59
  current_shaping_reward += -10 * np.abs(angular_vel)
60
 
61
- # F. Main Engine usage penalty near the ground
62
  current_shaping_reward += -50 * main_engine_fired * (1.0 - y_pos)
63
 
64
- # G. Height Penalty (Penalizes distance from the ground)
65
  current_shaping_reward += -20 * y_pos
66
 
67
- # H. Post-Contact Thrust Penalty
68
  contact_sum = left_leg_contact + right_leg_contact
69
  if contact_sum > 0:
70
  current_shaping_reward += -900 * any_thruster_fired * contact_sum
71
 
72
- # I. Aggressive Landing Leg Use Incentive
73
  current_shaping_reward += 15 * contact_sum
74
 
75
- # 3. Calculate the differential shaping reward (reward for improvement)
76
  if self.last_shaping_reward is not None:
77
  shaping_reward_diff = current_shaping_reward - self.last_shaping_reward
78
- # Clip differential reward to prevent massive, unstable jumps
79
  reward += np.clip(shaping_reward_diff, -10.0, 10.0)
80
 
81
  self.last_shaping_reward = current_shaping_reward
82
 
83
  reward += TIME_PENALTY
84
- # ---------------------------------------------
85
 
86
  return observation, reward, terminated, truncated, info
87
 
 
3
  from gymnasium.spaces import Box
4
  from gymnasium import Wrapper
5
 
 
6
  TIME_PENALTY = -0.05
7
  # ----------------------------------------
8
 
 
18
  def step(self, action):
19
  observation, reward, terminated, truncated, info = self.env.step(action)
20
 
 
21
  x_pos = observation[0]
22
  y_pos = observation[1]
23
  x_vel = observation[2]
 
25
  angle = observation[4]
26
  angular_vel = observation[5]
27
 
 
28
  left_leg_contact = observation[6]
29
  right_leg_contact = observation[7]
30
 
 
31
  main_engine_fired = 1 if action == 2 else 0
32
 
 
33
  any_thruster_fired = 1 if action != 0 else 0
34
 
 
35
  current_shaping_reward = 0.0
36
 
 
 
37
  current_shaping_reward += -30 * np.abs(x_pos)
38
 
 
39
 
40
  y_factor = 1.0 - y_pos
41
  current_shaping_reward += -20 * np.abs(y_vel) * y_factor
42
 
 
43
  current_shaping_reward += -10 * np.abs(x_vel)
44
 
 
45
  current_shaping_reward += -5 * np.abs(angle)
46
 
 
47
  current_shaping_reward += -10 * np.abs(angular_vel)
48
 
 
49
  current_shaping_reward += -50 * main_engine_fired * (1.0 - y_pos)
50
 
 
51
  current_shaping_reward += -20 * y_pos
52
 
 
53
  contact_sum = left_leg_contact + right_leg_contact
54
  if contact_sum > 0:
55
  current_shaping_reward += -900 * any_thruster_fired * contact_sum
56
 
 
57
  current_shaping_reward += 15 * contact_sum
58
 
 
59
  if self.last_shaping_reward is not None:
60
  shaping_reward_diff = current_shaping_reward - self.last_shaping_reward
61
+
62
  reward += np.clip(shaping_reward_diff, -10.0, 10.0)
63
 
64
  self.last_shaping_reward = current_shaping_reward
65
 
66
  reward += TIME_PENALTY
 
67
 
68
  return observation, reward, terminated, truncated, info
69