File size: 6,194 Bytes
fe0625d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 | """
测试模块 04 —— step() 基础行为
需求覆盖
--------
* R5:step 返回 (obs, reward, terminated, truncated, info)
* R6:撞墙惩罚与位置不变
* R7:普通移动奖励
* R8:到达终点奖励与 terminated=True
对应用例
--------
TC-11, TC-12, TC-13
"""
from __future__ import annotations
import numpy as np
import pytest
from maze_env import MazeEnv
class TestStep:
"""验证 step() 的返回格式、奖励计算与位置更新。"""
# ------------------------------------------------------------------ #
# TC-11 撞墙行为 #
# ------------------------------------------------------------------ #
@pytest.mark.unit
def test_wall_hit_reward(self, env_zero_rewards: MazeEnv) -> None:
"""TC-11a:agent 在 (1,1),向上(action=0)必然撞上边界墙,reward=-10。
输入: env_zero_rewards.reset(),action=0(上)
期望: reward == -10.0,agent_pos 不变 == (1,1)
实测: step() 返回值
"""
env_zero_rewards.reset()
_, reward, _, _, info = env_zero_rewards.step(0) # 0 = UP
assert reward == -11.0, f"撞墙奖励应为 reward_step(-1)+reward_wall_hit(-10)=-11,实际 {reward}"
assert info["agent_pos"] == (1, 1), "撞墙后位置不应改变"
@pytest.mark.unit
def test_wall_hit_count_increments(self, env_zero_rewards: MazeEnv) -> None:
"""TC-11b:撞墙后 hit_wall_count 加 1。
输入: reset(),action=0(撞边界墙)
期望: info["hit_wall_count"] == 1
实测: step() 返回的 info
"""
env_zero_rewards.reset()
_, _, _, _, info = env_zero_rewards.step(0)
assert info["hit_wall_count"] == 1
@pytest.mark.unit
def test_wall_hit_obs_shape(self, env_zero_rewards: MazeEnv) -> None:
"""TC-11c:撞墙后 obs shape 仍为 (4, 6, 6)。
输入: reset(),action=0
期望: obs.shape == (4, 6, 6)
实测: step() 返回的 obs
"""
env_zero_rewards.reset()
obs, _, _, _, _ = env_zero_rewards.step(0)
assert obs.shape == (4, 6, 6)
# ------------------------------------------------------------------ #
# TC-12 普通移动行为 #
# ------------------------------------------------------------------ #
@pytest.mark.unit
def test_normal_move_reward(self, env_zero_rewards: MazeEnv) -> None:
"""TC-12a:向右(action=3)从 (1,1) 移动到 (1,2),reward=-1。
输入: reset(),action=3(右)
期望: reward == -1.0,agent_pos == (1,2)
实测: step() 返回值
"""
env_zero_rewards.reset()
_, reward, _, _, info = env_zero_rewards.step(3) # 3 = RIGHT
assert reward == -1.0, f"正常移动奖励应为 -1,实际 {reward}"
assert info["agent_pos"] == (1, 2), "移动后位置应为 (1,2)"
@pytest.mark.unit
def test_normal_move_agent_channel(self, env_zero_rewards: MazeEnv) -> None:
"""TC-12b:移动后 obs[1] 在新位置激活,旧位置清零。
输入: reset(),action=3(右,到 (1,2))
期望: obs[1][1,2] == 1.0,obs[1][1,1] == 0.0
实测: obs[1] 通道值
"""
env_zero_rewards.reset()
obs, _, _, _, info = env_zero_rewards.step(3)
ar, ac = info["agent_pos"]
assert obs[1, ar, ac] == 1.0
assert float(obs[1].sum()) == 1.0
@pytest.mark.unit
def test_normal_move_step_count(self, env_zero_rewards: MazeEnv) -> None:
"""TC-12c:每次 step(含撞墙)step_count 加 1。
输入: reset(),连续调用两次 step()
期望: 第一次后 step_count==1,第二次后 step_count==2
实测: info["step_count"]
"""
env_zero_rewards.reset()
_, _, _, _, info1 = env_zero_rewards.step(3)
assert info1["step_count"] == 1
_, _, _, _, info2 = env_zero_rewards.step(3)
assert info2["step_count"] == 2
# ------------------------------------------------------------------ #
# TC-13 到达终点 #
# ------------------------------------------------------------------ #
@pytest.mark.integration
def test_reach_goal_terminated(self, env_zero_rewards: MazeEnv) -> None:
"""TC-13a:agent 走到 (N-2,N-2) 时,terminated=True,reward 含 +100。
输入: grid=6,无障碍,seed=0;手动引导 agent 到 (4,4)
期望: 终点步的 terminated==True,truncated==False,
reward == 100 + (-1) == 99.0
实测: step() 返回值
"""
env = MazeEnv(grid_size=6, obstacle_density=0.0, seed=0,
reward_goal=100.0, reward_wall_hit=-10.0, reward_step=-1.0)
env.reset()
# 从 (1,1) 向右走到 (1,4),再向下走到 (4,4)
# 右:action=3,下:action=1
for _ in range(3): # (1,1)->(1,2)->(1,3)->(1,4)
env.step(3)
for _ in range(3): # (1,4)->(2,4)->(3,4)->(4,4)
_, reward, terminated, truncated, info = env.step(1)
assert terminated is True, "到达终点应 terminated=True"
assert truncated is False, "到达终点不应 truncated"
assert reward == 99.0, f"终点奖励应为 99.0,实际 {reward}"
assert info["success"] is True
@pytest.mark.integration
def test_reach_goal_agent_channel(self, env_zero_rewards: MazeEnv) -> None:
"""TC-13b:到达终点后,obs[1] 与 obs[2] 在同一位置均为 1.0。
输入: grid=6,引导 agent 至 goal (4,4)
期望: obs[1][4,4]==1.0,obs[2][4,4]==1.0
实测: 终点步 obs 通道值
"""
env = MazeEnv(grid_size=6, obstacle_density=0.0, seed=0)
env.reset()
for _ in range(3):
env.step(3)
for _ in range(2):
env.step(1)
obs, _, _, _, _ = env.step(1) # 最后一步到达 (4,4)
assert obs[1, 4, 4] == 1.0
assert obs[2, 4, 4] == 1.0
|