| """ |
| 测试模块 04 —— step() 基础行为 |
| |
| 需求覆盖 |
| -------- |
| * R5:step 返回 (obs, reward, terminated, truncated, info) |
| * R6:撞墙惩罚与位置不变 |
| * R7:普通移动奖励 |
| * R8:到达终点奖励与 terminated=True |
| |
| 对应用例 |
| -------- |
| TC-11, TC-12, TC-13 |
| """ |
|
|
| from __future__ import annotations |
|
|
| import numpy as np |
| import pytest |
|
|
| from maze_env import MazeEnv |
|
|
|
|
| class TestStep: |
| """验证 step() 的返回格式、奖励计算与位置更新。""" |
|
|
| |
| |
| |
|
|
| @pytest.mark.unit |
| def test_wall_hit_reward(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-11a:agent 在 (1,1),向上(action=0)必然撞上边界墙,reward=-10。 |
| |
| 输入: env_zero_rewards.reset(),action=0(上) |
| 期望: reward == -10.0,agent_pos 不变 == (1,1) |
| 实测: step() 返回值 |
| """ |
| env_zero_rewards.reset() |
| _, reward, _, _, info = env_zero_rewards.step(0) |
| assert reward == -11.0, f"撞墙奖励应为 reward_step(-1)+reward_wall_hit(-10)=-11,实际 {reward}" |
| assert info["agent_pos"] == (1, 1), "撞墙后位置不应改变" |
|
|
| @pytest.mark.unit |
| def test_wall_hit_count_increments(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-11b:撞墙后 hit_wall_count 加 1。 |
| |
| 输入: reset(),action=0(撞边界墙) |
| 期望: info["hit_wall_count"] == 1 |
| 实测: step() 返回的 info |
| """ |
| env_zero_rewards.reset() |
| _, _, _, _, info = env_zero_rewards.step(0) |
| assert info["hit_wall_count"] == 1 |
|
|
| @pytest.mark.unit |
| def test_wall_hit_obs_shape(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-11c:撞墙后 obs shape 仍为 (4, 6, 6)。 |
| |
| 输入: reset(),action=0 |
| 期望: obs.shape == (4, 6, 6) |
| 实测: step() 返回的 obs |
| """ |
| env_zero_rewards.reset() |
| obs, _, _, _, _ = env_zero_rewards.step(0) |
| assert obs.shape == (4, 6, 6) |
|
|
| |
| |
| |
|
|
| @pytest.mark.unit |
| def test_normal_move_reward(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-12a:向右(action=3)从 (1,1) 移动到 (1,2),reward=-1。 |
| |
| 输入: reset(),action=3(右) |
| 期望: reward == -1.0,agent_pos == (1,2) |
| 实测: step() 返回值 |
| """ |
| env_zero_rewards.reset() |
| _, reward, _, _, info = env_zero_rewards.step(3) |
| assert reward == -1.0, f"正常移动奖励应为 -1,实际 {reward}" |
| assert info["agent_pos"] == (1, 2), "移动后位置应为 (1,2)" |
|
|
| @pytest.mark.unit |
| def test_normal_move_agent_channel(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-12b:移动后 obs[1] 在新位置激活,旧位置清零。 |
| |
| 输入: reset(),action=3(右,到 (1,2)) |
| 期望: obs[1][1,2] == 1.0,obs[1][1,1] == 0.0 |
| 实测: obs[1] 通道值 |
| """ |
| env_zero_rewards.reset() |
| obs, _, _, _, info = env_zero_rewards.step(3) |
| ar, ac = info["agent_pos"] |
| assert obs[1, ar, ac] == 1.0 |
| assert float(obs[1].sum()) == 1.0 |
|
|
| @pytest.mark.unit |
| def test_normal_move_step_count(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-12c:每次 step(含撞墙)step_count 加 1。 |
| |
| 输入: reset(),连续调用两次 step() |
| 期望: 第一次后 step_count==1,第二次后 step_count==2 |
| 实测: info["step_count"] |
| """ |
| env_zero_rewards.reset() |
| _, _, _, _, info1 = env_zero_rewards.step(3) |
| assert info1["step_count"] == 1 |
| _, _, _, _, info2 = env_zero_rewards.step(3) |
| assert info2["step_count"] == 2 |
|
|
| |
| |
| |
|
|
| @pytest.mark.integration |
| def test_reach_goal_terminated(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-13a:agent 走到 (N-2,N-2) 时,terminated=True,reward 含 +100。 |
| |
| 输入: grid=6,无障碍,seed=0;手动引导 agent 到 (4,4) |
| 期望: 终点步的 terminated==True,truncated==False, |
| reward == 100 + (-1) == 99.0 |
| 实测: step() 返回值 |
| """ |
| env = MazeEnv(grid_size=6, obstacle_density=0.0, seed=0, |
| reward_goal=100.0, reward_wall_hit=-10.0, reward_step=-1.0) |
| env.reset() |
| |
| |
| for _ in range(3): |
| env.step(3) |
| for _ in range(3): |
| _, reward, terminated, truncated, info = env.step(1) |
|
|
| assert terminated is True, "到达终点应 terminated=True" |
| assert truncated is False, "到达终点不应 truncated" |
| assert reward == 99.0, f"终点奖励应为 99.0,实际 {reward}" |
| assert info["success"] is True |
|
|
| @pytest.mark.integration |
| def test_reach_goal_agent_channel(self, env_zero_rewards: MazeEnv) -> None: |
| """TC-13b:到达终点后,obs[1] 与 obs[2] 在同一位置均为 1.0。 |
| |
| 输入: grid=6,引导 agent 至 goal (4,4) |
| 期望: obs[1][4,4]==1.0,obs[2][4,4]==1.0 |
| 实测: 终点步 obs 通道值 |
| """ |
| env = MazeEnv(grid_size=6, obstacle_density=0.0, seed=0) |
| env.reset() |
| for _ in range(3): |
| env.step(3) |
| for _ in range(2): |
| env.step(1) |
| obs, _, _, _, _ = env.step(1) |
| assert obs[1, 4, 4] == 1.0 |
| assert obs[2, 4, 4] == 1.0 |
|
|