interview / tests /test_04_step.py
Lee93whut
feat(env): Gymnasium maze env, 3-channel obs, BFS reachability
fe0625d
"""
测试模块 04 —— step() 基础行为
需求覆盖
--------
* R5:step 返回 (obs, reward, terminated, truncated, info)
* R6:撞墙惩罚与位置不变
* R7:普通移动奖励
* R8:到达终点奖励与 terminated=True
对应用例
--------
TC-11, TC-12, TC-13
"""
from __future__ import annotations
import numpy as np
import pytest
from maze_env import MazeEnv
class TestStep:
"""验证 step() 的返回格式、奖励计算与位置更新。"""
# ------------------------------------------------------------------ #
# TC-11 撞墙行为 #
# ------------------------------------------------------------------ #
@pytest.mark.unit
def test_wall_hit_reward(self, env_zero_rewards: MazeEnv) -> None:
"""TC-11a:agent 在 (1,1),向上(action=0)必然撞上边界墙,reward=-10。
输入: env_zero_rewards.reset(),action=0(上)
期望: reward == -10.0,agent_pos 不变 == (1,1)
实测: step() 返回值
"""
env_zero_rewards.reset()
_, reward, _, _, info = env_zero_rewards.step(0) # 0 = UP
assert reward == -11.0, f"撞墙奖励应为 reward_step(-1)+reward_wall_hit(-10)=-11,实际 {reward}"
assert info["agent_pos"] == (1, 1), "撞墙后位置不应改变"
@pytest.mark.unit
def test_wall_hit_count_increments(self, env_zero_rewards: MazeEnv) -> None:
"""TC-11b:撞墙后 hit_wall_count 加 1。
输入: reset(),action=0(撞边界墙)
期望: info["hit_wall_count"] == 1
实测: step() 返回的 info
"""
env_zero_rewards.reset()
_, _, _, _, info = env_zero_rewards.step(0)
assert info["hit_wall_count"] == 1
@pytest.mark.unit
def test_wall_hit_obs_shape(self, env_zero_rewards: MazeEnv) -> None:
"""TC-11c:撞墙后 obs shape 仍为 (4, 6, 6)。
输入: reset(),action=0
期望: obs.shape == (4, 6, 6)
实测: step() 返回的 obs
"""
env_zero_rewards.reset()
obs, _, _, _, _ = env_zero_rewards.step(0)
assert obs.shape == (4, 6, 6)
# ------------------------------------------------------------------ #
# TC-12 普通移动行为 #
# ------------------------------------------------------------------ #
@pytest.mark.unit
def test_normal_move_reward(self, env_zero_rewards: MazeEnv) -> None:
"""TC-12a:向右(action=3)从 (1,1) 移动到 (1,2),reward=-1。
输入: reset(),action=3(右)
期望: reward == -1.0,agent_pos == (1,2)
实测: step() 返回值
"""
env_zero_rewards.reset()
_, reward, _, _, info = env_zero_rewards.step(3) # 3 = RIGHT
assert reward == -1.0, f"正常移动奖励应为 -1,实际 {reward}"
assert info["agent_pos"] == (1, 2), "移动后位置应为 (1,2)"
@pytest.mark.unit
def test_normal_move_agent_channel(self, env_zero_rewards: MazeEnv) -> None:
"""TC-12b:移动后 obs[1] 在新位置激活,旧位置清零。
输入: reset(),action=3(右,到 (1,2))
期望: obs[1][1,2] == 1.0,obs[1][1,1] == 0.0
实测: obs[1] 通道值
"""
env_zero_rewards.reset()
obs, _, _, _, info = env_zero_rewards.step(3)
ar, ac = info["agent_pos"]
assert obs[1, ar, ac] == 1.0
assert float(obs[1].sum()) == 1.0
@pytest.mark.unit
def test_normal_move_step_count(self, env_zero_rewards: MazeEnv) -> None:
"""TC-12c:每次 step(含撞墙)step_count 加 1。
输入: reset(),连续调用两次 step()
期望: 第一次后 step_count==1,第二次后 step_count==2
实测: info["step_count"]
"""
env_zero_rewards.reset()
_, _, _, _, info1 = env_zero_rewards.step(3)
assert info1["step_count"] == 1
_, _, _, _, info2 = env_zero_rewards.step(3)
assert info2["step_count"] == 2
# ------------------------------------------------------------------ #
# TC-13 到达终点 #
# ------------------------------------------------------------------ #
@pytest.mark.integration
def test_reach_goal_terminated(self, env_zero_rewards: MazeEnv) -> None:
"""TC-13a:agent 走到 (N-2,N-2) 时,terminated=True,reward 含 +100。
输入: grid=6,无障碍,seed=0;手动引导 agent 到 (4,4)
期望: 终点步的 terminated==True,truncated==False,
reward == 100 + (-1) == 99.0
实测: step() 返回值
"""
env = MazeEnv(grid_size=6, obstacle_density=0.0, seed=0,
reward_goal=100.0, reward_wall_hit=-10.0, reward_step=-1.0)
env.reset()
# 从 (1,1) 向右走到 (1,4),再向下走到 (4,4)
# 右:action=3,下:action=1
for _ in range(3): # (1,1)->(1,2)->(1,3)->(1,4)
env.step(3)
for _ in range(3): # (1,4)->(2,4)->(3,4)->(4,4)
_, reward, terminated, truncated, info = env.step(1)
assert terminated is True, "到达终点应 terminated=True"
assert truncated is False, "到达终点不应 truncated"
assert reward == 99.0, f"终点奖励应为 99.0,实际 {reward}"
assert info["success"] is True
@pytest.mark.integration
def test_reach_goal_agent_channel(self, env_zero_rewards: MazeEnv) -> None:
"""TC-13b:到达终点后,obs[1] 与 obs[2] 在同一位置均为 1.0。
输入: grid=6,引导 agent 至 goal (4,4)
期望: obs[1][4,4]==1.0,obs[2][4,4]==1.0
实测: 终点步 obs 通道值
"""
env = MazeEnv(grid_size=6, obstacle_density=0.0, seed=0)
env.reset()
for _ in range(3):
env.step(3)
for _ in range(2):
env.step(1)
obs, _, _, _, _ = env.step(1) # 最后一步到达 (4,4)
assert obs[1, 4, 4] == 1.0
assert obs[2, 4, 4] == 1.0