Spaces:
Runtime error
Runtime error
| import numpy as np | |
| from apple.envs.discrete_apple import get_apple_env | |
| def test_discrete_apple_phase1(): | |
| c = 0.5 | |
| timelimit = 30 | |
| env = get_apple_env("phase1", start_x=0, goal_x=10, c=c, time_limit=timelimit) | |
| observations, actions, rewards, done = [], [], [], False | |
| obs = env.reset() | |
| for i in range(timelimit): | |
| action = np.random.choice([0, 1], p=[0.2, 0.8]) | |
| obs, reward, done, info = env.step(action) | |
| observations.append(obs) | |
| actions.append(action) | |
| rewards.append(reward) | |
| if done: | |
| break | |
| observations = np.array(observations) | |
| actions = np.array(actions) | |
| rewards = np.array(rewards) | |
| target_rewards = np.ones(len(actions)) * actions * 2 - 1 | |
| if info["success"]: | |
| target_rewards[-1] = 100 | |
| target_states = np.stack([np.ones(len(actions)), np.ones(len(actions)) * -c], axis=1) | |
| assert (rewards == target_rewards).all() | |
| assert (observations == target_states).all() | |
| def test_discrete_apple_phase2(): | |
| c = 0.5 | |
| timelimit = 30 | |
| env = get_apple_env("phase2", start_x=0, goal_x=10, c=c, time_limit=timelimit) | |
| observations, actions, rewards, done = [], [], [], False | |
| obs = env.reset() | |
| for i in range(timelimit): | |
| action = np.random.choice([0, 1], p=[0.8, 0.2]) | |
| obs, reward, done, info = env.step(action) | |
| observations.append(obs) | |
| actions.append(action) | |
| rewards.append(reward) | |
| if done: | |
| break | |
| observations = np.array(observations) | |
| actions = np.array(actions) | |
| rewards = np.array(rewards) | |
| target_rewards = np.ones(len(actions)) * (1 - actions) * 2 - 1 | |
| if info["success"]: | |
| target_rewards[-1] = 100 | |
| target_states = np.stack([np.ones(len(actions)), np.ones(len(actions)) * c], axis=1) | |
| assert (rewards == target_rewards).all() | |
| assert (observations == target_states).all() | |
| def test_discrete_apple_full(): | |
| c = 0.5 | |
| target_rewards = np.ones(20) | |
| target_rewards[-1] = 100 | |
| target_states = np.stack([np.ones(20), np.concatenate([np.ones(10) * -c, np.ones(10) * c])], axis=1) | |
| env = get_apple_env("full", start_x=0, goal_x=10, c=c, time_limit=30) | |
| observations, actions, rewards = [], [], [] | |
| obs = env.reset() | |
| for i in range(10): | |
| action = 1 | |
| obs, reward, done, info = env.step(action) | |
| observations.append(obs) | |
| actions.append(action) | |
| rewards.append(reward) | |
| for i in range(10): | |
| action = 0 | |
| obs, reward, done, info = env.step(action) | |
| observations.append(obs) | |
| actions.append(action) | |
| rewards.append(reward) | |
| rewards = np.array(rewards) | |
| observations = np.array(observations) | |
| assert (rewards == target_rewards).all() | |
| assert (observations == target_states).all() | |