Apple / tests /test_discrete_apple.py
New Author Name
init
4b714e2
import numpy as np
from apple.envs.discrete_apple import get_apple_env
def test_discrete_apple_phase1():
c = 0.5
timelimit = 30
env = get_apple_env("phase1", start_x=0, goal_x=10, c=c, time_limit=timelimit)
observations, actions, rewards, done = [], [], [], False
obs = env.reset()
for i in range(timelimit):
action = np.random.choice([0, 1], p=[0.2, 0.8])
obs, reward, done, info = env.step(action)
observations.append(obs)
actions.append(action)
rewards.append(reward)
if done:
break
observations = np.array(observations)
actions = np.array(actions)
rewards = np.array(rewards)
target_rewards = np.ones(len(actions)) * actions * 2 - 1
if info["success"]:
target_rewards[-1] = 100
target_states = np.stack([np.ones(len(actions)), np.ones(len(actions)) * -c], axis=1)
assert (rewards == target_rewards).all()
assert (observations == target_states).all()
def test_discrete_apple_phase2():
c = 0.5
timelimit = 30
env = get_apple_env("phase2", start_x=0, goal_x=10, c=c, time_limit=timelimit)
observations, actions, rewards, done = [], [], [], False
obs = env.reset()
for i in range(timelimit):
action = np.random.choice([0, 1], p=[0.8, 0.2])
obs, reward, done, info = env.step(action)
observations.append(obs)
actions.append(action)
rewards.append(reward)
if done:
break
observations = np.array(observations)
actions = np.array(actions)
rewards = np.array(rewards)
target_rewards = np.ones(len(actions)) * (1 - actions) * 2 - 1
if info["success"]:
target_rewards[-1] = 100
target_states = np.stack([np.ones(len(actions)), np.ones(len(actions)) * c], axis=1)
assert (rewards == target_rewards).all()
assert (observations == target_states).all()
def test_discrete_apple_full():
c = 0.5
target_rewards = np.ones(20)
target_rewards[-1] = 100
target_states = np.stack([np.ones(20), np.concatenate([np.ones(10) * -c, np.ones(10) * c])], axis=1)
env = get_apple_env("full", start_x=0, goal_x=10, c=c, time_limit=30)
observations, actions, rewards = [], [], []
obs = env.reset()
for i in range(10):
action = 1
obs, reward, done, info = env.step(action)
observations.append(obs)
actions.append(action)
rewards.append(reward)
for i in range(10):
action = 0
obs, reward, done, info = env.step(action)
observations.append(obs)
actions.append(action)
rewards.append(reward)
rewards = np.array(rewards)
observations = np.array(observations)
assert (rewards == target_rewards).all()
assert (observations == target_states).all()