Spaces:

LLParallax
/

Apple

Runtime error

Apple / tests /test_discrete_apple.py

New Author Name

init

4b714e2 almost 3 years ago

2.81 kB

	import numpy as np

	from apple.envs.discrete_apple import get_apple_env


	def test_discrete_apple_phase1():
	c = 0.5
	timelimit = 30
	env = get_apple_env("phase1", start_x=0, goal_x=10, c=c, time_limit=timelimit)

	observations, actions, rewards, done = [], [], [], False
	obs = env.reset()
	for i in range(timelimit):
	action = np.random.choice([0, 1], p=[0.2, 0.8])
	obs, reward, done, info = env.step(action)
	observations.append(obs)
	actions.append(action)
	rewards.append(reward)

	if done:
	break

	observations = np.array(observations)
	actions = np.array(actions)
	rewards = np.array(rewards)

	target_rewards = np.ones(len(actions)) * actions * 2 - 1
	if info["success"]:
	target_rewards[-1] = 100
	target_states = np.stack([np.ones(len(actions)), np.ones(len(actions)) * -c], axis=1)
	assert (rewards == target_rewards).all()
	assert (observations == target_states).all()


	def test_discrete_apple_phase2():
	c = 0.5
	timelimit = 30
	env = get_apple_env("phase2", start_x=0, goal_x=10, c=c, time_limit=timelimit)

	observations, actions, rewards, done = [], [], [], False
	obs = env.reset()
	for i in range(timelimit):
	action = np.random.choice([0, 1], p=[0.8, 0.2])
	obs, reward, done, info = env.step(action)
	observations.append(obs)
	actions.append(action)
	rewards.append(reward)

	if done:
	break

	observations = np.array(observations)
	actions = np.array(actions)
	rewards = np.array(rewards)

	target_rewards = np.ones(len(actions)) * (1 - actions) * 2 - 1
	if info["success"]:
	target_rewards[-1] = 100
	target_states = np.stack([np.ones(len(actions)), np.ones(len(actions)) * c], axis=1)
	assert (rewards == target_rewards).all()
	assert (observations == target_states).all()


	def test_discrete_apple_full():
	c = 0.5

	target_rewards = np.ones(20)
	target_rewards[-1] = 100
	target_states = np.stack([np.ones(20), np.concatenate([np.ones(10) * -c, np.ones(10) * c])], axis=1)

	env = get_apple_env("full", start_x=0, goal_x=10, c=c, time_limit=30)

	observations, actions, rewards = [], [], []
	obs = env.reset()
	for i in range(10):
	action = 1
	obs, reward, done, info = env.step(action)
	observations.append(obs)
	actions.append(action)
	rewards.append(reward)

	for i in range(10):
	action = 0
	obs, reward, done, info = env.step(action)
	observations.append(obs)
	actions.append(action)
	rewards.append(reward)

	rewards = np.array(rewards)
	observations = np.array(observations)

	assert (rewards == target_rewards).all()
	assert (observations == target_states).all()