|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| """Defines many boolean functions indicating when to step and reset.
|
| """
|
|
|
| import tensorflow as tf
|
| import gin.tf
|
|
|
|
|
| @gin.configurable
|
| def env_transition(agent, state, action, transition_type, environment_steps,
|
| num_episodes):
|
| """True if the transition_type is TRANSITION or FINAL_TRANSITION.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| Returns:
|
| cond: Returns an op that evaluates to true if the transition type is
|
| not RESTARTING
|
| """
|
| del agent, state, action, num_episodes, environment_steps
|
| cond = tf.logical_not(transition_type)
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def env_restart(agent, state, action, transition_type, environment_steps,
|
| num_episodes):
|
| """True if the transition_type is RESTARTING.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| Returns:
|
| cond: Returns an op that evaluates to true if the transition type equals
|
| RESTARTING.
|
| """
|
| del agent, state, action, num_episodes, environment_steps
|
| cond = tf.identity(transition_type)
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def every_n_steps(agent,
|
| state,
|
| action,
|
| transition_type,
|
| environment_steps,
|
| num_episodes,
|
| n=150):
|
| """True once every n steps.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| n: Return true once every n steps.
|
| Returns:
|
| cond: Returns an op that evaluates to true if environment_steps
|
| equals 0 mod n. We increment the step before checking this condition, so
|
| we do not need to add one to environment_steps.
|
| """
|
| del agent, state, action, transition_type, num_episodes
|
| cond = tf.equal(tf.mod(environment_steps, n), 0)
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def every_n_episodes(agent,
|
| state,
|
| action,
|
| transition_type,
|
| environment_steps,
|
| num_episodes,
|
| n=2,
|
| steps_per_episode=None):
|
| """True once every n episodes.
|
|
|
| Specifically, evaluates to True on the 0th step of every nth episode.
|
| Unlike environment_steps, num_episodes starts at 0, so we do want to add
|
| one to ensure it does not reset on the first call.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| n: Return true once every n episodes.
|
| steps_per_episode: How many steps per episode. Needed to determine when a
|
| new episode starts.
|
| Returns:
|
| cond: Returns an op that evaluates to true on the last step of the episode
|
| (i.e. if num_episodes equals 0 mod n).
|
| """
|
| assert steps_per_episode is not None
|
| del agent, action, transition_type
|
| ant_fell = tf.logical_or(state[2] < 0.2, state[2] > 1.0)
|
| cond = tf.logical_and(
|
| tf.logical_or(
|
| ant_fell,
|
| tf.equal(tf.mod(num_episodes + 1, n), 0)),
|
| tf.equal(tf.mod(environment_steps, steps_per_episode), 0))
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def failed_reset_after_n_episodes(agent,
|
| state,
|
| action,
|
| transition_type,
|
| environment_steps,
|
| num_episodes,
|
| steps_per_episode=None,
|
| reset_state=None,
|
| max_dist=1.0,
|
| epsilon=1e-10):
|
| """Every n episodes, returns True if the reset agent fails to return.
|
|
|
| Specifically, evaluates to True if the distance between the state and the
|
| reset state is greater than max_dist at the end of the episode.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| steps_per_episode: How many steps per episode. Needed to determine when a
|
| new episode starts.
|
| reset_state: State to which the reset controller should return.
|
| max_dist: Agent is considered to have successfully reset if its distance
|
| from the reset_state is less than max_dist.
|
| epsilon: small offset to ensure non-negative/zero distance.
|
| Returns:
|
| cond: Returns an op that evaluates to true if num_episodes+1 equals 0
|
| mod n. We add one to the num_episodes so the environment is not reset after
|
| the 0th step.
|
| """
|
| assert steps_per_episode is not None
|
| assert reset_state is not None
|
| del agent, state, action, transition_type, num_episodes
|
| dist = tf.sqrt(
|
| tf.reduce_sum(tf.squared_difference(state, reset_state)) + epsilon)
|
| cond = tf.logical_and(
|
| tf.greater(dist, tf.constant(max_dist)),
|
| tf.equal(tf.mod(environment_steps, steps_per_episode), 0))
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def q_too_small(agent,
|
| state,
|
| action,
|
| transition_type,
|
| environment_steps,
|
| num_episodes,
|
| q_min=0.5):
|
| """True of q is too small.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| q_min: Returns true if the qval is less than q_min
|
| Returns:
|
| cond: Returns an op that evaluates to true if qval is less than q_min.
|
| """
|
| del transition_type, environment_steps, num_episodes
|
| state_for_reset_agent = tf.stack(state[:-1], tf.constant([0], dtype=tf.float))
|
| qval = agent.BASE_AGENT_CLASS.critic_net(
|
| tf.expand_dims(state_for_reset_agent, 0), tf.expand_dims(action, 0))[0, :]
|
| cond = tf.greater(tf.constant(q_min), qval)
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def true_fn(agent, state, action, transition_type, environment_steps,
|
| num_episodes):
|
| """Returns an op that evaluates to true.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| Returns:
|
| cond: op that always evaluates to True.
|
| """
|
| del agent, state, action, transition_type, environment_steps, num_episodes
|
| cond = tf.constant(True, dtype=tf.bool)
|
| return cond
|
|
|
|
|
| @gin.configurable
|
| def false_fn(agent, state, action, transition_type, environment_steps,
|
| num_episodes):
|
| """Returns an op that evaluates to false.
|
|
|
| Args:
|
| agent: RL agent.
|
| state: A [num_state_dims] tensor representing a state.
|
| action: Action performed.
|
| transition_type: Type of transition after action
|
| environment_steps: Number of steps performed by environment.
|
| num_episodes: Number of episodes.
|
| Returns:
|
| cond: op that always evaluates to False.
|
| """
|
| del agent, state, action, transition_type, environment_steps, num_episodes
|
| cond = tf.constant(False, dtype=tf.bool)
|
| return cond
|
|
|