import numpy as np from gym import Env from gym.spaces import Discrete class FixedSequenceEnv(Env): def __init__( self, n_actions=10, episode_len=100 ): self.action_space = Discrete(n_actions) self.observation_space = Discrete(1) self.np_random = np.random.RandomState(0) self.episode_len = episode_len self.sequence = [self.np_random.randint(0, self.action_space.n) for _ in range(self.episode_len)] self.time = 0 def reset(self): self.time = 0 return 0 def step(self, actions): rew = self._get_reward(actions) self._choose_next_state() done = False if self.episode_len and self.time >= self.episode_len: done = True return 0, rew, done, {} def seed(self, seed=None): self.np_random.seed(seed) def _choose_next_state(self): self.time += 1 def _get_reward(self, actions): return 1 if actions == self.sequence[self.time] else 0