def render_episode(env, model): obs, done, episode_return, episode_len = env.reset(), False, 0, 0 while not done: action = model.get_action(obs) new_obs, reward, done, _ = env.step(action) episode_return += reward episode_len += 1 data = dict( x=env.x, obs=obs, action=action, reward=reward, done=done, episode_len=episode_len, episode_return=episode_return, pixel_array=env.unwrapped.render(), ) yield data obs = new_obs