Spaces:

tensorus
/

api

Running

App Files Files Community

api / dummy_env.py

tensorus

Upload 11 files

aa654a4 verified 12 months ago

raw

history blame contribute delete

3.31 kB

	# dummy_env.py
	"""
	A simple dummy environment for testing the RL agent.
	State: Position (1D)
	Action: Move left (-1), Stay (0), Move right (+1) (Discrete actions)
	Goal: Reach position 0
	Reward: -abs(position), +10 if at goal
	"""
	from typing import Tuple, Dict
	import torch
	import random
	import numpy as np # Use numpy for state representation convenience

	class DummyEnv:
	def __init__(self, max_steps=50):
	self.state_dim = 1 # Position is the only state variable
	self.action_dim = 3 # Actions: 0 (left), 1 (stay), 2 (right)
	self.max_steps = max_steps
	self.current_pos = 0.0
	self.steps_taken = 0
	self.goal_pos = 0.0
	self.max_pos = 5.0 # Boundaries

	def reset(self) -> torch.Tensor:
	"""Resets the environment to a random starting position."""
	self.current_pos = random.uniform(-self.max_pos, self.max_pos)
	self.steps_taken = 0
	# Return state as a PyTorch tensor
	return torch.tensor([self.current_pos], dtype=torch.float32)

	def step(self, action: int) -> Tuple[torch.Tensor, float, bool, Dict]:
	"""Takes an action, updates the state, and returns results."""
	if not isinstance(action, int) or action not in [0, 1, 2]:
	raise ValueError(f"Invalid action: {action}. Must be 0, 1, or 2.")

	# Update position based on action
	if action == 0: # Move left
	self.current_pos -= 0.5
	elif action == 2: # Move right
	self.current_pos += 0.5
	# Action 1 (stay) does nothing to position

	# Clip position to boundaries
	self.current_pos = np.clip(self.current_pos, -self.max_pos, self.max_pos)

	self.steps_taken += 1

	# Calculate reward
	# Higher reward closer to the goal, large penalty for being far
	reward = -abs(self.current_pos - self.goal_pos) * 0.1 # Small penalty for distance
	done = False

	# Check if goal is reached (within a small tolerance)
	if abs(self.current_pos - self.goal_pos) < 0.1:
	reward += 10.0 # Bonus for reaching goal
	done = True

	# Check if max steps exceeded
	if self.steps_taken >= self.max_steps:
	done = True
	# Optional: small penalty for running out of time
	# reward -= 1.0

	# Return next state, reward, done flag, and info dict
	next_state = torch.tensor([self.current_pos], dtype=torch.float32)
	info = {} # Empty info dict for now

	return next_state, float(reward), done, info

	# Example Usage
	if __name__ == "__main__":
	env = DummyEnv()
	state = env.reset()
	print(f"Initial state: {state.item()}")
	done = False
	total_reward = 0
	steps = 0

	while not done:
	action = random.choice([0, 1, 2]) # Take random action
	next_state, reward, done, _ = env.step(action)
	print(f"Step {steps+1}: Action={action}, Next State={next_state.item():.2f}, Reward={reward:.2f}, Done={done}")
	state = next_state
	total_reward += reward
	steps += 1
	if steps > env.max_steps + 5: # Safety break
	print("Exceeded max steps significantly, breaking.")
	break


	print(f"\nEpisode finished after {steps} steps. Total reward: {total_reward:.2f}")