File size: 3,306 Bytes
aa654a4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | # dummy_env.py
"""
A simple dummy environment for testing the RL agent.
State: Position (1D)
Action: Move left (-1), Stay (0), Move right (+1) (Discrete actions)
Goal: Reach position 0
Reward: -abs(position), +10 if at goal
"""
from typing import Tuple, Dict
import torch
import random
import numpy as np # Use numpy for state representation convenience
class DummyEnv:
def __init__(self, max_steps=50):
self.state_dim = 1 # Position is the only state variable
self.action_dim = 3 # Actions: 0 (left), 1 (stay), 2 (right)
self.max_steps = max_steps
self.current_pos = 0.0
self.steps_taken = 0
self.goal_pos = 0.0
self.max_pos = 5.0 # Boundaries
def reset(self) -> torch.Tensor:
"""Resets the environment to a random starting position."""
self.current_pos = random.uniform(-self.max_pos, self.max_pos)
self.steps_taken = 0
# Return state as a PyTorch tensor
return torch.tensor([self.current_pos], dtype=torch.float32)
def step(self, action: int) -> Tuple[torch.Tensor, float, bool, Dict]:
"""Takes an action, updates the state, and returns results."""
if not isinstance(action, int) or action not in [0, 1, 2]:
raise ValueError(f"Invalid action: {action}. Must be 0, 1, or 2.")
# Update position based on action
if action == 0: # Move left
self.current_pos -= 0.5
elif action == 2: # Move right
self.current_pos += 0.5
# Action 1 (stay) does nothing to position
# Clip position to boundaries
self.current_pos = np.clip(self.current_pos, -self.max_pos, self.max_pos)
self.steps_taken += 1
# Calculate reward
# Higher reward closer to the goal, large penalty for being far
reward = -abs(self.current_pos - self.goal_pos) * 0.1 # Small penalty for distance
done = False
# Check if goal is reached (within a small tolerance)
if abs(self.current_pos - self.goal_pos) < 0.1:
reward += 10.0 # Bonus for reaching goal
done = True
# Check if max steps exceeded
if self.steps_taken >= self.max_steps:
done = True
# Optional: small penalty for running out of time
# reward -= 1.0
# Return next state, reward, done flag, and info dict
next_state = torch.tensor([self.current_pos], dtype=torch.float32)
info = {} # Empty info dict for now
return next_state, float(reward), done, info
# Example Usage
if __name__ == "__main__":
env = DummyEnv()
state = env.reset()
print(f"Initial state: {state.item()}")
done = False
total_reward = 0
steps = 0
while not done:
action = random.choice([0, 1, 2]) # Take random action
next_state, reward, done, _ = env.step(action)
print(f"Step {steps+1}: Action={action}, Next State={next_state.item():.2f}, Reward={reward:.2f}, Done={done}")
state = next_state
total_reward += reward
steps += 1
if steps > env.max_steps + 5: # Safety break
print("Exceeded max steps significantly, breaking.")
break
print(f"\nEpisode finished after {steps} steps. Total reward: {total_reward:.2f}") |