Upload folder using huggingface_hub
Browse files- README.md +26 -0
- __init__.py +0 -0
- __pycache__/__init__.cpython-312.pyc +0 -0
- __pycache__/models.cpython-312.pyc +0 -0
- models.py +11 -0
- server/__init__.py +0 -0
- server/__pycache__/__init__.cpython-312.pyc +0 -0
- server/__pycache__/app.cpython-312.pyc +0 -0
- server/__pycache__/guess_environment.cpython-312.pyc +0 -0
- server/app.py +20 -0
- server/guess_environment.py +48 -0
README.md
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- reinforcement-learning
|
| 4 |
+
- openenv
|
| 5 |
+
- environment
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
# Number Guessing Game Environment
|
| 9 |
+
|
| 10 |
+
A simple RL environment where an agent tries to guess a randomly chosen number between 1 and 100.
|
| 11 |
+
|
| 12 |
+
## Action
|
| 13 |
+
|
| 14 |
+
- `guess` (int): A number between 1-100
|
| 15 |
+
|
| 16 |
+
## Observation
|
| 17 |
+
|
| 18 |
+
- `last_guess_feedback` (str): "Too high!", "Too low!", or "Correct!"
|
| 19 |
+
- `guesses_remaining` (int): How many guesses are left
|
| 20 |
+
- `reward` (float): +10 for correct, -1 for wrong
|
| 21 |
+
- `done` (bool): Whether the episode is over
|
| 22 |
+
|
| 23 |
+
## Rules
|
| 24 |
+
|
| 25 |
+
- The agent has 5 guesses to find the number
|
| 26 |
+
- Optimal strategy is binary search (can solve in ~7 guesses worst case)
|
__init__.py
ADDED
|
File without changes
|
__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (177 Bytes). View file
|
|
|
__pycache__/models.cpython-312.pyc
ADDED
|
Binary file (790 Bytes). View file
|
|
|
models.py
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from openenv.core.env_server.types import Action, Observation
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
class GuessAction(Action):
|
| 5 |
+
guess: int
|
| 6 |
+
|
| 7 |
+
class GuessObservation(Observation):
|
| 8 |
+
last_guess_feedback: str
|
| 9 |
+
guesses_remaining: int
|
| 10 |
+
reward: float
|
| 11 |
+
done: bool
|
server/__init__.py
ADDED
|
File without changes
|
server/__pycache__/__init__.cpython-312.pyc
ADDED
|
Binary file (184 Bytes). View file
|
|
|
server/__pycache__/app.cpython-312.pyc
ADDED
|
Binary file (1.23 kB). View file
|
|
|
server/__pycache__/guess_environment.cpython-312.pyc
ADDED
|
Binary file (2.2 kB). View file
|
|
|
server/app.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from fastapi import FastAPI
|
| 2 |
+
from pydantic import BaseModel
|
| 3 |
+
|
| 4 |
+
from guess_env.server.guess_environment import GuessEnvironment
|
| 5 |
+
from guess_env.models import GuessAction
|
| 6 |
+
|
| 7 |
+
app = FastAPI()
|
| 8 |
+
env = GuessEnvironment(max_guesses=5)
|
| 9 |
+
|
| 10 |
+
class StepRequest(BaseModel):
|
| 11 |
+
action: GuessAction
|
| 12 |
+
|
| 13 |
+
@app.post("/reset")
|
| 14 |
+
def reset():
|
| 15 |
+
return env.reset()
|
| 16 |
+
|
| 17 |
+
@app.post("/step")
|
| 18 |
+
def step(request: StepRequest):
|
| 19 |
+
obs = env.step(request.action)
|
| 20 |
+
return obs
|
server/guess_environment.py
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from guess_env.models import GuessObservation
|
| 2 |
+
import random
|
| 3 |
+
|
| 4 |
+
class GuessEnvironment:
|
| 5 |
+
def __init__(self, max_guesses: int):
|
| 6 |
+
self.max_guesses = max_guesses
|
| 7 |
+
self.guesses_made = 0
|
| 8 |
+
self.done = False
|
| 9 |
+
|
| 10 |
+
def reset(self):
|
| 11 |
+
self.guesses_made = 0
|
| 12 |
+
self.done = False
|
| 13 |
+
self.target_number = random.randint(1, 100)
|
| 14 |
+
return GuessObservation(
|
| 15 |
+
last_guess_feedback="Game reset. Start guessing!",
|
| 16 |
+
guesses_remaining=self.max_guesses,
|
| 17 |
+
reward=0.0,
|
| 18 |
+
done=self.done
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
def step(self, action):
|
| 22 |
+
if self.done:
|
| 23 |
+
raise Exception("Game is over. Please reset the environment.")
|
| 24 |
+
|
| 25 |
+
self.guesses_made += 1
|
| 26 |
+
guess = action.guess
|
| 27 |
+
|
| 28 |
+
if guess < self.target_number:
|
| 29 |
+
feedback = "Too low!"
|
| 30 |
+
reward = -1.0
|
| 31 |
+
elif guess > self.target_number:
|
| 32 |
+
feedback = "Too high!"
|
| 33 |
+
reward = -1.0
|
| 34 |
+
else:
|
| 35 |
+
feedback = "Correct! You've guessed the number!"
|
| 36 |
+
reward = 10.0
|
| 37 |
+
self.done = True
|
| 38 |
+
|
| 39 |
+
if self.guesses_made >= self.max_guesses and not self.done:
|
| 40 |
+
feedback = f"Game over! The correct number was {self.target_number}."
|
| 41 |
+
self.done = True
|
| 42 |
+
|
| 43 |
+
return GuessObservation(
|
| 44 |
+
last_guess_feedback=feedback,
|
| 45 |
+
guesses_remaining=self.max_guesses - self.guesses_made,
|
| 46 |
+
reward=reward,
|
| 47 |
+
done=self.done
|
| 48 |
+
)
|