thomasm6m6 commited on
Commit
b003bf0
·
verified ·
1 Parent(s): 3650272

Switch Space to minimal OpenEnv demo

Browse files
Files changed (1) hide show
  1. minimal_space_app.py +96 -0
minimal_space_app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Literal
4
+ from uuid import uuid4
5
+
6
+ from fastapi.responses import RedirectResponse
7
+ from openenv.core.env_server import create_app
8
+ from openenv.core.env_server.interfaces import Environment
9
+ from openenv.core.env_server.types import Action, Observation, State
10
+ from pydantic import Field
11
+
12
+
13
+ class MinimalAction(Action):
14
+ action_type: Literal["noop", "increment", "finish"] = "noop"
15
+ amount: int = Field(default=1, ge=1, le=3)
16
+
17
+
18
+ class MinimalObservation(Observation):
19
+ status: str
20
+ counter: int
21
+ summary: str
22
+ reward: float = 0.0
23
+ done: bool = False
24
+
25
+
26
+ class MinimalState(State):
27
+ counter: int = 0
28
+
29
+
30
+ class MinimalEnvironment(Environment[MinimalAction, MinimalObservation, MinimalState]):
31
+ SUPPORTS_CONCURRENT_SESSIONS = False
32
+
33
+ def __init__(self):
34
+ super().__init__()
35
+ self._done = False
36
+ self._state = MinimalState(episode_id=str(uuid4()), step_count=0, counter=0)
37
+
38
+ def reset(self, seed: int | None = None, episode_id: str | None = None, **kwargs) -> MinimalObservation:
39
+ del seed, kwargs
40
+ self._done = False
41
+ self._state = MinimalState(
42
+ episode_id=episode_id or str(uuid4()),
43
+ step_count=0,
44
+ counter=0,
45
+ )
46
+ return self._observation(status="ready", reward=0.0, done=False)
47
+
48
+ def step(self, action: MinimalAction, timeout_s: float | None = None, **kwargs) -> MinimalObservation:
49
+ del timeout_s, kwargs
50
+ if self._done:
51
+ return self._observation(status="done", reward=0.0, done=True)
52
+
53
+ self._state.step_count += 1
54
+ reward = 0.0
55
+ status = "ok"
56
+
57
+ if action.action_type == "increment":
58
+ self._state.counter += action.amount
59
+ reward = float(action.amount)
60
+ elif action.action_type == "finish":
61
+ self._done = True
62
+ status = "finished"
63
+
64
+ if self._state.step_count >= 8:
65
+ self._done = True
66
+ status = "finished"
67
+
68
+ return self._observation(status=status, reward=reward, done=self._done)
69
+
70
+ @property
71
+ def state(self) -> MinimalState:
72
+ return self._state
73
+
74
+ def close(self) -> None:
75
+ return None
76
+
77
+ def _observation(self, *, status: str, reward: float, done: bool) -> MinimalObservation:
78
+ return MinimalObservation(
79
+ status=status,
80
+ counter=self._state.counter,
81
+ summary=(
82
+ f"Minimal demo environment. Counter={self._state.counter}. "
83
+ f"Step={self._state.step_count}. "
84
+ f"Choose noop, increment, or finish."
85
+ ),
86
+ reward=reward,
87
+ done=done,
88
+ )
89
+
90
+
91
+ app = create_app(MinimalEnvironment, MinimalAction, MinimalObservation, env_name="minimal_space")
92
+
93
+
94
+ @app.get("/", include_in_schema=False)
95
+ def root() -> RedirectResponse:
96
+ return RedirectResponse(url="/web")