Veer15 commited on
Commit
0da1902
·
verified ·
1 Parent(s): b641d3d

Upload folder using huggingface_hub

Browse files
Files changed (5) hide show
  1. Dockerfile +1 -0
  2. README.md +11 -0
  3. __init__.py +13 -0
  4. client.py +56 -0
  5. models.py +9 -0
Dockerfile CHANGED
@@ -37,4 +37,5 @@ RUN chmod +x ./start.sh
37
  RUN ln -sfn /home/user/app/mesh /mesh
38
 
39
  EXPOSE 8000
 
40
  CMD ["./start.sh"]
 
37
  RUN ln -sfn /home/user/app/mesh /mesh
38
 
39
  EXPOSE 8000
40
+ ENV ENABLE_WEB_INTERFACE=true
41
  CMD ["./start.sh"]
README.md CHANGED
@@ -1,3 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
1
  # Distributed Systems Debug Environment
2
 
3
  ## Overview
 
1
+ ---
2
+ title: distributed-systems-debug-env
3
+ sdk: docker
4
+ app_port: 8000
5
+ colorFrom: blue
6
+ colorTo: indigo
7
+ short_description: OpenEnv RL env for debugging distributed systems failures.
8
+ base_path: /web
9
+ ---
10
+
11
+
12
  # Distributed Systems Debug Environment
13
 
14
  ## Overview
__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Distributed systems debug OpenEnv package."""
2
+
3
+ from .client import DistributedSystemsDebugEnv
4
+ from .models import Action, Observation, Reward, StepResult, SystemMetrics
5
+
6
+ __all__ = [
7
+ "DistributedSystemsDebugEnv",
8
+ "Action",
9
+ "Observation",
10
+ "Reward",
11
+ "StepResult",
12
+ "SystemMetrics",
13
+ ]
client.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any
2
+
3
+ from openenv.core import EnvClient
4
+ from openenv.core.client_types import StepResult as ClientStepResult
5
+ from openenv.core.env_server.types import State
6
+
7
+ from .models import Action, Observation, SystemMetrics
8
+
9
+
10
+ class DistributedSystemsDebugEnv(EnvClient[Action, Observation]):
11
+ """Client wrapper around the environment HTTP API."""
12
+
13
+ def _step_payload(self, action: Action) -> dict[str, Any]:
14
+ return action.model_dump()
15
+
16
+ def _parse_result(self, payload: dict[str, Any]) -> ClientStepResult[Observation]:
17
+ observation_payload = payload.get("observation") or {}
18
+ metrics_payload = observation_payload.get("metrics") or {}
19
+
20
+ observation = Observation(
21
+ command_output=str(observation_payload.get("command_output") or ""),
22
+ metrics=SystemMetrics(
23
+ gateway_success_rate=float(
24
+ metrics_payload.get("gateway_success_rate", 0.0)
25
+ ),
26
+ gateway_p99_latency_ms=float(
27
+ metrics_payload.get("gateway_p99_latency_ms", 0.0)
28
+ ),
29
+ queue_depth=int(metrics_payload.get("queue_depth", 0)),
30
+ worker_restart_count=int(
31
+ metrics_payload.get("worker_restart_count", 0)
32
+ ),
33
+ consumer_stall_count=int(
34
+ metrics_payload.get("consumer_stall_count", 0)
35
+ ),
36
+ ),
37
+ process_status={
38
+ str(key): str(value)
39
+ for key, value in dict(
40
+ observation_payload.get("process_status") or {}
41
+ ).items()
42
+ },
43
+ )
44
+
45
+ reward = payload.get("reward")
46
+ return ClientStepResult(
47
+ observation=observation,
48
+ reward=float(reward) if reward is not None else None,
49
+ done=bool(payload.get("done", False)),
50
+ )
51
+
52
+ def _parse_state(self, payload: dict[str, Any]) -> State:
53
+ return State(
54
+ episode_id=payload.get("task"),
55
+ step_count=int(payload.get("step_count", 0)),
56
+ )
models.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from server.models import Action, Observation, Reward, StepResult, SystemMetrics
2
+
3
+ __all__ = [
4
+ "Action",
5
+ "Observation",
6
+ "Reward",
7
+ "StepResult",
8
+ "SystemMetrics",
9
+ ]