Spaces:

Veer15
/

openenv-distributed-systems-debugging

Sleeping

App Files Files Community

Veer15 commited on Apr 6

Commit

0da1902

verified ·

1 Parent(s): b641d3d

Upload folder using huggingface_hub

Browse files

Files changed (5) hide show

Dockerfile +1 -0
README.md +11 -0
__init__.py +13 -0
client.py +56 -0
models.py +9 -0

Dockerfile CHANGED Viewed

@@ -37,4 +37,5 @@ RUN chmod +x ./start.sh
 RUN ln -sfn /home/user/app/mesh /mesh
 EXPOSE 8000
 CMD ["./start.sh"]

 RUN ln -sfn /home/user/app/mesh /mesh
 EXPOSE 8000
+ENV ENABLE_WEB_INTERFACE=true
 CMD ["./start.sh"]

README.md CHANGED Viewed

@@ -1,3 +1,14 @@
 # Distributed Systems Debug Environment
 ## Overview

+---
+title: distributed-systems-debug-env
+sdk: docker
+app_port: 8000
+colorFrom: blue
+colorTo: indigo
+short_description: OpenEnv RL env for debugging distributed systems failures.
+base_path: /web
+---
 # Distributed Systems Debug Environment
 ## Overview

__init__.py ADDED Viewed

	@@ -0,0 +1,13 @@

+"""Distributed systems debug OpenEnv package."""
+from .client import DistributedSystemsDebugEnv
+from .models import Action, Observation, Reward, StepResult, SystemMetrics
+__all__ = [
+    "DistributedSystemsDebugEnv",
+    "Action",
+    "Observation",
+    "Reward",
+    "StepResult",
+    "SystemMetrics",
+]

client.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from typing import Any
+from openenv.core import EnvClient
+from openenv.core.client_types import StepResult as ClientStepResult
+from openenv.core.env_server.types import State
+from .models import Action, Observation, SystemMetrics
+class DistributedSystemsDebugEnv(EnvClient[Action, Observation]):
+    """Client wrapper around the environment HTTP API."""
+    def _step_payload(self, action: Action) -> dict[str, Any]:
+        return action.model_dump()
+    def _parse_result(self, payload: dict[str, Any]) -> ClientStepResult[Observation]:
+        observation_payload = payload.get("observation") or {}
+        metrics_payload = observation_payload.get("metrics") or {}
+        observation = Observation(
+            command_output=str(observation_payload.get("command_output") or ""),
+            metrics=SystemMetrics(
+                gateway_success_rate=float(
+                    metrics_payload.get("gateway_success_rate", 0.0)
+                ),
+                gateway_p99_latency_ms=float(
+                    metrics_payload.get("gateway_p99_latency_ms", 0.0)
+                ),
+                queue_depth=int(metrics_payload.get("queue_depth", 0)),
+                worker_restart_count=int(
+                    metrics_payload.get("worker_restart_count", 0)
+                ),
+                consumer_stall_count=int(
+                    metrics_payload.get("consumer_stall_count", 0)
+                ),
+            ),
+            process_status={
+                str(key): str(value)
+                for key, value in dict(
+                    observation_payload.get("process_status") or {}
+                ).items()
+            },
+        )
+        reward = payload.get("reward")
+        return ClientStepResult(
+            observation=observation,
+            reward=float(reward) if reward is not None else None,
+            done=bool(payload.get("done", False)),
+        )
+    def _parse_state(self, payload: dict[str, Any]) -> State:
+        return State(
+            episode_id=payload.get("task"),
+            step_count=int(payload.get("step_count", 0)),
+        )

models.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from server.models import Action, Observation, Reward, StepResult, SystemMetrics
+__all__ = [
+    "Action",
+    "Observation",
+    "Reward",
+    "StepResult",
+    "SystemMetrics",
+]