Spaces:

Veer15
/

openenv-distributed-systems-debugging

Sleeping

App Files Files Community

Veer15 commited on Apr 6

Commit

b641d3d

verified ·

1 Parent(s): 80ee7f5

chore: deploy distributed systems debug environment

Browse files

Files changed (48) hide show

.gitignore +36 -0
.pytest_cache/.gitignore +2 -0
.pytest_cache/CACHEDIR.TAG +4 -0
.pytest_cache/README.md +8 -0
.pytest_cache/v/cache/nodeids +34 -0
Dockerfile +40 -0
README.md +242 -7
inference.py +418 -0
mesh/auth/bun.lock +19 -0
mesh/auth/config.json +3 -0
mesh/auth/index.ts +83 -0
mesh/auth/package.json +10 -0
mesh/auth/tsconfig.json +10 -0
mesh/gateway/blocked_routes.json +3 -0
mesh/gateway/bun.lock +44 -0
mesh/gateway/config.json +3 -0
mesh/gateway/index.ts +213 -0
mesh/gateway/package.json +13 -0
mesh/gateway/tsconfig.json +10 -0
mesh/registry.json +19 -0
mesh/worker/bun.lock +44 -0
mesh/worker/config.json +4 -0
mesh/worker/index.ts +198 -0
mesh/worker/job_generator.ts +94 -0
mesh/worker/job_generator_config.json +3 -0
mesh/worker/package.json +14 -0
mesh/worker/tsconfig.json +10 -0
openenv.yaml +86 -0
pyproject.toml +32 -0
requirements.txt +7 -0
server/__init__.py +0 -0
server/api.py +63 -0
server/app.py +11 -0
server/constants.py +54 -0
server/env.py +467 -0
server/fault_injector.py +95 -0
server/graders.py +109 -0
server/metrics_poller.py +96 -0
server/models.py +41 -0
server/process_manager.py +164 -0
setup-dev.sh +86 -0
start.sh +50 -0
tests/test_api_reset.py +65 -0
tests/test_env_task_context.py +57 -0
tests/test_graders.py +132 -0
tests/test_inference_format.py +186 -0
tests/test_models.py +42 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,36 @@

+docs/
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+.venv/
+venv/
+.env
+*.egg-info/
+dist/
+build/
+# Bun / Node
+node_modules/
+bun.lockb
+# SQLite
+*.db
+*.sqlite
+*.sqlite3
+# Logs
+*.log
+/tmp/
+# OS
+.DS_Store
+Thumbs.db
+# IDE
+.cursor/
+.vscode/
+*.swp
+inference.out

.pytest_cache/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ # Created by pytest automatically.
2	+ *

.pytest_cache/CACHEDIR.TAG ADDED Viewed

	@@ -0,0 +1,4 @@

+Signature: 8a477f597d28d172789f06886806bc55
+# This file is a cache directory tag created by pytest.
+# For information about cache directory tags, see:
+#	https://bford.info/cachedir/spec.html

.pytest_cache/README.md ADDED Viewed

	@@ -0,0 +1,8 @@

+# pytest cache directory #
+This directory contains data from the pytest's cache plugin,
+which provides the `--lf` and `--ff` options, as well as the `cache` fixture.
+**Do not** commit this to version control.
+See [the docs](https://docs.pytest.org/en/stable/how-to/cache.html) for more information.

.pytest_cache/v/cache/nodeids ADDED Viewed

	@@ -0,0 +1,34 @@

+[
+  "tests/test_api_reset.py::test_reset_defaults_to_cascading_timeout_when_task_missing",
+  "tests/test_api_reset.py::test_reset_rejects_unknown_explicit_task",
+  "tests/test_env_task_context.py::test_job_generator_rate_resolved_uses_config",
+  "tests/test_env_task_context.py::test_registry_auth_matches_default_detects_corruption",
+  "tests/test_env_task_context.py::test_restore_defaults_adds_job_generator_config",
+  "tests/test_graders.py::test_grade_backpressure_cascade_continuous",
+  "tests/test_graders.py::test_grade_byzantine_queue_fault_cases",
+  "tests/test_graders.py::test_grade_cascading_timeout_boundaries",
+  "tests/test_graders.py::test_grade_distributed_lock_starvation_cases",
+  "tests/test_graders.py::test_grade_job_generator_runaway_thresholds",
+  "tests/test_graders.py::test_grade_registry_corruption_thresholds",
+  "tests/test_graders.py::test_grade_route_partition_threshold",
+  "tests/test_inference_format.py::test_attempt_history_block_renders_all_attempts",
+  "tests/test_inference_format.py::test_build_prompt_contains_symptoms_and_history",
+  "tests/test_inference_format.py::test_end_log_line_includes_score_and_reward_list",
+  "tests/test_inference_format.py::test_episode_score_clamps_terminal_reward_to_unit_interval",
+  "tests/test_inference_format.py::test_extract_command_prefers_first_json_object_with_command",
+  "tests/test_inference_format.py::test_extract_command_reads_fenced_json_payload",
+  "tests/test_inference_format.py::test_extract_command_reads_json_after_reasoning_preamble",
+  "tests/test_inference_format.py::test_extract_command_reads_json_embedded_in_text",
+  "tests/test_inference_format.py::test_extract_command_reads_json_payload",
+  "tests/test_inference_format.py::test_extract_command_rejects_non_json_code_fence",
+  "tests/test_inference_format.py::test_extract_command_requires_command_even_with_reasoning",
+  "tests/test_inference_format.py::test_extract_command_returns_none_when_empty",
+  "tests/test_inference_format.py::test_extract_reasoning_when_present",
+  "tests/test_inference_format.py::test_parse_tasks_default_and_override",
+  "tests/test_inference_format.py::test_single_line_removes_newlines",
+  "tests/test_inference_format.py::test_task_symptom_block_includes_new_tasks",
+  "tests/test_inference_format.py::test_task_symptom_block_is_non_empty",
+  "tests/test_models.py::test_action_rejects_empty_command",
+  "tests/test_models.py::test_observation_roundtrip",
+  "tests/test_models.py::test_system_metrics_rejects_success_rate_above_one"
+]

Dockerfile ADDED Viewed

	@@ -0,0 +1,40 @@

+FROM python:3.12-slim
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    redis-server \
+    jq \
+    procps \
+    sqlite3 \
+    unzip \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+RUN useradd -m -u 1000 user
+WORKDIR /home/user/app
+RUN mkdir -p /mesh && chown -R user:user /home/user /mesh
+USER user
+ENV HOME=/home/user
+ENV PATH="/home/user/.bun/bin:/home/user/.local/bin:${PATH}"
+RUN curl -fsSL https://bun.sh/install | bash
+COPY --chown=user:user mesh/ ./mesh/
+RUN cd mesh/gateway && bun install
+RUN cd mesh/auth && bun install
+RUN cd mesh/worker && bun install
+COPY --chown=user:user requirements.txt ./
+RUN pip install --no-cache-dir -r requirements.txt
+COPY --chown=user:user *.py ./
+COPY --chown=user:user server/ ./server/
+COPY --chown=user:user openenv.yaml ./
+COPY --chown=user:user start.sh ./
+RUN chmod +x ./start.sh
+RUN ln -sfn /home/user/app/mesh /mesh
+EXPOSE 8000
+CMD ["./start.sh"]

README.md CHANGED Viewed

@@ -1,9 +1,244 @@
----
-title: openenv-distributed-systems-debugging
-sdk: docker
-short_description: OpenEnv RL env for debugging distributed systems failures.
----
-# openenv-distributed-systems-debugging
-OpenEnv-compatible RL environment for debugging distributed systems failures.

+# Distributed Systems Debug Environment
+## Overview
+This project provides an OpenEnv-compatible RL environment for debugging distributed systems failures.
+The environment simulates a production-style pipeline:
+- Gateway service (sync HTTP orchestration)
+- Auth service (sync dependency)
+- Redis queue (message bus)
+- Worker service (async consumer + lock handling)
+- SQLite sink (persistence simulation)
+An agent interacts only through shell commands and must diagnose/fix injected faults.
+## Why this environment
+Most RL environments focus on games or synthetic workflows. This one targets some bugs that I have faced personally at my job focussing on debugging skills used in real systems engineering:
+- reading logs under uncertainty
+- triaging latency and queue symptoms
+- fixing misconfigurations safely
+- validating recovery from metrics
+## Architecture
+```
+Agent command -> /step (FastAPI)
+                  |
+                  +-> executes shell command (sandboxed, 10s timeout)
+                  +-> polls metrics
+                  +-> grades progress
+Services (same container):
+  gateway:3000 -> auth:3001 -> redis:6379 -> worker -> sqlite
+```
+## Observation Space
+| Field | Type | Description |
+|---|---|---|
+| `command_output` | string | stdout+stderr of last command |
+| `metrics.gateway_success_rate` | float [0,1] | rolling gateway success rate |
+| `metrics.gateway_p99_latency_ms` | float | rolling p99 latency |
+| `metrics.queue_depth` | int | Redis queue depth |
+| `metrics.worker_restart_count` | int | simulated crash-loop count |
+| `metrics.consumer_stall_count` | int | lock-starvation stall count |
+| `process_status` | object | runtime status by service |
+## Action Space
+Single command action:
+```json
+{ "command": "<bash command>" }
+```
+Examples:
+- `tail -20 /tmp/worker.log`
+- `redis-cli DEL LOCK:job_processor`
+- `cat /mesh/gateway/blocked_routes.json`
+- `kill -HUP $(cat /tmp/worker.pid)`
+## Tasks
+| Task | Difficulty | Goal |
+|---|---|---|
+| `cascading-timeout` | easy | restore successful sync flow (auth delay vs gateway timeout) |
+| `byzantine-queue-fault` | medium | remove poison message and stabilize worker |
+| `distributed-lock-starvation` | hard | clear stale lock and resume consumption |
+| `backpressure-cascade` | hard | recover throughput and reduce queue growth |
+| `route-partition` | hard | unblock gateway->redis route policy |
+| `registry-corruption` | medium | repair corrupted auth registry entry and restore request flow |
+| `job-generator-runaway` | hard | reduce enqueue pressure so the queue drains sustainably |
+## Reward Function
+- Terminal reward: `1.0` when grader score >= `0.95`
+- Dense shaping from grader progress + investigation command bonus + metric improvements
+- Penalties for blocked/damaging actions and repeated non-productive behavior
+- Reward clamped to `[0.0, 1.0]`
+## Baseline Inference policy (3 of 7 by default)
+All seven tasks are implemented in the environment.
+`inference.py` runs these default tasks for runtime reliability:
+1. `cascading-timeout` (easy)
+2. `byzantine-queue-fault` (medium)
+3. `distributed-lock-starvation` (hard)
+Override with:
+```bash
+TASKS_CSV=cascading-timeout,route-partition python inference.py
+```
+## Setup
+### Local
+```bash
+python3.12 -m venv .venv
+. .venv/bin/activate
+pip install -r requirements.txt
+bun install --cwd mesh/gateway
+bun install --cwd mesh/auth
+bun install --cwd mesh/worker
+APP_ROOT=$(pwd) MESH_ROOT=$(pwd)/mesh ./start.sh
+```
+### Docker
+```bash
+docker build -t dist-debug-env .
+docker run -p 8000:8000 dist-debug-env
+```
+### API smoke check
+```bash
+curl http://localhost:8000/health
+curl -X POST "http://localhost:8000/reset?task_name=cascading-timeout"
+curl -X POST http://localhost:8000/step \
+  -H "Content-Type: application/json" \
+  -d '{"command":"ls /tmp"}'
+```
+## Inference script contract
+`inference.py` emits strict logs:
+```text
+[START] task=<task_name> env=<benchmark> model=<model_name>
+[STEP]  step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
+[END]   success=<true|false> steps=<n> score=<0.00> rewards=<r1,r2,...,rn>
+```
+## Logging
+Service logs (JSON-lines):
+- `/tmp/gateway.log`
+- `/tmp/auth.log`
+- `/tmp/worker.log`
+Common fields:
+- `ts`, `level`, `service`, `event`, `pattern`
+Example investigation commands:
+```bash
+tail -30 /tmp/worker.log
+jq 'select(.level=="ERROR")' /tmp/worker.log
+redis-cli LLEN job_queue
+```
+## Baseline scores
+Baseline scores depend on endpoint/model latency and quality. Reproduce with:
+```bash
+HF_TOKEN=<token> API_BASE_URL=<endpoint> MODEL_NAME=<model> python inference.py
+```
+## Run this locally
+Use this checklist when running the full baseline end-to-end on your machine.
+1. Install dependencies and validate project setup:
+```bash
+./setup-dev.sh
+```
+2. Activate the project virtual environment (required so `uvicorn` and Python deps are on PATH):
+```bash
+source .venv/bin/activate
+```
+3. Start the environment API (leave this terminal running):
+```bash
+APP_ROOT=$(pwd) MESH_ROOT=$(pwd)/mesh ./start.sh
+```
+4. In another terminal, activate venv again and export required inference variables:
+```bash
+source .venv/bin/activate
+export API_BASE_URL="https://openrouter.ai/api/v1"
+export MODEL_NAME="<your-model>"
+export HF_TOKEN="<your-api-key>"
+# Optional override; default already runs 3 baseline tasks
+export TASKS_CSV="cascading-timeout,byzantine-queue-fault,distributed-lock-starvation"
+```
+If you have a .env file you can set the variables from the file via this command
+```bash
+set -a
+source .env
+set +a
+```
+5. Run inference with a 20 minute cap and capture output:
+```bash
+# macOS (coreutils): gtimeout ; Linux: timeout
+gtimeout 1200 python inference.py | tee inference.out
+```
+6. Validate structured stdout format quickly:
+```bash
+python - <<'PY'
+import re, sys
+from pathlib import Path
+lines = Path("inference.out").read_text(encoding="utf-8").splitlines()
+if not lines:
+    print("FAIL: no output")
+    raise SystemExit(1)
+start_re = re.compile(r'^\[START\] task=\S+ env=\S+ model=.+$')
+step_re = re.compile(r'^\[STEP\]\s{2}step=\d+ action=.* reward=\d+\.\d{2} done=(true|false) error=.*$')
+end_re = re.compile(r'^\[END\]\s{3}success=(true|false) steps=\d+ score=\d+\.\d{2} rewards=.*$')
+for i, line in enumerate(lines, 1):
+    if line.startswith("[START]") and not start_re.match(line):
+        print(f"FAIL: bad START line {i}: {line}")
+        raise SystemExit(1)
+    if line.startswith("[STEP]") and not step_re.match(line):
+        print(f"FAIL: bad STEP line {i}: {line}")
+        raise SystemExit(1)
+    if line.startswith("[END]") and not end_re.match(line):
+        print(f"FAIL: bad END line {i}: {line}")
+        raise SystemExit(1)
+print("PASS: stdout format valid")
+PY
+```
+7. Re-run required submission gates:
+```bash
+openenv validate .
+docker build -t dist-debug-env:local .
+```
+## Benchmarks b/w Models
+### 3 Tasks Benchmark
+<img width="1177" height="752" alt="Screenshot 2026-04-04 at 11 54 25 PM" src="https://github.com/user-attachments/assets/3dbfa87a-6696-4589-a908-baa3f498bda8" />
+### 7 Task Benchmark
+<img width="1294" height="240" alt="Screenshot 2026-04-05 at 12 30 45 AM" src="https://github.com/user-attachments/assets/1d0d3847-212e-46ba-967f-f79be3f9067c" />

inference.py ADDED Viewed

	@@ -0,0 +1,418 @@

+import json
+import os
+import re
+from typing import Any, Iterator
+import httpx
+from server.constants import (
+    DEFAULT_BASELINE_TASK_ENUMS,
+    NO_COMMAND_PROVIDED_SENTINEL,
+    TASK_MAX_STEPS,
+    TaskName,
+)
+from server.models import Action, Observation, StepResult
+API_BASE_URL = os.getenv("API_BASE_URL", "https://router.huggingface.co/v1")
+MODEL_NAME = os.getenv("MODEL_NAME", "Qwen/Qwen2.5-72B-Instruct")
+API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
+ENV_URL = os.getenv("ENV_URL", "http://localhost:8000")
+BENCHMARK = "distributed-systems-debug-env"
+MAX_STEPS_CAP = int(os.getenv("MAX_STEPS", "0"))
+TEMPERATURE = float(os.getenv("TEMPERATURE", "0.2"))
+MAX_COMPLETION_TOKENS = int(os.getenv("MAX_COMPLETION_TOKENS", "2048"))
+_JSON_DECODER = json.JSONDecoder()
+def _chat_token_limit_kwargs() -> dict[str, int]:
+    """OpenAI `gpt-5.*` / some models require `max_completion_tokens`, not `max_tokens`."""
+    override = os.getenv("CHAT_TOKEN_LIMIT_PARAM", "").strip().lower()
+    if override == "max_tokens":
+        return {"max_tokens": MAX_COMPLETION_TOKENS}
+    if override == "max_completion_tokens":
+        return {"max_completion_tokens": MAX_COMPLETION_TOKENS}
+    base = API_BASE_URL or ""
+    if "api.openai.com" in base:
+        return {"max_completion_tokens": MAX_COMPLETION_TOKENS}
+    return {"max_tokens": MAX_COMPLETION_TOKENS}
+SYSTEM_PROMPT = """You have bash access to a distributed job processing pipeline that is experiencing a failure.
+Use bash commands to investigate system behavior and narrow down likely fault conditions.
+Standard Unix tools are available: ps, ls, cat, grep, tail, curl, jq, redis-cli, kill, sed.
+Work iteratively across multiple steps; each response must provide the next bash command only.
+Respond with compact JSON where `command` is required: {"command":"<bash command>","reasoning":"optional concise reason"}.
+No markdown. No explanation outside JSON."""
+TASK_SYMPTOMS: dict[TaskName, tuple[str, ...]] = {
+    TaskName.CASCADING_TIMEOUT: (
+        "Requests intermittently fail even when services appear up.",
+        "Latency spikes sharply during traffic bursts.",
+    ),
+    TaskName.BYZANTINE_QUEUE_FAULT: (
+        "Worker throughput degrades after specific jobs enter the queue.",
+        "Queue backlog grows despite workers being alive.",
+    ),
+    TaskName.DISTRIBUTED_LOCK_STARVATION: (
+        "One or more workers appear blocked for extended periods.",
+        "Work completion remains low without full service outage.",
+    ),
+    TaskName.BACKPRESSURE_CASCADE: (
+        "Queue depth trends upward over time under steady load.",
+    ),
+    TaskName.ROUTE_PARTITION: (
+        "Gateway requests intermittently fail despite local process health.",
+        "Signals point to a connectivity path issue rather than a full service outage.",
+    ),
+    TaskName.REGISTRY_CORRUPTION: (
+        "Gateway requests fail even though the gateway process is still healthy.",
+        "Logs and config inspection suggest a bad upstream registry entry.",
+    ),
+    TaskName.JOB_GENERATOR_RUNAWAY: (
+        "Queue backlog grows while the worker stays alive.",
+        "Producer pressure appears higher than the system can sustainably drain.",
+    ),
+}
+class DistributedDebugEnvClient:
+    def __init__(self, base_url: str) -> None:
+        self._client = httpx.Client(base_url=base_url, timeout=45.0)
+    def close(self) -> None:
+        self._client.close()
+    def reset(self, task_name: str) -> Observation:
+        response = self._client.post("/reset", params={"task_name": task_name})
+        response.raise_for_status()
+        return Observation.model_validate(response.json())
+    def step(self, action: Action) -> StepResult:
+        response = self._client.post("/step", json=action.model_dump())
+        response.raise_for_status()
+        return StepResult.model_validate(response.json())
+def _parse_tasks() -> list[TaskName]:
+    csv = os.getenv("TASKS_CSV", "").strip()
+    if not csv:
+        return list(DEFAULT_BASELINE_TASK_ENUMS)
+    tasks: list[TaskName] = []
+    for value in csv.split(","):
+        task_str = value.strip()
+        if not task_str:
+            continue
+        tasks.append(TaskName.parse(task_str))
+    return tasks
+def _bool(value: bool) -> str:
+    return "true" if value else "false"
+def _single_line(text: str) -> str:
+    return " ".join(text.replace("\t", " ").splitlines()).strip()
+def _command_from_dict(payload: dict[str, Any]) -> tuple[str | None, str | None]:
+    command_value = payload.get("command")
+    command = command_value.strip() if isinstance(command_value, str) else ""
+    if not command:
+        return None, None
+    reasoning_value = payload.get("reasoning")
+    reasoning = reasoning_value.strip() if isinstance(reasoning_value, str) else ""
+    return command, (reasoning or None)
+def _parse_action_payload(text: str) -> tuple[str | None, str | None]:
+    try:
+        payload = json.loads(text)
+    except json.JSONDecodeError:
+        return None, None
+    if not isinstance(payload, dict):
+        return None, None
+    return _command_from_dict(payload)
+def _iter_decoded_json_objects(text: str) -> Iterator[Any]:
+    i = 0
+    while i < len(text):
+        if text[i] != "{":
+            i += 1
+            continue
+        try:
+            obj, end = _JSON_DECODER.raw_decode(text, i)
+        except json.JSONDecodeError:
+            i += 1
+            continue
+        yield obj
+        i = end
+def _assistant_message_text(message: Any) -> str:
+    content = getattr(message, "content", None)
+    if content is None:
+        return ""
+    if isinstance(content, str):
+        return content
+    if isinstance(content, list):
+        parts: list[str] = []
+        for part in content:
+            if isinstance(part, dict):
+                text_val = part.get("text")
+                if text_val is not None:
+                    parts.append(str(text_val))
+            else:
+                text_attr = getattr(part, "text", None)
+                if text_attr is not None:
+                    parts.append(str(text_attr))
+        return "\n".join(parts)
+    return str(content)
+def extract_action_payload(llm_response: str) -> tuple[str | None, str | None]:
+    response = llm_response.strip()
+    if not response:
+        return None, None
+    if response.startswith("```"):
+        lines = response.split("\n")
+        if len(lines) > 2:
+            response = "\n".join(lines[1:-1]).strip()
+    direct_command, direct_reasoning = _parse_action_payload(response)
+    if direct_command:
+        return direct_command, direct_reasoning
+    for obj in _iter_decoded_json_objects(response):
+        if isinstance(obj, dict):
+            embedded_command, embedded_reasoning = _command_from_dict(obj)
+            if embedded_command:
+                return embedded_command, embedded_reasoning
+    for match in re.finditer(r"\{[^{}]*\}", response, flags=re.DOTALL):
+        embedded_command, embedded_reasoning = _parse_action_payload(match.group(0))
+        if embedded_command:
+            return embedded_command, embedded_reasoning
+    first_line = response.split("\n")[0].strip()
+    return _parse_action_payload(first_line)
+def extract_command(llm_response: str) -> str | None:
+    return extract_action_payload(llm_response)[0]
+def extract_reasoning(llm_response: str) -> str | None:
+    return extract_action_payload(llm_response)[1]
+def _sanitize_reasoning_for_step(reasoning: str) -> str:
+    sanitized = _single_line(reasoning)
+    sanitized = sanitized.replace(" reward=", " reward:")
+    sanitized = sanitized.replace(" done=", " done:")
+    sanitized = sanitized.replace(" error=", " error:")
+    return sanitized[:160]
+def _format_step_action(command: str, reasoning: str | None) -> str:
+    action = _single_line(command)
+    if not reasoning:
+        return action
+    sanitized_reasoning = _sanitize_reasoning_for_step(reasoning)
+    if not sanitized_reasoning:
+        return action
+    return f"{action} | reasoning={sanitized_reasoning}"
+def _episode_score(rewards: list[float]) -> float:
+    # Score is terminal task progress signal and must stay normalized for evaluator checks.
+    if not rewards:
+        return 0.0
+    return max(0.0, min(1.0, float(rewards[-1])))
+def _format_end_line(
+    *, success: bool, steps: int, score: float, rewards: list[float]
+ ) -> str:
+    rewards_csv = ",".join(f"{reward:.2f}" for reward in rewards)
+    return (
+        f"[END]   success={_bool(success)} steps={steps} "
+        f"score={score:.2f} rewards={rewards_csv}"
+    )
+def _task_symptom_block(task_name: TaskName) -> str:
+    return "\n".join(f"- {symptom}" for symptom in TASK_SYMPTOMS[task_name])
+def _attempt_history_block(attempt_history: list[dict[str, Any]]) -> str:
+    if not attempt_history:
+        return "- none"
+    lines: list[str] = []
+    for attempt in attempt_history:
+        command = _single_line(str(attempt["command"]))[:120]
+        reasoning = _single_line(str(attempt.get("reasoning") or ""))[:120]
+        output_preview = _single_line(str(attempt.get("output") or ""))[:140]
+        error = attempt.get("error")
+        error_text = _single_line(str(error))[:80] if error else "none"
+        line = f"- step {attempt['step']}: command={command}; error={error_text}"
+        if reasoning:
+            line = f"{line}; reasoning={reasoning}"
+        if output_preview:
+            line = f"{line}; output={output_preview}"
+        lines.append(line)
+    return "\n".join(lines)
+def build_prompt(
+    obs: Observation,
+    step_num: int,
+    task_name: TaskName,
+    attempt_history: list[dict[str, Any]],
+) -> str:
+    return (
+        f"Step {step_num}. Current system state:\n\n"
+        "TASK SYMPTOMS:\n"
+        f"{_task_symptom_block(task_name)}\n\n"
+        "PREVIOUS ATTEMPTS:\n"
+        f"{_attempt_history_block(attempt_history)}\n\n"
+        "METRICS:\n"
+        f"- Gateway success rate: {obs.metrics.gateway_success_rate:.1%}\n"
+        f"- Gateway P99 latency: {obs.metrics.gateway_p99_latency_ms:.0f}ms\n"
+        f"- Queue depth: {obs.metrics.queue_depth}\n"
+        f"- Worker restarts: {obs.metrics.worker_restart_count}\n"
+        f"- Consumer stall count: {obs.metrics.consumer_stall_count}\n\n"
+        "SERVICE STATUS:\n"
+        f"{obs.process_status}\n\n"
+        "LATEST COMMAND OUTPUT:\n"
+        f"{obs.command_output[:2000]}\n\n"
+        "Solve this over multiple steps as needed. For this step, return only the single next bash command.\n"
+        'Respond with compact JSON where command is required: {"command":"<bash command>","reasoning":"optional concise reason"}.'
+    )
+def _run_episode(
+    client: Any, env: DistributedDebugEnvClient, task_name: TaskName
+) -> None:
+    messages: list[dict[str, str]] = [{"role": "system", "content": SYSTEM_PROMPT}]
+    rewards: list[float] = []
+    done = False
+    step = 0
+    last_error: str | None = None
+    attempt_history: list[dict[str, Any]] = []
+    print(
+        f"[START] task={task_name.value} env={BENCHMARK} model={MODEL_NAME}", flush=True
+    )
+    task_budget = TASK_MAX_STEPS[task_name]
+    max_steps = min(task_budget, MAX_STEPS_CAP) if MAX_STEPS_CAP > 0 else task_budget
+    try:
+        obs = env.reset(task_name=task_name.value)
+        while not done and step < max_steps:
+            next_step = step + 1
+            user_prompt = build_prompt(obs, next_step, task_name, attempt_history)
+            messages.append({"role": "user", "content": user_prompt})
+            completion = client.chat.completions.create(
+                model=MODEL_NAME,
+                messages=messages,
+                temperature=TEMPERATURE,
+                **_chat_token_limit_kwargs(),
+            )
+            raw_response = _assistant_message_text(completion.choices[0].message)
+            command, reasoning = extract_action_payload(raw_response)
+            if not command:
+                messages.append({"role": "assistant", "content": raw_response})
+                messages.append(
+                    {
+                        "role": "user",
+                        "content": (
+                            "No command was provided. Respond with compact JSON where command is required: "
+                            '{"command":"<bash command>","reasoning":"optional concise reason"}.'
+                        ),
+                    }
+                )
+                command = NO_COMMAND_PROVIDED_SENTINEL
+                reasoning = None
+            else:
+                assistant_payload: dict[str, str] = {"command": command}
+                if reasoning:
+                    assistant_payload["reasoning"] = reasoning
+                messages.append(
+                    {"role": "assistant", "content": json.dumps(assistant_payload)}
+                )
+            result = env.step(Action(command=command))
+            obs = result.observation
+            rewards.append(result.reward)
+            done = result.done
+            error_value = result.info.get("error")
+            last_error = None if error_value in (None, "", "None") else str(error_value)
+            error_field = "null" if last_error is None else _single_line(last_error)
+            attempt_history.append(
+                {
+                    "step": next_step,
+                    "command": command,
+                    "reasoning": reasoning,
+                    "output": obs.command_output,
+                    "error": last_error,
+                }
+            )
+            print(
+                f"[STEP]  step={next_step} action={_format_step_action(command, reasoning)} "
+                f"reward={result.reward:.2f} done={_bool(done)} error={error_field}",
+                flush=True,
+            )
+            step = next_step
+    except Exception as exc:
+        last_error = str(exc)
+        print(
+            f"[ERROR] task={task_name.value} {type(exc).__name__}: {exc}",
+            flush=True,
+        )
+    finally:
+        score = _episode_score(rewards)
+        success = bool(done and score >= 0.95)
+        print(
+            _format_end_line(success=success, steps=step, score=score, rewards=rewards),
+            flush=True,
+        )
+def main() -> None:
+    if not API_KEY:
+        raise RuntimeError("HF_TOKEN (or API_KEY) must be set")
+    tasks = _parse_tasks()
+    from openai import OpenAI
+    client = OpenAI(
+        api_key=API_KEY,
+        base_url=API_BASE_URL,
+        timeout=30.0,
+        max_retries=2,
+    )
+    env = DistributedDebugEnvClient(base_url=ENV_URL)
+    try:
+        for task_name in tasks:
+            _run_episode(client, env, task_name)
+    finally:
+        env.close()
+if __name__ == "__main__":
+    main()

mesh/auth/bun.lock ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "auth",
+      "devDependencies": {
+        "bun-types": "latest",
+      },
+    },
+  },
+  "packages": {
+    "@types/node": ["@types/node@25.5.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg=="],
+    "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="],
+    "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
+  }
+}

mesh/auth/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "delay_ms": 200
+}

mesh/auth/index.ts ADDED Viewed

	@@ -0,0 +1,83 @@

+import { existsSync } from "node:fs";
+type AuthConfig = {
+  delay_ms: number;
+};
+const MESH_ROOT = process.env.MESH_ROOT || "/mesh";
+const CONFIG_PATH = `${MESH_ROOT}/auth/config.json`;
+const CURRENT_TASK_PATH = "/tmp/current_task";
+const PORT = 3001;
+let config: AuthConfig;
+const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+const readJson = async <T>(path: string, fallback: T): Promise<T> => {
+  try {
+    return (await Bun.file(path).json()) as T;
+  } catch {
+    return fallback;
+  }
+};
+const currentPattern = async (): Promise<string> => {
+  if (!existsSync(CURRENT_TASK_PATH)) return "unknown";
+  try {
+    return (await Bun.file(CURRENT_TASK_PATH).text()).trim() || "unknown";
+  } catch {
+    return "unknown";
+  }
+};
+const log = async (event: string, level: "INFO" | "WARN" | "ERROR", details: Record<string, unknown> = {}) => {
+  console.log(
+    JSON.stringify({
+      ts: new Date().toISOString(),
+      level,
+      service: "auth",
+      event,
+      pattern: await currentPattern(),
+      ...details,
+    }),
+  );
+};
+const loadConfig = async () => {
+  config = await readJson<AuthConfig>(CONFIG_PATH, { delay_ms: 200 });
+};
+await loadConfig();
+process.on("SIGHUP", async () => {
+  await loadConfig();
+  await log("config_reloaded", "INFO", { config });
+});
+Bun.serve({
+  port: PORT,
+  async fetch(req) {
+    const url = new URL(req.url);
+    if (req.method === "GET" && url.pathname === "/health") {
+      return Response.json({ status: "ok", delay_ms: config.delay_ms });
+    }
+    if (req.method === "POST" && url.pathname === "/verify") {
+      const started = Date.now();
+      await log("verify_start", "INFO", { delay_ms: config.delay_ms });
+      await sleep(Math.max(0, config.delay_ms));
+      const elapsed = Date.now() - started;
+      await log("verify_complete", "INFO", {
+        delay_ms: config.delay_ms,
+        elapsed_ms: elapsed,
+      });
+      return Response.json({ verified: true, elapsed_ms: elapsed });
+    }
+    return new Response("not found", { status: 404 });
+  },
+});

mesh/auth/package.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "name": "auth",
+  "version": "1.0.0",
+  "scripts": {
+    "start": "bun run index.ts"
+  },
+  "devDependencies": {
+    "bun-types": "latest"
+  }
+}

mesh/auth/tsconfig.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "strict": true,
+    "skipLibCheck": true,
+    "types": ["bun-types"]
+  }
+}

mesh/gateway/blocked_routes.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "blocked": []
+}

mesh/gateway/bun.lock ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "gateway",
+      "dependencies": {
+        "ioredis": "^5.4.1",
+      },
+      "devDependencies": {
+        "bun-types": "latest",
+      },
+    },
+  },
+  "packages": {
+    "@ioredis/commands": ["@ioredis/commands@1.5.1", "", {}, "sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw=="],
+    "@types/node": ["@types/node@25.5.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg=="],
+    "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="],
+    "cluster-key-slot": ["cluster-key-slot@1.1.2", "", {}, "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA=="],
+    "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
+    "denque": ["denque@2.1.0", "", {}, "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw=="],
+    "ioredis": ["ioredis@5.10.1", "", { "dependencies": { "@ioredis/commands": "1.5.1", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-HuEDBTI70aYdx1v6U97SbNx9F1+svQKBDo30o0b9fw055LMepzpOOd0Ccg9Q6tbqmBSJaMuY0fB7yw9/vjBYCA=="],
+    "lodash.defaults": ["lodash.defaults@4.2.0", "", {}, "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="],
+    "lodash.isarguments": ["lodash.isarguments@3.1.0", "", {}, "sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg=="],
+    "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
+    "redis-errors": ["redis-errors@1.2.0", "", {}, "sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w=="],
+    "redis-parser": ["redis-parser@3.0.0", "", { "dependencies": { "redis-errors": "^1.0.0" } }, "sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A=="],
+    "standard-as-callback": ["standard-as-callback@2.1.0", "", {}, "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A=="],
+    "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
+  }
+}

mesh/gateway/config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "auth_timeout_ms": 500
+}

mesh/gateway/index.ts ADDED Viewed

	@@ -0,0 +1,213 @@

+import { existsSync } from "node:fs";
+import Redis from "ioredis";
+type GatewayConfig = {
+  auth_timeout_ms: number;
+};
+type Registry = {
+  services: {
+    auth: { host: string; port: number; protocol: string };
+    redis: { host: string; port: number; protocol: string };
+    worker: { host: string; port: number | null; protocol: string };
+  };
+};
+type BlockedRoutes = {
+  blocked: string[];
+};
+const MESH_ROOT = process.env.MESH_ROOT || "/mesh";
+const CONFIG_PATH = `${MESH_ROOT}/gateway/config.json`;
+const BLOCKED_ROUTES_PATH = `${MESH_ROOT}/gateway/blocked_routes.json`;
+const REGISTRY_PATH = `${MESH_ROOT}/registry.json`;
+const CURRENT_TASK_PATH = "/tmp/current_task";
+const PORT = 3000;
+let config: GatewayConfig;
+let registry: Registry;
+let redisClient: Redis;
+const successWindow: number[] = [];
+const latencyWindow: number[] = [];
+const WINDOW_SIZE = 20;
+const readJson = async <T>(path: string, fallback: T): Promise<T> => {
+  try {
+    return (await Bun.file(path).json()) as T;
+  } catch {
+    return fallback;
+  }
+};
+const currentPattern = async (): Promise<string> => {
+  if (!existsSync(CURRENT_TASK_PATH)) return "unknown";
+  try {
+    return (await Bun.file(CURRENT_TASK_PATH).text()).trim() || "unknown";
+  } catch {
+    return "unknown";
+  }
+};
+const log = async (event: string, level: "INFO" | "WARN" | "ERROR", details: Record<string, unknown> = {}) => {
+  const payload = {
+    ts: new Date().toISOString(),
+    level,
+    service: "gateway",
+    event,
+    pattern: await currentPattern(),
+    ...details,
+  };
+  console.log(JSON.stringify(payload));
+};
+const loadRuntimeState = async () => {
+  config = await readJson<GatewayConfig>(CONFIG_PATH, { auth_timeout_ms: 500 });
+  registry = await readJson<Registry>(REGISTRY_PATH, {
+    services: {
+      auth: { host: "localhost", port: 3001, protocol: "http" },
+      redis: { host: "localhost", port: 6379, protocol: "tcp" },
+      worker: { host: "localhost", port: null, protocol: "internal" },
+    },
+  });
+  if (redisClient) {
+    redisClient.disconnect();
+  }
+  redisClient = new Redis({
+    host: registry.services.redis.host,
+    port: registry.services.redis.port,
+    maxRetriesPerRequest: 1,
+    lazyConnect: false,
+  });
+};
+const fetchWithTimeout = async (
+  url: string,
+  init: RequestInit,
+  timeoutMs: number,
+ ): Promise<Response> => {
+  const controller = new AbortController();
+  const timer = setTimeout(() => controller.abort(), timeoutMs);
+  try {
+    return await fetch(url, { ...init, signal: controller.signal });
+  } finally {
+    clearTimeout(timer);
+  }
+};
+const percentile99 = (values: number[]): number => {
+  if (!values.length) return 0;
+  const sorted = [...values].sort((a, b) => a - b);
+  const idx = Math.min(sorted.length - 1, Math.floor(0.99 * sorted.length));
+  return sorted[idx];
+};
+const recordOutcome = (success: boolean, elapsedMs: number) => {
+  successWindow.push(success ? 1 : 0);
+  latencyWindow.push(elapsedMs);
+  if (successWindow.length > WINDOW_SIZE) {
+    successWindow.shift();
+  }
+  if (latencyWindow.length > WINDOW_SIZE) {
+    latencyWindow.shift();
+  }
+};
+const getSuccessRate = (): number => {
+  if (!successWindow.length) return 1;
+  const successes = successWindow.reduce((acc, v) => acc + v, 0);
+  return successes / successWindow.length;
+};
+await loadRuntimeState();
+process.on("SIGHUP", async () => {
+  await loadRuntimeState();
+  await log("config_reloaded", "INFO", { config });
+});
+Bun.serve({
+  port: PORT,
+  async fetch(req) {
+    const url = new URL(req.url);
+    if (req.method === "GET" && url.pathname === "/health") {
+      return Response.json({
+        status: "ok",
+        success_rate: getSuccessRate(),
+        p99_latency_ms: percentile99(latencyWindow),
+      });
+    }
+    if (req.method === "POST" && url.pathname === "/process") {
+      const start = Date.now();
+      try {
+        const blockedRoutes = await readJson<BlockedRoutes>(BLOCKED_ROUTES_PATH, { blocked: [] });
+        if (blockedRoutes.blocked.includes("gateway->redis")) {
+          await log("route_blocked", "ERROR", {
+            route: "gateway->redis",
+            policy_file: BLOCKED_ROUTES_PATH,
+          });
+          throw new Error("redis_unreachable");
+        }
+        const authUrl = `http://${registry.services.auth.host}:${registry.services.auth.port}/verify`;
+        const authResponse = await fetchWithTimeout(
+          authUrl,
+          {
+            method: "POST",
+            headers: { "content-type": "application/json" },
+            body: JSON.stringify({ token: "allow" }),
+          },
+          config.auth_timeout_ms,
+        );
+        if (!authResponse.ok) {
+          throw new Error(`auth_status_${authResponse.status}`);
+        }
+        const job = JSON.stringify({ id: crypto.randomUUID(), payload: "ok" });
+        await redisClient.rpush("job_queue", job);
+        const elapsedMs = Date.now() - start;
+        recordOutcome(true, elapsedMs);
+        await log("request_complete", "INFO", {
+          elapsed_ms: elapsedMs,
+          upstream: "auth",
+          queue_depth_hint: await redisClient.llen("job_queue"),
+        });
+        return Response.json({ ok: true, elapsed_ms: elapsedMs });
+      } catch (error) {
+        const elapsedMs = Date.now() - start;
+        recordOutcome(false, elapsedMs);
+        const reason = error instanceof Error ? error.message : String(error);
+        if (reason === "AbortError" || reason.includes("aborted") || reason.includes("timeout")) {
+          await log("upstream_timeout", "ERROR", {
+            elapsed_ms: elapsedMs,
+            upstream: "auth",
+            threshold_ms: config.auth_timeout_ms,
+          });
+        }
+        await log("request_failed", "ERROR", {
+          path: "/process",
+          status: 500,
+          reason,
+        });
+        return new Response(JSON.stringify({ error: reason }), {
+          status: 500,
+          headers: { "content-type": "application/json" },
+        });
+      }
+    }
+    return new Response("not found", { status: 404 });
+  },
+});

mesh/gateway/package.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "name": "gateway",
+  "version": "1.0.0",
+  "scripts": {
+    "start": "bun run index.ts"
+  },
+  "dependencies": {
+    "ioredis": "^5.4.1"
+  },
+  "devDependencies": {
+    "bun-types": "latest"
+  }
+}

mesh/gateway/tsconfig.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "strict": true,
+    "skipLibCheck": true,
+    "types": ["bun-types"]
+  }
+}

mesh/registry.json ADDED Viewed

	@@ -0,0 +1,19 @@

+{
+  "services": {
+    "auth": {
+      "host": "localhost",
+      "port": 3001,
+      "protocol": "http"
+    },
+    "redis": {
+      "host": "localhost",
+      "port": 6379,
+      "protocol": "tcp"
+    },
+    "worker": {
+      "host": "localhost",
+      "port": null,
+      "protocol": "internal"
+    }
+  }
+}

mesh/worker/bun.lock ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "lockfileVersion": 1,
+  "configVersion": 1,
+  "workspaces": {
+    "": {
+      "name": "worker",
+      "dependencies": {
+        "ioredis": "^5.4.1",
+      },
+      "devDependencies": {
+        "bun-types": "latest",
+      },
+    },
+  },
+  "packages": {
+    "@ioredis/commands": ["@ioredis/commands@1.5.1", "", {}, "sha512-JH8ZL/ywcJyR9MmJ5BNqZllXNZQqQbnVZOqpPQqE1vHiFgAw4NHbvE0FOduNU8IX9babitBT46571OnPTT0Zcw=="],
+    "@types/node": ["@types/node@25.5.2", "", { "dependencies": { "undici-types": "~7.18.0" } }, "sha512-tO4ZIRKNC+MDWV4qKVZe3Ql/woTnmHDr5JD8UI5hn2pwBrHEwOEMZK7WlNb5RKB6EoJ02gwmQS9OrjuFnZYdpg=="],
+    "bun-types": ["bun-types@1.3.11", "", { "dependencies": { "@types/node": "*" } }, "sha512-1KGPpoxQWl9f6wcZh57LvrPIInQMn2TQ7jsgxqpRzg+l0QPOFvJVH7HmvHo/AiPgwXy+/Thf6Ov3EdVn1vOabg=="],
+    "cluster-key-slot": ["cluster-key-slot@1.1.2", "", {}, "sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA=="],
+    "debug": ["debug@4.4.3", "", { "dependencies": { "ms": "^2.1.3" } }, "sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA=="],
+    "denque": ["denque@2.1.0", "", {}, "sha512-HVQE3AAb/pxF8fQAoiqpvg9i3evqug3hoiwakOyZAwJm+6vZehbkYXZ0l4JxS+I3QxM97v5aaRNhj8v5oBhekw=="],
+    "ioredis": ["ioredis@5.10.1", "", { "dependencies": { "@ioredis/commands": "1.5.1", "cluster-key-slot": "^1.1.0", "debug": "^4.3.4", "denque": "^2.1.0", "lodash.defaults": "^4.2.0", "lodash.isarguments": "^3.1.0", "redis-errors": "^1.2.0", "redis-parser": "^3.0.0", "standard-as-callback": "^2.1.0" } }, "sha512-HuEDBTI70aYdx1v6U97SbNx9F1+svQKBDo30o0b9fw055LMepzpOOd0Ccg9Q6tbqmBSJaMuY0fB7yw9/vjBYCA=="],
+    "lodash.defaults": ["lodash.defaults@4.2.0", "", {}, "sha512-qjxPLHd3r5DnsdGacqOMU6pb/avJzdh9tFX2ymgoZE27BmjXrNy/y4LoaiTeAb+O3gL8AfpJGtqfX/ae2leYYQ=="],
+    "lodash.isarguments": ["lodash.isarguments@3.1.0", "", {}, "sha512-chi4NHZlZqZD18a0imDHnZPrDeBbTtVN7GXMwuGdRH9qotxAjYs3aVLKc7zNOG9eddR5Ksd8rvFEBc9SsggPpg=="],
+    "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="],
+    "redis-errors": ["redis-errors@1.2.0", "", {}, "sha512-1qny3OExCf0UvUV/5wpYKf2YwPcOqXzkwKKSmKHiE6ZMQs5heeE/c8eXK+PNllPvmjgAbfnsbpkGZWy8cBpn9w=="],
+    "redis-parser": ["redis-parser@3.0.0", "", { "dependencies": { "redis-errors": "^1.0.0" } }, "sha512-DJnGAeenTdpMEH6uAJRK/uiyEIH9WVsUmoLwzudwGJUwZPp80PDBWPHXSAGNPwNvIXAbe7MSUB1zQFugFml66A=="],
+    "standard-as-callback": ["standard-as-callback@2.1.0", "", {}, "sha512-qoRRSyROncaz1z0mvYqIE4lCd9p2R90i6GxW3uZv5ucSu8tU7B5HXUP1gG8pVZsYNVaXjk8ClXHPttLyxAL48A=="],
+    "undici-types": ["undici-types@7.18.2", "", {}, "sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w=="],
+  }
+}

mesh/worker/config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "db_pool_size": 10,
+  "db_write_delay_ms": 0
+}

mesh/worker/index.ts ADDED Viewed

	@@ -0,0 +1,198 @@

+import { existsSync } from "node:fs";
+import Redis from "ioredis";
+import { Database } from "bun:sqlite";
+type WorkerConfig = {
+  db_pool_size: number;
+  db_write_delay_ms: number;
+};
+const MESH_ROOT = process.env.MESH_ROOT || "/mesh";
+const CONFIG_PATH = `${MESH_ROOT}/worker/config.json`;
+const CURRENT_TASK_PATH = "/tmp/current_task";
+const LOCK_KEY = "LOCK:job_processor";
+const ENQUEUE_RATE_PER_S = Number(process.env.ENQUEUE_RATE_PER_S || "3.0");
+let config: WorkerConfig;
+let running = true;
+let backoffMs = 1000;
+const redis = new Redis({ host: "localhost", port: 6379, maxRetriesPerRequest: 1 });
+const db = new Database("/tmp/worker_jobs.sqlite");
+db.exec(
+  "CREATE TABLE IF NOT EXISTS jobs (id TEXT PRIMARY KEY, payload TEXT, created_at TEXT DEFAULT CURRENT_TIMESTAMP)",
+);
+const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+const readJson = async <T>(path: string, fallback: T): Promise<T> => {
+  try {
+    return (await Bun.file(path).json()) as T;
+  } catch {
+    return fallback;
+  }
+};
+const currentPattern = async (): Promise<string> => {
+  if (!existsSync(CURRENT_TASK_PATH)) return "unknown";
+  try {
+    return (await Bun.file(CURRENT_TASK_PATH).text()).trim() || "unknown";
+  } catch {
+    return "unknown";
+  }
+};
+const incrementCounter = async (path: string): Promise<number> => {
+  let current = 0;
+  try {
+    current = Number(await Bun.file(path).text()) || 0;
+  } catch {
+    current = 0;
+  }
+  const next = current + 1;
+  await Bun.write(path, String(next));
+  return next;
+};
+const log = async (event: string, level: "INFO" | "WARN" | "ERROR", details: Record<string, unknown> = {}) => {
+  console.log(
+    JSON.stringify({
+      ts: new Date().toISOString(),
+      level,
+      service: "worker",
+      event,
+      pattern: await currentPattern(),
+      ...details,
+    }),
+  );
+};
+const loadConfig = async () => {
+  config = await readJson<WorkerConfig>(CONFIG_PATH, {
+    db_pool_size: 10,
+    db_write_delay_ms: 0,
+  });
+};
+const estimatedProcessingRate = (): number => {
+  const unitCostMs = Math.max(50, config.db_write_delay_ms + 120);
+  return config.db_pool_size * (1000 / unitCostMs);
+};
+const processLoop = async () => {
+  while (running) {
+    try {
+      const acquired = await redis.set(LOCK_KEY, String(process.pid), "EX", 30, "NX");
+      if (!acquired) {
+        const stallCount = await incrementCounter("/tmp/consumer_stall_count");
+        await log("lock_acquire_failed", "WARN", {
+          lock_key: LOCK_KEY,
+          stall_count: stallCount,
+        });
+        await sleep(120);
+        continue;
+      }
+      const raw = await redis.lpop("job_queue");
+      if (!raw) {
+        await redis.del(LOCK_KEY);
+        await sleep(120);
+        continue;
+      }
+      let parsed: { id?: string; payload?: unknown };
+      try {
+        parsed = JSON.parse(raw) as { id?: string; payload?: unknown };
+      } catch (error) {
+        const restartCount = await incrementCounter("/tmp/worker_restart_count");
+        await log("job_dequeued", "INFO", { raw });
+        await log("parse_failed", "ERROR", {
+          error: error instanceof Error ? error.message : String(error),
+          raw,
+        });
+        await log("consumer_backoff", "WARN", {
+          restart_count: restartCount,
+          backoff_ms: backoffMs,
+        });
+        await redis.lpush("job_queue", raw);
+        await redis.del(LOCK_KEY);
+        await sleep(backoffMs);
+        backoffMs = Math.min(10000, backoffMs * 2);
+        continue;
+      }
+      const start = Date.now();
+      if (config.db_write_delay_ms > 0) {
+        await sleep(config.db_write_delay_ms);
+      }
+      db.query("INSERT OR REPLACE INTO jobs (id, payload) VALUES (?, ?)").run(
+        parsed.id || crypto.randomUUID(),
+        JSON.stringify(parsed.payload ?? null),
+      );
+      backoffMs = 1000;
+      const elapsedMs = Date.now() - start;
+      const queueDepth = Number(await redis.llen("job_queue"));
+      await log("db_write_complete", "INFO", {
+        elapsed_ms: elapsedMs,
+        pool_size: config.db_pool_size,
+      });
+      const processingRate = estimatedProcessingRate();
+      if (processingRate < ENQUEUE_RATE_PER_S || queueDepth > 10) {
+        await log("throughput_lag", "WARN", {
+          processing_rate_per_s: Number(processingRate.toFixed(2)),
+          enqueue_rate_per_s: ENQUEUE_RATE_PER_S,
+          queue_depth: queueDepth,
+        });
+      }
+      await log("job_processed", "INFO", {
+        job_id: parsed.id || null,
+        queue_depth: queueDepth,
+      });
+      await redis.del(LOCK_KEY);
+      await sleep(80);
+    } catch (error) {
+      await log("loop_error", "ERROR", {
+        error: error instanceof Error ? error.message : String(error),
+      });
+      await sleep(250);
+    }
+  }
+};
+if (!existsSync("/tmp/worker_restart_count")) await Bun.write("/tmp/worker_restart_count", "0");
+if (!existsSync("/tmp/consumer_stall_count")) await Bun.write("/tmp/consumer_stall_count", "0");
+await Bun.write("/tmp/worker.pid", String(process.pid));
+await loadConfig();
+process.on("SIGHUP", async () => {
+  await loadConfig();
+  await log("config_reloaded", "INFO", { config });
+});
+process.on("SIGTERM", () => {
+  running = false;
+});
+process.on("SIGINT", () => {
+  running = false;
+});
+await processLoop();
+try {
+  await redis.quit();
+} catch {
+  redis.disconnect();
+}
+db.close();

mesh/worker/job_generator.ts ADDED Viewed

	@@ -0,0 +1,94 @@

+import Redis from "ioredis";
+type JobGeneratorConfig = {
+  interval_ms: number;
+};
+const redis = new Redis({ host: "localhost", port: 6379, maxRetriesPerRequest: 1 });
+const MESH_ROOT = process.env.MESH_ROOT || "/mesh";
+const CONFIG_PATH = `${MESH_ROOT}/worker/job_generator_config.json`;
+let running = true;
+let intervalMs = 333;
+const sleep = (ms: number) => new Promise((resolve) => setTimeout(resolve, ms));
+const readJson = async <T>(path: string, fallback: T): Promise<T> => {
+  try {
+    return (await Bun.file(path).json()) as T;
+  } catch {
+    return fallback;
+  }
+};
+const loadConfig = async () => {
+  const config = await readJson<JobGeneratorConfig>(CONFIG_PATH, { interval_ms: 333 });
+  intervalMs = Math.max(10, Number(config.interval_ms) || 333);
+};
+const loop = async () => {
+  while (running) {
+    const job = JSON.stringify({
+      id: crypto.randomUUID(),
+      payload: {
+        kind: "normal",
+        ts: new Date().toISOString(),
+      },
+    });
+    try {
+      await redis.rpush("job_queue", job);
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          level: "INFO",
+          service: "job_generator",
+          event: "job_enqueued",
+        }),
+      );
+    } catch (error) {
+      console.log(
+        JSON.stringify({
+          ts: new Date().toISOString(),
+          level: "ERROR",
+          service: "job_generator",
+          event: "enqueue_failed",
+          error: error instanceof Error ? error.message : String(error),
+        }),
+      );
+    }
+    await sleep(intervalMs);
+  }
+};
+await loadConfig();
+process.on("SIGHUP", async () => {
+  await loadConfig();
+  console.log(
+    JSON.stringify({
+      ts: new Date().toISOString(),
+      level: "INFO",
+      service: "job_generator",
+      event: "config_reloaded",
+      interval_ms: intervalMs,
+    }),
+  );
+});
+process.on("SIGTERM", () => {
+  running = false;
+});
+process.on("SIGINT", () => {
+  running = false;
+});
+await loop();
+try {
+  await redis.quit();
+} catch {
+  redis.disconnect();
+}

mesh/worker/job_generator_config.json ADDED Viewed

	@@ -0,0 +1,3 @@

+{
+  "interval_ms": 10
+}

mesh/worker/package.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "name": "worker",
+  "version": "1.0.0",
+  "scripts": {
+    "start": "bun run index.ts",
+    "job-generator": "bun run job_generator.ts"
+  },
+  "dependencies": {
+    "ioredis": "^5.4.1"
+  },
+  "devDependencies": {
+    "bun-types": "latest"
+  }
+}

mesh/worker/tsconfig.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "Bundler",
+    "strict": true,
+    "skipLibCheck": true,
+    "types": ["bun-types"]
+  }
+}

openenv.yaml ADDED Viewed

	@@ -0,0 +1,86 @@

+name: distributed-systems-debug-env
+version: "1.0.0"
+description: >
+  An RL environment for debugging a distributed job processing pipeline with
+  five deterministic fault scenarios: cascading timeout, byzantine queue fault,
+  distributed lock starvation, backpressure cascade, and route partition.
+author: Team Rocket
+tags:
+  - openenv
+  - distributed-systems
+  - debugging
+  - reinforcement-learning
+reward_range: [0.0, 1.0]
+tasks:
+  - name: cascading-timeout
+    description: Synchronous upstream delay exceeds gateway timeout.
+    difficulty: easy
+    max_steps: 15
+  - name: byzantine-queue-fault
+    description: Poison pill message causes worker parse crash-loop.
+    difficulty: medium
+    max_steps: 18
+  - name: distributed-lock-starvation
+    description: Stale distributed lock prevents queue consumption.
+    difficulty: hard
+    max_steps: 20
+  - name: backpressure-cascade
+    description: Consumer throughput lower than producer enqueue rate.
+    difficulty: hard
+    max_steps: 20
+  - name: route-partition
+    description: Route policy blocks gateway to redis communication.
+    difficulty: hard
+    max_steps: 20
+  - name: registry-corruption
+    description: Gateway reloads a corrupted auth registry entry and fails requests.
+    difficulty: medium
+    max_steps: 18
+  - name: job-generator-runaway
+    description: Runaway enqueue rate overwhelms worker throughput and grows backlog.
+    difficulty: hard
+    max_steps: 20
+observation_space:
+  type: object
+  properties:
+    command_output:
+      type: string
+    metrics:
+      type: object
+      properties:
+        gateway_success_rate:
+          type: number
+          minimum: 0.0
+          maximum: 1.0
+        gateway_p99_latency_ms:
+          type: number
+          minimum: 0.0
+        queue_depth:
+          type: integer
+          minimum: 0
+        worker_restart_count:
+          type: integer
+          minimum: 0
+        consumer_stall_count:
+          type: integer
+          minimum: 0
+    process_status:
+      type: object
+      additionalProperties:
+        type: string
+action_space:
+  type: object
+  properties:
+    command:
+      type: string
+      description: Single bash command executed in the debug sandbox.

pyproject.toml ADDED Viewed

	@@ -0,0 +1,32 @@

+[project]
+name = "distributed-systems-debug-env"
+version = "1.0.0"
+description = "OpenEnv distributed systems debugging environment"
+readme = "README.md"
+requires-python = "==3.12.*"
+dependencies = [
+  "fastapi==0.115.0",
+  "uvicorn[standard]==0.30.0",
+  "pydantic>=2.11.0",
+  "openenv-core>=0.2.0",
+  "openai>=1.30.0",
+  "httpx>=0.27.0"
+]
+[project.scripts]
+server = "server.app:main"
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+pythonpath = ["."]
+[build-system]
+requires = ["setuptools>=68", "wheel"]
+build-backend = "setuptools.build_meta"
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["server*"]
+exclude = ["mesh*", "tests*"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi==0.115.0
+uvicorn[standard]==0.30.0
+pydantic>=2.11.0
+openenv-core>=0.2.0
+openai>=1.30.0
+httpx>=0.27.0
+pytest>=8.0.0

server/__init__.py ADDED Viewed

File without changes

server/api.py ADDED Viewed

	@@ -0,0 +1,63 @@

+from contextlib import asynccontextmanager
+from fastapi import FastAPI, HTTPException
+from .constants import TaskName
+from .env import DistributedDebugEnv
+from .models import Action, Observation, StepResult
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    env = DistributedDebugEnv()
+    env.start()
+    app.state.env = env
+    try:
+        yield
+    finally:
+        env.close()
+app = FastAPI(title="Distributed Systems Debug Environment", version="1.0.0", lifespan=lifespan)
+@app.post("/reset", response_model=Observation)
+async def reset(task_name: str | None = None) -> Observation:
+    # Validator and sample inference call /reset without task input.
+    # Use a deterministic default task for reproducible episode bootstrapping.
+    selected_task_name = task_name or TaskName.CASCADING_TIMEOUT.value
+    try:
+        task = TaskName.parse(selected_task_name)
+    except ValueError as exc:
+        raise HTTPException(status_code=400, detail=str(exc)) from exc
+    try:
+        env: DistributedDebugEnv = app.state.env
+        return env.reset(task_name=task)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+@app.post("/step", response_model=StepResult)
+async def step(action: Action) -> StepResult:
+    try:
+        env: DistributedDebugEnv = app.state.env
+        return env.step(action)
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+@app.get("/state")
+async def state() -> dict:
+    try:
+        env: DistributedDebugEnv = app.state.env
+        return env.state()
+    except Exception as exc:
+        raise HTTPException(status_code=500, detail=str(exc)) from exc
+@app.get("/health")
+async def health() -> dict:
+    return {"status": "ok", "version": "1.0.0"}

server/app.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .api import app
+def main() -> None:
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)
+if __name__ == "__main__":
+    main()

server/constants.py ADDED Viewed

	@@ -0,0 +1,54 @@

+from enum import Enum
+class TaskName(str, Enum):
+    CASCADING_TIMEOUT = "cascading-timeout"
+    BYZANTINE_QUEUE_FAULT = "byzantine-queue-fault"
+    DISTRIBUTED_LOCK_STARVATION = "distributed-lock-starvation"
+    BACKPRESSURE_CASCADE = "backpressure-cascade"
+    ROUTE_PARTITION = "route-partition"
+    REGISTRY_CORRUPTION = "registry-corruption"
+    JOB_GENERATOR_RUNAWAY = "job-generator-runaway"
+    @classmethod
+    def parse(cls, value: str) -> "TaskName":
+        try:
+            return cls(value)
+        except ValueError as exc:
+            raise ValueError(f"Unknown task: {value}") from exc
+ALL_TASKS = [task.value for task in TaskName]
+# Baseline default: easy + medium + hard (deadlock/starvation included).
+DEFAULT_BASELINE_TASKS = [
+    TaskName.CASCADING_TIMEOUT.value,
+    TaskName.BYZANTINE_QUEUE_FAULT.value,
+    TaskName.DISTRIBUTED_LOCK_STARVATION.value,
+]
+DEFAULT_BASELINE_TASK_ENUMS = [
+    TaskName.CASCADING_TIMEOUT,
+    TaskName.BYZANTINE_QUEUE_FAULT,
+    TaskName.DISTRIBUTED_LOCK_STARVATION,
+]
+NO_COMMAND_PROVIDED_SENTINEL = "__NO_COMMAND_PROVIDED__"
+TASK_MAX_STEPS = {
+    TaskName.CASCADING_TIMEOUT: 15,
+    TaskName.BYZANTINE_QUEUE_FAULT: 18,
+    TaskName.DISTRIBUTED_LOCK_STARVATION: 20,
+    TaskName.BACKPRESSURE_CASCADE: 20,
+    TaskName.ROUTE_PARTITION: 20,
+    TaskName.REGISTRY_CORRUPTION: 18,
+    TaskName.JOB_GENERATOR_RUNAWAY: 20,
+}
+DEFAULT_CONFIGS = {
+    "auth": {"delay_ms": 200},
+    "gateway": {"auth_timeout_ms": 500},
+    "worker": {"db_pool_size": 10, "db_write_delay_ms": 0},
+    "job_generator": {"interval_ms": 333},
+    "blocked_routes": {"blocked": []},
+}

server/env.py ADDED Viewed

	@@ -0,0 +1,467 @@

+import json
+import os
+import subprocess
+import time
+from pathlib import Path
+from typing import Any
+from .constants import (
+    DEFAULT_CONFIGS,
+    NO_COMMAND_PROVIDED_SENTINEL,
+    TASK_MAX_STEPS,
+    TaskName,
+)
+from .fault_injector import inject_fault
+from .graders import grade_task
+from .metrics_poller import MetricsPoller
+from .models import Action, Observation, StepResult
+from .process_manager import ProcessManager
+class DistributedDebugEnv:
+    """OpenEnv-compatible distributed systems debugging environment."""
+    def __init__(
+        self, project_root: Path | None = None, mesh_root: Path | None = None
+    ) -> None:
+        self.project_root = (
+            project_root or Path(__file__).resolve().parent.parent
+        ).resolve()
+        self.mesh_root = (
+            mesh_root or Path(os.getenv("MESH_ROOT", self.project_root / "mesh"))
+        ).resolve()
+        self._process_manager = ProcessManager(
+            project_root=self.project_root, mesh_root=self.mesh_root
+        )
+        self._metrics_poller = MetricsPoller(poll_interval_s=2.0)
+        self.current_task: TaskName | None = None
+        self.max_steps: int = 0
+        self.step_count: int = 0
+        self.last_exit_code: int = 0
+        self.prev_observation: Observation | None = None
+        self._baselines: dict[str, int] = {
+            "baseline_worker_restart_count": 0,
+            "baseline_consumer_stall_count": 0,
+        }
+        self._seen_diagnostic_signatures: set[str] = set()
+        self._command_counts: dict[str, int] = {}
+        self._last_grader_score: float = 0.0
+    def start(self) -> None:
+        if not self._metrics_poller.is_alive():
+            self._metrics_poller.start()
+    def close(self) -> None:
+        self._metrics_poller.stop()
+    def _write_json(self, path: Path, payload: dict[str, Any]) -> None:
+        path.parent.mkdir(parents=True, exist_ok=True)
+        path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+    def _restore_defaults(self) -> None:
+        self._write_json(
+            self.mesh_root / "registry.json",
+            {
+                "services": {
+                    "auth": {"host": "localhost", "port": 3001, "protocol": "http"},
+                    "redis": {"host": "localhost", "port": 6379, "protocol": "tcp"},
+                    "worker": {
+                        "host": "localhost",
+                        "port": None,
+                        "protocol": "internal",
+                    },
+                }
+            },
+        )
+        self._write_json(
+            self.mesh_root / "auth" / "config.json", DEFAULT_CONFIGS["auth"]
+        )
+        self._write_json(
+            self.mesh_root / "gateway" / "config.json", DEFAULT_CONFIGS["gateway"]
+        )
+        self._write_json(
+            self.mesh_root / "gateway" / "blocked_routes.json",
+            DEFAULT_CONFIGS["blocked_routes"],
+        )
+        self._write_json(
+            self.mesh_root / "worker" / "config.json", DEFAULT_CONFIGS["worker"]
+        )
+        self._write_json(
+            self.mesh_root / "worker" / "job_generator_config.json",
+            DEFAULT_CONFIGS["job_generator"],
+        )
+    def _truncate_logs(self) -> None:
+        for service in ["gateway", "auth", "worker", "job_gen"]:
+            Path(f"/tmp/{service}.log").write_text("", encoding="utf-8")
+    def _reset_runtime_counters(self) -> None:
+        Path("/tmp/worker_restart_count").write_text("0", encoding="utf-8")
+        Path("/tmp/consumer_stall_count").write_text("0", encoding="utf-8")
+    def _redis_flush(self) -> None:
+        subprocess.run(
+            ["redis-cli", "FLUSHDB"], check=True, capture_output=True, text=True
+        )
+    def _read_float(self, value: str, default: float = 0.0) -> float:
+        try:
+            return float(value)
+        except (TypeError, ValueError):
+            return default
+    def _is_route_blocked(self) -> bool:
+        blocked_file = self.mesh_root / "gateway" / "blocked_routes.json"
+        try:
+            payload = json.loads(blocked_file.read_text(encoding="utf-8"))
+            blocked = payload.get("blocked", [])
+            return "gateway->redis" in blocked
+        except Exception:
+            return False
+    def _is_lock_present(self) -> bool:
+        result = subprocess.run(
+            ["redis-cli", "EXISTS", "LOCK:job_processor"],
+            capture_output=True,
+            text=True,
+            timeout=2,
+            check=False,
+        )
+        return result.stdout.strip() == "1"
+    def _is_cascading_timeout_resolved(self) -> bool:
+        auth_config_file = self.mesh_root / "auth" / "config.json"
+        gateway_config_file = self.mesh_root / "gateway" / "config.json"
+        try:
+            auth_payload = json.loads(auth_config_file.read_text(encoding="utf-8"))
+            gateway_payload = json.loads(
+                gateway_config_file.read_text(encoding="utf-8")
+            )
+        except Exception:
+            return False
+        auth_delay_ms = self._read_float(auth_payload.get("delay_ms"), default=0.0)
+        auth_timeout_ms = self._read_float(
+            gateway_payload.get("auth_timeout_ms"), default=0.0
+        )
+        if auth_timeout_ms <= 0:
+            return False
+        return auth_delay_ms <= auth_timeout_ms
+    def _is_registry_auth_default(self) -> bool:
+        registry_file = self.mesh_root / "registry.json"
+        try:
+            payload = json.loads(registry_file.read_text(encoding="utf-8"))
+            auth_service = payload["services"]["auth"]
+        except Exception:
+            return False
+        return (
+            auth_service.get("host") == "localhost"
+            and int(auth_service.get("port", 0)) == 3001
+            and auth_service.get("protocol") == "http"
+        )
+    def _job_generator_interval_ms(self) -> int:
+        config_file = self.mesh_root / "worker" / "job_generator_config.json"
+        try:
+            payload = json.loads(config_file.read_text(encoding="utf-8"))
+        except Exception:
+            return 0
+        try:
+            return int(payload.get("interval_ms", 0))
+        except (TypeError, ValueError):
+            return 0
+    def _is_job_generator_rate_resolved(self) -> bool:
+        return self._job_generator_interval_ms() >= int(
+            DEFAULT_CONFIGS["job_generator"]["interval_ms"]
+        )
+    def _build_grader_context(self) -> dict[str, Any]:
+        return {
+            **self._baselines,
+            "route_blocked": self._is_route_blocked(),
+            "lock_exists": self._is_lock_present(),
+            "cascading_timeout_resolved": self._is_cascading_timeout_resolved(),
+            "registry_auth_matches_default": self._is_registry_auth_default(),
+            "job_generator_interval_ms": self._job_generator_interval_ms(),
+            "job_generator_rate_resolved": self._is_job_generator_rate_resolved(),
+        }
+    def _blocked_command(self, command: str) -> bool:
+        dangerous_patterns = [
+            "rm -rf /",
+            "kill -9 1",
+            "pkill -f uvicorn",
+            "> /tmp/gateway.log",
+            "> /tmp/auth.log",
+            "> /tmp/worker.log",
+        ]
+        normalized = command.strip().lower()
+        return any(pattern in normalized for pattern in dangerous_patterns)
+    def _run_command(self, command: str) -> tuple[str, str | None]:
+        if command.strip() == NO_COMMAND_PROVIDED_SENTINEL:
+            self.last_exit_code = 2
+            return (
+                "No command provided by model. Expected JSON with a command field.",
+                "no_command_provided",
+            )
+        if self._blocked_command(command):
+            self.last_exit_code = 1
+            return (
+                "BLOCKED: This command would damage the environment infrastructure.",
+                "blocked_command",
+            )
+        try:
+            result = subprocess.run(
+                command,
+                shell=True,
+                capture_output=True,
+                text=True,
+                timeout=10,
+                cwd="/",
+                env={
+                    **os.environ,
+                    "PATH": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
+                },
+                check=False,
+            )
+            self.last_exit_code = result.returncode
+            output = (result.stdout + result.stderr).strip() or "(no output)"
+            return output, None
+        except subprocess.TimeoutExpired:
+            self.last_exit_code = 124
+            return "Command timed out after 10 seconds.", "timeout"
+        except Exception as exc:
+            self.last_exit_code = 1
+            return f"Command execution error: {exc}", str(exc)
+    def _command_signature(self, command: str) -> str:
+        return " ".join(command.strip().lower().split())
+    def _is_diagnostic_command(self, command: str) -> bool:
+        diagnostic_keywords = [
+            "cat",
+            "curl",
+            "redis-cli",
+            "ps",
+            "ls",
+            "grep",
+            "tail",
+            "jq",
+            "lrange",
+            "llen",
+            "keys",
+            "ttl",
+            "get",
+        ]
+        normalized = command.lower()
+        return any(keyword in normalized for keyword in diagnostic_keywords)
+    def _is_state_change_command(self, command: str) -> bool:
+        normalized = command.lower()
+        state_change_patterns = [
+            "kill -hup",
+            "redis-cli del",
+            "redis-cli lrem",
+            "redis-cli set",
+            "redis-cli flushdb",
+            "echo '{",
+            "> /mesh/",
+            "tee /mesh/",
+        ]
+        return any(pattern in normalized for pattern in state_change_patterns)
+    def _compute_reward(
+        self,
+        command: str,
+        current: Observation,
+        previous: Observation,
+        grader_score: float,
+        previous_grader_score: float,
+        command_error: str | None,
+    ) -> float:
+        if command_error == "no_command_provided":
+            return 0.0
+        if grader_score >= 0.95:
+            return 1.0
+        reward = grader_score * 0.75
+        signature = self._command_signature(command)
+        signature_count = self._command_counts.get(signature, 0) + 1
+        self._command_counts[signature] = signature_count
+        if (
+            self._is_diagnostic_command(command)
+            and signature not in self._seen_diagnostic_signatures
+        ):
+            reward += 0.02
+            self._seen_diagnostic_signatures.add(signature)
+        if self._is_state_change_command(command):
+            reward += 0.03
+        if grader_score > previous_grader_score + 1e-4:
+            reward += 0.15
+        else:
+            reward -= 0.05
+        if (
+            current.metrics.gateway_success_rate
+            > previous.metrics.gateway_success_rate + 1e-3
+        ):
+            reward += 0.05
+        if current.metrics.queue_depth < previous.metrics.queue_depth:
+            reward += 0.05
+        if current.metrics.worker_restart_count < previous.metrics.worker_restart_count:
+            reward += 0.03
+        if current.metrics.consumer_stall_count < previous.metrics.consumer_stall_count:
+            reward += 0.03
+        if signature_count > 1:
+            reward -= min(0.12, 0.04 * (signature_count - 1))
+        if command.strip().lower() in {
+            "echo",
+            "pwd",
+            "whoami",
+            "date",
+            "true",
+            "false",
+        }:
+            reward -= 0.08
+        if self.last_exit_code != 0 and command_error not in {
+            "blocked_command",
+            "no_command_provided",
+        }:
+            reward -= 0.08
+        if command_error == "blocked_command":
+            reward -= 0.25
+        return max(0.0, min(1.0, reward))
+    def _status_block(self, metrics: Any) -> str:
+        return (
+            "=== pipeline status after reset ===\n"
+            "gateway:  running\n"
+            "auth:     running\n"
+            "worker:   running\n"
+            f"queue_depth: {metrics.queue_depth}\n"
+            f"gateway_success_rate: {metrics.gateway_success_rate:.2f}"
+        )
+    def reset(self, task_name: TaskName | str) -> Observation:
+        task = TaskName.parse(task_name) if isinstance(task_name, str) else task_name
+        self.current_task = task
+        self.max_steps = TASK_MAX_STEPS[task]
+        self.step_count = 0
+        self._seen_diagnostic_signatures = set()
+        self._command_counts = {}
+        self._last_grader_score = 0.0
+        self._truncate_logs()
+        self._restore_defaults()
+        self._redis_flush()
+        self._reset_runtime_counters()
+        Path("/tmp/current_task").write_text(task.value, encoding="utf-8")
+        self._process_manager.restart_all()
+        if not self._process_manager.wait_healthy(timeout_s=30):
+            raise RuntimeError("Services failed health checks after reset")
+        inject_fault(task, self._process_manager)
+        time.sleep(1.0)
+        self._metrics_poller.poll_once()
+        metrics = self._metrics_poller.get_current_metrics()
+        self._baselines = {
+            "baseline_worker_restart_count": metrics.worker_restart_count,
+            "baseline_consumer_stall_count": metrics.consumer_stall_count,
+        }
+        self._last_grader_score = grade_task(
+            task, metrics, self._build_grader_context()
+        )
+        observation = Observation(
+            command_output=self._status_block(metrics),
+            metrics=metrics,
+            process_status=self._process_manager.get_status(),
+        )
+        self.prev_observation = observation
+        return observation
+    def step(self, action: Action) -> StepResult:
+        if not self.current_task:
+            raise RuntimeError(
+                "Environment not initialized. Call reset(task_name) first."
+            )
+        self.step_count += 1
+        command_output, command_error = self._run_command(action.command)
+        self._metrics_poller.poll_once()
+        metrics = self._metrics_poller.get_current_metrics()
+        observation = Observation(
+            command_output=command_output,
+            metrics=metrics,
+            process_status=self._process_manager.get_status(),
+        )
+        previous = self.prev_observation or observation
+        previous_grader_score = self._last_grader_score
+        grader_score = grade_task(
+            self.current_task, metrics, self._build_grader_context()
+        )
+        reward = self._compute_reward(
+            action.command,
+            observation,
+            previous,
+            grader_score,
+            previous_grader_score,
+            command_error,
+        )
+        if command_error == "no_command_provided":
+            done = self.step_count >= self.max_steps
+        else:
+            done = grader_score >= 0.95 or self.step_count >= self.max_steps
+        self._last_grader_score = grader_score
+        self.prev_observation = observation
+        info: dict[str, Any] = {
+            "grader_score": round(grader_score, 4),
+            "error": command_error,
+            "exit_code": self.last_exit_code,
+            "task": self.current_task.value if self.current_task else None,
+        }
+        return StepResult(observation=observation, reward=reward, done=done, info=info)
+    def state(self) -> dict[str, Any]:
+        self._metrics_poller.poll_once()
+        metrics = self._metrics_poller.get_current_metrics()
+        return {
+            "task": self.current_task.value if self.current_task else None,
+            "step_count": self.step_count,
+            "max_steps": self.max_steps,
+            "metrics": metrics.model_dump(),
+            "process_status": self._process_manager.get_status(),
+            "baselines": dict(self._baselines),
+        }

server/fault_injector.py ADDED Viewed

	@@ -0,0 +1,95 @@

+import json
+import subprocess
+from pathlib import Path
+from .constants import TaskName
+from .process_manager import ProcessManager
+def _write_json(path: Path, payload: dict) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+def inject_cascading_timeout(pm: ProcessManager) -> None:
+    _write_json(pm.mesh_root / "auth" / "config.json", {"delay_ms": 1500})
+    _write_json(pm.mesh_root / "gateway" / "config.json", {"auth_timeout_ms": 500})
+    pm.sighup("auth")
+    pm.sighup("gateway")
+def inject_byzantine_queue_fault(pm: ProcessManager) -> None:
+    subprocess.run(
+        ["redis-cli", "LPUSH", "job_queue", '{"id":"poison-001","payload":{{BROKEN'],
+        check=True,
+    )
+def inject_distributed_lock_starvation(pm: ProcessManager) -> None:
+    subprocess.run(
+        ["redis-cli", "SET", "LOCK:job_processor", "dead-worker-pid-9999"], check=True
+    )
+def inject_backpressure_cascade(pm: ProcessManager) -> None:
+    _write_json(
+        pm.mesh_root / "worker" / "config.json",
+        {"db_pool_size": 1, "db_write_delay_ms": 800},
+    )
+    pm.sighup("worker")
+def inject_route_partition(pm: ProcessManager) -> None:
+    _write_json(
+        pm.mesh_root / "gateway" / "blocked_routes.json",
+        {"blocked": ["gateway->redis"]},
+    )
+    pm.sighup("gateway")
+def inject_registry_corruption(pm: ProcessManager) -> None:
+    _write_json(
+        pm.mesh_root / "registry.json",
+        {
+            "services": {
+                "auth": {"host": "invalid-auth-host", "port": 3001, "protocol": "http"},
+                "redis": {"host": "localhost", "port": 6379, "protocol": "tcp"},
+                "worker": {"host": "localhost", "port": None, "protocol": "internal"},
+            }
+        },
+    )
+    pm.sighup("gateway")
+def inject_job_generator_runaway(pm: ProcessManager) -> None:
+    _write_json(
+        pm.mesh_root / "worker" / "job_generator_config.json", {"interval_ms": 10}
+    )
+    pm.sighup("job_generator")
+def inject_fault(task_name: TaskName | str, pm: ProcessManager) -> None:
+    task = TaskName.parse(task_name) if isinstance(task_name, str) else task_name
+    if task is TaskName.CASCADING_TIMEOUT:
+        inject_cascading_timeout(pm)
+        return
+    if task is TaskName.BYZANTINE_QUEUE_FAULT:
+        inject_byzantine_queue_fault(pm)
+        return
+    if task is TaskName.DISTRIBUTED_LOCK_STARVATION:
+        inject_distributed_lock_starvation(pm)
+        return
+    if task is TaskName.BACKPRESSURE_CASCADE:
+        inject_backpressure_cascade(pm)
+        return
+    if task is TaskName.ROUTE_PARTITION:
+        inject_route_partition(pm)
+        return
+    if task is TaskName.REGISTRY_CORRUPTION:
+        inject_registry_corruption(pm)
+        return
+    if task is TaskName.JOB_GENERATOR_RUNAWAY:
+        inject_job_generator_runaway(pm)
+        return
+    raise ValueError(f"Unknown task: {task_name}")

server/graders.py ADDED Viewed

	@@ -0,0 +1,109 @@

+from typing import Any
+from .constants import TaskName
+from .models import SystemMetrics
+def _clamp(score: float) -> float:
+    return max(0.0, min(1.0, score))
+def grade_cascading_timeout(metrics: SystemMetrics, context: dict[str, Any]) -> float:
+    timeout_resolved = bool(context.get("cascading_timeout_resolved", False))
+    if timeout_resolved and metrics.gateway_success_rate >= 0.99:
+        return 1.0
+    if not timeout_resolved:
+        # Prevent instant pass while the injected timeout fault is still active.
+        return _clamp(metrics.gateway_success_rate * 0.25)
+    return _clamp(0.4 + metrics.gateway_success_rate * 0.4)
+def grade_byzantine_queue_fault(
+    metrics: SystemMetrics, context: dict[str, Any]
+) -> float:
+    baseline_restart = int(context.get("baseline_worker_restart_count", 0))
+    restart_delta = max(0, metrics.worker_restart_count - baseline_restart)
+    if metrics.queue_depth == 0 and restart_delta <= 1:
+        return 1.0
+    if metrics.queue_depth == 0:
+        return 0.6
+    queue_component = max(0.0, 1.0 - metrics.queue_depth / 50.0)
+    stability_penalty = min(0.4, restart_delta * 0.05)
+    return _clamp(queue_component - stability_penalty)
+def grade_distributed_lock_starvation(
+    metrics: SystemMetrics, context: dict[str, Any]
+) -> float:
+    lock_exists = bool(context.get("lock_exists", True))
+    baseline_stall = int(context.get("baseline_consumer_stall_count", 0))
+    stall_delta = max(0, metrics.consumer_stall_count - baseline_stall)
+    if not lock_exists and metrics.queue_depth <= 3:
+        return 1.0
+    if not lock_exists:
+        return 0.6
+    # If lock still exists, reward slight progress only when stalls don't explode.
+    return 0.2 if stall_delta <= 1 else 0.0
+def grade_backpressure_cascade(metrics: SystemMetrics, _: dict[str, Any]) -> float:
+    return _clamp(1.0 - (metrics.queue_depth / 200.0))
+def grade_route_partition(metrics: SystemMetrics, context: dict[str, Any]) -> float:
+    route_blocked = bool(context.get("route_blocked", True))
+    if not route_blocked and metrics.gateway_success_rate >= 0.95:
+        return 1.0
+    if not route_blocked:
+        return _clamp(metrics.gateway_success_rate)
+    return 0.0
+def grade_registry_corruption(metrics: SystemMetrics, context: dict[str, Any]) -> float:
+    registry_auth_matches_default = bool(
+        context.get("registry_auth_matches_default", False)
+    )
+    if registry_auth_matches_default and metrics.gateway_success_rate >= 0.99:
+        return 1.0
+    if registry_auth_matches_default:
+        return _clamp(0.5 + metrics.gateway_success_rate * 0.5)
+    return _clamp(metrics.gateway_success_rate * 0.3)
+def grade_job_generator_runaway(
+    metrics: SystemMetrics, context: dict[str, Any]
+) -> float:
+    rate_resolved = bool(context.get("job_generator_rate_resolved", False))
+    if rate_resolved and metrics.queue_depth <= 5:
+        return 1.0
+    if rate_resolved and metrics.queue_depth <= 30:
+        return 0.7
+    if rate_resolved:
+        return _clamp(0.7 - (metrics.queue_depth - 30) / 100.0)
+    return 0.2 if metrics.queue_depth <= 30 else 0.0
+def grade_task(
+    task_name: TaskName | str, metrics: SystemMetrics, context: dict[str, Any]
+) -> float:
+    task = TaskName.parse(task_name) if isinstance(task_name, str) else task_name
+    if task is TaskName.CASCADING_TIMEOUT:
+        return grade_cascading_timeout(metrics, context)
+    if task is TaskName.BYZANTINE_QUEUE_FAULT:
+        return grade_byzantine_queue_fault(metrics, context)
+    if task is TaskName.DISTRIBUTED_LOCK_STARVATION:
+        return grade_distributed_lock_starvation(metrics, context)
+    if task is TaskName.BACKPRESSURE_CASCADE:
+        return grade_backpressure_cascade(metrics, context)
+    if task is TaskName.ROUTE_PARTITION:
+        return grade_route_partition(metrics, context)
+    if task is TaskName.REGISTRY_CORRUPTION:
+        return grade_registry_corruption(metrics, context)
+    if task is TaskName.JOB_GENERATOR_RUNAWAY:
+        return grade_job_generator_runaway(metrics, context)
+    return 0.0

server/metrics_poller.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import subprocess
+import threading
+from pathlib import Path
+import httpx
+from .models import SystemMetrics
+class MetricsPoller(threading.Thread):
+    """Background metrics poller with last-known-good caching."""
+    def __init__(self, poll_interval_s: float = 2.0) -> None:
+        super().__init__(daemon=True)
+        self.poll_interval_s = poll_interval_s
+        self._stop_event = threading.Event()
+        self._lock = threading.Lock()
+        self._latest: dict[str, float | int] = {
+            "gateway_success_rate": 0.0,
+            "gateway_p99_latency_ms": 0.0,
+            "queue_depth": 0,
+            "worker_restart_count": 0,
+            "consumer_stall_count": 0,
+        }
+    def stop(self) -> None:
+        self._stop_event.set()
+    def _read_counter(self, path: str) -> int:
+        file_path = Path(path)
+        if not file_path.exists():
+            return 0
+        try:
+            return int(file_path.read_text().strip() or "0")
+        except ValueError:
+            return 0
+    def _poll_gateway(self) -> dict[str, float]:
+        with httpx.Client(timeout=1.0) as client:
+            response = client.get("http://localhost:3000/health")
+            response.raise_for_status()
+            payload = response.json()
+        success_rate = float(
+            payload.get("success_rate", payload.get("gateway_success_rate", 0.0))
+        )
+        p99 = float(
+            payload.get("p99_latency_ms", payload.get("gateway_p99_latency_ms", 0.0))
+        )
+        return {
+            "gateway_success_rate": max(0.0, min(1.0, success_rate)),
+            "gateway_p99_latency_ms": max(0.0, p99),
+        }
+    def _poll_queue_depth(self) -> int:
+        result = subprocess.run(
+            ["redis-cli", "LLEN", "job_queue"],
+            capture_output=True,
+            text=True,
+            timeout=2,
+            check=False,
+        )
+        if result.returncode != 0:
+            return int(self._latest["queue_depth"])
+        try:
+            return max(0, int(result.stdout.strip() or "0"))
+        except ValueError:
+            return int(self._latest["queue_depth"])
+    def poll_once(self) -> None:
+        snapshot = dict(self._latest)
+        try:
+            snapshot.update(self._poll_gateway())
+        except Exception:
+            pass
+        snapshot["queue_depth"] = self._poll_queue_depth()
+        snapshot["worker_restart_count"] = self._read_counter(
+            "/tmp/worker_restart_count"
+        )
+        snapshot["consumer_stall_count"] = self._read_counter(
+            "/tmp/consumer_stall_count"
+        )
+        with self._lock:
+            self._latest = snapshot
+    def run(self) -> None:
+        while not self._stop_event.is_set():
+            self.poll_once()
+            self._stop_event.wait(self.poll_interval_s)
+    def get_current_metrics(self) -> SystemMetrics:
+        with self._lock:
+            snapshot = dict(self._latest)
+        return SystemMetrics.model_validate(snapshot)

server/models.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from typing import Any
+from pydantic import BaseModel, Field, field_validator
+class SystemMetrics(BaseModel):
+    gateway_success_rate: float = Field(..., ge=0.0, le=1.0)
+    gateway_p99_latency_ms: float = Field(..., ge=0.0)
+    queue_depth: int = Field(..., ge=0)
+    worker_restart_count: int = Field(..., ge=0)
+    consumer_stall_count: int = Field(..., ge=0)
+class Observation(BaseModel):
+    command_output: str = Field(
+        ..., description="stdout+stderr from the last executed command"
+    )
+    metrics: SystemMetrics
+    process_status: dict[str, str] = Field(default_factory=dict)
+class Action(BaseModel):
+    command: str = Field(..., description="Single bash command to execute")
+    @field_validator("command")
+    @classmethod
+    def command_must_not_be_empty(cls, value: str) -> str:
+        if not value.strip():
+            raise ValueError("command must not be empty")
+        return value
+class Reward(BaseModel):
+    value: float = Field(..., ge=0.0, le=1.0)
+class StepResult(BaseModel):
+    observation: Observation
+    reward: float = Field(..., ge=0.0, le=1.0)
+    done: bool
+    info: dict[str, Any] = Field(default_factory=dict)

server/process_manager.py ADDED Viewed

	@@ -0,0 +1,164 @@

+import os
+import signal
+import subprocess
+import time
+from pathlib import Path
+import httpx
+class ProcessManager:
+    """Tracks Bun service processes and lifecycle state."""
+    def __init__(
+        self, project_root: Path | None = None, mesh_root: Path | None = None
+    ) -> None:
+        self.project_root = (project_root or Path(__file__).resolve().parent.parent).resolve()
+        self.mesh_root = (
+            mesh_root or Path(os.getenv("MESH_ROOT", self.project_root / "mesh"))
+        ).resolve()
+        self._service_scripts = {
+            "gateway": self.project_root / "mesh" / "gateway" / "index.ts",
+            "auth": self.project_root / "mesh" / "auth" / "index.ts",
+            "worker": self.project_root / "mesh" / "worker" / "index.ts",
+        }
+        self._job_generator_script = (
+            self.project_root / "mesh" / "worker" / "job_generator.ts"
+        )
+        self._health_urls = {
+            "gateway": "http://localhost:3000/health",
+            "auth": "http://localhost:3001/health",
+        }
+        self._processes: dict[str, subprocess.Popen[str]] = {}
+        self._log_handles: dict[str, object] = {}
+    @staticmethod
+    def _pid_path(service: str) -> Path:
+        return Path(f"/tmp/{service}.pid")
+    @staticmethod
+    def _is_pid_alive(pid: int) -> bool:
+        try:
+            os.kill(pid, 0)
+            return True
+        except OSError:
+            return False
+    def _read_pid(self, service: str) -> int | None:
+        path = self._pid_path(service)
+        if not path.exists():
+            return None
+        try:
+            pid = int(path.read_text().strip())
+        except (TypeError, ValueError):
+            return None
+        return pid if self._is_pid_alive(pid) else None
+    def _write_pid(self, service: str, pid: int) -> None:
+        self._pid_path(service).write_text(str(pid))
+    def _spawn_service(self, service: str, script: Path, log_path: Path) -> None:
+        log_handle = open(log_path, "a", encoding="utf-8")
+        env = {
+            **os.environ,
+            "MESH_ROOT": str(self.mesh_root),
+        }
+        process = subprocess.Popen(
+            ["bun", "run", str(script)],
+            cwd=str(self.project_root),
+            stdout=log_handle,
+            stderr=subprocess.STDOUT,
+            text=True,
+            env=env,
+        )
+        self._processes[service] = process
+        self._log_handles[service] = log_handle
+        self._write_pid(service, process.pid)
+    def start_all(self) -> None:
+        for service, script in self._service_scripts.items():
+            existing_pid = self._read_pid(service)
+            if existing_pid:
+                continue
+            self._spawn_service(service, script, Path(f"/tmp/{service}.log"))
+        if not self._read_pid("job_generator"):
+            self._spawn_service(
+                "job_generator", self._job_generator_script, Path("/tmp/job_gen.log")
+            )
+    def _terminate_pid(self, pid: int, timeout_s: float = 0.5) -> None:
+        try:
+            os.kill(pid, signal.SIGTERM)
+        except ProcessLookupError:
+            return
+        deadline = time.time() + timeout_s
+        while time.time() < deadline:
+            if not self._is_pid_alive(pid):
+                return
+            time.sleep(0.05)
+        try:
+            os.kill(pid, signal.SIGKILL)
+        except ProcessLookupError:
+            return
+    def stop_all(self) -> None:
+        for service in ["gateway", "auth", "worker", "job_generator"]:
+            pid = self._read_pid(service)
+            if pid:
+                self._terminate_pid(pid)
+            pid_path = self._pid_path(service)
+            if pid_path.exists():
+                pid_path.unlink(missing_ok=True)
+        for handle in self._log_handles.values():
+            try:
+                handle.close()
+            except Exception:
+                pass
+        self._processes.clear()
+        self._log_handles.clear()
+    def restart_all(self) -> None:
+        self.stop_all()
+        self.start_all()
+    def sighup(self, service: str) -> None:
+        pid = self._read_pid(service)
+        if not pid:
+            raise RuntimeError(f"Service not running: {service}")
+        os.kill(pid, signal.SIGHUP)
+    def wait_healthy(self, timeout_s: int = 30) -> bool:
+        deadline = time.time() + timeout_s
+        with httpx.Client(timeout=1.0) as client:
+            while time.time() < deadline:
+                try:
+                    gateway_ok = (
+                        client.get(self._health_urls["gateway"]).status_code == 200
+                    )
+                    auth_ok = client.get(self._health_urls["auth"]).status_code == 200
+                    if gateway_ok and auth_ok:
+                        return True
+                except Exception:
+                    pass
+                time.sleep(1)
+        return False
+    def get_status(self) -> dict[str, str]:
+        status: dict[str, str] = {}
+        for service in ["gateway", "auth", "worker", "job_generator"]:
+            pid = self._read_pid(service)
+            status[service] = f"running pid={pid}" if pid else "stopped"
+        return status
+    def get_pid(self, service: str) -> int | None:
+        return self._read_pid(service)
+    def close(self) -> None:
+        self.stop_all()

setup-dev.sh ADDED Viewed

	@@ -0,0 +1,86 @@

+#!/usr/bin/env bash
+set -euo pipefail
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+VENV_DIR="${ROOT_DIR}/.venv"
+REQUIRED_PYTHON="3.12"
+PYTHON_BIN="python3.12"
+if ! command -v "$PYTHON_BIN" >/dev/null && command -v python3 >/dev/null; then
+  if [ "$(python3 -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')" = "$REQUIRED_PYTHON" ]; then
+    PYTHON_BIN="python3"
+  fi
+fi
+require_cmd() {
+  local name="$1"
+  local install_hint="$2"
+  if ! command -v "$name" >/dev/null; then
+    echo "[ERROR] Missing required command: $name"
+    echo "        Install hint: $install_hint"
+    exit 1
+  fi
+}
+optional_cmd_note() {
+  local name="$1"
+  local note="$2"
+  if ! command -v "$name" >/dev/null; then
+    echo "[WARN] Optional command not found: $name"
+    echo "       $note"
+  fi
+}
+echo "[INFO] Preflight checks"
+require_cmd "$PYTHON_BIN" "Install Python 3.12 (binary: python3.12 or python3==3.12)"
+require_cmd bun "Install Bun: https://bun.sh"
+require_cmd redis-server "Install Redis server"
+require_cmd redis-cli "Install Redis CLI"
+require_cmd curl "Install curl"
+require_cmd jq "Install jq"
+optional_cmd_note docker "Needed for local container validation and HF parity checks."
+optional_cmd_note uv "Used for regenerating uv.lock when dependencies change."
+echo "[INFO] Creating virtual environment"
+if [ -x "$VENV_DIR/bin/python" ]; then
+  EXISTING_PY_VERSION="$($VENV_DIR/bin/python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")')"
+  if [ "$EXISTING_PY_VERSION" != "$REQUIRED_PYTHON" ]; then
+    echo "[INFO] Recreating .venv with Python 3.12 (found $EXISTING_PY_VERSION)"
+    rm -rf "$VENV_DIR"
+  fi
+fi
+if [ ! -d "$VENV_DIR" ]; then
+  "$PYTHON_BIN" -m venv "$VENV_DIR"
+fi
+# shellcheck disable=SC1091
+source "$VENV_DIR/bin/activate"
+python -m pip install --upgrade pip
+pip install -r "$ROOT_DIR/requirements.txt"
+echo "[INFO] Installing Bun dependencies"
+( cd "$ROOT_DIR/mesh/gateway" && bun install )
+( cd "$ROOT_DIR/mesh/auth" && bun install )
+( cd "$ROOT_DIR/mesh/worker" && bun install )
+chmod +x "$ROOT_DIR/start.sh"
+chmod +x "$ROOT_DIR/inference.py" || true
+if command -v uv >/dev/null && [ ! -f "$ROOT_DIR/uv.lock" ]; then
+  echo "[INFO] Generating uv.lock"
+  ( cd "$ROOT_DIR" && uv lock )
+fi
+echo "[INFO] Running OpenEnv validation"
+openenv validate "$ROOT_DIR"
+echo "[INFO] Setup complete"
+echo "[NEXT] Export required inference vars:"
+echo "       API_BASE_URL=<endpoint>"
+echo "       MODEL_NAME=<model>"
+echo "       HF_TOKEN=<api_key>"
+echo "[NEXT] Start services: APP_ROOT=$ROOT_DIR MESH_ROOT=$ROOT_DIR/mesh ./start.sh"
+echo "[NEXT] Run baseline: HF_TOKEN=... API_BASE_URL=... MODEL_NAME=... python inference.py"

start.sh ADDED Viewed

	@@ -0,0 +1,50 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export APP_ROOT="${APP_ROOT:-/home/user/app}"
+export MESH_ROOT="${MESH_ROOT:-/mesh}"
+mkdir -p /tmp
+redis-server --daemonize yes --logfile /tmp/redis.log --port 6379
+until redis-cli ping >/dev/null; do sleep 0.2; done
+rm -f /tmp/*.pid /tmp/*.log /tmp/worker_restart_count /tmp/consumer_stall_count /tmp/current_task
+printf '0' > /tmp/worker_restart_count
+printf '0' > /tmp/consumer_stall_count
+cat > "${MESH_ROOT}/registry.json" <<'EOF'
+{
+  "services": {
+    "auth": {"host": "localhost", "port": 3001, "protocol": "http"},
+    "redis": {"host": "localhost", "port": 6379, "protocol": "tcp"},
+    "worker": {"host": "localhost", "port": null, "protocol": "internal"}
+  }
+}
+EOF
+: > /tmp/gateway.log
+: > /tmp/auth.log
+: > /tmp/worker.log
+: > /tmp/job_gen.log
+bun run "${APP_ROOT}/mesh/gateway/index.ts" >> /tmp/gateway.log &
+echo $! > /tmp/gateway.pid
+bun run "${APP_ROOT}/mesh/auth/index.ts" >> /tmp/auth.log &
+echo $! > /tmp/auth.pid
+bun run "${APP_ROOT}/mesh/worker/index.ts" >> /tmp/worker.log &
+echo $! > /tmp/worker.pid
+bun run "${APP_ROOT}/mesh/worker/job_generator.ts" >> /tmp/job_gen.log &
+echo $! > /tmp/job_generator.pid
+for _ in $(seq 1 45); do
+  if curl -sf http://localhost:3000/health >/dev/null && curl -sf http://localhost:3001/health >/dev/null; then
+    break
+  fi
+  sleep 1
+done
+exec uvicorn server.api:app --host 0.0.0.0 --port 8000

tests/test_api_reset.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from fastapi.testclient import TestClient
+from server import api
+from server.constants import TaskName
+from server.models import Observation, SystemMetrics
+class _FakeEnv:
+    def __init__(self) -> None:
+        self.reset_calls: list[TaskName] = []
+    def start(self) -> None:
+        return None
+    def close(self) -> None:
+        return None
+    def reset(self, task_name: TaskName) -> Observation:
+        self.reset_calls.append(task_name)
+        return Observation(
+            command_output="ready",
+            metrics=SystemMetrics(
+                gateway_success_rate=0.0,
+                gateway_p99_latency_ms=0.0,
+                queue_depth=0,
+                worker_restart_count=0,
+                consumer_stall_count=0,
+            ),
+            process_status={"gateway": "running"},
+        )
+def test_reset_defaults_to_cascading_timeout_when_task_missing(monkeypatch) -> None:
+    holder: dict[str, _FakeEnv] = {}
+    def fake_env_factory() -> _FakeEnv:
+        env = _FakeEnv()
+        holder["env"] = env
+        return env
+    monkeypatch.setattr(api, "DistributedDebugEnv", fake_env_factory)
+    with TestClient(api.app) as client:
+        response = client.post("/reset", json={})
+    assert response.status_code == 200
+    assert holder["env"].reset_calls == [TaskName.CASCADING_TIMEOUT]
+def test_reset_rejects_unknown_explicit_task(monkeypatch) -> None:
+    holder: dict[str, _FakeEnv] = {}
+    def fake_env_factory() -> _FakeEnv:
+        env = _FakeEnv()
+        holder["env"] = env
+        return env
+    monkeypatch.setattr(api, "DistributedDebugEnv", fake_env_factory)
+    with TestClient(api.app) as client:
+        response = client.post("/reset", params={"task_name": "not-a-task"}, json={})
+    assert response.status_code == 400
+    assert response.json()["detail"] == "Unknown task: not-a-task"
+    assert holder["env"].reset_calls == []

tests/test_env_task_context.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import json
+from pathlib import Path
+from server.constants import DEFAULT_CONFIGS
+from server.env import DistributedDebugEnv
+def _write_json(path: Path, payload: dict) -> None:
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2) + "\n", encoding="utf-8")
+def test_restore_defaults_adds_job_generator_config(tmp_path: Path) -> None:
+    mesh_root = tmp_path / "mesh"
+    env = DistributedDebugEnv(project_root=tmp_path, mesh_root=mesh_root)
+    env._restore_defaults()
+    payload = json.loads(
+        (mesh_root / "worker" / "job_generator_config.json").read_text()
+    )
+    assert payload == DEFAULT_CONFIGS["job_generator"]
+def test_registry_auth_matches_default_detects_corruption(tmp_path: Path) -> None:
+    mesh_root = tmp_path / "mesh"
+    env = DistributedDebugEnv(project_root=tmp_path, mesh_root=mesh_root)
+    env._restore_defaults()
+    assert env._is_registry_auth_default() is True
+    _write_json(
+        mesh_root / "registry.json",
+        {
+            "services": {
+                "auth": {"host": "invalid-host", "port": 3001, "protocol": "http"},
+                "redis": {"host": "localhost", "port": 6379, "protocol": "tcp"},
+                "worker": {"host": "localhost", "port": None, "protocol": "internal"},
+            }
+        },
+    )
+    assert env._is_registry_auth_default() is False
+def test_job_generator_rate_resolved_uses_config(tmp_path: Path) -> None:
+    mesh_root = tmp_path / "mesh"
+    env = DistributedDebugEnv(project_root=tmp_path, mesh_root=mesh_root)
+    env._restore_defaults()
+    assert env._job_generator_interval_ms() == 333
+    assert env._is_job_generator_rate_resolved() is True
+    _write_json(mesh_root / "worker" / "job_generator_config.json", {"interval_ms": 10})
+    assert env._job_generator_interval_ms() == 10
+    assert env._is_job_generator_rate_resolved() is False

tests/test_graders.py ADDED Viewed

	@@ -0,0 +1,132 @@

+from server.graders import (
+    grade_backpressure_cascade,
+    grade_byzantine_queue_fault,
+    grade_cascading_timeout,
+    grade_job_generator_runaway,
+    grade_registry_corruption,
+    grade_distributed_lock_starvation,
+    grade_route_partition,
+)
+from server.models import SystemMetrics
+def _metrics(
+    *,
+    success_rate: float = 0.0,
+    p99: float = 1000.0,
+    depth: int = 10,
+    restarts: int = 0,
+    stalls: int = 0,
+) -> SystemMetrics:
+    return SystemMetrics(
+        gateway_success_rate=success_rate,
+        gateway_p99_latency_ms=p99,
+        queue_depth=depth,
+        worker_restart_count=restarts,
+        consumer_stall_count=stalls,
+    )
+def test_grade_cascading_timeout_boundaries() -> None:
+    assert (
+        grade_cascading_timeout(
+            _metrics(success_rate=1.0), {"cascading_timeout_resolved": True}
+        )
+        == 1.0
+    )
+    assert (
+        grade_cascading_timeout(
+            _metrics(success_rate=1.0), {"cascading_timeout_resolved": False}
+        )
+        == 0.25
+    )
+    assert (
+        grade_cascading_timeout(
+            _metrics(success_rate=0.5), {"cascading_timeout_resolved": False}
+        )
+        == 0.125
+    )
+def test_grade_byzantine_queue_fault_cases() -> None:
+    ctx = {"baseline_worker_restart_count": 3}
+    assert grade_byzantine_queue_fault(_metrics(depth=0, restarts=3), ctx) == 1.0
+    assert grade_byzantine_queue_fault(_metrics(depth=0, restarts=8), ctx) == 0.6
+    assert grade_byzantine_queue_fault(_metrics(depth=40, restarts=10), ctx) == 0.0
+def test_grade_distributed_lock_starvation_cases() -> None:
+    ctx_locked = {"baseline_consumer_stall_count": 0, "lock_exists": True}
+    ctx_unlocked = {"baseline_consumer_stall_count": 0, "lock_exists": False}
+    assert (
+        grade_distributed_lock_starvation(_metrics(depth=2, stalls=0), ctx_unlocked)
+        == 1.0
+    )
+    assert (
+        grade_distributed_lock_starvation(_metrics(depth=10, stalls=0), ctx_unlocked)
+        == 0.6
+    )
+    assert (
+        grade_distributed_lock_starvation(_metrics(depth=10, stalls=3), ctx_locked)
+        == 0.0
+    )
+def test_grade_backpressure_cascade_continuous() -> None:
+    assert grade_backpressure_cascade(_metrics(depth=0), {}) == 1.0
+    assert grade_backpressure_cascade(_metrics(depth=100), {}) == 0.5
+    assert grade_backpressure_cascade(_metrics(depth=200), {}) == 0.0
+def test_grade_route_partition_threshold() -> None:
+    assert (
+        grade_route_partition(_metrics(success_rate=0.96), {"route_blocked": False})
+        == 1.0
+    )
+    assert (
+        grade_route_partition(_metrics(success_rate=0.8), {"route_blocked": True})
+        == 0.0
+    )
+def test_grade_registry_corruption_thresholds() -> None:
+    assert (
+        grade_registry_corruption(
+            _metrics(success_rate=0.99), {"registry_auth_matches_default": True}
+        )
+        == 1.0
+    )
+    assert (
+        grade_registry_corruption(
+            _metrics(success_rate=0.8), {"registry_auth_matches_default": True}
+        )
+        == 0.9
+    )
+    assert (
+        grade_registry_corruption(
+            _metrics(success_rate=1.0), {"registry_auth_matches_default": False}
+        )
+        == 0.3
+    )
+def test_grade_job_generator_runaway_thresholds() -> None:
+    assert (
+        grade_job_generator_runaway(
+            _metrics(depth=4), {"job_generator_rate_resolved": True}
+        )
+        == 1.0
+    )
+    assert (
+        grade_job_generator_runaway(
+            _metrics(depth=20), {"job_generator_rate_resolved": True}
+        )
+        == 0.7
+    )
+    assert (
+        grade_job_generator_runaway(
+            _metrics(depth=20), {"job_generator_rate_resolved": False}
+        )
+        == 0.2
+    )

tests/test_inference_format.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import os
+from server.constants import TaskName
+from inference import (
+    _attempt_history_block,
+    _episode_score,
+    _format_end_line,
+    _parse_tasks,
+    _single_line,
+    _task_symptom_block,
+    build_prompt,
+    extract_command,
+    extract_reasoning,
+)
+from server.models import Observation, SystemMetrics
+def test_extract_command_rejects_non_json_code_fence() -> None:
+    raw = "```bash\nredis-cli LLEN job_queue\n```"
+    assert extract_command(raw) is None
+def test_extract_command_returns_none_when_empty() -> None:
+    assert extract_command("   ") is None
+def test_extract_command_reads_json_payload() -> None:
+    raw = '{"command":"redis-cli LLEN job_queue"}'
+    assert extract_command(raw) == "redis-cli LLEN job_queue"
+def test_extract_command_reads_fenced_json_payload() -> None:
+    raw = '```json\n{"command":"ps -ef"}\n```'
+    assert extract_command(raw) == "ps -ef"
+def test_extract_command_reads_json_embedded_in_text() -> None:
+    raw = 'Use this command: {"command":"redis-cli LLEN job_queue"} thanks.'
+    assert extract_command(raw) == "redis-cli LLEN job_queue"
+def test_extract_command_reads_json_after_reasoning_preamble() -> None:
+    raw = (
+        "I'll start by checking process state.\n"
+        '{"command":"ps aux","reasoning":"list processes"}'
+    )
+    assert extract_command(raw) == "ps aux"
+    assert extract_reasoning(raw) == "list processes"
+def test_extract_command_prefers_first_json_object_with_command() -> None:
+    raw = '{"meta":"skip"} then {"command":"ls -la","reasoning":"explore"}'
+    assert extract_command(raw) == "ls -la"
+def test_extract_reasoning_when_present() -> None:
+    raw = '{"command":"redis-cli LLEN job_queue","reasoning":"check queue depth first"}'
+    assert extract_command(raw) == "redis-cli LLEN job_queue"
+    assert extract_reasoning(raw) == "check queue depth first"
+def test_extract_command_requires_command_even_with_reasoning() -> None:
+    raw = '{"reasoning":"i should inspect logs"}'
+    assert extract_command(raw) is None
+    assert extract_reasoning(raw) is None
+def test_single_line_removes_newlines() -> None:
+    assert _single_line("echo a\necho b") == "echo a echo b"
+def test_task_symptom_block_is_non_empty() -> None:
+    block = _task_symptom_block(TaskName.ROUTE_PARTITION)
+    assert "connectivity path issue" in block
+    assert "route-partition" not in block
+def test_task_symptom_block_includes_new_tasks() -> None:
+    registry_block = _task_symptom_block(TaskName.REGISTRY_CORRUPTION)
+    runaway_block = _task_symptom_block(TaskName.JOB_GENERATOR_RUNAWAY)
+    assert "registry" in registry_block.lower()
+    assert "queue" in runaway_block.lower()
+    assert "job-generator-runaway" not in runaway_block
+def test_attempt_history_block_renders_all_attempts() -> None:
+    attempts = [
+        {
+            "step": 1,
+            "command": "redis-cli LLEN job_queue",
+            "reasoning": "check backlog",
+            "reward": 0.12,
+            "error": None,
+        },
+        {
+            "step": 2,
+            "command": "curl -s localhost:3000/health",
+            "reasoning": None,
+            "reward": 0.08,
+            "error": "timeout",
+        },
+    ]
+    block = _attempt_history_block(attempts)
+    assert "step 1: command=redis-cli LLEN job_queue" in block
+    assert "step 2: command=curl -s localhost:3000/health" in block
+    assert "reasoning=check backlog" in block
+    assert "error=timeout" in block
+    assert "reward=" not in block
+def test_build_prompt_contains_symptoms_and_history() -> None:
+    obs = Observation(
+        command_output="service checks show partial failures",
+        metrics=SystemMetrics(
+            gateway_success_rate=0.32,
+            gateway_p99_latency_ms=1500.0,
+            queue_depth=412,
+            worker_restart_count=3,
+            consumer_stall_count=2,
+        ),
+        process_status={"gateway": "running", "worker": "running"},
+    )
+    prompt = build_prompt(
+        obs=obs,
+        step_num=3,
+        task_name=TaskName.BACKPRESSURE_CASCADE,
+        attempt_history=[
+            {
+                "step": 1,
+                "command": "redis-cli LLEN job_queue",
+                "reasoning": "measure backlog",
+                "reward": 0.10,
+                "error": None,
+            }
+        ],
+    )
+    assert "TASK SYMPTOMS:" in prompt
+    assert "PREVIOUS ATTEMPTS:" in prompt
+    assert "step 1: command=redis-cli LLEN job_queue" in prompt
+    assert "LATEST COMMAND OUTPUT:" in prompt
+    assert "reward=" not in prompt
+def test_parse_tasks_default_and_override() -> None:
+    previous = os.getenv("TASKS_CSV")
+    try:
+        os.environ.pop("TASKS_CSV", None)
+        default_tasks = _parse_tasks()
+        assert default_tasks == [
+            TaskName.CASCADING_TIMEOUT,
+            TaskName.BYZANTINE_QUEUE_FAULT,
+            TaskName.DISTRIBUTED_LOCK_STARVATION,
+        ]
+        os.environ["TASKS_CSV"] = "route-partition,backpressure-cascade"
+        assert _parse_tasks() == [
+            TaskName.ROUTE_PARTITION,
+            TaskName.BACKPRESSURE_CASCADE,
+        ]
+        os.environ["TASKS_CSV"] = "registry-corruption,job-generator-runaway"
+        assert _parse_tasks() == [
+            TaskName.REGISTRY_CORRUPTION,
+            TaskName.JOB_GENERATOR_RUNAWAY,
+        ]
+    finally:
+        if previous is None:
+            os.environ.pop("TASKS_CSV", None)
+        else:
+            os.environ["TASKS_CSV"] = previous
+def test_episode_score_clamps_terminal_reward_to_unit_interval() -> None:
+    assert _episode_score([]) == 0.0
+    assert _episode_score([0.2, 0.8]) == 0.8
+    assert _episode_score([1.2]) == 1.0
+    assert _episode_score([-0.1]) == 0.0
+def test_end_log_line_includes_score_and_reward_list() -> None:
+    line = _format_end_line(success=True, steps=3, score=0.987, rewards=[0.0, 0.125, 1.0])
+    assert line == (
+        "[END]   success=true steps=3 score=0.99 rewards=0.00,0.12,1.00"
+    )

tests/test_models.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from pydantic import ValidationError
+from server.models import Action, Observation, SystemMetrics
+def test_system_metrics_rejects_success_rate_above_one() -> None:
+    try:
+        SystemMetrics(
+            gateway_success_rate=1.2,
+            gateway_p99_latency_ms=20,
+            queue_depth=0,
+            worker_restart_count=0,
+            consumer_stall_count=0,
+        )
+    except ValidationError:
+        return
+    raise AssertionError("Expected ValidationError for success rate > 1.0")
+def test_observation_roundtrip() -> None:
+    original = Observation(
+        command_output="ok",
+        metrics=SystemMetrics(
+            gateway_success_rate=0.7,
+            gateway_p99_latency_ms=123,
+            queue_depth=3,
+            worker_restart_count=1,
+            consumer_stall_count=2,
+        ),
+        process_status={"gateway": "running pid=42"},
+    )
+    restored = Observation.model_validate_json(original.model_dump_json())
+    assert restored == original
+def test_action_rejects_empty_command() -> None:
+    try:
+        Action(command="   ")
+    except ValidationError:
+        return
+    raise AssertionError("Expected ValidationError for empty command")

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff