from fastapi import FastAPI from fastapi.responses import HTMLResponse from env.openenv_wrapper import ConflictArbitrationEnv from pydantic import BaseModel app = FastAPI(title="Conflict Arbitration Environment") env = ConflictArbitrationEnv() HOMEPAGE = """ Conflict Arbitration Environment
● running

Conflict Arbitration Environment

OpenEnv FastAPI GRPO Team WooshiWooshi

What this is

Three agents, one task, one conflict, one arbitrator. Two frozen worker agents build the same spec in parallel. A third agent (Agent C) sees both outputs plus the original spec, decides who drifted, and stops the wrong one before merge fails. Agent C is trained via GRPO on programmatic, contrastive rewards — no LLM as judge, no hardcoded rules.

Endpoints

GET /healthliveness checkreturns status + env name
POST /resetstart a new episodereturns spec + Agent A/B outputs
POST /stepsubmit Agent C decisionreturns reward + merge result
GET /statefull episode ground truthlogging/debug only
GET /docsinteractive OpenAPI UItry every endpoint live

Quick test

curl https://testingaccc-conflict-arbitration-env.hf.space/health

curl -X POST https://testingaccc-conflict-arbitration-env.hf.space/reset

curl -X POST https://testingaccc-conflict-arbitration-env.hf.space/step \\
  -H "Content-Type: application/json" \\
  -d '{"conflict_detected": true, "action": "stop_a",
       "reason": "A drifted", "correction_request": "use canonical name"}'

Action schema

{
  "conflict_detected": true | false,
  "action": "stop_a" | "stop_b" | "nothing",
  "reason": "one sentence describing the conflict",
  "correction_request": "specific instruction to the stopped agent"
}
""" @app.get("/", response_class=HTMLResponse) def homepage(): return HOMEPAGE class Action(BaseModel): conflict_detected: bool action: str reason: str correction_request: str = "" @app.post("/reset") def reset(): return env.reset() @app.post("/step") def step(action: Action): return env.step(action.dict()) @app.get("/state") def state(): return env.state() @app.get("/health") def health(): return {"status": "ok", "env": "ConflictArbitrationEnv"}