File size: 3,458 Bytes
fda6582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# server/app.py – OpenEnv HTTP server
import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))

from fastapi import FastAPI, HTTPException
from environment import CodeReviewEnv
from models import AnyAction, Observation, Reward, State, action_adapter

app = FastAPI(title="Code Review Environment", version="1.0.0")
env = CodeReviewEnv()

# ----------------------------------------------------------------------
# Health & metadata endpoints
# ----------------------------------------------------------------------
@app.get("/")
def root():
    print("[ROOT] Health check hit")
    return {"status": "crazy good"}
    
@app.get("/health")
def health():
    print("[HEALTH] Service is healthy")
    return {"status": "healthy"}

@app.get("/metadata")
def metadata():
    print("[METADATA] Requested")
    return {
        "name": "Code Review Professional Workflow",
        "description": (
            "Multi‑turn code review environment for professional‑level bug fixing. "
            "The agent must inspect, test, lint, query documentation, and negotiate with "
            "a simulated (persona‑driven) author to get a fix accepted. "
            "Includes 25 bugs across 5 difficulty levels, AST‑based injection, "
            "a reward‑shaping system (full/core profiles), and curriculum learning. "
            "Designed for RL training (PPO, DPO, or any policy‑gradient method)."
        )
    }

@app.get("/schema")
def schema():
    print("[SCHEMA] Requested")
    return {
        "action": AnyAction.model_json_schema(),
        "observation": Observation.model_json_schema(),
        "state": State.model_json_schema()
    }

@app.post("/mcp")
def mcp():
    print("[MCP] Ping received")
    return {"jsonrpc": "2.0", "result": None}

# ----------------------------------------------------------------------
# Environment endpoints
# ----------------------------------------------------------------------
@app.post("/reset")
def reset(task: str = "easy"):
    try:
        print(f"[RESET] Starting new episode | task={task}")

        env.set_task(task)
        obs = env.reset()

        print(f"[RESET DONE] step={env._step_count}")

        return obs.__dict__
    except Exception as e:
        print(f"[RESET ERROR] {e}")
        raise HTTPException(status_code=400, detail=str(e))

@app.post("/step")
def step(action: dict):
    try:
        print(f"[STEP INPUT] {action}")

        parsed_action = action_adapter.validate_python(action)
        obs, reward, done, info = env.step(parsed_action)

        print(f"[STEP OUTPUT] reward={reward.value:.4f} | done={done}")

        return {
            "observation": obs.__dict__,
            "reward": reward.value,
            "done": done,
            "info": info
        }
    except Exception as e:
        print(f"[STEP ERROR] {e}")
        raise HTTPException(status_code=400, detail=str(e))

@app.get("/state")
def state():
    print("[STATE] Requested")
    return env._get_observation().__dict__

# ----------------------------------------------------------------------
# Main entry point (for local testing)
# ----------------------------------------------------------------------
if __name__ == "__main__":
    import uvicorn
    print("[SERVER START] Running on http://0.0.0.0:7860")
    uvicorn.run(app, host="0.0.0.0", port=7860)