File size: 7,171 Bytes
cacd58c
 
 
 
 
 
 
 
 
 
 
 
 
 
d9f4cc2
 
 
 
509448b
f60ce5a
d9f4cc2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361a2d6
d9f4cc2
 
 
 
 
 
 
 
 
361a2d6
d9f4cc2
361a2d6
d9f4cc2
 
 
 
 
 
 
 
 
 
 
 
 
 
361a2d6
 
d9f4cc2
 
361a2d6
 
 
 
 
d9f4cc2
 
 
 
361a2d6
d9f4cc2
361a2d6
d9f4cc2
 
 
 
361a2d6
 
 
 
d9f4cc2
 
 
 
 
361a2d6
 
 
 
 
 
d9f4cc2
 
 
f60ce5a
 
cacd58c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94140d6
cacd58c
94140d6
 
 
 
cacd58c
94140d6
cacd58c
 
1021aa7
 
 
0df2253
 
1021aa7
 
0df2253
 
 
 
1021aa7
0df2253
1021aa7
cacd58c
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# server/app.py
from fastapi import FastAPI, HTTPException
from openenv.core.env_server import create_fastapi_app
from ..models import Action, Observation, TaskInfo
from .environment import CodeDebugEnvironment
from .tasks import TASK_REGISTRY
from .grader import grade

# Core OpenEnv app (provides /reset, /step, /state, /ws, /health)
app = create_fastapi_app(CodeDebugEnvironment, Action, Observation)


# ── Additional required hackathon endpoints ────────────────────────────

from fastapi.responses import HTMLResponse

@app.get("/", response_class=HTMLResponse)
@app.get("/web", response_class=HTMLResponse)
@app.get("/web/", response_class=HTMLResponse)
def home():
    return """
    <!DOCTYPE html>
    <html>
    <head>
        <title>Code Debug Env | OpenEnv</title>
        <style>
            @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;800&display=swap');
            body { 
                font-family: 'Inter', sans-serif; 
                background: linear-gradient(135deg, #0f172a 0%, #1e1b4b 100%); 
                color: #f8fafc; 
                display: flex; 
                flex-direction: column; 
                align-items: center; 
                justify-content: center; 
                min-height: 100vh; 
                margin: 0; 
                text-align: center;
                overflow-x: hidden;
            }
            .container { 
                background: rgba(30, 41, 59, 0.7); 
                backdrop-filter: blur(12px); 
                padding: 3rem; 
                border-radius: 24px; 
                border: 1px solid rgba(255, 255, 255, 0.1);
                box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.5);
                max-width: 600px;
                animation: fadeIn 0.8s ease-out;
            }
            @keyframes fadeIn { from { opacity: 0; transform: translateY(20px); } to { opacity: 1; transform: translateY(0); } }
            h1 { font-size: 3rem; margin-bottom: 0.5rem; background: linear-gradient(to right, #60a5fa, #a78bfa); -webkit-background-clip: text; -webkit-text-fill-color: transparent; }
            p { color: #94a3b8; font-size: 1.1rem; line-height: 1.6; }
            .badge { background: #334155; padding: 4px 12px; border-radius: 20px; font-size: 0.8rem; vertical-align: middle; border: 1px solid #475569; }
            .grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-top: 2rem; }
            .btn { 
                display: block; 
                padding: 1rem; 
                border-radius: 12px; 
                background: rgba(255, 255, 255, 0.05); 
                border: 1px solid rgba(255, 255, 255, 0.1); 
                color: #fff; 
                text-decoration: none; 
                transition: all 0.2s;
                font-weight: 600;
                position: relative;
                overflow: hidden;
            }
            .btn:hover { background: rgba(255, 255, 255, 0.1); border-color: #60a5fa; transform: translateY(-2px); }
            .btn.loading { pointer-events: none; opacity: 0.7; }
            .loading-text { display: none; }
            .btn.loading .loading-text { display: inline-block; animation: pulse 1.5s infinite; }
            .btn.loading span:not(.loading-text) { display: none; }
            @keyframes pulse { 0% { opacity: 0.4; } 50% { opacity: 1; } 100% { opacity: 0.4; } }
            footer { margin-top: 2rem; color: #64748b; font-size: 0.9rem; }
        </style>
    </head>
    <body>
        <div class="container" id="main-card">
            <h1>Code Debug Env</h1>
            <p>A production-grade <strong>OpenEnv</strong> for training frontier reasoning agents on code repair tasks. <span class="badge">v1.1.0</span></p>
            
            <div class="grid">
                <a href="/health" class="btn">πŸ“‘ Server Health</a>
                <a href="/tasks" class="btn">πŸ“‹ Task Registry</a>
                <a id="baseline-btn" href="/baseline" class="btn">
                    <span>πŸ€– Run Baseline</span>
                    <span class="loading-text">⏳ Evaluating (~2 mins)...</span>
                </a>
                <a href="https://huggingface.co/spaces/luciferai-devil/code-debug-env" class="btn">πŸ“– Documentation</a>
            </div>
            
            <footer>Built for Meta & PyTorch Γ— Scaler Hackathon</footer>
        </div>

        <script>
            document.getElementById('baseline-btn').addEventListener('click', function(e) {
                this.classList.add('loading');
            });
        </script>
    </body>
    </html>
    """


@app.get("/tasks")
def list_tasks() -> list[TaskInfo]:
    """Return all tasks with their action schema."""
    return [
        TaskInfo(
            task_id=tid,
            difficulty=task["difficulty"],
            description=task["description"],
            action_schema=Action.model_json_schema(),
        )
        for tid, task in TASK_REGISTRY.items()
    ]


@app.get("/grader")
def get_grader_score(task_id: str, submitted_code: str) -> dict:
    """
    Grade a submission directly (for testing / evaluation).
    Returns: { score: float, passed: int, total: int, test_results: list }
    """
    if task_id not in TASK_REGISTRY:
        raise HTTPException(status_code=404, detail=f"Unknown task_id: {task_id}")
    task = TASK_REGISTRY[task_id]
    result = grade(submitted_code, task_id, task["test_suite"])
    return {
        "task_id": task_id,
        "score": result["score"],
        "passed": result["passed"],
        "total": result["total"],
        "test_results": [r.model_dump() for r in result["test_results"]],
    }


@app.get("/baseline")
def run_baseline() -> dict:
    """
    Run the baseline agent on all tasks and return scores.
    This endpoint triggers the baseline inference script.
    """
    import subprocess, sys, json, re, os
    try:
        # Get absolute path to the baseline script
        base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
        baseline_script = os.path.join(base_dir, "baseline", "run_baseline.py")
        
        result = subprocess.run(
            [sys.executable, baseline_script, "--output", "json"],
            capture_output=True, text=True, timeout=120,
        )
        
        # Robustly find JSON in potentially noisy stdout
        stdout = result.stdout.strip()
        stderr = result.stderr.strip()
        
        match = re.search(r'(\{.*\})', stdout, re.DOTALL)
        if match:
            try:
                return json.loads(match.group(1))
            except Exception as j_err:
                raise ValueError(f"JSON Decode Error: {j_err}. Raw Match: {match.group(1)}")
        
        raise ValueError(f"No JSON found. Stdout: {stdout[:100]}. Stderr: {stderr[:100]}. ReturnCode: {result.returncode}")
        
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


def main():
    """Entry point for the server."""
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000)


if __name__ == "__main__":
    main()