codearena-rl / server /raw_runner.py
havinashpatil
Finalizing CodeArena RL Benchmark: frontend improvements, GRPO training scripts, and cleaned environment
03a7eb9
raw
history blame
3.82 kB
import subprocess
import time
import os
import tempfile
import sys
from pydantic import BaseModel
from typing import Optional
class RawRunResult(BaseModel):
stdout: str
stderr: str
execution_time: float
time_complexity_hint: str
def analyze_complexity_hint_fallback(code: str, exec_time: float) -> str:
"""Fallback rough hint about time complexity based on loops and execution time."""
loops = code.count("for ") + code.count("while ")
nested_loops = code.count("for") + code.count("while") if " for" in code or " while" in code else 0
if "def " not in code:
return "N/A (No function defined)"
hint = "O(1) or O(N)"
if nested_loops >= 2:
hint = "O(N^2) or O(N^3) detected"
elif loops >= 1:
hint = "O(N) or O(N log N) detected"
if exec_time > 1.0:
hint += " β€” High execution time, consider optimizing!"
elif exec_time < 0.01:
hint += " β€” Runs very fast!"
return hint
def analyze_complexity_ai(code: str, exec_time: float) -> str:
"""Use Ollama AI to perform a 5-step complexity analysis on the custom code."""
try:
import urllib.request
import json
prompt = f"""You are an expert Python performance analyst.
Analyze the following code using these 5 steps:
1. Identify the core algorithm.
2. Calculate current Time Complexity (Big-O).
3. Calculate current Space Complexity (Big-O).
4. Identify bottlenecks.
5. Propose a more efficient time complexity if possible.
CODE:
{code}
Return a concise 5-line summary (one line per step). No markdown blocks."""
payload = json.dumps({
"model": "codearena",
"prompt": prompt,
"stream": False,
"options": {"temperature": 0.1, "num_predict": 256}
}).encode()
req = urllib.request.Request(
"http://localhost:11434/api/generate",
data=payload,
headers={"Content-Type": "application/json"},
method="POST"
)
with urllib.request.urlopen(req, timeout=10) as resp:
data = json.loads(resp.read())
result = data.get("response", "").strip()
if result:
return f"\nπŸ€– AI Complexity Analysis:\n{result}"
except Exception as e:
print(f"Ollama complexity failed: {e}")
pass
return analyze_complexity_hint_fallback(code, exec_time)
def run_raw_code(code: str, timeout: float = 5.0) -> RawRunResult:
"""Runs arbitrary Python code and returns output, errors, and time."""
with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False) as f:
f.write(code)
temp_file = f.name
start_time = time.time()
try:
process = subprocess.run(
[sys.executable, temp_file],
capture_output=True,
text=True,
timeout=timeout
)
exec_time = time.time() - start_time
stdout = process.stdout
stderr = process.stderr
hint = analyze_complexity_ai(code, exec_time)
return RawRunResult(
stdout=stdout,
stderr=stderr,
execution_time=exec_time,
time_complexity_hint=hint
)
except subprocess.TimeoutExpired as e:
exec_time = time.time() - start_time
return RawRunResult(
stdout=e.stdout.decode('utf-8') if e.stdout else "",
stderr="Execution timed out! The code took too long to run or entered an infinite loop.",
execution_time=timeout,
time_complexity_hint="O(∞) - Infinite loop or very high complexity."
)
finally:
if os.path.exists(temp_file):
os.remove(temp_file)