File size: 6,463 Bytes
e5f64b3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 | #!/usr/bin/env python3
"""Solve a coding task with a hosted LLM via Hugging Face Inference.
This script mirrors ``textarena_wordle_inference.py`` but targets the Coding
environment. It launches the CodingEnv Docker image locally and asks an
OpenAI-compatible model served through Hugging Face's router to iteratively
produce Python code until the task is solved.
Prerequisites
-------------
1. Build the Coding environment Docker image::
docker build \
-f envs/coding_env/server/Dockerfile \
-t coding-env:latest .
2. Set your Hugging Face token, or any other API key that is compatible with the OpenAI API:
export HF_TOKEN=your_token_here
export API_KEY=your_api_key_here
3. Run the script::
python examples/coding_env_inference.py
The script keeps sending execution feedback to the model until it prints
``Result: 338350`` or reaches the configured step limit.
"""
from __future__ import annotations
import os
import re
from typing import List, Tuple
from openai import OpenAI
from coding_env import CodeAction, CodingEnv
# ---------------------------------------------------------------------------
# Configuration
# ---------------------------------------------------------------------------
API_BASE_URL = "https://router.huggingface.co/v1"
API_KEY = os.getenv("API_KEY") or os.getenv("HF_TOKEN")
MODEL = "openai/gpt-oss-120b:novita"
MAX_STEPS = 5
VERBOSE = True
CODING_TASK = (
"Write Python code that prints the sum of squares of the integers from 1 "
"to 100 inclusive. The final line must be exactly `Result: <value>` with "
"the correct number substituted."
)
EXPECTED_SUBSTRING = "Result: 338350"
SYSTEM_PROMPT = (
"You are an expert Python programmer. Respond with valid Python code that "
"solves the user's task. Always wrap your final answer in a fenced code "
"block starting with ```python. Provide a complete script that can be "
"executed as-is, with no commentary outside the code block."
)
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def extract_python_code(text: str) -> str:
"""Extract the first Python code block from the model output."""
code_blocks = re.findall(
r"```(?:python)?\s*(.*?)```",
text,
re.IGNORECASE | re.DOTALL,
)
if code_blocks:
return code_blocks[0].strip()
return text.strip()
def format_feedback(
step: int,
stdout: str,
stderr: str,
exit_code: int,
) -> str:
"""Generate feedback text describing the previous execution."""
stdout_display = stdout if stdout.strip() else "<empty>"
stderr_display = stderr if stderr.strip() else "<empty>"
return (
f"Execution feedback for step {step}:\n"
f"exit_code={exit_code}\n"
f"stdout:\n{stdout_display}\n"
f"stderr:\n{stderr_display}\n"
"If the task is not solved, return an improved Python script."
)
def build_initial_prompt(task: str) -> str:
"""Construct the first user prompt for the coding task."""
return (
"You must write Python code to satisfy the following task. "
"When executed, your script should behave exactly as described.\n\n"
f"Task:\n{task}\n\n"
"Reply with the full script in a single ```python code block."
)
# ---------------------------------------------------------------------------
# Gameplay
# ---------------------------------------------------------------------------
def solve_coding_task(
env: CodingEnv,
client: OpenAI,
) -> Tuple[bool, List[str]]:
"""Iteratively ask the model for code until the task is solved."""
history = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": build_initial_prompt(CODING_TASK)},
]
obs = env.reset().observation
transcripts: List[str] = []
for step in range(1, MAX_STEPS + 1):
response = client.chat.completions.create(
model=MODEL,
messages=history,
max_tokens=2048,
temperature=0.2,
)
assistant_message = response.choices[0].message.content.strip()
history.append({"role": "assistant", "content": assistant_message})
code = extract_python_code(assistant_message)
if VERBOSE:
print(f"\n🛠️ Step {step}: executing model-produced code")
print(code)
result = env.step(CodeAction(code=code))
obs = result.observation
transcripts.append(
(
f"Step {step} | exit_code={obs.exit_code}\nstdout:\n{obs.stdout}\nstderr:\n{obs.stderr}\n"
)
)
if VERBOSE:
print(" ▶ exit_code:", obs.exit_code)
if obs.stdout:
print(" ▶ stdout:\n" + obs.stdout)
if obs.stderr:
print(" ▶ stderr:\n" + obs.stderr)
solved = obs.exit_code == 0 and EXPECTED_SUBSTRING in obs.stdout
if solved:
return True, transcripts
history.append(
{
"role": "user",
"content": format_feedback(
step,
obs.stdout,
obs.stderr,
obs.exit_code,
),
}
)
# Keep conversation history compact to avoid exceeding context limits
if len(history) > 20:
history = [history[0]] + history[-19:]
return False, transcripts
# ---------------------------------------------------------------------------
# Entrypoint
# ---------------------------------------------------------------------------
def main() -> None:
if not API_KEY:
raise SystemExit(
"HF_TOKEN (or API_KEY) must be set to query the model."
)
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
env = CodingEnv.from_docker_image(
"coding-env:latest",
ports={8000: 8000},
)
try:
success, transcripts = solve_coding_task(env, client)
finally:
env.close()
print(
"\n✅ Session complete"
if success
else "\n⚠️ Session finished without solving the task"
)
print("--- Execution transcripts ---")
for entry in transcripts:
print(entry)
if __name__ == "__main__":
main()
|