File size: 3,161 Bytes
2182d10 62f081e 2182d10 1d48da1 2182d10 1d48da1 2182d10 78c01fe 2182d10 78c01fe 2182d10 3d38d37 0263e79 2182d10 3d38d37 2182d10 3d38d37 2182d10 3d38d37 0263e79 3d38d37 2182d10 3d38d37 0263e79 2182d10 3d38d37 2182d10 3d38d37 0263e79 3d38d37 0263e79 3d38d37 0263e79 2182d10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | import os
import sys
import traceback
from openai import OpenAI
from server.environment import CodeReviewEnv
# -------------------------------------------------------------------
# Configuration & Environment Variables
# -------------------------------------------------------------------
API_BASE_URL = os.getenv("API_BASE_URL", "https://api.openai.com/v1")
MODEL_NAME = os.getenv("MODEL_NAME", "gpt-4.1-mini")
HF_TOKEN = os.getenv("HF_TOKEN")
if HF_TOKEN is None:
raise ValueError("HF_TOKEN environment variable is required")
# -------------------------------------------------------------------
# Main Inference Loop
# -------------------------------------------------------------------
def main():
# Initialize OpenAI Client
client = OpenAI(
base_url=API_BASE_URL,
api_key=HF_TOKEN
)
for diff in ["easy", "medium", "hard"]:
env = CodeReviewEnv(difficulty=diff)
# [START] Output
print(f"[START] task={env.task_name} env={env.benchmark_name} model={MODEL_NAME}", flush=True)
success = False
try:
obs = env.reset()
done = False
while not done:
# Replace dummy action with actual LLM generation using the standard OpenAI client
response = client.chat.completions.create(
model=MODEL_NAME,
messages=[
{"role": "system", "content": "You are a precise code reviewer. Your ONLY allowed outputs are: 'COMMENT <line_number> <text>', 'APPROVE', or 'REQUEST_CHANGES'."},
{"role": "user", "content": obs}
],
max_tokens=100
)
action_str = response.choices[0].message.content.strip().replace("\n", " ")
obs, reward_str, done, error = env.step(action_str)
error_str = error if error else "null"
done_str = "true" if done else "false"
# [STEP] Output
print(f"[STEP] step={env.steps_taken} action={action_str} reward={reward_str} done={done_str} error={error_str}", flush=True)
success = True
except Exception as e:
error_msg = str(e).replace('\n', ' ')
print(f"[STEP] step={env.steps_taken} action=error reward=0.00 done=true error={error_msg}", flush=True)
success = False
finally:
# [END] Output MUST ALWAYS be emitted, even on exceptions
success_str = "true" if success else "false"
# For our Code Review Environment, the maximum optimal reward is 1.8 (0.8 comment + 1.0 request_changes)
sum_rewards = sum(env.rewards) if env.rewards else 0.0
score = max(0.0, min(sum_rewards / 1.8, 1.0))
score_str = f"{score:.3f}"
rewards_str = ",".join([f"{r:.2f}" for r in env.rewards])
print(f"[END] success={success_str} steps={env.steps_taken} score={score_str} rewards={rewards_str}", flush=True)
if __name__ == "__main__":
main()
|