import sys # Redirect all print calls from imported modules to stderr _original_print = print def print(*args, **kwargs): kwargs.setdefault('file', sys.stderr) _original_print(*args, **kwargs) import os import textwrap API_BASE_URL = os.getenv("API_BASE_URL", "https://dummy.api") API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY", "dummy-key") MODEL_NAME = os.getenv("MODEL_NAME", "dummy-model") MAX_STEPS = 5 FALLBACK_ACTION = "skip" from environment import CodeReviewEnv from models import Action SYSTEM_PROMPT = textwrap.dedent( """ You are an AI code reviewer. Reply with one of: - write_comment: [comment] - ask_question: [question] - propose_fix: [code] - skip - done """ ).strip() def build_user_prompt(step, obs, history): return f"Step {step}\nCode:\n{obs.code_snippet}\nComments:\n{obs.comments}\nHistory:\n{history}\nYour response:" def parse_model_action(text): if not text: return Action(action_type=FALLBACK_ACTION) lower = text.strip().lower() if lower.startswith("skip"): return Action(action_type="skip") if lower.startswith("done"): return Action(action_type="done") if lower.startswith("write_comment"): comment = text.split(":", 1)[1].strip() if ":" in text else text[14:].strip() return Action(action_type="write_comment", comment_text=comment) if lower.startswith("ask_question"): question = text.split(":", 1)[1].strip() if ":" in text else text[12:].strip() return Action(action_type="ask_question", question=question) if lower.startswith("propose_fix"): fix = text.split(":", 1)[1].strip() if ":" in text else text[11:].strip() return Action(action_type="propose_fix", fix_code=fix) return Action(action_type="write_comment", comment_text=text) def main(): try: from openai import OpenAI client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY) if API_BASE_URL != "https://dummy.api" else None except Exception: client = None env = CodeReviewEnv() tasks = ["easy", "medium", "hard", "harder", "hardest"] EPS = 0.001 for task in tasks: env.set_task(task) obs = env.reset() history = [] done = False step = 0 final_reward = 0.0 sys.stdout.write(f"[START] task={task}\n") sys.stdout.flush() while not done and step < MAX_STEPS: step += 1 prompt = build_user_prompt(step, obs, history) messages = [{"role": "system", "content": SYSTEM_PROMPT}, {"role": "user", "content": prompt}] response_text = FALLBACK_ACTION if client is not None: try: resp = client.chat.completions.create( model=MODEL_NAME, messages=messages, temperature=0.2, max_tokens=200, ) response_text = resp.choices[0].message.content or FALLBACK_ACTION except Exception: pass action = parse_model_action(response_text) obs, reward, done, _ = env.step(action) final_reward = reward.value sys.stdout.write(f"[STEP] step={step} reward={final_reward:.3f}\n") sys.stdout.flush() history.append(f"Step {step}: {action.action_type}") # Clamp the final reward to be strictly between 0 and 1 if final_reward <= 0.0: final_reward = EPS elif final_reward >= 1.0: final_reward = 1.0 - EPS sys.stdout.write(f"[END] task={task} score={final_reward:.3f} steps={step}\n") sys.stdout.flush() if __name__ == "__main__": main()