Spaces:
Sleeping
Sleeping
Update inference.py
Browse files- inference.py +14 -34
inference.py
CHANGED
|
@@ -1,17 +1,3 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Inference Script for Code Review Environment
|
| 3 |
-
===================================
|
| 4 |
-
MANDATORY
|
| 5 |
-
- Before submitting, ensure the following variables are defined in your environment configuration:
|
| 6 |
-
API_BASE_URL The API endpoint for the LLM.
|
| 7 |
-
MODEL_NAME The model identifier to use for inference.
|
| 8 |
-
HF_TOKEN Your Hugging Face / API key.
|
| 9 |
-
|
| 10 |
-
- The inference script must be named `inference.py` and placed in the root directory of the project
|
| 11 |
-
- Participants must use OpenAI Client for all LLM calls using above variables
|
| 12 |
-
- Participants must emit structured stdout logs strictly following the [START], [STEP], and [END] format.
|
| 13 |
-
"""
|
| 14 |
-
|
| 15 |
import os
|
| 16 |
import json
|
| 17 |
import textwrap
|
|
@@ -38,7 +24,6 @@ SYSTEM_PROMPT = textwrap.dedent(
|
|
| 38 |
- propose a fixed code snippet (prefixed with "propose_fix:")
|
| 39 |
- skip if you cannot help (just "skip")
|
| 40 |
- end the episode if the code is perfect (just "done")
|
| 41 |
-
|
| 42 |
Be constructive, specific, and focus on improving code quality.
|
| 43 |
Do not add any other text.
|
| 44 |
"""
|
|
@@ -54,16 +39,12 @@ def build_user_prompt(step: int, obs: Observation, history: List[str]) -> str:
|
|
| 54 |
Step: {step}
|
| 55 |
PR Title: {obs.pr_title}
|
| 56 |
Description: {obs.pr_description}
|
| 57 |
-
|
| 58 |
Code to review:
|
| 59 |
{obs.code_snippet}
|
| 60 |
-
|
| 61 |
Conversation so far:
|
| 62 |
{comments_str}
|
| 63 |
-
|
| 64 |
Previous actions:
|
| 65 |
{history_str}
|
| 66 |
-
|
| 67 |
Your response (choose one of the following formats):
|
| 68 |
- write_comment: [your comment]
|
| 69 |
- ask_question: [your question]
|
|
@@ -106,18 +87,16 @@ def parse_model_action(response_text: str) -> Action:
|
|
| 106 |
|
| 107 |
|
| 108 |
def main() -> None:
|
| 109 |
-
# Check mandatory environment variables
|
| 110 |
if not API_BASE_URL or not API_KEY or not MODEL_NAME:
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
| 112 |
return
|
| 113 |
|
| 114 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 115 |
env = CodeReviewEnv()
|
| 116 |
tasks = ["easy", "medium", "hard", "harder", "hardest"]
|
| 117 |
-
scores = {}
|
| 118 |
-
|
| 119 |
-
# Emit START log
|
| 120 |
-
print("[START] code_review_env")
|
| 121 |
|
| 122 |
for task in tasks:
|
| 123 |
env.set_task(task)
|
|
@@ -127,6 +106,9 @@ def main() -> None:
|
|
| 127 |
step = 0
|
| 128 |
final_reward = 0.0
|
| 129 |
|
|
|
|
|
|
|
|
|
|
| 130 |
while not done and step < MAX_STEPS:
|
| 131 |
step += 1
|
| 132 |
user_prompt = build_user_prompt(step, obs, history)
|
|
@@ -143,24 +125,22 @@ def main() -> None:
|
|
| 143 |
)
|
| 144 |
response_text = completion.choices[0].message.content or ""
|
| 145 |
except Exception as exc:
|
| 146 |
-
|
|
|
|
|
|
|
| 147 |
response_text = FALLBACK_ACTION
|
| 148 |
|
| 149 |
action = parse_model_action(response_text)
|
| 150 |
obs, reward, done, info = env.step(action)
|
| 151 |
final_reward = reward.value
|
| 152 |
|
| 153 |
-
#
|
| 154 |
-
|
| 155 |
-
print(f"[STEP] {step} {action.action_type} {final_reward:.3f} {int(done)}")
|
| 156 |
|
| 157 |
-
# Optionally store history for prompt (not part of log)
|
| 158 |
history.append(f"Step {step}: {action.action_type}")
|
| 159 |
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
# Emit END log with JSON scores
|
| 163 |
-
print("[END] " + json.dumps(scores, separators=(',', ':')))
|
| 164 |
|
| 165 |
|
| 166 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
import json
|
| 3 |
import textwrap
|
|
|
|
| 24 |
- propose a fixed code snippet (prefixed with "propose_fix:")
|
| 25 |
- skip if you cannot help (just "skip")
|
| 26 |
- end the episode if the code is perfect (just "done")
|
|
|
|
| 27 |
Be constructive, specific, and focus on improving code quality.
|
| 28 |
Do not add any other text.
|
| 29 |
"""
|
|
|
|
| 39 |
Step: {step}
|
| 40 |
PR Title: {obs.pr_title}
|
| 41 |
Description: {obs.pr_description}
|
|
|
|
| 42 |
Code to review:
|
| 43 |
{obs.code_snippet}
|
|
|
|
| 44 |
Conversation so far:
|
| 45 |
{comments_str}
|
|
|
|
| 46 |
Previous actions:
|
| 47 |
{history_str}
|
|
|
|
| 48 |
Your response (choose one of the following formats):
|
| 49 |
- write_comment: [your comment]
|
| 50 |
- ask_question: [your question]
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
def main() -> None:
|
|
|
|
| 90 |
if not API_BASE_URL or not API_KEY or not MODEL_NAME:
|
| 91 |
+
# This error message is printed to stderr? It must not appear on stdout.
|
| 92 |
+
# Use sys.stderr to avoid polluting structured output.
|
| 93 |
+
import sys
|
| 94 |
+
print("Error: API_BASE_URL, HF_TOKEN/API_KEY, and MODEL_NAME must be set.", file=sys.stderr)
|
| 95 |
return
|
| 96 |
|
| 97 |
client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
|
| 98 |
env = CodeReviewEnv()
|
| 99 |
tasks = ["easy", "medium", "hard", "harder", "hardest"]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
|
| 101 |
for task in tasks:
|
| 102 |
env.set_task(task)
|
|
|
|
| 106 |
step = 0
|
| 107 |
final_reward = 0.0
|
| 108 |
|
| 109 |
+
# START marker
|
| 110 |
+
print(f"[START] task={task}", flush=True)
|
| 111 |
+
|
| 112 |
while not done and step < MAX_STEPS:
|
| 113 |
step += 1
|
| 114 |
user_prompt = build_user_prompt(step, obs, history)
|
|
|
|
| 125 |
)
|
| 126 |
response_text = completion.choices[0].message.content or ""
|
| 127 |
except Exception as exc:
|
| 128 |
+
# Print error to stderr only
|
| 129 |
+
import sys
|
| 130 |
+
print(f"Request failed: {exc}. Using fallback.", file=sys.stderr)
|
| 131 |
response_text = FALLBACK_ACTION
|
| 132 |
|
| 133 |
action = parse_model_action(response_text)
|
| 134 |
obs, reward, done, info = env.step(action)
|
| 135 |
final_reward = reward.value
|
| 136 |
|
| 137 |
+
# STEP marker with required key=value format
|
| 138 |
+
print(f"[STEP] step={step} reward={final_reward:.3f}", flush=True)
|
|
|
|
| 139 |
|
|
|
|
| 140 |
history.append(f"Step {step}: {action.action_type}")
|
| 141 |
|
| 142 |
+
# END marker
|
| 143 |
+
print(f"[END] task={task} score={final_reward:.3f} steps={step}", flush=True)
|
|
|
|
|
|
|
| 144 |
|
| 145 |
|
| 146 |
if __name__ == "__main__":
|