Fix final OpenEnv validator compliance: inference stdout format, Dockerfile deps, API vars, and grader bounds
Browse files- Dockerfile +3 -3
- env/environment.py +7 -8
- evaluate.py +1 -1
- inference.py +9 -5
Dockerfile
CHANGED
|
@@ -2,12 +2,12 @@ FROM python:3.11-slim
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
| 5 |
-
# Install dependencies directly to be lightweight
|
| 6 |
-
RUN pip install --no-cache-dir pydantic openai fastapi uvicorn
|
| 7 |
-
|
| 8 |
# Copy project files
|
| 9 |
COPY . .
|
| 10 |
|
|
|
|
|
|
|
|
|
|
| 11 |
# Set default env vars
|
| 12 |
ENV PYTHONUNBUFFERED=1
|
| 13 |
|
|
|
|
| 2 |
|
| 3 |
WORKDIR /app
|
| 4 |
|
|
|
|
|
|
|
|
|
|
| 5 |
# Copy project files
|
| 6 |
COPY . .
|
| 7 |
|
| 8 |
+
# Install dependencies directly to be lightweight
|
| 9 |
+
RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir .
|
| 10 |
+
|
| 11 |
# Set default env vars
|
| 12 |
ENV PYTHONUNBUFFERED=1
|
| 13 |
|
env/environment.py
CHANGED
|
@@ -104,17 +104,16 @@ class SupportTicketEnv:
|
|
| 104 |
system_message = "Max steps reached."
|
| 105 |
|
| 106 |
# Calculate intermediate/final reward
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
if self.state.is_done:
|
| 108 |
-
|
| 109 |
-
reward = self.state.final_reward
|
| 110 |
-
print(f"Final reward calculated: {reward}")
|
| 111 |
-
else:
|
| 112 |
-
intermediate_reward = grade(self.state) # Add intermediate reward dynamically
|
| 113 |
-
self.state.final_reward += intermediate_reward
|
| 114 |
-
reward = self.state.final_reward
|
| 115 |
|
| 116 |
info = {
|
| 117 |
-
"current_reward":
|
| 118 |
"step_count": self.state.step_count
|
| 119 |
}
|
| 120 |
|
|
|
|
| 104 |
system_message = "Max steps reached."
|
| 105 |
|
| 106 |
# Calculate intermediate/final reward
|
| 107 |
+
new_total_reward = grade(self.state)
|
| 108 |
+
step_reward = new_total_reward - self.state.final_reward
|
| 109 |
+
self.state.final_reward = new_total_reward
|
| 110 |
+
reward = step_reward
|
| 111 |
+
|
| 112 |
if self.state.is_done:
|
| 113 |
+
print(f"Final reward calculated: {self.state.final_reward}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 114 |
|
| 115 |
info = {
|
| 116 |
+
"current_reward": self.state.final_reward,
|
| 117 |
"step_count": self.state.step_count
|
| 118 |
}
|
| 119 |
|
evaluate.py
CHANGED
|
@@ -19,8 +19,8 @@ EXPECTED_ACTIONS = {
|
|
| 19 |
],
|
| 20 |
"task_hard_1": [
|
| 21 |
Action(action_type="fetch_user_data", parameters={"user_id": "USR-C3"}),
|
| 22 |
-
Action(action_type="escalate", parameters={"reason": "billing_tier2"}),
|
| 23 |
Action(action_type="reply_to_customer", parameters={"message": "We're escalating this to billing tier 2 and will follow up."}),
|
|
|
|
| 24 |
],
|
| 25 |
}
|
| 26 |
|
|
|
|
| 19 |
],
|
| 20 |
"task_hard_1": [
|
| 21 |
Action(action_type="fetch_user_data", parameters={"user_id": "USR-C3"}),
|
|
|
|
| 22 |
Action(action_type="reply_to_customer", parameters={"message": "We're escalating this to billing tier 2 and will follow up."}),
|
| 23 |
+
Action(action_type="escalate", parameters={"reason": "billing_tier2"}),
|
| 24 |
],
|
| 25 |
}
|
| 26 |
|
inference.py
CHANGED
|
@@ -23,11 +23,14 @@ def log_start(task: str, env: str, model: str):
|
|
| 23 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 24 |
|
| 25 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None):
|
| 26 |
-
|
| 27 |
-
|
|
|
|
| 28 |
|
| 29 |
def log_end(success: bool, steps: int, score: float, rewards: list):
|
| 30 |
-
|
|
|
|
|
|
|
| 31 |
|
| 32 |
def parse_action(text: str) -> Action:
|
| 33 |
# Robustly extract the first JSON object from text and validate with Pydantic
|
|
@@ -47,7 +50,8 @@ def parse_action(text: str) -> Action:
|
|
| 47 |
logger.warning("Action validation failed: %s", val_err)
|
| 48 |
# Fallback to manual construction with validation
|
| 49 |
action_type = obj.get("action_type", "close_ticket")
|
| 50 |
-
|
|
|
|
| 51 |
logger.error("Invalid action_type: %s. Defaulting to 'close_ticket'.", action_type)
|
| 52 |
action_type = "close_ticket"
|
| 53 |
return Action(action_type=action_type, parameters=obj.get("parameters", {}))
|
|
@@ -164,7 +168,7 @@ async def run_task(task_id: str, client: OpenAI) -> None:
|
|
| 164 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 165 |
|
| 166 |
async def main() -> None:
|
| 167 |
-
api_key = os.getenv("
|
| 168 |
client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
|
| 169 |
|
| 170 |
tasks = ["task_easy_1", "task_medium_1", "task_hard_1"]
|
|
|
|
| 23 |
print(f"[START] task={task} env={env} model={model}", flush=True)
|
| 24 |
|
| 25 |
def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None):
|
| 26 |
+
error_val = error if error else "null"
|
| 27 |
+
done_val = str(done).lower()
|
| 28 |
+
print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
|
| 29 |
|
| 30 |
def log_end(success: bool, steps: int, score: float, rewards: list):
|
| 31 |
+
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 32 |
+
success_val = str(success).lower()
|
| 33 |
+
print(f"[END] success={success_val} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True)
|
| 34 |
|
| 35 |
def parse_action(text: str) -> Action:
|
| 36 |
# Robustly extract the first JSON object from text and validate with Pydantic
|
|
|
|
| 50 |
logger.warning("Action validation failed: %s", val_err)
|
| 51 |
# Fallback to manual construction with validation
|
| 52 |
action_type = obj.get("action_type", "close_ticket")
|
| 53 |
+
valid_actions = ["fetch_user_data", "check_policy", "issue_refund", "reply_to_customer", "escalate", "close_ticket"]
|
| 54 |
+
if action_type not in valid_actions:
|
| 55 |
logger.error("Invalid action_type: %s. Defaulting to 'close_ticket'.", action_type)
|
| 56 |
action_type = "close_ticket"
|
| 57 |
return Action(action_type=action_type, parameters=obj.get("parameters", {}))
|
|
|
|
| 168 |
log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
|
| 169 |
|
| 170 |
async def main() -> None:
|
| 171 |
+
api_key = os.getenv("HF_TOKEN")
|
| 172 |
client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
|
| 173 |
|
| 174 |
tasks = ["task_easy_1", "task_medium_1", "task_hard_1"]
|