Fix final OpenEnv validator compliance: inference stdout format, Dockerfile deps, API vars, and grader bounds

Browse files

Files changed (4) hide show

Dockerfile +3 -3
env/environment.py +7 -8
evaluate.py +1 -1
inference.py +9 -5

Dockerfile CHANGED Viewed

@@ -2,12 +2,12 @@ FROM python:3.11-slim
 WORKDIR /app
-# Install dependencies directly to be lightweight
-RUN pip install --no-cache-dir pydantic openai fastapi uvicorn
 # Copy project files
 COPY . .
 # Set default env vars
 ENV PYTHONUNBUFFERED=1

 WORKDIR /app
 # Copy project files
 COPY . .
+# Install dependencies directly to be lightweight
+RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir .
 # Set default env vars
 ENV PYTHONUNBUFFERED=1

env/environment.py CHANGED Viewed

@@ -104,17 +104,16 @@ class SupportTicketEnv:
             system_message = "Max steps reached."
         # Calculate intermediate/final reward
         if self.state.is_done:
-            self.state.final_reward += grade(self.state)  # Add final reward
-            reward = self.state.final_reward
-            print(f"Final reward calculated: {reward}")
-        else:
-            intermediate_reward = grade(self.state)  # Add intermediate reward dynamically
-            self.state.final_reward += intermediate_reward
-            reward = self.state.final_reward
         info = {
-            "current_reward": reward,
             "step_count": self.state.step_count
         }

             system_message = "Max steps reached."
         # Calculate intermediate/final reward
+        new_total_reward = grade(self.state)
+        step_reward = new_total_reward - self.state.final_reward
+        self.state.final_reward = new_total_reward
+        reward = step_reward
         if self.state.is_done:
+            print(f"Final reward calculated: {self.state.final_reward}")
         info = {
+            "current_reward": self.state.final_reward,
             "step_count": self.state.step_count
         }

evaluate.py CHANGED Viewed

@@ -19,8 +19,8 @@ EXPECTED_ACTIONS = {
     ],
     "task_hard_1": [
         Action(action_type="fetch_user_data", parameters={"user_id": "USR-C3"}),
-        Action(action_type="escalate", parameters={"reason": "billing_tier2"}),
         Action(action_type="reply_to_customer", parameters={"message": "We're escalating this to billing tier 2 and will follow up."}),
     ],
 }

     ],
     "task_hard_1": [
         Action(action_type="fetch_user_data", parameters={"user_id": "USR-C3"}),
         Action(action_type="reply_to_customer", parameters={"message": "We're escalating this to billing tier 2 and will follow up."}),
+        Action(action_type="escalate", parameters={"reason": "billing_tier2"}),
     ],
 }

inference.py CHANGED Viewed

@@ -23,11 +23,14 @@ def log_start(task: str, env: str, model: str):
     print(f"[START] task={task} env={env} model={model}", flush=True)
 def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None):
-    err_str = f" error={error}" if error else ""
-    print(f"[STEP] step={step} action={action!r} reward={reward} done={done}{err_str}", flush=True)
 def log_end(success: bool, steps: int, score: float, rewards: list):
-    print(f"[END] success={success} steps={steps} score={score} rewards={rewards}", flush=True)
 def parse_action(text: str) -> Action:
     # Robustly extract the first JSON object from text and validate with Pydantic
@@ -47,7 +50,8 @@ def parse_action(text: str) -> Action:
                         logger.warning("Action validation failed: %s", val_err)
                         # Fallback to manual construction with validation
                         action_type = obj.get("action_type", "close_ticket")
-                        if action_type not in Action.__fields__["action_type"].type.__args__:
                             logger.error("Invalid action_type: %s. Defaulting to 'close_ticket'.", action_type)
                             action_type = "close_ticket"
                         return Action(action_type=action_type, parameters=obj.get("parameters", {}))
@@ -164,7 +168,7 @@ async def run_task(task_id: str, client: OpenAI) -> None:
         log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 async def main() -> None:
-    api_key = os.getenv("OPENAI_API_KEY", "dummy-key")
     client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
     tasks = ["task_easy_1", "task_medium_1", "task_hard_1"]

     print(f"[START] task={task} env={env} model={model}", flush=True)
 def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None):
+    error_val = error if error else "null"
+    done_val = str(done).lower()
+    print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
 def log_end(success: bool, steps: int, score: float, rewards: list):
+    rewards_str = ",".join(f"{r:.2f}" for r in rewards)
+    success_val = str(success).lower()
+    print(f"[END] success={success_val} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True)
 def parse_action(text: str) -> Action:
     # Robustly extract the first JSON object from text and validate with Pydantic
                         logger.warning("Action validation failed: %s", val_err)
                         # Fallback to manual construction with validation
                         action_type = obj.get("action_type", "close_ticket")
+                        valid_actions = ["fetch_user_data", "check_policy", "issue_refund", "reply_to_customer", "escalate", "close_ticket"]
+                        if action_type not in valid_actions:
                             logger.error("Invalid action_type: %s. Defaulting to 'close_ticket'.", action_type)
                             action_type = "close_ticket"
                         return Action(action_type=action_type, parameters=obj.get("parameters", {}))
         log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
 async def main() -> None:
+    api_key = os.getenv("HF_TOKEN")
     client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
     tasks = ["task_easy_1", "task_medium_1", "task_hard_1"]