Sid8421 commited on
Commit
ba2722e
·
1 Parent(s): 0c24081

Fix final OpenEnv validator compliance: inference stdout format, Dockerfile deps, API vars, and grader bounds

Browse files
Files changed (4) hide show
  1. Dockerfile +3 -3
  2. env/environment.py +7 -8
  3. evaluate.py +1 -1
  4. inference.py +9 -5
Dockerfile CHANGED
@@ -2,12 +2,12 @@ FROM python:3.11-slim
2
 
3
  WORKDIR /app
4
 
5
- # Install dependencies directly to be lightweight
6
- RUN pip install --no-cache-dir pydantic openai fastapi uvicorn
7
-
8
  # Copy project files
9
  COPY . .
10
 
 
 
 
11
  # Set default env vars
12
  ENV PYTHONUNBUFFERED=1
13
 
 
2
 
3
  WORKDIR /app
4
 
 
 
 
5
  # Copy project files
6
  COPY . .
7
 
8
+ # Install dependencies directly to be lightweight
9
+ RUN pip install --no-cache-dir -r requirements.txt && pip install --no-cache-dir .
10
+
11
  # Set default env vars
12
  ENV PYTHONUNBUFFERED=1
13
 
env/environment.py CHANGED
@@ -104,17 +104,16 @@ class SupportTicketEnv:
104
  system_message = "Max steps reached."
105
 
106
  # Calculate intermediate/final reward
 
 
 
 
 
107
  if self.state.is_done:
108
- self.state.final_reward += grade(self.state) # Add final reward
109
- reward = self.state.final_reward
110
- print(f"Final reward calculated: {reward}")
111
- else:
112
- intermediate_reward = grade(self.state) # Add intermediate reward dynamically
113
- self.state.final_reward += intermediate_reward
114
- reward = self.state.final_reward
115
 
116
  info = {
117
- "current_reward": reward,
118
  "step_count": self.state.step_count
119
  }
120
 
 
104
  system_message = "Max steps reached."
105
 
106
  # Calculate intermediate/final reward
107
+ new_total_reward = grade(self.state)
108
+ step_reward = new_total_reward - self.state.final_reward
109
+ self.state.final_reward = new_total_reward
110
+ reward = step_reward
111
+
112
  if self.state.is_done:
113
+ print(f"Final reward calculated: {self.state.final_reward}")
 
 
 
 
 
 
114
 
115
  info = {
116
+ "current_reward": self.state.final_reward,
117
  "step_count": self.state.step_count
118
  }
119
 
evaluate.py CHANGED
@@ -19,8 +19,8 @@ EXPECTED_ACTIONS = {
19
  ],
20
  "task_hard_1": [
21
  Action(action_type="fetch_user_data", parameters={"user_id": "USR-C3"}),
22
- Action(action_type="escalate", parameters={"reason": "billing_tier2"}),
23
  Action(action_type="reply_to_customer", parameters={"message": "We're escalating this to billing tier 2 and will follow up."}),
 
24
  ],
25
  }
26
 
 
19
  ],
20
  "task_hard_1": [
21
  Action(action_type="fetch_user_data", parameters={"user_id": "USR-C3"}),
 
22
  Action(action_type="reply_to_customer", parameters={"message": "We're escalating this to billing tier 2 and will follow up."}),
23
+ Action(action_type="escalate", parameters={"reason": "billing_tier2"}),
24
  ],
25
  }
26
 
inference.py CHANGED
@@ -23,11 +23,14 @@ def log_start(task: str, env: str, model: str):
23
  print(f"[START] task={task} env={env} model={model}", flush=True)
24
 
25
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None):
26
- err_str = f" error={error}" if error else ""
27
- print(f"[STEP] step={step} action={action!r} reward={reward} done={done}{err_str}", flush=True)
 
28
 
29
  def log_end(success: bool, steps: int, score: float, rewards: list):
30
- print(f"[END] success={success} steps={steps} score={score} rewards={rewards}", flush=True)
 
 
31
 
32
  def parse_action(text: str) -> Action:
33
  # Robustly extract the first JSON object from text and validate with Pydantic
@@ -47,7 +50,8 @@ def parse_action(text: str) -> Action:
47
  logger.warning("Action validation failed: %s", val_err)
48
  # Fallback to manual construction with validation
49
  action_type = obj.get("action_type", "close_ticket")
50
- if action_type not in Action.__fields__["action_type"].type.__args__:
 
51
  logger.error("Invalid action_type: %s. Defaulting to 'close_ticket'.", action_type)
52
  action_type = "close_ticket"
53
  return Action(action_type=action_type, parameters=obj.get("parameters", {}))
@@ -164,7 +168,7 @@ async def run_task(task_id: str, client: OpenAI) -> None:
164
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
165
 
166
  async def main() -> None:
167
- api_key = os.getenv("OPENAI_API_KEY", "dummy-key")
168
  client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
169
 
170
  tasks = ["task_easy_1", "task_medium_1", "task_hard_1"]
 
23
  print(f"[START] task={task} env={env} model={model}", flush=True)
24
 
25
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str] = None):
26
+ error_val = error if error else "null"
27
+ done_val = str(done).lower()
28
+ print(f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}", flush=True)
29
 
30
  def log_end(success: bool, steps: int, score: float, rewards: list):
31
+ rewards_str = ",".join(f"{r:.2f}" for r in rewards)
32
+ success_val = str(success).lower()
33
+ print(f"[END] success={success_val} steps={steps} score={score:.2f} rewards={rewards_str}", flush=True)
34
 
35
  def parse_action(text: str) -> Action:
36
  # Robustly extract the first JSON object from text and validate with Pydantic
 
50
  logger.warning("Action validation failed: %s", val_err)
51
  # Fallback to manual construction with validation
52
  action_type = obj.get("action_type", "close_ticket")
53
+ valid_actions = ["fetch_user_data", "check_policy", "issue_refund", "reply_to_customer", "escalate", "close_ticket"]
54
+ if action_type not in valid_actions:
55
  logger.error("Invalid action_type: %s. Defaulting to 'close_ticket'.", action_type)
56
  action_type = "close_ticket"
57
  return Action(action_type=action_type, parameters=obj.get("parameters", {}))
 
168
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
169
 
170
  async def main() -> None:
171
+ api_key = os.getenv("HF_TOKEN")
172
  client = OpenAI(base_url=API_BASE_URL, api_key=api_key)
173
 
174
  tasks = ["task_easy_1", "task_medium_1", "task_hard_1"]