samrat-rm commited on
Commit
b3d65f0
·
1 Parent(s): 90c8812

feat: init inference

Browse files
Files changed (1) hide show
  1. inference.py +69 -82
inference.py CHANGED
@@ -1,48 +1,19 @@
1
  """
2
- Inference Script Example
3
  ===================================
4
- MANDATORY
5
- - Before submitting, ensure the following variables are defined in your environment configuration:
6
- API_BASE_URL The API endpoint for the LLM.
7
- MODEL_NAME The model identifier to use for inference.
8
- HF_TOKEN Your Hugging Face / API key.
9
- LOCAL_IMAGE_NAME The name of the local image to use for the environment if you are using from_docker_image()
10
- method
11
-
12
- - Defaults are set only for API_BASE_URL and MODEL_NAME
13
- (and should reflect your active inference setup):
14
- API_BASE_URL = os.getenv("API_BASE_URL", "<your-active-endpoint>")
15
- MODEL_NAME = os.getenv("MODEL_NAME", "<your-active-model>")
16
-
17
- - The inference script must be named `inference.py` and placed in the root directory of the project
18
- - Participants must use OpenAI Client for all LLM calls using above variables
19
 
20
  STDOUT FORMAT
21
- - The script must emit exactly three line types to stdout, in this order:
22
-
23
  [START] task=<task_name> env=<benchmark> model=<model_name>
24
  [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
25
  [END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
26
-
27
- Rules:
28
- - One [START] line at episode begin.
29
- - One [STEP] line per step, immediately after env.step() returns.
30
- - One [END] line after env.close(), always emitted (even on exception).
31
- - reward and rewards are formatted to 2 decimal places.
32
- - done and success are lowercase booleans: true or false.
33
- - error is the raw last_action_error string, or null if none.
34
- - All fields on a single line with no newlines within a line.
35
- - Each tasks should return score in [0, 1]
36
-
37
- Example:
38
- [START] task=click-test env=miniwob model=Qwen3-VL-30B
39
- [STEP] step=1 action=click('123') reward=0.00 done=false error=null
40
- [STEP] step=2 action=fill('456','text') reward=0.00 done=false error=null
41
- [STEP] step=3 action=click('789') reward=1.00 done=true error=null
42
- [END] success=true steps=3 score=1.00 rewards=0.00,0.00,1.00
43
  """
44
 
45
  import asyncio
 
46
  import os
47
  import textwrap
48
  from typing import List, Optional
@@ -52,31 +23,38 @@ load_dotenv()
52
 
53
  from openai import OpenAI
54
 
55
- from WhyDidItFail.client import WhydiditfailEnv
56
- from WhyDidItFail.models import WhyDidItFailAction
57
- IMAGE_NAME = os.getenv("IMAGE_NAME") # If you are using docker image
58
- API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
59
 
 
 
60
  API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
61
  MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
62
  TASK_NAME = os.getenv("WHYDIDITFAIL_TASK", "whydiditfail")
63
  BENCHMARK = os.getenv("WHYDIDITFAIL_BENCHMARK", "whydiditfail")
64
  MAX_STEPS = 8
65
- TEMPERATURE = 0.7
66
- MAX_TOKENS = 150
67
- SUCCESS_SCORE_THRESHOLD = 0.1 # normalized score in [0, 1]
68
-
69
- # Max possible reward: each token contributes 0.1, across all steps
70
- _MAX_REWARD_PER_STEP = MAX_TOKENS * 0.1
71
- MAX_TOTAL_REWARD = MAX_STEPS * _MAX_REWARD_PER_STEP
72
 
73
  SYSTEM_PROMPT = textwrap.dedent(
74
  """
75
- You are interacting with a simple echo environment.
76
- Each turn you must send a message. The environment will echo it back.
77
- Reward is proportional to message length: reward = len(message) * 0.1
78
- Your goal is to maximize total reward by sending meaningful, substantive messages.
79
- Reply with exactly one message string — no quotes, no prefixes, just the message text.
 
 
 
 
 
 
 
 
 
 
 
80
  """
81
  ).strip()
82
 
@@ -87,11 +65,7 @@ def log_start(task: str, env: str, model: str) -> None:
87
 
88
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
89
  error_val = error if error else "null"
90
- done_val = str(done).lower()
91
- print(
92
- f"[STEP] step={step} action={action} reward={reward:.2f} done={done_val} error={error_val}",
93
- flush=True,
94
- )
95
 
96
 
97
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
@@ -99,22 +73,25 @@ def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> No
99
  print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
100
 
101
 
102
- def build_user_prompt(step: int, last_echoed: str, last_reward: float, history: List[str]) -> str:
103
  history_block = "\n".join(history[-4:]) if history else "None"
104
  return textwrap.dedent(
105
  f"""
106
  Step: {step}
107
- Last echoed message: {last_echoed!r}
108
- Last reward: {last_reward:.2f}
109
- Previous steps:
 
 
110
  {history_block}
111
- Send your next message.
 
112
  """
113
  ).strip()
114
 
115
 
116
- def get_model_message(client: OpenAI, step: int, last_echoed: str, last_reward: float, history: List[str]) -> str:
117
- user_prompt = build_user_prompt(step, last_echoed, last_reward, history)
118
  try:
119
  completion = client.chat.completions.create(
120
  model=MODEL_NAME,
@@ -127,16 +104,30 @@ def get_model_message(client: OpenAI, step: int, last_echoed: str, last_reward:
127
  stream=False,
128
  )
129
  text = (completion.choices[0].message.content or "").strip()
130
- return text if text else "hello"
 
131
  except Exception as exc:
132
- print(f"[DEBUG] Model request failed: {exc}", flush=True)
133
- return "hello"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
 
135
 
136
  async def main() -> None:
137
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
138
-
139
- env = await WhydiditfailEnv.from_docker_image(IMAGE_NAME or "")
140
 
141
  history: List[str] = []
142
  rewards: List[float] = []
@@ -147,44 +138,40 @@ async def main() -> None:
147
  log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
148
 
149
  try:
150
- result = await env.reset() # OpenENV.reset()
151
- last_echoed = result.observation.echoed_message
152
- last_reward = 0.0
153
 
154
  for step in range(1, MAX_STEPS + 1):
155
  if result.done:
156
  break
157
 
158
- message = get_model_message(client, step, last_echoed, last_reward, history)
 
159
 
160
- result = await env.step(WhyDidItFailAction(message=message))
161
  obs = result.observation
162
 
163
  reward = result.reward or 0.0
164
  done = result.done
165
- error = None
166
 
167
  rewards.append(reward)
168
  steps_taken = step
169
- last_echoed = obs.echoed_message
170
- last_reward = reward
171
-
172
- log_step(step=step, action=message, reward=reward, done=done, error=error)
173
 
174
- history.append(f"Step {step}: {message!r} -> reward {reward:+.2f}")
 
175
 
176
  if done:
177
  break
178
 
179
- score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
180
- score = min(max(score, 0.0), 1.0) # clamp to [0, 1]
181
  success = score >= SUCCESS_SCORE_THRESHOLD
182
 
183
  finally:
184
  try:
185
  await env.close()
186
  except Exception as e:
187
- print(f"[DEBUG] env.close() error (container cleanup): {e}", flush=True)
188
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
189
 
190
 
 
1
  """
2
+ Inference Script — WhyDidItFail
3
  ===================================
4
+ MANDATORY environment variables:
5
+ API_BASE_URL The API endpoint for the LLM.
6
+ MODEL_NAME The model identifier to use for inference.
7
+ HF_TOKEN / API_KEY Your Hugging Face / API key.
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  STDOUT FORMAT
 
 
10
  [START] task=<task_name> env=<benchmark> model=<model_name>
11
  [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null>
12
  [END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  """
14
 
15
  import asyncio
16
+ import json
17
  import os
18
  import textwrap
19
  from typing import List, Optional
 
23
 
24
  from openai import OpenAI
25
 
26
+ from client import WhyDidItFailEnv
27
+ from models import WhyDidItFailAction
 
 
28
 
29
+ IMAGE_NAME = os.getenv("IMAGE_NAME")
30
+ API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY")
31
  API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1"
32
  MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct"
33
  TASK_NAME = os.getenv("WHYDIDITFAIL_TASK", "whydiditfail")
34
  BENCHMARK = os.getenv("WHYDIDITFAIL_BENCHMARK", "whydiditfail")
35
  MAX_STEPS = 8
36
+ TEMPERATURE = 0.3
37
+ MAX_TOKENS = 256
38
+ SUCCESS_SCORE_THRESHOLD = 0.5 # reward >= 0.5 counts as success
 
 
 
 
39
 
40
  SYSTEM_PROMPT = textwrap.dedent(
41
  """
42
+ You are a machine learning engineer diagnosing a failed training run.
43
+ Each turn you will receive data from the training run and must decide what to investigate next.
44
+
45
+ Available actions:
46
+ - inspect_logs : examine training loss curves
47
+ - inspect_config : examine hyperparameter config (lr, optimizer, etc.)
48
+ - inspect_gradients : examine gradient statistics
49
+ - submit_diagnosis : submit your final diagnosis (ends the episode)
50
+
51
+ You must respond with a JSON object on a single line. Examples:
52
+ {"action_type": "inspect_logs"}
53
+ {"action_type": "inspect_config"}
54
+ {"action_type": "submit_diagnosis", "diagnosis": "exploding gradients"}
55
+
56
+ Only submit_diagnosis when you are confident. The diagnosis should describe the failure mode
57
+ in plain terms (e.g. "exploding gradients", "overfitting", "vanishing gradients").
58
  """
59
  ).strip()
60
 
 
65
 
66
  def log_step(step: int, action: str, reward: float, done: bool, error: Optional[str]) -> None:
67
  error_val = error if error else "null"
68
+ print(f"[STEP] step={step} action={action} reward={reward:.2f} done={str(done).lower()} error={error_val}", flush=True)
 
 
 
 
69
 
70
 
71
  def log_end(success: bool, steps: int, score: float, rewards: List[float]) -> None:
 
73
  print(f"[END] success={str(success).lower()} steps={steps} score={score:.3f} rewards={rewards_str}", flush=True)
74
 
75
 
76
+ def build_user_prompt(step: int, observation_summary: str, history: List[str]) -> str:
77
  history_block = "\n".join(history[-4:]) if history else "None"
78
  return textwrap.dedent(
79
  f"""
80
  Step: {step}
81
+
82
+ Current observation:
83
+ {observation_summary}
84
+
85
+ History:
86
  {history_block}
87
+
88
+ Respond with a JSON action.
89
  """
90
  ).strip()
91
 
92
 
93
+ def get_model_action(client: OpenAI, step: int, observation_summary: str, history: List[str]) -> WhyDidItFailAction:
94
+ user_prompt = build_user_prompt(step, observation_summary, history)
95
  try:
96
  completion = client.chat.completions.create(
97
  model=MODEL_NAME,
 
104
  stream=False,
105
  )
106
  text = (completion.choices[0].message.content or "").strip()
107
+ data = json.loads(text)
108
+ return WhyDidItFailAction(**data)
109
  except Exception as exc:
110
+ print(f"[DEBUG] Model request/parse failed: {exc}", flush=True)
111
+ # Fallback: inspect logs if early, otherwise give up and submit empty diagnosis
112
+ if step <= 2:
113
+ return WhyDidItFailAction(action_type="inspect_logs")
114
+ return WhyDidItFailAction(action_type="submit_diagnosis", diagnosis="unknown")
115
+
116
+
117
+ def summarize_observation(obs) -> str:
118
+ lines = [
119
+ f"Task: {obs.task_description}",
120
+ f"Feedback: {obs.feedback}",
121
+ f"Available actions: {', '.join(obs.available_actions)}",
122
+ ]
123
+ if obs.visible_data:
124
+ lines.append(f"Data: {json.dumps(obs.visible_data, indent=2)}")
125
+ return "\n".join(lines)
126
 
127
 
128
  async def main() -> None:
129
  client = OpenAI(base_url=API_BASE_URL, api_key=API_KEY)
130
+ env = await WhyDidItFailEnv.from_docker_image(IMAGE_NAME or "")
 
131
 
132
  history: List[str] = []
133
  rewards: List[float] = []
 
138
  log_start(task=TASK_NAME, env=BENCHMARK, model=MODEL_NAME)
139
 
140
  try:
141
+ result = await env.reset()
142
+ obs = result.observation
 
143
 
144
  for step in range(1, MAX_STEPS + 1):
145
  if result.done:
146
  break
147
 
148
+ obs_summary = summarize_observation(obs)
149
+ action = get_model_action(client, step, obs_summary, history)
150
 
151
+ result = await env.step(action)
152
  obs = result.observation
153
 
154
  reward = result.reward or 0.0
155
  done = result.done
156
+ action_str = action.model_dump_json(exclude_none=True)
157
 
158
  rewards.append(reward)
159
  steps_taken = step
 
 
 
 
160
 
161
+ log_step(step=step, action=action_str, reward=reward, done=done, error=None)
162
+ history.append(f"Step {step}: {action_str} -> reward={reward:.2f} feedback={obs.feedback!r}")
163
 
164
  if done:
165
  break
166
 
167
+ score = max(rewards) if rewards else 0.0 # final diagnosis reward is what matters
 
168
  success = score >= SUCCESS_SCORE_THRESHOLD
169
 
170
  finally:
171
  try:
172
  await env.close()
173
  except Exception as e:
174
+ print(f"[DEBUG] env.close() error: {e}", flush=True)
175
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
176
 
177